From 1fc30d387c1691f3eccbc9427620050d8fe6f6e4 Mon Sep 17 00:00:00 2001 From: dzerik Date: Tue, 5 May 2026 01:18:06 +0300 Subject: [PATCH 1/9] feat(seccomp): Syscall newtype with checked constructor Closes the footgun where ExtraHandler::new(-5, h) or future add_handler patterns would compile but silently never fire because the cBPF filter cannot emit a JEQ for arch-unknown nr. Syscall(i64) + Syscall::checked(nr) -> Result (Negative / UnknownForArch variants) + TryFrom via checked. Adds: - crates/sandlock-core/src/seccomp/syscall.rs (78 lines, 4 unit tests) - crates/sandlock-core/src/arch.rs: MAX_SYSCALL_NR per arch + pub fn is_known_syscall(nr) for the checked() validator - async-trait dependency for the Handler trait that the next commit introduces --- Cargo.lock | 12 ++++ crates/sandlock-core/Cargo.toml | 1 + crates/sandlock-core/src/arch.rs | 12 ++++ crates/sandlock-core/src/seccomp/mod.rs | 1 + crates/sandlock-core/src/seccomp/syscall.rs | 78 +++++++++++++++++++++ 5 files changed, 104 insertions(+) create mode 100644 crates/sandlock-core/src/seccomp/syscall.rs diff --git a/Cargo.lock b/Cargo.lock index 4d62edb..2a2e79f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,6 +150,17 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -1576,6 +1587,7 @@ dependencies = [ name = "sandlock-core" version = "0.7.0" dependencies = [ + "async-trait", "bincode", "goblin", "hudsucker", diff --git a/crates/sandlock-core/Cargo.toml b/crates/sandlock-core/Cargo.toml index c02de58..4043f34 100644 --- a/crates/sandlock-core/Cargo.toml +++ b/crates/sandlock-core/Cargo.toml @@ -9,6 +9,7 @@ readme = "../../README.md" description = "Lightweight process sandbox using Landlock, seccomp-bpf, and seccomp user notification" [dependencies] +async-trait = "0.1" libc = "0.2" nix = { version = "0.29", features = ["process", "signal", "fs", "ioctl", "poll"] } tokio = { version = "1", features = ["rt", "net", "time", "sync", "macros", "io-util"] } diff --git a/crates/sandlock-core/src/arch.rs b/crates/sandlock-core/src/arch.rs index 14bd7f1..89969e4 100644 --- a/crates/sandlock-core/src/arch.rs +++ b/crates/sandlock-core/src/arch.rs @@ -3,6 +3,7 @@ #[cfg(target_arch = "x86_64")] mod imp { pub const AUDIT_ARCH: u32 = 0xC000_003E; + pub const MAX_SYSCALL_NR: i64 = 462; pub const SYS_SECCOMP: i64 = 317; pub const SYS_MEMFD_CREATE: i64 = 319; pub const SYS_PIDFD_OPEN: i64 = 434; @@ -45,6 +46,7 @@ mod imp { #[cfg(target_arch = "aarch64")] mod imp { pub const AUDIT_ARCH: u32 = 0xC000_00B7; + pub const MAX_SYSCALL_NR: i64 = 463; pub const SYS_SECCOMP: i64 = 277; pub const SYS_MEMFD_CREATE: i64 = 279; pub const SYS_PIDFD_OPEN: i64 = 434; @@ -82,6 +84,16 @@ mod imp { pub use imp::*; +/// True if `nr` is plausibly a syscall number on the current architecture. +/// Used by [`crate::seccomp::syscall::Syscall::checked`] to reject foot-gun +/// cases like negative or arch-mismatched numbers. +/// +/// Conservative: validates `0 <= nr <= MAX_SYSCALL_NR`. Doesn't enumerate +/// every nr — kernel's seccomp filter rejects unknowns at JEQ stage anyway. +pub fn is_known_syscall(nr: i64) -> bool { + nr >= 0 && nr <= imp::MAX_SYSCALL_NR +} + pub fn push_optional_syscall(v: &mut Vec, nr: Option) { if let Some(nr) = nr { v.push(nr as u32); diff --git a/crates/sandlock-core/src/seccomp/mod.rs b/crates/sandlock-core/src/seccomp/mod.rs index 4f22b80..6afa622 100644 --- a/crates/sandlock-core/src/seccomp/mod.rs +++ b/crates/sandlock-core/src/seccomp/mod.rs @@ -3,3 +3,4 @@ pub mod ctx; pub mod dispatch; pub mod notif; pub mod state; +pub mod syscall; diff --git a/crates/sandlock-core/src/seccomp/syscall.rs b/crates/sandlock-core/src/seccomp/syscall.rs new file mode 100644 index 0000000..86fc232 --- /dev/null +++ b/crates/sandlock-core/src/seccomp/syscall.rs @@ -0,0 +1,78 @@ +//! `Syscall` — checked syscall number newtype. +//! +//! Closes the footgun where `add_handler(-5, h)` would compile but +//! silently never fire because the cBPF filter cannot emit a JEQ for +//! an architecture-unknown syscall number. + +use thiserror::Error; + +#[derive(Debug, Error, PartialEq, Eq)] +pub enum SyscallError { + #[error("syscall number {0} is negative")] + Negative(i64), + #[error("syscall number {0} is unknown for the current architecture")] + UnknownForArch(i64), +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub struct Syscall(i64); + +impl Syscall { + /// Validates that `nr` is non-negative and known on the current architecture. + pub fn checked(nr: i64) -> Result { + if nr < 0 { + return Err(SyscallError::Negative(nr)); + } + if !crate::arch::is_known_syscall(nr) { + return Err(SyscallError::UnknownForArch(nr)); + } + Ok(Self(nr)) + } + + pub fn raw(self) -> i64 { + self.0 + } +} + +impl TryFrom for Syscall { + type Error = SyscallError; + fn try_from(nr: i64) -> Result { + Self::checked(nr) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn checked_accepts_valid_openat() { + let s = Syscall::checked(libc::SYS_openat).expect("openat is valid"); + assert_eq!(s.raw(), libc::SYS_openat); + } + + #[test] + fn checked_rejects_negative() { + match Syscall::checked(-5) { + Err(SyscallError::Negative(-5)) => {} + other => panic!("expected Negative(-5), got {:?}", other), + } + } + + #[test] + fn checked_rejects_arch_unknown() { + // 99_999 is above any reasonable MAX_SYSCALL_NR. + match Syscall::checked(99_999) { + Err(SyscallError::UnknownForArch(99_999)) => {} + other => panic!("expected UnknownForArch(99_999), got {:?}", other), + } + } + + #[test] + fn try_from_i64_delegates_to_checked() { + let s: Syscall = libc::SYS_openat.try_into().expect("openat valid"); + assert_eq!(s.raw(), libc::SYS_openat); + let bad: Result = (-1i64).try_into(); + assert!(matches!(bad, Err(SyscallError::Negative(-1)))); + } +} From 989daf1153a9d0f9f3b225a82bee9ebde82aa7ec Mon Sep 17 00:00:00 2001 From: dzerik Date: Tue, 5 May 2026 01:37:22 +0300 Subject: [PATCH 2/9] feat(seccomp): Handler trait + IntoIterator-shaped run_with_extra_handlers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the closure-typedef + struct-wrapper extension API with a proper trait, and migrates all 21 builtin handlers + the public entry points onto it. No deprecation cycle: the old `ExtraHandler` / `HandlerFn` types are removed outright. API surface ----------- New public types in `seccomp::dispatch`: - `pub trait Handler { async fn handle(&self, &HandlerCtx<'_>) -> NotifAction; }` (via `async_trait` to match the maintainer's PR #20 review sketch). - `pub struct HandlerCtx<'a>` — borrowed `notif`, `&Arc`, `notif_fd`. - Blanket `impl Handler for F where F: Fn(&HandlerCtx<'_>) -> Fut` so closure-shaped handlers compose with the trait without ceremony. - `impl Handler for Arc` so callers can erase concrete closure types when collecting differently-shaped handlers in one iterator. - `pub enum HandlerError { InvalidSyscall(SyscallError), OnDenySyscall { syscall_nr } }` added as a `#[from]` variant on `SandlockError::Handler`. Public entry points reshaped (no rename, no deprecation): - `Sandbox::run_with_extra_handlers(policy, cmd, extra_handlers)` where `I: IntoIterator`, `S: TryInto`, `H: Handler`. Old: run_with_extra_handlers(p, c, vec![ExtraHandler::new(nr, fn)]) New: run_with_extra_handlers(p, c, [(libc::SYS_openat, openat_h)]) - `Sandbox::run_interactive_with_extra_handlers(...)` — same shape, interactive stdio. - `Sandbox::run` and `Sandbox::run_interactive` keep their pre-existing signatures and are now self-contained (no longer route through the extras path with an empty Vec). Removed ------- - `pub struct ExtraHandler` and `ExtraHandler::new(nr, fn)`. - `pub type HandlerFn` (the `Box, RawFd) -> Pin>>` typedef). - The internal `HandlerFnAdapter` shim that wrapped `HandlerFn` into the new trait during the proposed deprecation cycle. Internal -------- - `DispatchTable` backing storage migrated to `Vec>`. - `DispatchTable::register` accepts any handler; `register_arc` for pre-`Arc`'d handlers shared across syscalls. - All 21 builtin handler closures (fork/clone, wait, memory, network, random, time, openat virtualisation, etc.) rewritten onto the new shape using closures via blanket impl. No behaviour change. - `validate_extras_against_policy` renamed to `validate_handler_syscalls_against_policy` and now takes `&[i64]`. - `Sandbox.extra_handlers` field type changed from `Vec` to `Vec<(i64, Arc)>`. - `notif::supervisor` parameter follows the same shape change. Tests ----- - 8 carry-over integration tests from PR #20 rewritten to use the new closure-via-blanket-impl shape and the `[(syscall, handler)]` array literal at the call site. Coverage unchanged: deny-list rejection, ordering boundary, builtin short-circuit, Continue passthrough. - 6 new integration tests: * `handler_via_blanket_impl_dispatches_in_sandbox` (closure shape) * `struct_handler_state_persists_across_sandbox_calls` (struct + `impl Handler` with `Arc` self state observed across multiple `uname` notifications) * `run_with_extra_handlers_rejects_negative_syscall` / `..._rejects_arch_unknown_syscall` — surface `HandlerError::InvalidSyscall(SyscallError::*)`. * `run_with_extra_handlers_preserves_insertion_order_in_sandbox_chain` — `Arc` erasure pattern observed end-to-end. * `run_with_extra_handlers_rejects_handler_on_default_deny_syscall` — surfaces `HandlerError::OnDenySyscall`. - 2 dispatch-level unit tests (closure-via-blanket and struct-impl) exercising the new trait through `DispatchTable::register` + `dispatch()`. 234 lib unit tests + 14 integration extra-handler tests pass. --- crates/sandlock-core/src/error.rs | 3 + crates/sandlock-core/src/sandbox.rs | 142 ++-- crates/sandlock-core/src/seccomp/dispatch.rs | 744 +++++++++++-------- crates/sandlock-core/src/seccomp/notif.rs | 8 +- 4 files changed, 559 insertions(+), 338 deletions(-) diff --git a/crates/sandlock-core/src/error.rs b/crates/sandlock-core/src/error.rs index f929c23..463c8dc 100644 --- a/crates/sandlock-core/src/error.rs +++ b/crates/sandlock-core/src/error.rs @@ -11,6 +11,9 @@ pub enum SandlockError { #[error("memory protection error: {0}")] MemoryProtect(String), + + #[error("handler error: {0}")] + Handler(#[from] crate::seccomp::dispatch::HandlerError), } #[derive(Debug, Error)] diff --git a/crates/sandlock-core/src/sandbox.rs b/crates/sandlock-core/src/sandbox.rs index af053ae..7600081 100644 --- a/crates/sandlock-core/src/sandbox.rs +++ b/crates/sandlock-core/src/sandbox.rs @@ -109,9 +109,10 @@ pub struct Sandbox { /// Optional callback invoked when a port bind is recorded. #[allow(clippy::type_complexity)] on_bind: Option) + Send + Sync>>, - /// User-supplied extra syscall handlers. Taken on spawn and - /// appended to the dispatch table after all builtin handlers. - extra_handlers: Vec, + /// User-supplied extra syscall handlers as `(syscall_nr, Arc)` + /// pairs. Taken on spawn and appended to the dispatch table after + /// all builtin handlers. + extra_handlers: Vec<(i64, Arc)>, } impl Sandbox { @@ -182,7 +183,9 @@ impl Sandbox { name: Option<&str>, cmd: &[&str], ) -> Result { - Self::run_with_extra_handlers(policy, name, cmd, Vec::new()).await + let mut sb = Self::new(policy, name)?; + sb.do_spawn(cmd, true).await?; + sb.wait().await } /// Run a sandboxed process with inherited stdio (interactive mode). @@ -198,69 +201,90 @@ impl Sandbox { /// One-shot run with user-supplied syscall handlers. /// - /// `extra_handlers` are registered in the dispatch table **after** all - /// builtin handlers for the same syscall. They observe the post-builtin - /// view (e.g. [`chroot`]-normalized paths on `openat`) and cannot be used - /// to bypass builtin confinement. See - /// [`crate::seccomp::dispatch::ExtraHandler`] for the ordering contract. + /// `extra_handlers` is any `IntoIterator` over `(syscall, handler)` pairs + /// where: + /// + /// * `syscall: S` is anything implementing `TryInto` — `i64`/`u32` + /// raw numbers (validated through + /// [`crate::seccomp::syscall::Syscall::checked`]), or a pre-validated + /// [`crate::seccomp::syscall::Syscall`]. + /// * `handler: H` is anything implementing + /// [`crate::seccomp::dispatch::Handler`] — a struct with explicit + /// `impl Handler` for stateful handlers, or a closure of shape + /// `Fn(&HandlerCtx) -> impl Future` via the + /// blanket impl. /// - /// When called with an empty vector, this function is identical to - /// [`Self::run`]. + /// Handlers are registered in the dispatch table **after** all builtin + /// handlers for the same syscall, so they observe the post-builtin view + /// (e.g. `chroot`-normalized paths on `openat`) and cannot bypass builtin + /// confinement. + /// + /// Validation happens up-front (before fork): each `syscall` is checked + /// through `Syscall::checked`, and the deny-list contract is enforced via + /// [`crate::seccomp::dispatch::validate_handler_syscalls_against_policy`]. /// /// # Example /// /// ```ignore /// use sandlock_core::{Policy, Sandbox}; - /// use sandlock_core::seccomp::dispatch::{ExtraHandler, HandlerFn}; /// use sandlock_core::seccomp::notif::NotifAction; /// /// # tokio_test::block_on(async { /// let policy = Policy::builder().fs_read("/usr").build().unwrap(); /// - /// let audit: HandlerFn = Box::new(|notif, _ctx, _fd| { - /// Box::pin(async move { - /// eprintln!("openat from pid {}", notif.data.pid); + /// let audit = |cx: &sandlock_core::HandlerCtx<'_>| { + /// let pid = cx.notif.data.pid; + /// async move { + /// eprintln!("openat from pid {}", pid); /// NotifAction::Continue - /// }) - /// }); + /// } + /// }; /// /// let result = Sandbox::run_with_extra_handlers( /// &policy, /// Some("audit"), /// &["/usr/bin/true"], - /// vec![ExtraHandler::new(libc::SYS_openat, audit)], + /// [(libc::SYS_openat, audit)], /// ).await.unwrap(); /// # }); /// ``` - pub async fn run_with_extra_handlers( + pub async fn run_with_extra_handlers( policy: &Policy, name: Option<&str>, cmd: &[&str], - extra_handlers: Vec, - ) -> Result { - // Reject extras that would weaken confinement (e.g. one registered - // on a default-deny syscall). See - // [`crate::seccomp::dispatch::validate_extras_against_policy`] for the - // rationale. Done before fork so the caller gets a clear error - // instead of a silently-broken sandbox. - if let Err(nr) = - crate::seccomp::dispatch::validate_extras_against_policy(&extra_handlers, policy) - { - return Err(SandboxError::Child(format!( - "ExtraHandler on syscall {} conflicts with the deny list \ - (DEFAULT_DENY_SYSCALLS or policy.deny_syscalls) and would let \ - user code bypass it via SECCOMP_USER_NOTIF_FLAG_CONTINUE", - nr - )) - .into()); - } - + extra_handlers: I, + ) -> Result + where + I: IntoIterator, + S: TryInto, + H: crate::seccomp::dispatch::Handler, + { + let pending = collect_extra_handlers(extra_handlers, policy)?; let mut sb = Self::new(policy, name)?; - sb.extra_handlers = extra_handlers; + sb.extra_handlers = pending; sb.do_spawn(cmd, true).await?; sb.wait().await } + /// Interactive-stdio counterpart of [`Self::run_with_extra_handlers`]. + pub async fn run_interactive_with_extra_handlers( + policy: &Policy, + name: Option<&str>, + cmd: &[&str], + extra_handlers: I, + ) -> Result + where + I: IntoIterator, + S: TryInto, + H: crate::seccomp::dispatch::Handler, + { + let pending = collect_extra_handlers(extra_handlers, policy)?; + let mut sb = Self::new(policy, name)?; + sb.extra_handlers = pending; + sb.do_spawn(cmd, false).await?; + sb.wait().await + } + /// Dry-run: spawn, wait, collect filesystem changes, then abort. /// Returns the run result plus a list of changes that would have been /// committed. The workdir is left unchanged. @@ -939,7 +963,7 @@ impl Sandbox { let extra_syscalls: Vec = self .extra_handlers .iter() - .map(|h| h.syscall_nr as u32) + .map(|h| h.0 as u32) .collect(); // This never returns. @@ -1036,8 +1060,8 @@ impl Sandbox { // argv reads TOCTOU-safe. argv_safety_required: self.policy.policy_fn.is_some() || self.extra_handlers.iter().any(|h| { - h.syscall_nr == libc::SYS_execve - || h.syscall_nr == libc::SYS_execveat + h.0 == libc::SYS_execve + || h.0 == libc::SYS_execveat }), time_offset: time_offset_val, num_cpus: self.policy.num_cpus, @@ -1246,6 +1270,40 @@ impl Sandbox { } } +// ============================================================ +// Helpers +// ============================================================ + +/// Convert a user-supplied iterator of `(syscall, handler)` pairs into +/// the internal `Vec<(i64, Arc)>` shape used by the +/// supervisor, validating each syscall up-front against the deny list. +fn collect_extra_handlers( + extra_handlers: I, + policy: &Policy, +) -> Result)>, SandlockError> +where + I: IntoIterator, + S: TryInto, + H: crate::seccomp::dispatch::Handler, +{ + use crate::seccomp::dispatch::{Handler, HandlerError}; + + let pending: Vec<(i64, Arc)> = extra_handlers + .into_iter() + .map(|(syscall, handler)| { + let nr = syscall.try_into().map_err(HandlerError::from)?.raw(); + let h: Arc = Arc::new(handler); + Ok::<_, HandlerError>((nr, h)) + }) + .collect::>()?; + + let nrs: Vec = pending.iter().map(|(nr, _)| *nr).collect(); + crate::seccomp::dispatch::validate_handler_syscalls_against_policy(&nrs, policy) + .map_err(|nr_u| HandlerError::OnDenySyscall { syscall_nr: nr_u as i64 })?; + + Ok(pending) +} + // ============================================================ // Drop — kill and reap child if still running // ============================================================ diff --git a/crates/sandlock-core/src/seccomp/dispatch.rs b/crates/sandlock-core/src/seccomp/dispatch.rs index 3420a7f..56fe181 100644 --- a/crates/sandlock-core/src/seccomp/dispatch.rs +++ b/crates/sandlock-core/src/seccomp/dispatch.rs @@ -17,76 +17,104 @@ // them approve a syscall based on user-memory contents. use std::collections::HashMap; -use std::future::Future; use std::os::unix::io::RawFd; -use std::pin::Pin; use std::sync::Arc; use super::ctx::SupervisorCtx; use super::notif::{NotifAction, NotifPolicy}; use super::state::ResourceState; +use super::syscall::SyscallError; use crate::arch; use crate::sys::structs::SeccompNotif; +use thiserror::Error; use tokio::sync::Mutex; // ============================================================ // Types // ============================================================ -/// An async handler function. Receives the notification, the supervisor -/// context, and the notif fd. Returns a `NotifAction`. -pub type HandlerFn = Box< - dyn Fn(SeccompNotif, Arc, RawFd) -> Pin + Send>> - + Send - + Sync, ->; +// ============================================================ +// Handler trait — the new public extension API. +// ============================================================ -/// A user-supplied handler bound to a specific syscall number. -/// -/// Passed to [`crate::Sandbox::run_with_extra_handlers`]; appended to the -/// dispatch table **after** all builtin handlers for the same syscall. -/// -/// # Ordering and security boundary +/// Public extension trait for sandlock seccomp-notif handlers. /// -/// Within a syscall's chain, handlers run in registration order and the -/// first non-[`NotifAction::Continue`] result wins. Builtin handlers are -/// registered first (for example `chroot` path-normalization on `openat`), -/// so an `ExtraHandler` observes the post-builtin view of each syscall. -/// This ordering is fixed and cannot be changed by downstream crates — -/// it is the security boundary that prevents user handlers from bypassing -/// sandlock confinement. +/// Each implementor is registered against a [`crate::seccomp::syscall::Syscall`] +/// through [`crate::Sandbox::run_with_extra_handlers`] / +/// [`crate::Sandbox::run_interactive_with_extra_handlers`]. Receives +/// `&HandlerCtx` borrowed for the call; cannot outlive the dispatch +/// invocation. /// -/// # Example -/// -/// ```ignore -/// use sandlock_core::seccomp::dispatch::{ExtraHandler, HandlerFn}; -/// use sandlock_core::seccomp::notif::NotifAction; -/// -/// let audit: HandlerFn = Box::new(|notif, _ctx, _fd| { -/// Box::pin(async move { -/// eprintln!("openat from pid {}", notif.data.pid); -/// NotifAction::Continue -/// }) -/// }); +/// State lives on the implementor — no `Arc::clone` ladders, no +/// `Box::pin(async move {...})` ceremony at registration time. +#[async_trait::async_trait] +pub trait Handler: Send + Sync + 'static { + async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction; +} + +/// Borrowed context passed to `Handler::handle`. /// -/// let extras = vec![ExtraHandler::new(libc::SYS_openat, audit)]; -/// ``` -pub struct ExtraHandler { - pub syscall_nr: i64, - pub handler: HandlerFn, +/// `notif` is owned by value (it's a small `repr(C)` kernel struct, +/// cheap to copy); `sup` is borrowed from the supervisor's +/// `Arc` and exposed as `&Arc<...>` so handlers that +/// need to spawn additional tasks holding the supervisor context can +/// `Arc::clone` it without unsafe; `notif_fd` is the supervisor's +/// seccomp listener fd, used by helpers like `read_child_mem` / +/// `write_child_mem` for TOCTOU-safe child memory access. +pub struct HandlerCtx<'a> { + pub notif: SeccompNotif, + pub sup: &'a std::sync::Arc, + pub notif_fd: RawFd, } -impl ExtraHandler { - pub fn new(syscall_nr: i64, handler: HandlerFn) -> Self { - Self { syscall_nr, handler } +// Blanket impl: any Fn(&HandlerCtx) -> Future is a Handler. +// +// Lets lightweight closure-style handlers work without ceremony at the +// call site. Handlers that need state should use `struct + explicit +// impl Handler` instead. +#[async_trait::async_trait] +impl Handler for F +where + F: Fn(&HandlerCtx<'_>) -> Fut + Send + Sync + 'static, + Fut: std::future::Future + Send + 'static, +{ + async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { + (self)(cx).await + } +} + +// Allow callers to pre-erase concrete handler types via `Arc` +// when they need a uniform type in a collection — e.g. mixing several +// closures of different opaque types in one IntoIterator passed to +// run_with_extra_handlers. +#[async_trait::async_trait] +impl Handler for std::sync::Arc { + async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { + (**self).handle(cx).await } } -/// Reject extras that would weaken sandlock's confinement guarantees. +/// Errors raised when registering user handlers via +/// [`crate::Sandbox::run_with_extra_handlers`]. +#[derive(Debug, Error, PartialEq, Eq)] +pub enum HandlerError { + #[error("invalid syscall in handler registration: {0}")] + InvalidSyscall(#[from] SyscallError), + + #[error( + "handler on syscall {syscall_nr} conflicts with the deny list \ + (DEFAULT_DENY_SYSCALLS or policy.deny_syscalls) and would let \ + user code bypass it via SECCOMP_USER_NOTIF_FLAG_CONTINUE" + )] + OnDenySyscall { syscall_nr: i64 }, +} + +/// Reject handler registrations that would weaken sandlock's confinement +/// guarantees. /// /// The cBPF program emits notif JEQs *before* deny JEQs, so a syscall -/// present in both lists hits `SECCOMP_RET_USER_NOTIF` first. An extra +/// present in both lists hits `SECCOMP_RET_USER_NOTIF` first. A handler /// registered on a syscall that is on the deny list would therefore /// convert a kernel-deny into a user-supervised path: a handler returning /// `NotifAction::Continue` becomes `SECCOMP_USER_NOTIF_FLAG_CONTINUE` and @@ -98,38 +126,36 @@ impl ExtraHandler { /// `allow_syscalls` is set; both branches are guarded by this function. /// /// **Allowlist mode** (`policy.allow_syscalls = Some(_)`): the resolved -/// deny list is empty, so this function returns `Ok(())` for any extra. +/// deny list is empty, so this function returns `Ok(())` for any syscall. /// That is sound because the BPF deny block is empty in this mode too — /// confinement comes from the allowlist enforced at the kernel level, -/// and there is no notif/deny overlap for an extra to bypass. +/// and there is no notif/deny overlap to bypass. +/// +/// Takes only the syscall numbers because that's all it needs to check. +/// Called from the `run_with_extra_handlers` entry points before any +/// handler is registered against the dispatch table. /// /// Returns the offending syscall number on rejection so the caller can /// surface it to the end user. -/// -/// Visibility: kept `pub(crate)` because the only safe consumption path -/// is via [`crate::Sandbox::run_with_extra_handlers`], which calls this -/// function before fork. Downstream crates that pre-build their own -/// `Vec` get the same enforcement transparently through -/// that entry point — there is no `ExtraHandler::register_into` API -/// that would let a user bypass it. -pub(crate) fn validate_extras_against_policy( - extras: &[ExtraHandler], +pub(crate) fn validate_handler_syscalls_against_policy( + syscall_nrs: &[i64], policy: &crate::policy::Policy, ) -> Result<(), u32> { let deny: std::collections::HashSet = crate::context::deny_syscall_numbers(policy).into_iter().collect(); - for extra in extras { - let nr = extra.syscall_nr as u32; - if deny.contains(&nr) { - return Err(nr); + for &nr in syscall_nrs { + let nr_u = nr as u32; + if deny.contains(&nr_u) { + return Err(nr_u); } } Ok(()) } + /// Ordered chain of handlers for a single syscall number. struct HandlerChain { - handlers: Vec, + handlers: Vec>, } /// Maps syscall numbers to handler chains. @@ -145,14 +171,28 @@ impl DispatchTable { } } - /// Register a handler for the given syscall number. Handlers are called in - /// registration order; the first non-Continue result wins. - pub fn register(&mut self, syscall_nr: i64, handler: HandlerFn) { + /// Register a handler for the given syscall number. Handlers are + /// called in registration order; the first non-Continue result wins. + /// + /// Generic over `H: Handler` — accepts either a struct with explicit + /// `impl Handler for ...` or a closure (via blanket impl). + pub fn register(&mut self, syscall_nr: i64, handler: H) { + self.register_arc(syscall_nr, std::sync::Arc::new(handler)); + } + + /// Register a pre-`Arc`'d handler. Used both by builtin chunks + /// that share state via `Arc::clone` (one `ForkHandler` instance + /// registers against `SYS_clone`/`SYS_clone3`/`SYS_vfork`) and by + /// `run_with_extra_handlers` when each item already arrives as + /// `Arc`. + pub(crate) fn register_arc( + &mut self, + syscall_nr: i64, + handler: std::sync::Arc, + ) { self.chains .entry(syscall_nr) - .or_insert_with(|| HandlerChain { - handlers: Vec::new(), - }) + .or_insert_with(|| HandlerChain { handlers: Vec::new() }) .handlers .push(handler); } @@ -161,13 +201,14 @@ impl DispatchTable { pub async fn dispatch( &self, notif: SeccompNotif, - ctx: &Arc, + ctx: &std::sync::Arc, notif_fd: RawFd, ) -> NotifAction { let nr = notif.data.nr as i64; if let Some(chain) = self.chains.get(&nr) { + let handler_ctx = HandlerCtx { notif, sup: ctx, notif_fd }; for handler in &chain.handlers { - let action = handler(notif, Arc::clone(ctx), notif_fd).await; + let action = handler.handle(&handler_ctx).await; if !matches!(action, NotifAction::Continue) { return action; } @@ -185,14 +226,14 @@ impl DispatchTable { /// monolithic `dispatch()` function is translated into a `table.register()` call. /// Priority is preserved by registration order. /// -/// `extra_handlers` are appended **after** all builtin handlers, so they +/// `pending_handlers` are appended **after** all builtin handlers, so they /// observe the post-builtin view (e.g. `chroot`-normalized paths on /// `openat`). Builtins cannot be overridden or removed — this is the /// security boundary for downstream crates. pub fn build_dispatch_table( policy: &Arc, resource: &Arc>, - extra_handlers: Vec, + pending_handlers: Vec<(i64, std::sync::Arc)>, ) -> DispatchTable { let mut table = DispatchTable::new(); @@ -200,28 +241,31 @@ pub fn build_dispatch_table( // Fork/clone family (always on) // ------------------------------------------------------------------ for &nr in arch::FORK_LIKE_SYSCALLS { - let policy = Arc::clone(policy); - let resource = Arc::clone(resource); - table.register(nr, Box::new(move |notif, _ctx, notif_fd| { - let policy = Arc::clone(&policy); - let resource = Arc::clone(&resource); - Box::pin(async move { + let policy_for_fork = Arc::clone(policy); + let resource_for_fork = Arc::clone(resource); + table.register(nr, move |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let notif_fd = cx.notif_fd; + let policy = Arc::clone(&policy_for_fork); + let resource = Arc::clone(&resource_for_fork); + async move { crate::resource::handle_fork(¬if, notif_fd, &resource, &policy).await - }) - })); + } + }); } // ------------------------------------------------------------------ // Wait family (always on) // ------------------------------------------------------------------ for &nr in &[libc::SYS_wait4, libc::SYS_waitid] { - let resource = Arc::clone(resource); - table.register(nr, Box::new(move |notif, _ctx, _notif_fd| { - let resource = Arc::clone(&resource); - Box::pin(async move { + let resource_for_wait = Arc::clone(resource); + table.register(nr, move |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let resource = Arc::clone(&resource_for_wait); + async move { crate::resource::handle_wait(¬if, &resource).await - }) - })); + } + }); } // ------------------------------------------------------------------ @@ -232,13 +276,15 @@ pub fn build_dispatch_table( libc::SYS_mmap, libc::SYS_munmap, libc::SYS_brk, libc::SYS_mremap, libc::SYS_shmget, ] { - let policy = Arc::clone(policy); - table.register(nr, Box::new(move |notif, ctx, _notif_fd| { - let policy = Arc::clone(&policy); - Box::pin(async move { - crate::resource::handle_memory(¬if, &ctx, &policy).await - }) - })); + let policy_for_mem = Arc::clone(policy); + table.register(nr, move |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let sup = std::sync::Arc::clone(cx.sup); + let policy = Arc::clone(&policy_for_mem); + async move { + crate::resource::handle_memory(¬if, &sup, &policy).await + } + }); } } @@ -247,11 +293,14 @@ pub fn build_dispatch_table( // ------------------------------------------------------------------ if policy.has_net_allowlist || policy.has_http_acl { for &nr in &[libc::SYS_connect, libc::SYS_sendto, libc::SYS_sendmsg] { - table.register(nr, Box::new(|notif, ctx, notif_fd| { - Box::pin(async move { - crate::network::handle_net(¬if, &ctx, notif_fd).await - }) - })); + table.register(nr, |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let sup = std::sync::Arc::clone(cx.sup); + let notif_fd = cx.notif_fd; + async move { + crate::network::handle_net(¬if, &sup, notif_fd).await + } + }); } } @@ -259,33 +308,39 @@ pub fn build_dispatch_table( // Deterministic random — getrandom() // ------------------------------------------------------------------ if policy.has_random_seed { - table.register(libc::SYS_getrandom, Box::new(|notif, ctx, notif_fd| { - Box::pin(async move { - let mut tr = ctx.time_random.lock().await; + table.register(libc::SYS_getrandom, |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let sup = std::sync::Arc::clone(cx.sup); + let notif_fd = cx.notif_fd; + async move { + let mut tr = sup.time_random.lock().await; if let Some(ref mut rng) = tr.random_state { crate::random::handle_getrandom(¬if, rng, notif_fd) } else { NotifAction::Continue } - }) - })); + } + }); } // ------------------------------------------------------------------ // Deterministic random — /dev/urandom opens (openat) // ------------------------------------------------------------------ if policy.has_random_seed { - table.register(libc::SYS_openat, Box::new(|notif, ctx, notif_fd| { - Box::pin(async move { - let mut tr = ctx.time_random.lock().await; + table.register(libc::SYS_openat, |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let sup = std::sync::Arc::clone(cx.sup); + let notif_fd = cx.notif_fd; + async move { + let mut tr = sup.time_random.lock().await; if let Some(ref mut rng) = tr.random_state { if let Some(action) = crate::random::handle_random_open(¬if, rng, notif_fd) { return action; } } NotifAction::Continue - }) - })); + } + }); } // ------------------------------------------------------------------ @@ -298,11 +353,13 @@ pub fn build_dispatch_table( libc::SYS_timerfd_settime as i64, libc::SYS_timer_settime as i64, ] { - table.register(nr, Box::new(move |notif, _ctx, notif_fd| { - Box::pin(async move { + table.register(nr, move |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let notif_fd = cx.notif_fd; + async move { crate::time::handle_timer(¬if, time_offset, notif_fd) - }) - })); + } + }); } } @@ -324,83 +381,97 @@ pub fn build_dispatch_table( // /proc virtualization (always on) // ------------------------------------------------------------------ { - let policy = Arc::clone(policy); - let resource = Arc::clone(resource); - table.register(libc::SYS_openat, Box::new(move |notif, ctx, notif_fd| { - let policy = Arc::clone(&policy); - let resource = Arc::clone(&resource); - let processes = Arc::clone(&ctx.processes); - let network = Arc::clone(&ctx.network); - Box::pin(async move { + let policy_for_proc_open = Arc::clone(policy); + let resource_for_proc_open = Arc::clone(resource); + table.register(libc::SYS_openat, move |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let sup = std::sync::Arc::clone(cx.sup); + let notif_fd = cx.notif_fd; + let policy = Arc::clone(&policy_for_proc_open); + let resource = Arc::clone(&resource_for_proc_open); + async move { + let processes = Arc::clone(&sup.processes); + let network = Arc::clone(&sup.network); crate::procfs::handle_proc_open(¬if, &processes, &resource, &network, &policy, notif_fd).await - }) - })); + } + }); } let mut getdents_nrs = vec![libc::SYS_getdents64]; if let Some(getdents) = arch::SYS_GETDENTS { getdents_nrs.push(getdents); } for nr in getdents_nrs { - let policy = Arc::clone(policy); - table.register(nr, Box::new(move |notif, ctx, notif_fd| { - let policy = Arc::clone(&policy); - let processes = Arc::clone(&ctx.processes); - Box::pin(async move { + let policy_for_getdents = Arc::clone(policy); + table.register(nr, move |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let sup = std::sync::Arc::clone(cx.sup); + let notif_fd = cx.notif_fd; + let policy = Arc::clone(&policy_for_getdents); + async move { + let processes = Arc::clone(&sup.processes); crate::procfs::handle_getdents(¬if, &processes, &policy, notif_fd).await - }) - })); + } + }); } // ------------------------------------------------------------------ // Virtual CPU count // ------------------------------------------------------------------ if let Some(n) = policy.num_cpus { - table.register(libc::SYS_sched_getaffinity, Box::new(move |notif, _ctx, notif_fd| { - Box::pin(async move { + table.register(libc::SYS_sched_getaffinity, move |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let notif_fd = cx.notif_fd; + async move { crate::procfs::handle_sched_getaffinity(¬if, n, notif_fd) - }) - })); + } + }); } // ------------------------------------------------------------------ // Hostname virtualization // ------------------------------------------------------------------ if let Some(ref hostname) = policy.virtual_hostname { - let hostname = hostname.clone(); - let hostname2 = hostname.clone(); - table.register(libc::SYS_uname, Box::new(move |notif, _ctx, notif_fd| { - let hostname = hostname.clone(); - Box::pin(async move { + let hostname_for_uname = hostname.clone(); + let hostname_for_open = hostname.clone(); + table.register(libc::SYS_uname, move |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let notif_fd = cx.notif_fd; + let hostname = hostname_for_uname.clone(); + async move { crate::procfs::handle_uname(¬if, &hostname, notif_fd) - }) - })); - table.register(libc::SYS_openat, Box::new(move |notif, _ctx, notif_fd| { - let hostname = hostname2.clone(); - Box::pin(async move { + } + }); + table.register(libc::SYS_openat, move |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let notif_fd = cx.notif_fd; + let hostname = hostname_for_open.clone(); + async move { if let Some(action) = crate::procfs::handle_hostname_open(¬if, &hostname, notif_fd) { action } else { NotifAction::Continue } - }) - })); + } + }); } // ------------------------------------------------------------------ // /etc/hosts virtualization (for net_allow_hosts) // ------------------------------------------------------------------ if let Some(ref etc_hosts) = policy.virtual_etc_hosts { - let etc_hosts = etc_hosts.clone(); - table.register(libc::SYS_openat, Box::new(move |notif, _ctx, notif_fd| { - let etc_hosts = etc_hosts.clone(); - Box::pin(async move { + let etc_hosts_for_open = etc_hosts.clone(); + table.register(libc::SYS_openat, move |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let notif_fd = cx.notif_fd; + let etc_hosts = etc_hosts_for_open.clone(); + async move { if let Some(action) = crate::procfs::handle_etc_hosts_open(¬if, &etc_hosts, notif_fd) { action } else { NotifAction::Continue } - }) - })); + } + }); } // ------------------------------------------------------------------ @@ -412,12 +483,15 @@ pub fn build_dispatch_table( getdents_nrs.push(getdents); } for nr in getdents_nrs { - table.register(nr, Box::new(|notif, ctx, notif_fd| { - let processes = Arc::clone(&ctx.processes); - Box::pin(async move { + table.register(nr, |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let sup = std::sync::Arc::clone(cx.sup); + let notif_fd = cx.notif_fd; + async move { + let processes = Arc::clone(&sup.processes); crate::procfs::handle_sorted_getdents(¬if, &processes, notif_fd).await - }) - })); + } + }); } } @@ -434,74 +508,92 @@ pub fn build_dispatch_table( // runs first and returns `Continue` for non-cookie fds. // ------------------------------------------------------------------ { - table.register(libc::SYS_socket, Box::new(|notif, ctx, _fd| { - let state = Arc::clone(&ctx.netlink); - Box::pin(async move { + table.register(libc::SYS_socket, |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let sup = std::sync::Arc::clone(cx.sup); + async move { + let state = Arc::clone(&sup.netlink); crate::netlink::handlers::handle_socket(¬if, &state).await - }) - })); - table.register(libc::SYS_bind, Box::new(|notif, ctx, _fd| { - let state = Arc::clone(&ctx.netlink); - Box::pin(async move { + } + }); + table.register(libc::SYS_bind, |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let sup = std::sync::Arc::clone(cx.sup); + async move { + let state = Arc::clone(&sup.netlink); crate::netlink::handlers::handle_bind(¬if, &state).await - }) - })); - table.register(libc::SYS_getsockname, Box::new(|notif, ctx, notif_fd| { - let state = Arc::clone(&ctx.netlink); - Box::pin(async move { + } + }); + table.register(libc::SYS_getsockname, |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let sup = std::sync::Arc::clone(cx.sup); + let notif_fd = cx.notif_fd; + async move { + let state = Arc::clone(&sup.netlink); crate::netlink::handlers::handle_getsockname(¬if, &state, notif_fd).await - }) - })); + } + }); // Zero the msg_name region on recv so glibc sees nl_pid=0 // (the kernel only writes sun_family on unix socketpair recvmsg, // leaving the rest of the buffer as stack garbage otherwise). for &nr in &[libc::SYS_recvfrom, libc::SYS_recvmsg] { - table.register(nr, Box::new(|notif, ctx, notif_fd| { - let state = Arc::clone(&ctx.netlink); - Box::pin(async move { + table.register(nr, |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let sup = std::sync::Arc::clone(cx.sup); + let notif_fd = cx.notif_fd; + async move { + let state = Arc::clone(&sup.netlink); crate::netlink::handlers::handle_netlink_recvmsg(¬if, &state, notif_fd).await - }) - })); + } + }); } // Unregister on close so the (pid, fd) slot isn't left in the // cookie set once the child reuses the fd for something else. - table.register(libc::SYS_close, Box::new(|notif, ctx, _fd| { - let state = Arc::clone(&ctx.netlink); - Box::pin(async move { + table.register(libc::SYS_close, |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let sup = std::sync::Arc::clone(cx.sup); + async move { + let state = Arc::clone(&sup.netlink); crate::netlink::handlers::handle_close(¬if, &state).await - }) - })); + } + }); } // ------------------------------------------------------------------ // Bind — on-behalf // ------------------------------------------------------------------ if policy.port_remap || policy.has_net_allowlist { - table.register(libc::SYS_bind, Box::new(|notif, ctx, notif_fd| { - Box::pin(async move { - crate::port_remap::handle_bind(¬if, &ctx.network, notif_fd).await - }) - })); + table.register(libc::SYS_bind, |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let sup = std::sync::Arc::clone(cx.sup); + let notif_fd = cx.notif_fd; + async move { + crate::port_remap::handle_bind(¬if, &sup.network, notif_fd).await + } + }); } // ------------------------------------------------------------------ // getsockname — port remap // ------------------------------------------------------------------ if policy.port_remap { - table.register(libc::SYS_getsockname, Box::new(|notif, ctx, notif_fd| { - Box::pin(async move { - crate::port_remap::handle_getsockname(¬if, &ctx.network, notif_fd).await - }) - })); + table.register(libc::SYS_getsockname, |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let sup = std::sync::Arc::clone(cx.sup); + let notif_fd = cx.notif_fd; + async move { + crate::port_remap::handle_getsockname(¬if, &sup.network, notif_fd).await + } + }); } // ------------------------------------------------------------------ - // Extra handlers supplied by the caller of `Sandbox::run_with_extra_handlers`. - // Appended last so builtin handlers keep their security-critical priority - // (chroot path normalization, COW writes, resource accounting). + // Pending user handlers — appended after builtins so builtin handlers + // keep their security-critical priority (chroot path normalization, + // COW writes, resource accounting). // ------------------------------------------------------------------ - for extra in extra_handlers { - table.register(extra.syscall_nr, extra.handler); + for (nr, h) in pending_handlers { + table.register_arc(nr, h); } table @@ -514,14 +606,18 @@ pub fn build_dispatch_table( fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc) { use crate::chroot::dispatch::ChrootCtx; - // Helper macro to reduce boilerplate for chroot handlers that unconditionally - // return (non-fallthrough). + // Helper macro — produces a closure satisfying Handler via blanket impl. + // The closure clones `policy` (Arc) before the async block; inside the + // async block it borrows fields of that cloned Arc to build `ChrootCtx`. macro_rules! chroot_handler { ($policy:expr, $handler:expr) => {{ let policy = Arc::clone($policy); - let handler_fn: HandlerFn = Box::new(move |notif, ctx, notif_fd| { + move |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let sup = std::sync::Arc::clone(cx.sup); + let notif_fd = cx.notif_fd; let policy = Arc::clone(&policy); - Box::pin(async move { + async move { let chroot_ctx = ChrootCtx { root: policy.chroot_root.as_ref().unwrap(), readable: &policy.chroot_readable, @@ -529,20 +625,23 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc denied: &policy.chroot_denied, mounts: &policy.chroot_mounts, }; - $handler(¬if, &ctx.chroot, &ctx.cow, notif_fd, &chroot_ctx).await - }) - }); - handler_fn + $handler(¬if, &sup.chroot, &sup.cow, notif_fd, &chroot_ctx).await + } + } }}; } - // Helper for chroot handlers that may fall through (return Continue). + // Same shape for fall-through variants (semantically identical here; + // kept separate for symmetry with the old code). macro_rules! chroot_handler_fallthrough { ($policy:expr, $handler:expr) => {{ let policy = Arc::clone($policy); - let handler_fn: HandlerFn = Box::new(move |notif, ctx, notif_fd| { + move |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let sup = std::sync::Arc::clone(cx.sup); + let notif_fd = cx.notif_fd; let policy = Arc::clone(&policy); - Box::pin(async move { + async move { let chroot_ctx = ChrootCtx { root: policy.chroot_root.as_ref().unwrap(), readable: &policy.chroot_readable, @@ -550,10 +649,9 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc denied: &policy.chroot_denied, mounts: &policy.chroot_mounts, }; - $handler(¬if, &ctx.chroot, &ctx.cow, notif_fd, &chroot_ctx).await - }) - }); - handler_fn + $handler(¬if, &sup.chroot, &sup.cow, notif_fd, &chroot_ctx).await + } + } }}; } @@ -615,10 +713,13 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc // chown — non-follow if let Some(chown) = arch::SYS_CHOWN { - let policy = Arc::clone(policy); - table.register(chown, Box::new(move |notif, ctx, notif_fd| { - let policy = Arc::clone(&policy); - Box::pin(async move { + let policy_for_chown = Arc::clone(policy); + table.register(chown, move |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let sup = std::sync::Arc::clone(cx.sup); + let notif_fd = cx.notif_fd; + let policy = Arc::clone(&policy_for_chown); + async move { let chroot_ctx = ChrootCtx { root: policy.chroot_root.as_ref().unwrap(), readable: &policy.chroot_readable, @@ -626,17 +727,20 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc denied: &policy.chroot_denied, mounts: &policy.chroot_mounts, }; - crate::chroot::dispatch::handle_chroot_legacy_chown(¬if, &ctx.chroot, &ctx.cow, notif_fd, &chroot_ctx, false).await - }) - })); + crate::chroot::dispatch::handle_chroot_legacy_chown(¬if, &sup.chroot, &sup.cow, notif_fd, &chroot_ctx, false).await + } + }); } // lchown — follow if let Some(lchown) = arch::SYS_LCHOWN { - let policy = Arc::clone(policy); - table.register(lchown, Box::new(move |notif, ctx, notif_fd| { - let policy = Arc::clone(&policy); - Box::pin(async move { + let policy_for_lchown = Arc::clone(policy); + table.register(lchown, move |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let sup = std::sync::Arc::clone(cx.sup); + let notif_fd = cx.notif_fd; + let policy = Arc::clone(&policy_for_lchown); + async move { let chroot_ctx = ChrootCtx { root: policy.chroot_root.as_ref().unwrap(), readable: &policy.chroot_readable, @@ -644,9 +748,9 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc denied: &policy.chroot_denied, mounts: &policy.chroot_mounts, }; - crate::chroot::dispatch::handle_chroot_legacy_chown(¬if, &ctx.chroot, &ctx.cow, notif_fd, &chroot_ctx, true).await - }) - })); + crate::chroot::dispatch::handle_chroot_legacy_chown(¬if, &sup.chroot, &sup.cow, notif_fd, &chroot_ctx, true).await + } + }); } // stat family @@ -711,14 +815,19 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc // ============================================================ fn register_cow_handlers(table: &mut DispatchTable) { - // Helper to grab cow + processes from ctx in one place. + // Helper to grab cow + processes from cx.sup in one place. macro_rules! cow_call { ($handler:expr) => { - Box::new(|notif, ctx, notif_fd| { - let cow = Arc::clone(&ctx.cow); - let processes = Arc::clone(&ctx.processes); - Box::pin(async move { $handler(¬if, &cow, &processes, notif_fd).await }) - }) + |cx: &HandlerCtx<'_>| { + let notif = cx.notif; + let sup = std::sync::Arc::clone(cx.sup); + let notif_fd = cx.notif_fd; + async move { + let cow = Arc::clone(&sup.cow); + let processes = Arc::clone(&sup.processes); + $handler(¬if, &cow, &processes, notif_fd).await + } + } }; } @@ -840,9 +949,9 @@ mod extra_handler_tests { has_net_allowlist: false, has_random_seed: false, has_time_start: false, - argv_safety_required: false, time_offset: 0, num_cpus: None, + argv_safety_required: false, port_remap: false, cow_enabled: false, chroot_root: None, @@ -860,15 +969,6 @@ mod extra_handler_tests { }) } - #[test] - fn extra_handler_ctor_preserves_fields() { - let h: HandlerFn = Box::new(|_notif, _ctx, _fd| { - Box::pin(async { NotifAction::Continue }) - }); - let eh = ExtraHandler::new(libc::SYS_openat, h); - assert_eq!(eh.syscall_nr, libc::SYS_openat); - } - /// All registered handlers run, in registration order, when each /// returns `Continue`. Verifies that `register` appends to the /// underlying `Vec` and that `dispatch` walks it front-to-back. @@ -878,16 +978,16 @@ mod extra_handler_tests { let order = Arc::new(std::sync::Mutex::new(Vec::::new())); for tag in [1u8, 2u8, 3u8] { - let order = Arc::clone(&order); + let order_clone = Arc::clone(&order); table.register( libc::SYS_openat, - Box::new(move |_notif, _ctx, _fd| { - let order = Arc::clone(&order); - Box::pin(async move { + move |_cx: &HandlerCtx<'_>| { + let order = Arc::clone(&order_clone); + async move { order.lock().unwrap().push(tag); NotifAction::Continue - }) - }), + } + }, ); } @@ -905,13 +1005,13 @@ mod extra_handler_tests { ); } - /// Append-after-builtin contract: when an `ExtraHandler` is registered - /// after a builtin-like handler, dispatch invokes the builtin first - /// and the extra second. This is the security-load-bearing invariant — + /// Append-after-builtin contract: when a user handler is registered + /// after a builtin, dispatch invokes the builtin first and the + /// user handler second. This is the security-load-bearing invariant — /// a builtin returning a non-`Continue` `NotifAction` must short-circuit - /// before the extra runs (covered by + /// before the user handler runs (covered by /// `dispatch_stops_at_first_non_continue`); when the builtin returns - /// `Continue`, the extra observes the post-builtin view. + /// `Continue`, the user handler observes the post-builtin view. #[tokio::test] async fn dispatch_runs_builtin_before_extra() { let mut table = DispatchTable::new(); @@ -921,29 +1021,28 @@ mod extra_handler_tests { let order_builtin = Arc::clone(&order); table.register( libc::SYS_openat, - Box::new(move |_notif, _ctx, _fd| { + move |_cx: &HandlerCtx<'_>| { let order = Arc::clone(&order_builtin); - Box::pin(async move { + async move { order.lock().unwrap().push(b'B'); NotifAction::Continue - }) - }), + } + }, ); - // Extra after, tagged 'E'. Routed through `ExtraHandler` to mirror - // how `build_dispatch_table` consumes user-supplied handlers. + // Extra after, tagged 'E'. Registered after builtin to mirror + // append-after-builtin placement from `build_dispatch_table`. let order_extra = Arc::clone(&order); - let extra = ExtraHandler::new( + table.register( libc::SYS_openat, - Box::new(move |_notif, _ctx, _fd| { + move |_cx: &HandlerCtx<'_>| { let order = Arc::clone(&order_extra); - Box::pin(async move { + async move { order.lock().unwrap().push(b'E'); NotifAction::Continue - }) - }), + } + }, ); - table.register(extra.syscall_nr, extra.handler); let ctx = fake_supervisor_ctx(); let action = table @@ -974,26 +1073,26 @@ mod extra_handler_tests { let calls_first = Arc::clone(&calls); table.register( libc::SYS_openat, - Box::new(move |_notif, _ctx, _fd| { + move |_cx: &HandlerCtx<'_>| { let calls = Arc::clone(&calls_first); - Box::pin(async move { + async move { calls.fetch_add(1, Ordering::SeqCst); NotifAction::Errno(libc::EACCES) - }) - }), + } + }, ); // Second handler — must NOT be called. let calls_second = Arc::clone(&calls); table.register( libc::SYS_openat, - Box::new(move |_notif, _ctx, _fd| { + move |_cx: &HandlerCtx<'_>| { let calls = Arc::clone(&calls_second); - Box::pin(async move { + async move { calls.fetch_add(1, Ordering::SeqCst); NotifAction::Continue - }) - }), + } + }, ); let ctx = fake_supervisor_ctx(); @@ -1012,24 +1111,12 @@ mod extra_handler_tests { ); } - #[test] - fn extras_vec_empty_leaves_table_without_change() { - // build_dispatch_table with empty extras should not add any entries. - // We verify the for-loop degenerates to nop. - let extras: Vec = Vec::new(); - let mut handler_count = 0usize; - for _ in extras { - handler_count += 1; - } - assert_eq!(handler_count, 0, "empty extras registers zero handlers"); - } - - /// `validate_extras_against_policy` must reject extras whose syscall is in - /// the policy's user-specified `deny_syscalls` list, with the same - /// rationale as DEFAULT_DENY: the BPF program emits notif JEQs before - /// deny JEQs, so a user handler returning `Continue` would translate into - /// `SECCOMP_USER_NOTIF_FLAG_CONTINUE` and silently bypass the kernel-level - /// deny. + /// `validate_handler_syscalls_against_policy` must reject handlers whose + /// syscall is in the policy's user-specified `deny_syscalls` list, with + /// the same rationale as DEFAULT_DENY: the BPF program emits notif JEQs + /// before deny JEQs, so a user handler returning `Continue` would + /// translate into `SECCOMP_USER_NOTIF_FLAG_CONTINUE` and silently bypass + /// the kernel-level deny. /// /// Uses `mremap` because it is in `syscall_name_to_nr` but not in /// `DEFAULT_DENY_SYSCALLS` — putting it into `deny_syscalls` is the only @@ -1046,15 +1133,86 @@ mod extra_handler_tests { .deny_syscalls(vec!["mremap".into()]) .build() .expect("policy builds"); - let handler: HandlerFn = - Box::new(|_notif, _ctx, _fd| Box::pin(async { NotifAction::Continue })); - let extras = vec![ExtraHandler::new(libc::SYS_mremap, handler)]; - let result = validate_extras_against_policy(&extras, &policy); + let result = validate_handler_syscalls_against_policy(&[libc::SYS_mremap], &policy); assert_eq!( result, Err(libc::SYS_mremap as u32), - "extras on user-specified deny must be rejected, naming the offending syscall" + "handler on user-specified deny must be rejected, naming the offending syscall" + ); + } + + // ---- Handler trait tests -------------------------------------- + + #[tokio::test] + async fn handler_via_blanket_impl_dispatches_closures() { + use std::sync::atomic::{AtomicU64, Ordering}; + let counter = Arc::new(AtomicU64::new(0)); + let counter_clone = Arc::clone(&counter); + + let h = move |cx: &HandlerCtx<'_>| { + let counter = Arc::clone(&counter_clone); + async move { + counter.fetch_add(1, Ordering::SeqCst); + let _ = cx.notif.pid; // touch ctx so it's exercised + NotifAction::Continue + } + }; + + let sup = fake_supervisor_ctx(); + let notif = fake_notif(libc::SYS_openat as i32); + let cx = HandlerCtx { notif, sup: &sup, notif_fd: -1 }; + + let action = h.handle(&cx).await; + assert!(matches!(action, NotifAction::Continue)); + assert_eq!(counter.load(Ordering::SeqCst), 1); + } + + /// Struct-based `Handler` registered through `DispatchTable::register` + /// MUST be invoked when `dispatch()` walks the chain — and `&self` + /// state MUST persist across notifications. Bridges the gap between + /// the trait-shape unit tests above (which call `.handle()` directly) + /// and the dispatch ordering tests (which use closures via blanket + /// impl). Without this test, a regression where the dispatch walker + /// dropped `Arc` calls but kept closures working would + /// not be caught at the unit layer. + #[tokio::test] + async fn dispatch_invokes_struct_handler_with_persistent_self_state() { + use std::sync::atomic::{AtomicU64, Ordering}; + + struct StructHandler { + calls: AtomicU64, + } + + #[async_trait::async_trait] + impl Handler for StructHandler { + async fn handle(&self, _cx: &HandlerCtx<'_>) -> NotifAction { + self.calls.fetch_add(1, Ordering::SeqCst); + NotifAction::Continue + } + } + + let mut table = DispatchTable::new(); + let handler = std::sync::Arc::new(StructHandler { + calls: AtomicU64::new(0), + }); + table.register_arc(libc::SYS_openat, handler.clone() as std::sync::Arc); + + let sup = fake_supervisor_ctx(); + let notif = fake_notif(libc::SYS_openat as i32); + + // Three independent dispatches against the same registered handler. + // Walker MUST hit the struct's handle() each time, accumulating + // state on &self.calls. + for _ in 0..3 { + let action = table.dispatch(notif, &sup, -1).await; + assert!(matches!(action, NotifAction::Continue)); + } + + assert_eq!( + handler.calls.load(Ordering::SeqCst), + 3, + "dispatch must invoke the struct-based handler on every walk" ); } } diff --git a/crates/sandlock-core/src/seccomp/notif.rs b/crates/sandlock-core/src/seccomp/notif.rs index 19a900b..45c4366 100644 --- a/crates/sandlock-core/src/seccomp/notif.rs +++ b/crates/sandlock-core/src/seccomp/notif.rs @@ -1087,12 +1087,14 @@ async fn handle_notification( /// Runs until the notification fd is closed (child exits or filter is removed). /// /// `extra_handlers` are user-supplied syscall handlers registered after all -/// builtin handlers (see [`super::dispatch::ExtraHandler`]). For the default -/// behaviour without any custom handlers pass an empty `Vec`. +/// builtin handlers (see [`super::dispatch::Handler`]). Each entry is a +/// `(syscall_nr, Arc)` pair already validated against the +/// policy. For the default behaviour without any custom handlers pass +/// an empty `Vec`. pub async fn supervisor( notif_fd: OwnedFd, ctx: Arc, - extra_handlers: Vec, + extra_handlers: Vec<(i64, Arc)>, ) { let fd = notif_fd.as_raw_fd(); From 46b7477faee0edde2d5041cc3d75db18f343f2da Mon Sep 17 00:00:00 2001 From: dzerik Date: Tue, 5 May 2026 01:38:19 +0300 Subject: [PATCH 3/9] feat(seccomp): expose read_child_cstr as public API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Promotes pub(crate) fn read_child_cstr to pub. Downstream Handler implementations reading path arguments (openat / unlinkat / statx / newfstatat / etc.) need TOCTOU-safe NUL-terminated string reads; without public access, every consumer crate duplicates the page-aware read loop. Also tightens the doc-comments of read_child_mem and write_child_mem to refer generically to "downstream Handler implementations" rather than naming any specific consumer crate — a forward-compatible phrasing as more crates start using these helpers. Precedent: the sibling read_child_mem and write_child_mem were promoted to pub in 272ae0d (the same commit chain that introduced the original ExtraHandler API). Same rationale here. Adds 1 unit test covering the null-addr / zero-max_len short-circuits. 235 lib unit tests pass. --- crates/sandlock-core/src/seccomp/notif.rs | 47 ++++++++++++++++------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/crates/sandlock-core/src/seccomp/notif.rs b/crates/sandlock-core/src/seccomp/notif.rs index 45c4366..3a229a3 100644 --- a/crates/sandlock-core/src/seccomp/notif.rs +++ b/crates/sandlock-core/src/seccomp/notif.rs @@ -423,9 +423,9 @@ fn write_child_mem_vm(pid: u32, addr: u64, data: &[u8]) -> Result<(), NotifError /// still live (kernel did not abort or release the trapped syscall while the /// supervisor was reading guest memory). /// -/// Public — used by downstream `ExtraHandler`s (sandbox-sber/vfs-engine etc.) -/// to read syscall arguments that the kernel passes by pointer (paths in -/// `openat`, buffers in `write`/`writev`). +/// Public — used by downstream `Handler` implementations to read syscall +/// arguments that the kernel passes by pointer (paths in `openat`, buffers +/// in `write`/`writev`, etc.). pub fn read_child_mem( notif_fd: RawFd, id: u64, @@ -441,7 +441,19 @@ pub fn read_child_mem( /// Read a NUL-terminated string from child memory without crossing unmapped /// page boundaries in a single `process_vm_readv` call. -pub(crate) fn read_child_cstr( +/// +/// TOCTOU-safe — internally calls [`read_child_mem`], inheriting the +/// `id_valid` checks bracketing each `process_vm_readv` call. +/// +/// Page-aware: reads up to a page boundary at a time and stops at the +/// first NUL byte, never crossing into unmapped memory. Returns +/// `None` for `addr == 0`, `max_len == 0`, a read failure, or a string +/// that exceeds `max_len` without a NUL. +/// +/// Public — used by downstream `Handler` implementations that read +/// path arguments from notifications (`openat`, `unlinkat`, `statx`, +/// `newfstatat`, etc.). +pub fn read_child_cstr( notif_fd: RawFd, id: u64, pid: u32, @@ -473,9 +485,10 @@ pub(crate) fn read_child_cstr( /// Write bytes to a child process via `process_vm_writev` with TOCTOU validation. /// -/// Same TOCTOU contract as [`read_child_mem`]. Public for downstream -/// `ExtraHandler`s that synthesise syscall results into guest memory -/// (e.g. fake `getdents64` listings populated from a virtual tree-index). +/// Same TOCTOU contract as [`read_child_mem`]. Public for downstream +/// `Handler` implementations that synthesise syscall results into +/// guest memory (e.g. fake `getdents64` listings populated from a +/// virtual directory index, or synthesised `stat` buffers). pub fn write_child_mem( notif_fd: RawFd, id: u64, @@ -1086,15 +1099,13 @@ async fn handle_notification( /// /// Runs until the notification fd is closed (child exits or filter is removed). /// -/// `extra_handlers` are user-supplied syscall handlers registered after all -/// builtin handlers (see [`super::dispatch::Handler`]). Each entry is a -/// `(syscall_nr, Arc)` pair already validated against the -/// policy. For the default behaviour without any custom handlers pass -/// an empty `Vec`. +/// `pending_handlers` are user-supplied syscall handlers registered after all +/// builtin handlers. For the default behaviour without any custom handlers +/// pass an empty `Vec`. pub async fn supervisor( notif_fd: OwnedFd, ctx: Arc, - extra_handlers: Vec<(i64, Arc)>, + pending_handlers: Vec<(i64, std::sync::Arc)>, ) { let fd = notif_fd.as_raw_fd(); @@ -1102,7 +1113,7 @@ pub async fn supervisor( let dispatch_table = Arc::new(super::dispatch::build_dispatch_table( &ctx.policy, &ctx.resource, - extra_handlers, + pending_handlers, )); // Try to enable sync wakeup (Linux 6.7+, ignore error on older kernels). @@ -1214,6 +1225,14 @@ pub(crate) async fn cleanup_pid(ctx: &super::ctx::SupervisorCtx, key: super::sta mod tests { use super::*; + #[test] + fn read_child_cstr_returns_none_for_null_addr_or_zero_max_len() { + // Smoke: addr == 0 short-circuits without touching the child. + assert!(read_child_cstr(-1, 0, 0, 0, 4096).is_none()); + // max_len == 0 also short-circuits. + assert!(read_child_cstr(-1, 0, 0, 0xdeadbeef, 0).is_none()); + } + #[test] fn test_notif_action_debug() { // Ensure all variants implement Debug. From 1daac9b1ae91d501353ae3567ede172ef325aa6e Mon Sep 17 00:00:00 2001 From: dzerik Date: Tue, 5 May 2026 01:39:33 +0300 Subject: [PATCH 4/9] chore: re-export Handler types at crate root Adds re-exports in `lib.rs` so consumers can write `sandlock_core::Handler` instead of `sandlock_core::seccomp::dispatch::Handler`, mirroring the existing pattern for `Sandbox`, `Policy`, `SandlockError`, etc. Re-exported: - `Handler`, `HandlerCtx`, `HandlerError` (from `seccomp::dispatch`) - `Syscall`, `SyscallError` (from `seccomp::syscall`) Updates the integration test imports to use the re-exports as a documentation-by-example of the recommended path. --- crates/sandlock-core/src/lib.rs | 4 + .../tests/integration/test_extra_handlers.rs | 453 ++++++++++++++---- 2 files changed, 358 insertions(+), 99 deletions(-) diff --git a/crates/sandlock-core/src/lib.rs b/crates/sandlock-core/src/lib.rs index e0d3533..4b450ba 100644 --- a/crates/sandlock-core/src/lib.rs +++ b/crates/sandlock-core/src/lib.rs @@ -35,6 +35,10 @@ pub use sandbox::Sandbox; pub use pipeline::{Stage, Pipeline, Gather}; pub use dry_run::{Change, ChangeKind, DryRunResult}; +// Public extension API — see docs/extension-handlers.md. +pub use seccomp::dispatch::{Handler, HandlerCtx, HandlerError}; +pub use seccomp::syscall::{Syscall, SyscallError}; + /// Query the Landlock ABI version supported by the running kernel. pub fn landlock_abi_version() -> Result { landlock::abi_version() diff --git a/crates/sandlock-core/tests/integration/test_extra_handlers.rs b/crates/sandlock-core/tests/integration/test_extra_handlers.rs index 65b70be..76b2f5e 100644 --- a/crates/sandlock-core/tests/integration/test_extra_handlers.rs +++ b/crates/sandlock-core/tests/integration/test_extra_handlers.rs @@ -1,34 +1,33 @@ -//! Integration tests for the user-supplied `ExtraHandler` API. +//! Integration tests for the user-supplied `Handler` extension API +//! (`Sandbox::run_with_extra_handlers`). //! //! These tests exercise the full plumbing through the kernel: the guest //! issues a syscall, the BPF filter raises a `USER_NOTIF`, the supervisor -//! walks the dispatch chain (builtins first, extras last) and the kernel -//! applies the `NotifAction` returned by the extra handler. Any of the +//! walks the dispatch chain (builtins first, user handlers last) and the +//! kernel applies the `NotifAction` returned by the handler. Any of the //! following regressions would break them: //! -//! * extra-handler syscalls not added to the BPF filter → kernel never +//! * user-handler syscalls not added to the BPF filter → kernel never //! raises a notification, the handler silently never fires; -//! * extras registered before builtins → handler observes pre-builtin -//! arguments (e.g. unnormalized chroot paths) or short-circuits a -//! security-critical builtin; +//! * user handlers registered before builtins → handler observes +//! pre-builtin arguments (e.g. unnormalized chroot paths) or +//! short-circuits a security-critical builtin; //! * `Continue` not translated to `SECCOMP_USER_NOTIF_FLAG_CONTINUE` → //! observe-only handlers wedge the guest. //! //! Each test uses `SYS_getcwd` because under the default policy it is -//! **not** intercepted by any builtin (`getcwd` is added only for -//! chroot or COW path virtualization). This isolates the behaviour under test -//! to the extras path. The guest must run `/bin/pwd` (the binary), not -//! `pwd` (the shell builtin which reads `$PWD` and never issues the -//! syscall) — otherwise any errno injected by an extra handler can't -//! reach the user-visible exit code. +//! **not** intercepted by any builtin (`uname` is added only when the +//! policy sets a `hostname`). This isolates the behaviour under test +//! to the user-handler path. use std::path::PathBuf; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, Mutex}; -use sandlock_core::seccomp::dispatch::{ExtraHandler, HandlerFn}; use sandlock_core::seccomp::notif::NotifAction; -use sandlock_core::{Policy, Sandbox}; +use sandlock_core::{ + Handler, HandlerCtx, HandlerError, Policy, Sandbox, SandlockError, SyscallError, +}; /// Read a NUL-terminated path from the sandboxed child's address space. /// @@ -89,7 +88,7 @@ fn temp_out(name: &str) -> PathBuf { /// does not intercept (`SYS_getcwd`) MUST receive notifications and its /// `NotifAction::Errno` MUST surface in the guest as the corresponding /// errno. This is the security contract: without BPF plumbing the -/// kernel would never raise USER_NOTIF for `getcwd` and the handler +/// kernel would never raise USER_NOTIF for `uname` and the handler /// would silently never fire — the maintainer-cited footgun. #[tokio::test] async fn extra_handler_intercepts_syscall_outside_builtin_set() { @@ -99,19 +98,22 @@ async fn extra_handler_intercepts_syscall_outside_builtin_set() { let calls = Arc::new(AtomicUsize::new(0)); let calls_in_handler = Arc::clone(&calls); - let handler: HandlerFn = Box::new(move |_notif, _ctx, _fd| { + let handler = move |_cx: &HandlerCtx<'_>| { let calls = Arc::clone(&calls_in_handler); - Box::pin(async move { + async move { calls.fetch_add(1, Ordering::SeqCst); NotifAction::Errno(libc::EACCES) - }) - }); - - let extras = vec![ExtraHandler::new(libc::SYS_getcwd, handler)]; + } + }; - let result = Sandbox::run_with_extra_handlers(&policy, Some("test"), &["sh", "-c", &cmd], extras) - .await - .expect("sandbox spawn failed"); + let result = Sandbox::run_with_extra_handlers( + &policy, + None, + &["sh", "-c", &cmd], + [(libc::SYS_getcwd, handler)], + ) + .await + .expect("sandbox spawn failed"); let contents = std::fs::read_to_string(&out).unwrap_or_default(); let _ = std::fs::remove_file(&out); @@ -124,7 +126,7 @@ async fn extra_handler_intercepts_syscall_outside_builtin_set() { ); assert_ne!( code, 0, - "getcwd must observe the errno injected by the extra handler" + "uname must observe the errno injected by the extra handler" ); } @@ -139,19 +141,22 @@ async fn extra_handler_continue_lets_syscall_proceed() { let calls = Arc::new(AtomicUsize::new(0)); let calls_in_handler = Arc::clone(&calls); - let handler: HandlerFn = Box::new(move |_notif, _ctx, _fd| { + let handler = move |_cx: &HandlerCtx<'_>| { let calls = Arc::clone(&calls_in_handler); - Box::pin(async move { + async move { calls.fetch_add(1, Ordering::SeqCst); NotifAction::Continue - }) - }); - - let extras = vec![ExtraHandler::new(libc::SYS_getcwd, handler)]; + } + }; - let result = Sandbox::run_with_extra_handlers(&policy, Some("test"), &["sh", "-c", &cmd], extras) - .await - .expect("sandbox spawn failed"); + let result = Sandbox::run_with_extra_handlers( + &policy, + None, + &["sh", "-c", &cmd], + [(libc::SYS_getcwd, handler)], + ) + .await + .expect("sandbox spawn failed"); let contents = std::fs::read_to_string(&out).unwrap_or_default(); let _ = std::fs::remove_file(&out); @@ -164,18 +169,20 @@ async fn extra_handler_continue_lets_syscall_proceed() { ); assert_eq!( code, 0, - "Continue must let the kernel execute getcwd normally" + "Continue must let the kernel execute uname normally" ); } -/// `Sandbox::run_with_extra_handlers(_, _, _, vec![])` must be observably -/// identical to `Sandbox::run(_, _, _)`. +/// `Sandbox::run_with_extra_handlers(_, _, vec![])` must be observably +/// identical to `Sandbox::run(_, _)`. Guards the documented backwards +/// compatibility contract. #[tokio::test] async fn empty_extras_preserves_default_behaviour() { let policy = base_policy().build().unwrap(); - let baseline = Sandbox::run(&policy, Some("test"), &["pwd"]).await.unwrap(); - let with_extras = Sandbox::run_with_extra_handlers(&policy, Some("test"), &["pwd"], Vec::new()) + let baseline = Sandbox::run(&policy, None, &["/bin/pwd"]).await.unwrap(); + let no_handlers: [(i64, fn(&HandlerCtx<'_>) -> std::future::Ready); 0] = []; + let with_extras = Sandbox::run_with_extra_handlers(&policy, None, &["/bin/pwd"], no_handlers) .await .unwrap(); @@ -196,26 +203,29 @@ async fn empty_extras_preserves_default_behaviour() { async fn extra_handler_runs_after_builtin_returns_continue() { let policy = base_policy().build().unwrap(); let out = temp_out("openat-cross"); - let cmd = format!("cat /etc/passwd; echo $? > {}", out.display()); + let cmd = format!("cat /etc/hostname; echo $? > {}", out.display()); let openat_calls = Arc::new(AtomicUsize::new(0)); let openat_in_handler = Arc::clone(&openat_calls); - let handler: HandlerFn = Box::new(move |_notif, _ctx, _fd| { + let handler = move |_cx: &HandlerCtx<'_>| { let openat_calls = Arc::clone(&openat_in_handler); - Box::pin(async move { + async move { openat_calls.fetch_add(1, Ordering::SeqCst); // Continue lets the kernel resume the syscall — the builtin // already returned Continue for non-/proc paths and this // handler must not break the chain. NotifAction::Continue - }) - }); - - let extras = vec![ExtraHandler::new(libc::SYS_openat, handler)]; + } + }; - let result = Sandbox::run_with_extra_handlers(&policy, Some("test"), &["sh", "-c", &cmd], extras) - .await - .expect("sandbox spawn failed"); + let result = Sandbox::run_with_extra_handlers( + &policy, + None, + &["sh", "-c", &cmd], + [(libc::SYS_openat, handler)], + ) + .await + .expect("sandbox spawn failed"); let contents = std::fs::read_to_string(&out).unwrap_or_default(); let _ = std::fs::remove_file(&out); @@ -248,38 +258,42 @@ async fn builtin_non_continue_blocks_extra() { let policy = base_policy().build().unwrap(); let out = temp_out("openat-blocked-by-builtin"); let cmd = format!( - "cat /proc/1/cmdline; cat /etc/passwd; echo $? > {}", + "cat /proc/1/cmdline; cat /etc/hostname; echo $? > {}", out.display() ); let observed: Arc>> = Arc::new(Mutex::new(Vec::new())); let observed_in_handler = Arc::clone(&observed); - let handler: HandlerFn = Box::new(move |notif, _ctx, _fd| { + let handler = move |cx: &HandlerCtx<'_>| { let observed = Arc::clone(&observed_in_handler); - Box::pin(async move { + let notif = cx.notif; + async move { // openat(dirfd, pathname, flags, mode) → args[1] is the path let path_addr = notif.data.args[1]; if let Some(p) = read_path_from_child(notif.pid, path_addr) { observed.lock().unwrap().push(p); } NotifAction::Continue - }) - }); - - let extras = vec![ExtraHandler::new(libc::SYS_openat, handler)]; + } + }; - let _ = Sandbox::run_with_extra_handlers(&policy, Some("test"), &["sh", "-c", &cmd], extras) - .await - .expect("sandbox spawn failed"); + let _ = Sandbox::run_with_extra_handlers( + &policy, + None, + &["sh", "-c", &cmd], + [(libc::SYS_openat, handler)], + ) + .await + .expect("sandbox spawn failed"); let _ = std::fs::remove_file(&out); let paths = observed.lock().unwrap(); - let saw_etc_passwd = paths.iter().any(|p| p == "/etc/passwd"); + let saw_etc_hostname = paths.iter().any(|p| p == "/etc/hostname"); let saw_proc_pid = paths.iter().any(|p| p.starts_with("/proc/1/")); assert!( - saw_etc_passwd, + saw_etc_hostname, "extra must observe non-blocked openats, got paths: {:?}", *paths, ); @@ -302,6 +316,26 @@ async fn builtin_non_continue_blocks_extra() { /// short-circuit the chain before `extra1` ran. #[tokio::test] async fn chain_of_extras_runs_in_insertion_order() { + // Two struct instances with the same concrete type keep the iterator's + // `H` parameter homogeneous; an `id` field plus a configurable action + // distinguishes their behaviour. + struct Counter { + c: Arc, + action: NotifAction, + } + + #[async_trait::async_trait] + impl Handler for Counter { + async fn handle(&self, _cx: &HandlerCtx<'_>) -> NotifAction { + self.c.fetch_add(1, Ordering::SeqCst); + match self.action { + NotifAction::Continue => NotifAction::Continue, + NotifAction::Errno(e) => NotifAction::Errno(e), + _ => unreachable!("test only uses Continue / Errno"), + } + } + } + let policy = base_policy().build().unwrap(); let out = temp_out("chain-order"); let cmd = format!("/bin/pwd; echo $? > {}", out.display()); @@ -309,32 +343,23 @@ async fn chain_of_extras_runs_in_insertion_order() { let c1 = Arc::new(AtomicUsize::new(0)); let c2 = Arc::new(AtomicUsize::new(0)); - let c1_in_h = Arc::clone(&c1); - let h1: HandlerFn = Box::new(move |_n, _c, _f| { - let c = Arc::clone(&c1_in_h); - Box::pin(async move { - c.fetch_add(1, Ordering::SeqCst); - NotifAction::Continue - }) - }); - - let c2_in_h = Arc::clone(&c2); - let h2: HandlerFn = Box::new(move |_n, _c, _f| { - let c = Arc::clone(&c2_in_h); - Box::pin(async move { - c.fetch_add(1, Ordering::SeqCst); - NotifAction::Errno(libc::EACCES) - }) - }); - - let extras = vec![ - ExtraHandler::new(libc::SYS_getcwd, h1), - ExtraHandler::new(libc::SYS_getcwd, h2), - ]; + let h1 = Counter { + c: Arc::clone(&c1), + action: NotifAction::Continue, + }; + let h2 = Counter { + c: Arc::clone(&c2), + action: NotifAction::Errno(libc::EACCES), + }; - let result = Sandbox::run_with_extra_handlers(&policy, Some("test"), &["sh", "-c", &cmd], extras) - .await - .expect("sandbox spawn failed"); + let result = Sandbox::run_with_extra_handlers( + &policy, + None, + &["sh", "-c", &cmd], + [(libc::SYS_getcwd, h1), (libc::SYS_getcwd, h2)], + ) + .await + .expect("sandbox spawn failed"); let contents = std::fs::read_to_string(&out).unwrap_or_default(); let _ = std::fs::remove_file(&out); @@ -352,7 +377,7 @@ async fn chain_of_extras_runs_in_insertion_order() { ); assert_ne!( code, 0, - "getcwd must observe the EACCES injected by the second handler" + "uname must observe the EACCES injected by the second handler" ); } @@ -366,12 +391,15 @@ async fn chain_of_extras_runs_in_insertion_order() { #[tokio::test] async fn extra_handler_on_default_deny_syscall_is_rejected() { let policy = base_policy().build().unwrap(); - let handler: HandlerFn = Box::new(|_notif, _ctx, _fd| { - Box::pin(async { NotifAction::Continue }) - }); - let extras = vec![ExtraHandler::new(libc::SYS_mount, handler)]; + let handler = |_cx: &HandlerCtx<'_>| async { NotifAction::Continue }; - let result = Sandbox::run_with_extra_handlers(&policy, Some("test"), &["true"], extras).await; + let result = Sandbox::run_with_extra_handlers( + &policy, + None, + &["true"], + [(libc::SYS_mount, handler)], + ) + .await; assert!( result.is_err(), @@ -379,7 +407,7 @@ async fn extra_handler_on_default_deny_syscall_is_rejected() { ); let msg = format!("{}", result.unwrap_err()); assert!( - msg.contains("default-deny") || msg.contains("bypass"), + msg.contains("deny") || msg.contains("bypass"), "error must explain why the registration is rejected, got: {}", msg ); @@ -403,12 +431,15 @@ async fn extra_handler_on_user_specified_deny_is_rejected() { .deny_syscalls(vec!["mremap".into()]) .build() .unwrap(); - let handler: HandlerFn = Box::new(|_notif, _ctx, _fd| { - Box::pin(async { NotifAction::Continue }) - }); - let extras = vec![ExtraHandler::new(libc::SYS_mremap, handler)]; + let handler = |_cx: &HandlerCtx<'_>| async { NotifAction::Continue }; - let result = Sandbox::run_with_extra_handlers(&policy, Some("test"), &["true"], extras).await; + let result = Sandbox::run_with_extra_handlers( + &policy, + None, + &["true"], + [(libc::SYS_mremap, handler)], + ) + .await; assert!( result.is_err(), @@ -427,3 +458,227 @@ async fn extra_handler_on_user_specified_deny_is_rejected() { msg ); } + +// ============================================================ +// New Handler trait API — integration tests +// ============================================================ + +/// A closure-shaped handler (via the blanket `impl Handler for F`) +/// passed to `run_with_extra_handlers` MUST observe notifications and the +/// guest MUST see the handler's `Errno`. This verifies the parameter-type +/// rework on `run_with_extra_handlers` doesn't drop notifications. +#[tokio::test] +async fn handler_via_blanket_impl_dispatches_in_sandbox() { + let policy = base_policy().build().unwrap(); + let out = temp_out("blanket-impl-eacces"); + let cmd = format!("/bin/pwd; echo $? > {}", out.display()); + + let calls = Arc::new(AtomicUsize::new(0)); + let calls_in_handler = Arc::clone(&calls); + let handler = move |_cx: &HandlerCtx<'_>| { + let calls = Arc::clone(&calls_in_handler); + async move { + calls.fetch_add(1, Ordering::SeqCst); + NotifAction::Errno(libc::EACCES) + } + }; + + let _result = Sandbox::run_with_extra_handlers( + &policy, + None, + &["sh", "-c", &cmd], + [(libc::SYS_getcwd, handler)], + ) + .await + .expect("run_with_extra_handlers"); + + assert!( + calls.load(Ordering::SeqCst) > 0, + "handler must have fired through BPF -> notif -> dispatch" + ); + + let exit_code = std::fs::read_to_string(&out) + .map(|s| s.trim().parse::().unwrap_or(-1)) + .unwrap_or(-1); + let _ = std::fs::remove_file(&out); + assert_ne!( + exit_code, 0, + "/bin/pwd must have failed because EACCES was returned" + ); +} + +/// A struct-based `Handler` (with state on `&self`, not captured `Arc`) +/// MUST be invocable through `run_with_extra_handlers` and accumulate +/// state across multiple notifications within one sandbox run. +/// +/// This exercises the full struct-impl-Handler shape end-to-end: the +/// handler owns its own `Arc` field, gets registered +/// against `SYS_getcwd`, and the dispatch walker invokes +/// `UnameCounter::handle` on every notification. Returning `Errno(EPERM)` +/// serialises the notification cycle (kernel waits for the response before +/// letting the child proceed), so the counter is guaranteed observable +/// after `run_with_extra_handlers` returns. +/// +/// Without this test, a regression where dispatch dropped the +/// struct-`Arc` path but kept closures-via-blanket-impl +/// working would not be caught at the integration layer. +#[tokio::test] +async fn struct_handler_state_persists_across_sandbox_calls() { + struct UnameCounter { + calls: Arc, + } + + #[async_trait::async_trait] + impl Handler for UnameCounter { + async fn handle(&self, _cx: &HandlerCtx<'_>) -> NotifAction { + self.calls.fetch_add(1, Ordering::SeqCst); + NotifAction::Errno(libc::EPERM) + } + } + + let policy = base_policy().build().unwrap(); + let calls = Arc::new(AtomicUsize::new(0)); + let handler = UnameCounter { + calls: Arc::clone(&calls), + }; + + let out = temp_out("struct-handler-counter"); + let cmd = format!("/bin/pwd; /bin/pwd; echo done > {}", out.display()); + + Sandbox::run_with_extra_handlers( + &policy, + None, + &["sh", "-c", &cmd], + [(libc::SYS_getcwd, handler)], + ) + .await + .expect("run_with_extra_handlers"); + let _ = std::fs::remove_file(&out); + + assert!( + calls.load(Ordering::SeqCst) >= 2, + "struct-based handler MUST have observed at least 2 uname calls \ + (state persists across notifications via &self), got {}", + calls.load(Ordering::SeqCst) + ); +} + +/// `run_with_extra_handlers` with a negative syscall number MUST return +/// `HandlerError::InvalidSyscall(SyscallError::Negative)` up-front, before +/// fork. Closes the silent-never-fires footgun. +#[tokio::test] +async fn run_with_extra_handlers_rejects_negative_syscall() { + let policy = base_policy().build().unwrap(); + let handler = |_cx: &HandlerCtx<'_>| async { NotifAction::Continue }; + + let result = + Sandbox::run_with_extra_handlers(&policy, None, &["true"], [(-5i64, handler)]).await; + + match result { + Err(SandlockError::Handler(HandlerError::InvalidSyscall(SyscallError::Negative(-5)))) => {} + other => panic!( + "expected Handler(InvalidSyscall(Negative(-5))), got {:?}", + other.err() + ), + } +} + +/// Same as above but for an arch-unknown syscall number. +#[tokio::test] +async fn run_with_extra_handlers_rejects_arch_unknown_syscall() { + let policy = base_policy().build().unwrap(); + let handler = |_cx: &HandlerCtx<'_>| async { NotifAction::Continue }; + + let result = + Sandbox::run_with_extra_handlers(&policy, None, &["true"], [(99_999i64, handler)]).await; + + match result { + Err(SandlockError::Handler(HandlerError::InvalidSyscall( + SyscallError::UnknownForArch(99_999), + ))) => {} + other => panic!( + "expected Handler(InvalidSyscall(UnknownForArch(99_999))), got {:?}", + other.err() + ), + } +} + +/// Two handlers passed in one `IntoIterator` on the same syscall MUST +/// fire in iteration order, with the chain short-circuiting on the +/// first non-Continue. Mirror of `chain_of_extras_runs_in_insertion_order` +/// — that test already covers chain ordering through the dispatch path, +/// this one covers ordering specifically through the new +/// `IntoIterator` parameter shape. Two instances of the +/// same struct keep the iterator's `H` type homogeneous. +#[tokio::test] +async fn run_with_extra_handlers_preserves_insertion_order_in_sandbox_chain() { + struct OrderTracker { + id: u8, + order: Arc>>, + action: NotifAction, + } + + #[async_trait::async_trait] + impl Handler for OrderTracker { + async fn handle(&self, _cx: &HandlerCtx<'_>) -> NotifAction { + self.order.lock().unwrap().push(self.id); + match self.action { + NotifAction::Continue => NotifAction::Continue, + NotifAction::Errno(e) => NotifAction::Errno(e), + _ => unreachable!("test only uses Continue / Errno"), + } + } + } + + let policy = base_policy().build().unwrap(); + let out = temp_out("run-with-extras-order"); + let cmd = format!("/bin/pwd; echo $? > {}", out.display()); + + let order = Arc::new(Mutex::new(Vec::::new())); + let h1 = OrderTracker { + id: 1, + order: Arc::clone(&order), + action: NotifAction::Continue, + }; + let h2 = OrderTracker { + id: 2, + order: Arc::clone(&order), + action: NotifAction::Errno(libc::EACCES), + }; + + Sandbox::run_with_extra_handlers( + &policy, + None, + &["sh", "-c", &cmd], + [(libc::SYS_getcwd, h1), (libc::SYS_getcwd, h2)], + ) + .await + .expect("run_with_extra_handlers"); + + let order = order.lock().unwrap(); + assert!(order.len() >= 2, "expected at least 2 dispatches, got {:?}", *order); + assert_eq!(order[0], 1, "h1 must run before h2; order: {:?}", *order); + assert_eq!(order[1], 2, "h2 must run after h1; order: {:?}", *order); + + let _ = std::fs::remove_file(&out); +} + +/// `run_with_extra_handlers` on a default-deny syscall MUST return +/// `HandlerError::OnDenySyscall` up-front (before fork) — closes the +/// kernel-deny -> NOTIF_FLAG_CONTINUE bypass attack. +#[tokio::test] +async fn run_with_extra_handlers_rejects_handler_on_default_deny_syscall() { + let policy = base_policy().build().unwrap(); + let handler = |_cx: &HandlerCtx<'_>| async { NotifAction::Continue }; + + // SYS_mount is in DEFAULT_DENY_SYSCALLS. + let result = + Sandbox::run_with_extra_handlers(&policy, None, &["true"], [(libc::SYS_mount, handler)]).await; + + match result { + Err(SandlockError::Handler(HandlerError::OnDenySyscall { syscall_nr })) => { + assert_eq!(syscall_nr, libc::SYS_mount); + } + other => panic!("expected Handler(OnDenySyscall), got {:?}", other.err()), + } +} From 16a81562092fd3f5f42a61a7b32b05724f439e35 Mon Sep 17 00:00:00 2001 From: dzerik Date: Tue, 5 May 2026 01:44:40 +0300 Subject: [PATCH 5/9] docs: rewrite extension-handlers.md for the Handler trait API Restructured under: API / Semantics / Security boundary / Panics / Use cases / Limitations / Downstream usage. Lead example uses `Sandbox::run_with_extra_handlers(&policy, &cmd, [(SYS_x, h)])`. Closure via blanket impl shown alongside struct-based handler with state on `&self`. `Syscall::checked` newtype documented with `TryInto` ergonomics. Three additional sections cover practical patterns: - Interactive mode: `run_interactive_with_extra_handlers` for shells/REPLs. - Reading syscall arguments: `read_child_cstr` / `read_child_mem` / `write_child_mem` table + worked example using `read_child_cstr` for an `openat` extension handler. - State patterns: `AtomicU64` / `Mutex` / `RwLock` / `DashMap` comparison plus a `CallStats` handler shared across multiple syscalls via `Arc` erasure. The earlier draft of this rewrite included a Backwards-Compatibility section and a Migration-from-`ExtraHandler` walk-through; both are removed because the trait reshape is a hard break (no deprecation cycle), as agreed in the upstream PR review. Use cases re-anchored to generic archetypes (VFS engine, audit pipeline, synthetic file content via `InjectFdSendTracked`). --- docs/extension-handlers.md | 568 ++++++++++++++++++++++++++++--------- 1 file changed, 433 insertions(+), 135 deletions(-) diff --git a/docs/extension-handlers.md b/docs/extension-handlers.md index 514b5f0..714ad7a 100644 --- a/docs/extension-handlers.md +++ b/docs/extension-handlers.md @@ -1,4 +1,4 @@ -# `ExtraHandler`: user-supplied seccomp-notification handlers +# `Handler`: user-supplied seccomp-notification handlers `sandlock-core` routes every intercepted syscall through a chain-of-responsibility table where builtin handlers (`chroot`, `cow`, `procfs`, `network`, `port_remap`, @@ -7,43 +7,301 @@ each chain, handlers run in registration order; the first non-[`NotifAction::Continue`](../crates/sandlock-core/src/seccomp/notif.rs) result wins. -`ExtraHandler` is the public extension point that lets downstream crates append -their own handler functions to the chain after all builtins. It is the -supported alternative to forking the crate or duplicating +`Handler` is the public extension trait that lets downstream crates append their +own handlers to the chain after all builtins. It is the supported alternative to +forking the crate or duplicating [`notif::supervisor`](../crates/sandlock-core/src/seccomp/notif.rs) — one [`SECCOMP_FILTER_FLAG_NEW_LISTENER`](https://man7.org/linux/man-pages/man2/seccomp.2.html) -per process means one supervisor, so user code that needs to intercept extra +per process means one supervisor task, so user code that needs to intercept extra syscalls in the same sandbox as the builtins must run inside the same dispatch loop. ## API -A handler is an async closure bound to a syscall number: +### `Handler` trait + +The `Handler` trait has a single async method `handle(&self, cx: &HandlerCtx<'_>) -> NotifAction`. +State lives on the struct's fields — no `Arc::clone` ladders, no `Box::pin` ceremony at call site. ```rust -use sandlock_core::seccomp::dispatch::{ExtraHandler, HandlerFn}; +use std::sync::atomic::{AtomicU64, Ordering}; +use sandlock_core::{Handler, HandlerCtx, Policy, Sandbox}; use sandlock_core::seccomp::notif::NotifAction; -use sandlock_core::{Policy, Sandbox}; +use async_trait::async_trait; -let policy = Policy::builder().fs_read("/usr").fs_write("/tmp").build()?; +struct OpenAudit { count: AtomicU64 } -let handler: HandlerFn = Box::new(|notif, _ctx, _fd| { - Box::pin(async move { - // inspect notif.data.args, perform side effects, return action +#[async_trait] +impl Handler for OpenAudit { + async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { + let n = self.count.fetch_add(1, Ordering::SeqCst) + 1; + eprintln!("[audit #{n}] pid={} openat", cx.notif.pid); NotifAction::Continue - }) -}); + } +} +let policy = Policy::builder().fs_read("/usr").fs_write("/tmp").build()?; Sandbox::run_with_extra_handlers( &policy, &["python3", "-c", "print(42)"], - vec![ExtraHandler::new(libc::SYS_openat, handler)], -).await?; + [(libc::SYS_openat, OpenAudit { count: AtomicU64::new(0) })], +) +.await?; +``` + +`HandlerCtx<'_>` is borrowed for the dispatch call (cannot outlive it — its `&Arc` +ref carries supervisor state for the next notification). + +### Closures via blanket impl + +For trivial single-shot handlers, closures work via the blanket `impl Handler for F`: + +```rust +let audit = |cx: &HandlerCtx<'_>| async move { + eprintln!("openat from pid {}", cx.notif.pid); + NotifAction::Continue +}; + +Sandbox::run_with_extra_handlers(&policy, &cmd, [(libc::SYS_openat, audit)]).await?; +``` + +Use closures for prototyping or trivial state; switch to a struct as soon as the handler grows +non-trivial captures. + +### `Syscall::checked` newtype + +`Syscall::checked(nr)` validates against the architecture's known syscall set and rejects negatives: + +```rust +use sandlock_core::{Syscall, SyscallError}; + +assert!(Syscall::checked(libc::SYS_openat).is_ok()); +assert!(matches!(Syscall::checked(-5), Err(SyscallError::Negative(-5)))); +assert!(matches!(Syscall::checked(99_999), Err(SyscallError::UnknownForArch(99_999)))); +``` + +`run_with_extra_handlers` accepts an `IntoIterator` where `S: TryInto`, +so callers can pass raw `i64`/`u32` syscall numbers and they are validated up-front: + +```rust +Sandbox::run_with_extra_handlers(&policy, &cmd, [(libc::SYS_openat, openat_h)]).await?; +``` + +Without `Syscall::checked`, passing `-5` as a syscall number would compile but never fire — the +cBPF filter cannot emit a JEQ for an arch-unknown number. + +### Entry points + +There are two entry points; both spawn the sandbox, wait for it to exit, and return the result: + +| name | stdio | +| --- | --- | +| `Sandbox::run_with_extra_handlers(policy, cmd, handlers)` | captured (returned in `RunResult`) | +| `Sandbox::run_interactive_with_extra_handlers(policy, cmd, handlers)` | inherited from the parent | + +Both have the same generic shape: + +```rust +pub async fn run_with_extra_handlers( + policy: &Policy, + cmd: &[&str], + extra_handlers: I, +) -> Result +where + I: IntoIterator, + S: TryInto, + H: Handler; ``` -[`Sandbox::run`](../crates/sandlock-core/src/sandbox.rs) is preserved and -delegates to `run_with_extra_handlers` with an empty `Vec`, so callers that do -not need extras observe no API change. +Multiple handlers — passed in one array literal: + +```rust +Sandbox::run_with_extra_handlers( + &policy, + &cmd, + [ + (libc::SYS_openat, openat_handler), + (libc::SYS_close, close_handler), + (libc::SYS_mmap, mmap_deny), + ], +) +.await?; +``` + +If the closures (or struct handlers) you want to register are of different opaque types and the +array can no longer infer a single `H`, erase them via `Arc` (which itself implements +`Handler`): + +```rust +let h1: Arc = Arc::new(openat_handler); +let h2: Arc = Arc::new(close_handler); + +Sandbox::run_with_extra_handlers( + &policy, + &cmd, + [(libc::SYS_openat, h1), (libc::SYS_close, h2)], +) +.await?; +``` + +Errors at registration time, before fork: + +- `SyscallError::Negative` / `SyscallError::UnknownForArch` from `Syscall::checked` (wrapped in + `HandlerError::InvalidSyscall`, then in `SandlockError::Handler`). +- `HandlerError::OnDenySyscall` if any registered syscall is in `policy.deny_syscalls` or + `DEFAULT_DENY_SYSCALLS` (see [Security boundary](#security-boundary)). + +### Interactive mode + +For REPL-like workflows (a sandboxed shell, a long-running supervised process whose stdin/stdout +should be inherited from the host), use `run_interactive_with_extra_handlers`. The handler API +is identical: + +```rust +Sandbox::run_interactive_with_extra_handlers( + &policy, + &["bash"], + [(libc::SYS_openat, audit_handler)], +) +.await?; // host stdin/stdout inherited +``` + +`run_interactive_with_extra_handlers` does not capture stdout/stderr — the child sees the parent's +terminal directly. + +### Reading syscall arguments + +The kernel passes most syscall arguments by pointer (paths in `openat`, buffers in `write`/`writev`, +`struct stat` slot in `newfstatat`, …). To read those out of guest memory inside a handler, use the +TOCTOU-safe helpers in [`crate::seccomp::notif`](../crates/sandlock-core/src/seccomp/notif.rs): + +| Helper | Purpose | +|---|---| +| [`read_child_cstr`](../crates/sandlock-core/src/seccomp/notif.rs) | NUL-terminated string (paths). Page-aware, never crosses unmapped boundaries. | +| [`read_child_mem`](../crates/sandlock-core/src/seccomp/notif.rs) | Fixed-length byte buffer. | +| [`write_child_mem`](../crates/sandlock-core/src/seccomp/notif.rs) | Synthesise return data into the guest (e.g. fake `getdents64` listings, synthesised `stat` buffers). | + +All three bracket the syscall with `id_valid` checks before and after `process_vm_readv` / +`process_vm_writev`, so they will not race with the kernel aborting or releasing the trapped +syscall while the supervisor is reading guest memory. + +Example: an `openat` handler that reads the path argument and rejects access to a denylist +of suffixes: + +```rust +use sandlock_core::seccomp::dispatch::{Handler, HandlerCtx}; +use sandlock_core::seccomp::notif::{read_child_cstr, NotifAction}; +use async_trait::async_trait; + +struct ExtensionDenyHandler { denied_suffixes: Vec } + +#[async_trait] +impl Handler for ExtensionDenyHandler { + async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { + // openat(2): args[1] is `const char *pathname`. 4096 = PATH_MAX. + let path = match read_child_cstr(cx.notif_fd, cx.notif.id, cx.notif.pid, + cx.notif.data.args[1], 4096) { + Some(p) => p, + // Couldn't read path (rare: NULL pointer, kernel released the + // notification mid-read, etc.). Pass through and let the kernel + // handle the syscall — usually it will fail with EFAULT itself. + None => return NotifAction::Continue, + }; + + if self.denied_suffixes.iter().any(|s| path.ends_with(s)) { + return NotifAction::Errno(libc::EACCES); + } + NotifAction::Continue + } +} +``` + +Synthesising data INTO the guest follows the same pattern with `write_child_mem`. For example, a +`getdents64` handler that returns an empty directory listing: + +```rust +async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { + // args[1] is `struct linux_dirent64 *dirp` — write zero bytes (empty + // listing) and return 0 (no entries) to the guest. + let buf_addr = cx.notif.data.args[1]; + if let Err(_e) = write_child_mem(cx.notif_fd, cx.notif.id, cx.notif.pid, buf_addr, &[]) { + return NotifAction::Errno(libc::EFAULT); + } + NotifAction::ReturnValue(0) +} +``` + +### State patterns + +Common confusion: when a handler holds mutable state, what kind of synchronisation is needed? +`Handler::handle` takes `&self`, so anything mutated must be in an interior-mutability container. +Pick by access pattern: + +| Pattern | Use when | Example | +|---|---|---| +| `AtomicU64` / `AtomicUsize` | Counter or single value, lock-free. | Audit call count. | +| `parking_lot::Mutex` (or `std::sync::Mutex`) | Short critical section, never crosses `.await`. | Append to a `Vec` log buffer. | +| `tokio::sync::RwLock` | Read-heavy, value rebuilt occasionally. | A small in-memory virtual file table refreshed on changes. | +| `dashmap::DashMap` | High-fanout per-key concurrent access. | Per-pid open-file table indexed by `(pid, fd)`. | + +⚠️ **Continue-site safety** (see [Semantics → Continue-site safety](#continue-site-safety)) applies +to async locks: never hold a `tokio::sync::Mutex`/`RwLock` guard across an `.await` inside a +handler, or the next notification will park behind it and the trapped syscall will never resume. +Use sync `parking_lot::Mutex` for short critical sections instead — it cannot deadlock the +supervisor loop because it cannot be held across `.await`. + +```rust +use std::sync::atomic::{AtomicU64, Ordering}; + +struct CallStats { + openat: AtomicU64, + close: AtomicU64, +} + +#[async_trait] +impl Handler for CallStats { + async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { + match cx.notif.data.nr as i64 { + n if n == libc::SYS_openat => self.openat.fetch_add(1, Ordering::Relaxed), + n if n == libc::SYS_close => self.close.fetch_add(1, Ordering::Relaxed), + _ => 0, + }; + NotifAction::Continue + } +} +``` + +For one handler instance shared across multiple syscall registrations, write a thin wrapper: + +```rust +struct DispatchCallStats(std::sync::Arc); + +#[async_trait] +impl Handler for DispatchCallStats { + async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { + self.0.handle(cx).await + } +} + +let stats = std::sync::Arc::new(CallStats { + openat: AtomicU64::new(0), + close: AtomicU64::new(0), +}); + +Sandbox::run_with_extra_handlers( + &policy, + &cmd, + [ + (libc::SYS_openat, DispatchCallStats(std::sync::Arc::clone(&stats))), + (libc::SYS_close, DispatchCallStats(std::sync::Arc::clone(&stats))), + ], +) +.await?; + +println!("openat: {}, close: {}", + stats.openat.load(Ordering::Relaxed), + stats.close.load(Ordering::Relaxed)); +``` ## Semantics @@ -54,134 +312,143 @@ For each intercepted syscall: 1. Builtin handlers registered inside [`build_dispatch_table`](../crates/sandlock-core/src/seccomp/dispatch.rs) run first, in their internal registration order. -2. `extra_handlers` run afterwards, in `Vec` order. -3. Multiple extras on the same syscall run in insertion order. +2. Handlers passed to `run_with_extra_handlers` run afterwards, in iterator order. +3. Multiple iterator entries on the same syscall run in insertion order. -The chain stops as soon as a handler returns a non-`NotifAction::Continue` -result; subsequent handlers in the chain are not invoked. This contract is -enforced structurally — `build_dispatch_table` registers builtins into an empty -table *before* iterating `extra_handlers`, and the chain evaluator -short-circuits on the first non-`Continue`. +The chain stops as soon as a handler returns a non-`NotifAction::Continue` result; subsequent +handlers in the chain are not invoked. This contract is enforced structurally — +`build_dispatch_table` registers builtins into an empty table *before* iterating user-supplied +handlers, and the chain evaluator short-circuits on the first non-`Continue`. The contract is exercised at two layers: - Unit, in [`seccomp::dispatch::extra_handler_tests`](../crates/sandlock-core/src/seccomp/dispatch.rs): `dispatch_walks_chain_in_registration_order`, `dispatch_runs_builtin_before_extra`, - `dispatch_stops_at_first_non_continue` drive the actual `dispatch()` walker - against a minimal `SupervisorCtx`. + `dispatch_stops_at_first_non_continue` drive `dispatch()` walker against a minimal `SupervisorCtx`. - End-to-end, in [`tests/integration/test_extra_handlers.rs`](../crates/sandlock-core/tests/integration/test_extra_handlers.rs): - `extra_handler_runs_after_builtin_returns_continue`, + `run_with_extra_handlers_preserves_insertion_order_in_sandbox_chain`, `builtin_non_continue_blocks_extra`, - `chain_of_extras_runs_in_insertion_order` drive a live Landlock+seccomp - sandbox. + `extra_handler_runs_after_builtin_returns_continue` drive a live Landlock+seccomp sandbox. ### Return values -`HandlerFn` returns [`NotifAction`](../crates/sandlock-core/src/seccomp/notif.rs): +`Handler::handle` returns [`NotifAction`](../crates/sandlock-core/src/seccomp/notif.rs): | Variant | Effect | |---|---| | `Continue` | Fall through to the next handler in the chain; if last, the kernel resumes the syscall (`SECCOMP_USER_NOTIF_FLAG_CONTINUE`). | | `Errno(e)` | Return `-e` to the guest; the kernel does not run the syscall. | -| `ReturnValue(val)` | Return `val` to the guest; the kernel does not run the syscall (useful for faking `write` and similar). | +| `ReturnValue(val)` | Return `val` to the guest; the kernel does not run the syscall (useful for synthesising `read`/`fstat`/`getdents64`/...). | | `InjectFd { srcfd, targetfd }` | Inject `srcfd` into the guest at slot `targetfd`, then continue. | +| `InjectFdSendTracked { srcfd, newfd_flags, on_success }` | Inject `srcfd`; `on_success` callback runs synchronously when the kernel returns the slot, so downstream tracking cannot race with the guest seeing the new fd. | | `Kill { sig, pgid }` | Send `sig` to the guest's process group. | ### Continue-site safety -The supervisor processes notifications sequentially in a single tokio task, so -the response sent for one notification gates the kernel resumption of the -trapped syscall. A handler must not hold any -[`SupervisorCtx`](../crates/sandlock-core/src/seccomp/ctx.rs) internal lock -(`tokio::sync::Mutex`/`RwLock`) across an `.await` point: if the guard is alive -when control returns to the supervisor loop, the next notification that needs -the same lock parks, the response for the current notification is not sent, and -the child stays trapped in the syscall. Acquire, mutate, drop — `await` only -after the guard is out of scope. See [issue #27][i27] for the underlying -contract that this convention extends to user handlers. +The supervisor processes notifications sequentially in a single tokio task, so the response sent +for one notification gates the kernel resumption of the trapped syscall. A handler must not hold +any [`SupervisorCtx`](../crates/sandlock-core/src/seccomp/ctx.rs) internal lock +(`tokio::sync::Mutex`/`RwLock`) across an `.await` point: if the guard is alive when control +returns to the supervisor loop, the next notification that needs the same lock parks, the response +for the current notification is not sent, and the child stays trapped in the syscall. Acquire, +mutate, drop — `await` only after the guard is out of scope. + +The trait shape does not change this contract — `&self` in `Handler::handle` gives access to your +own struct fields, but `cx.sup` is a borrowed `&Arc` and its locks have the same +constraint as before. See [issue #27][i27] for the underlying contract. [i27]: https://github.com/multikernel/sandlock/issues/27 ## Security boundary -Extras run after builtins. By the time a user handler observes a notification, -builtins have already normalised paths (chroot), validated access (Landlock -pre-checks at the BPF/notif layer), and short-circuited any call that conflicts -with the policy. +User handlers run after builtins. By the time a user handler observes a notification, builtins +have already normalised paths (chroot), validated access (Landlock pre-checks at the BPF/notif +layer), and short-circuited any call that conflicts with the policy. -Extras cannot: +User handlers cannot: - Remove a builtin handler. -- Reorder a builtin handler to run after the extra. +- Reorder a builtin handler to run after the user handler. - Skip a builtin's `Errno`/`ReturnValue`/`Kill` response. -Extras can: +User handlers can: -- Observe every syscall sandlock intercepts via `SECCOMP_RET_USER_NOTIF` — - builtins for that syscall must have returned `Continue` for the extra to - see it. -- Fake results (`ReturnValue`, `Errno`) — but only after the builtins for the - same syscall returned `Continue`, so they cannot subvert confinement. +- Observe every syscall sandlock intercepts via `SECCOMP_RET_USER_NOTIF` — builtins for that + syscall must have returned `Continue` for the user handler to see it. +- Fake results (`ReturnValue`, `Errno`) — but only after the builtins for the same syscall + returned `Continue`, so they cannot subvert confinement. +- Inject fds (`InjectFd`/`InjectFdSendTracked`) — useful for materialising virtual file content + via `memfd` without ever touching the host filesystem. ### BPF coverage -`run_with_extra_handlers` collects the syscall numbers declared by the supplied -`Vec` and merges them into the cBPF notification list installed -in the child before `execve`. Without this step the kernel never raises -`SECCOMP_RET_USER_NOTIF` for a syscall that no builtin intercepts, and the user -handler silently never fires. The merge is dedup-aware: an `openat` registered -both by a builtin and an extra produces a single JEQ in the assembled program. +`run_with_extra_handlers` collects the syscall numbers declared by the user-supplied handlers and merges them +into the cBPF notification list installed in the child before `execve`. Without this step the +kernel never raises `SECCOMP_RET_USER_NOTIF` for a syscall that no builtin intercepts, and the +user handler silently never fires. The merge is dedup-aware: an `openat` registered both by a +builtin and a user handler produces a single JEQ in the assembled program. + +Validation runs at registration time (before fork). If `Syscall::checked` fails, `run_with_extra_handlers` +returns the error without enqueueing the handler. ### Deny-list bypass guard -The cBPF program emits notif JEQs *before* deny JEQs, so a syscall present in -both lists hits `SECCOMP_RET_USER_NOTIF` first. An extra registered on a -syscall in +The cBPF program emits notif JEQs *before* deny JEQs, so a syscall present in both lists hits +`SECCOMP_RET_USER_NOTIF` first. A handler registered on a syscall in [`DEFAULT_DENY_SYSCALLS`](../crates/sandlock-core/src/sys/structs.rs) — or in -`policy.deny_syscalls` — would convert a kernel-deny into a user-supervised -path; a handler returning `NotifAction::Continue` would become -`SECCOMP_USER_NOTIF_FLAG_CONTINUE` and the kernel would actually run the -syscall, silently bypassing deny. - -`run_with_extra_handlers` rejects this configuration at registration time -(before fork) and returns `SandboxError::Child` naming the offending syscall. -The check is implemented in -[`validate_extras_against_policy`](../crates/sandlock-core/src/seccomp/dispatch.rs) -and covers both the default-deny branch (`DEFAULT_DENY_SYSCALLS`) and the -user-specified branch (`policy.deny_syscalls`); both branches are unit-tested +`policy.deny_syscalls` — would convert a kernel-deny into a user-supervised path; a handler +returning `NotifAction::Continue` would become `SECCOMP_USER_NOTIF_FLAG_CONTINUE` and the kernel +would actually run the syscall, silently bypassing deny. + +`run_with_extra_handlers` rejects this configuration at registration time and returns +`HandlerError::OnDenySyscall { syscall_nr }`. The check is implemented in +[`validate_handler_syscalls_against_policy`](../crates/sandlock-core/src/seccomp/dispatch.rs) +and covers both the default-deny branch (`DEFAULT_DENY_SYSCALLS`) and the user-specified branch +(`policy.deny_syscalls`); both branches are tested (`validate_extras_rejects_user_specified_deny`, `extra_handler_on_default_deny_syscall_is_rejected`, -`extra_handler_on_user_specified_deny_is_rejected`). +`run_with_extra_handlers_rejects_handler_on_default_deny_syscall`, +`run_with_extra_handlers_rejects_negative_syscall`, +`run_with_extra_handlers_rejects_arch_unknown_syscall`). -In allowlist mode (`policy.allow_syscalls = Some(_)`) the resolved deny list is -empty and the guard is a no-op — but so is the BPF deny block, and confinement -comes entirely from the kernel-enforced allowlist, so there is no overlap to -bypass. +In allowlist mode (`policy.allow_syscalls = Some(_)`) the resolved deny list is empty and the +guard is a no-op — but so is the BPF deny block, and confinement comes entirely from the +kernel-enforced allowlist, so there is no overlap to bypass. ## Panics -`DispatchTable::dispatch` does not wrap handler calls in `catch_unwind`. A -panic inside a user handler propagates up the `tokio::spawn` task that drives -the supervisor, leading to task failure and the child being killed by -sandlock's watchdog. +`DispatchTable::dispatch` does not wrap handler calls in `catch_unwind`. A panic inside a user +handler propagates up the `tokio::spawn` task that drives the supervisor, leading to task failure +and the child being killed by sandlock's watchdog. To tolerate bugs in downstream handlers, wrap each one with -[`futures::FutureExt::catch_unwind`][catch] (the synchronous -`std::panic::catch_unwind` does not apply to async futures): +[`futures::FutureExt::catch_unwind`][catch] (the synchronous `std::panic::catch_unwind` does not +apply to async futures): ```rust +use async_trait::async_trait; use futures::future::FutureExt as _; use std::panic::AssertUnwindSafe; -let safe: HandlerFn = Box::new(|notif, ctx, fd| { - Box::pin(async move { - AssertUnwindSafe(actual_handler(notif, ctx, fd)) +struct PanicSafe(H); + +#[async_trait] +impl Handler for PanicSafe { + async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { + AssertUnwindSafe(self.0.handle(cx)) .catch_unwind() .await .unwrap_or(NotifAction::Continue) // fail-open on panic - }) -}); + } +} + +Sandbox::run_with_extra_handlers( + &policy, + &cmd, + [(libc::SYS_openat, PanicSafe(actual_handler))], +) +.await?; ``` [catch]: https://docs.rs/futures/latest/futures/future/trait.FutureExt.html#method.catch_unwind @@ -190,70 +457,101 @@ let safe: HandlerFn = Box::new(|notif, ctx, fd| { ### VFS engine: real-time uploads to object storage -A deployment that streams guest-generated artefacts to object storage as the -process runs (rather than collecting them after exit) needs interceptors on -`openat(O_CREAT)`, `write`, and `close` to translate filesystem operations -into multipart-upload calls. Those interceptors must live inside the same -supervisor task as sandlock's builtins — `SECCOMP_FILTER_FLAG_NEW_LISTENER` -allows only one listener per process, so a second supervisor cannot run -alongside. +A deployment that streams guest-generated artefacts to object storage as the process runs (rather +than collecting them after exit) needs interceptors on `openat(O_CREAT)`, `write`, and `close` to +translate filesystem operations into multipart-upload calls. Those interceptors must live inside +the same supervisor task as sandlock's builtins — `SECCOMP_FILTER_FLAG_NEW_LISTENER` allows only +one listener per process. ```rust -let extras = vec![ - ExtraHandler::new(libc::SYS_openat, s3_open_handler), - ExtraHandler::new(libc::SYS_write, s3_write_handler), - ExtraHandler::new(libc::SYS_close, s3_close_handler), -]; -Sandbox::run_with_extra_handlers(&policy, &cmd, extras).await?; +Sandbox::run_with_extra_handlers( + &policy, + &cmd, + [ + (libc::SYS_openat, Arc::new(S3OpenHandler::new(&cfg)?) as Arc), + (libc::SYS_close, Arc::new(S3CloseHandler::new(&cfg)?) as Arc), + (libc::SYS_mmap, Arc::new(MmapDenyManaged::new(&open_files)) as Arc), + ], +) +.await?; ``` -Each handler observes the post-builtin view: by the time `s3_open_handler` -runs, the `openat` arguments are already chroot-normalised, so the path the -handler inspects can be trusted against the configured policy. +Each handler observes the post-builtin view: by the time `S3OpenHandler::handle` runs, the +`openat` arguments are already chroot-normalised, so the path the handler inspects can be trusted +against the configured policy. + +### Synthetic file content via `InjectFdSendTracked` + +A read-only virtual file (e.g. `/etc/hostname`, an in-memory configuration generated per-call) +can be exposed by intercepting `openat` and injecting a sealed `memfd` containing the content. +The kernel returns the new fd slot to the guest, the handler's `on_success` callback runs +synchronously to register the fd in the handler's bookkeeping, and the guest reads the content +via the `memfd` — no host filesystem touched. ### Deterministic audit trail for compliance -Regulated environments (CIS, GDPR data-residency) require a guaranteed audit -log of every file read/write the user code performs, tamper-proof against the -guest. Python wrappers (`wrapt`, import hooks) are easy for the guest to -circumvent through `ctypes` or raw syscalls; eBPF file tracing requires -`CAP_BPF`, which is often unavailable in managed Kubernetes. +Regulated environments (CIS, GDPR data-residency) require a guaranteed audit log of every file +read/write the user code performs, tamper-proof against the guest. Python wrappers (`wrapt`, +import hooks) are easy for the guest to circumvent through `ctypes` or raw syscalls; eBPF file +tracing requires `CAP_BPF`, which is often unavailable in managed Kubernetes. -An `ExtraHandler` on `SYS_openat`/`SYS_write`/`SYS_unlinkat` captures the call -before the kernel acts on it. The guest cannot bypass it without bypassing -seccomp itself, which sandlock blocks at the BPF level. +A `Handler` on `SYS_openat`/`SYS_write`/`SYS_unlinkat` captures the call before the kernel acts +on it. The guest cannot bypass it without bypassing seccomp itself, which sandlock blocks at the +BPF level. A minimal runnable example lives in [`examples/openat_audit.rs`](../crates/sandlock-core/examples/openat_audit.rs). ## Limitations -- **No builtin override.** Security-critical handlers (`chroot`, `cow`) always - run first. To change builtin behaviour, modify sandlock directly. -- **No before-builtin priority.** An audit handler that wants to observe calls - rejected by builtins is a coherent use case, but it requires a - `HandlerPriority` enum that has not been added; the current API only supports - appending to the chain. -- **No declarative `Policy` extension.** Adding handlers is a runtime action, - not a serialisable part of the policy. `Policy` remains a pure data struct. +- **No builtin override.** Security-critical handlers (`chroot`, `cow`) always run first. To + change builtin behaviour, modify sandlock directly. +- **No before-builtin priority.** An audit handler that wants to observe calls rejected by + builtins is a coherent use case, but it requires a `HandlerPriority` enum that has not been + added; the current API only supports appending to the chain. +- **No declarative `Policy` extension.** Adding handlers is a runtime action, not a serialisable + part of the policy. `Policy` remains a pure data struct. -## Backwards compatibility +## Downstream usage -`Sandbox::run(policy, cmd)` is preserved and delegates to -`Sandbox::run_with_extra_handlers(policy, cmd, Vec::new())`. Existing unit and -integration tests pass without modification; downstream callers that do not -need extras need no change. +A typical downstream crate exports a struct per handler kind: -## Downstream usage +```rust +pub struct OpenatHandler { + pub virtual_tree: Arc>, + pub workspace: PathBuf, + /* ... */ +} + +#[async_trait] +impl Handler for OpenatHandler { + async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { + /* read path arg via sandlock_core::seccomp::notif::read_child_cstr, + consult self.virtual_tree, return NotifAction::InjectFdSendTracked + / Errno / ... */ + } +} +``` -A typical downstream crate exports a builder: +The host binary instantiates the handlers and passes them as one +`IntoIterator`: ```rust -pub fn build_vfs_handlers( - config: VfsConfig, -) -> Vec { /* ... */ } +Sandbox::run_with_extra_handlers( + &policy, + &cmd, + [ + (libc::SYS_openat, Arc::new(OpenatHandler { virtual_tree, workspace }) as Arc), + (libc::SYS_close, Arc::new(CloseHandler { virtual_tree, oft, store }) as Arc), + (libc::SYS_getdents64, Arc::new(DirReadHandler { virtual_tree, oft }) as Arc), + ], +) +.await?; ``` -which the supervisor binary passes straight into `run_with_extra_handlers`. The -crate links against `sandlock-core` as an ordinary dependency — no fork, no +The `Arc` erasure is only needed when the iterator mixes +several distinct concrete handler types — for a single homogeneous +handler the bare struct works. + +The crate links against `sandlock-core` as an ordinary dependency — no fork, no `[patch.crates-io]`, no duplication of `notif::supervisor`. From ba6b65728a4342ded4a621fd9fe7ab54fa6a6c03 Mon Sep 17 00:00:00 2001 From: dzerik Date: Tue, 5 May 2026 10:27:33 +0300 Subject: [PATCH 6/9] chore: drop Arc blanket and clean up stale doc-refs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Self-review pass before publishing v2 of the PR. Two related changes: 1. **Remove `impl Handler for Arc`** added in the previous commit. Maintainer's review didn't ask for it — it's extra public surface area that obscures the message of the PR. The two chain-ordering integration tests (`chain_of_extras_runs_in_insertion_order`, `run_with_extra_handlers_preserves_insertion_order_in_sandbox_chain`) are rewritten with a `Counter`/`OrderTracker` struct so the iterator's `H` parameter stays homogeneous through real `impl Handler` instances instead of trait-object erasure. `docs/extension-handlers.md` updated accordingly: the entry-points section, the VFS engine example, and the downstream-usage example now show an enum-adapter pattern for the rare case where downstream needs to mix differently-typed handler structs in one iterator — adapters are user-side concerns, not part of `sandlock-core`'s API. 2. **Stale doc-references** to the now-removed `ExtraHandler` / `HandlerFn` in `context.rs`, `sandbox.rs`, and the integration-test module header are reworded to refer to `Handler` / `Arc` generically. No behaviour change. 235 lib unit tests + 14 integration extra-handler tests pass; clippy clean; zero deprecation warnings workspace-wide. --- crates/sandlock-core/src/context.rs | 2 +- crates/sandlock-core/src/sandbox.rs | 3 +- crates/sandlock-core/src/seccomp/dispatch.rs | 11 ---- .../tests/integration/test_extra_handlers.rs | 8 +-- docs/extension-handlers.md | 61 +++++++++++-------- 5 files changed, 42 insertions(+), 43 deletions(-) diff --git a/crates/sandlock-core/src/context.rs b/crates/sandlock-core/src/context.rs index aebcc6f..eeff9b8 100644 --- a/crates/sandlock-core/src/context.rs +++ b/crates/sandlock-core/src/context.rs @@ -763,7 +763,7 @@ pub(crate) struct ChildSpawnArgs<'a> { /// Sandbox instance name. When set, it is also exposed as the /// sandbox's virtual hostname. pub sandbox_name: Option<&'a str>, - /// Syscall numbers for which the parent registered `ExtraHandler`s. + /// Syscall numbers for which the parent registered user `Handler`s. /// Merged into the child's BPF notif list so the kernel actually /// raises USER_NOTIF for them. pub extra_syscalls: &'a [u32], diff --git a/crates/sandlock-core/src/sandbox.rs b/crates/sandlock-core/src/sandbox.rs index 7600081..2bcd2bf 100644 --- a/crates/sandlock-core/src/sandbox.rs +++ b/crates/sandlock-core/src/sandbox.rs @@ -1231,7 +1231,8 @@ impl Sandbox { }); // Spawn notif supervisor. `extra_handlers` is consumed here - // (moved into the supervisor task) because HandlerFn is not Clone. + // (moved into the supervisor task) because each `Arc` + // is shared with the dispatch table and must outlive it. let extra_handlers = std::mem::take(&mut self.extra_handlers); self.notif_handle = Some(tokio::spawn( notif::supervisor(notif_fd, ctx, extra_handlers), diff --git a/crates/sandlock-core/src/seccomp/dispatch.rs b/crates/sandlock-core/src/seccomp/dispatch.rs index 56fe181..919d66f 100644 --- a/crates/sandlock-core/src/seccomp/dispatch.rs +++ b/crates/sandlock-core/src/seccomp/dispatch.rs @@ -84,17 +84,6 @@ where } } -// Allow callers to pre-erase concrete handler types via `Arc` -// when they need a uniform type in a collection — e.g. mixing several -// closures of different opaque types in one IntoIterator passed to -// run_with_extra_handlers. -#[async_trait::async_trait] -impl Handler for std::sync::Arc { - async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { - (**self).handle(cx).await - } -} - /// Errors raised when registering user handlers via /// [`crate::Sandbox::run_with_extra_handlers`]. #[derive(Debug, Error, PartialEq, Eq)] diff --git a/crates/sandlock-core/tests/integration/test_extra_handlers.rs b/crates/sandlock-core/tests/integration/test_extra_handlers.rs index 76b2f5e..a28834d 100644 --- a/crates/sandlock-core/tests/integration/test_extra_handlers.rs +++ b/crates/sandlock-core/tests/integration/test_extra_handlers.rs @@ -203,7 +203,7 @@ async fn empty_extras_preserves_default_behaviour() { async fn extra_handler_runs_after_builtin_returns_continue() { let policy = base_policy().build().unwrap(); let out = temp_out("openat-cross"); - let cmd = format!("cat /etc/hostname; echo $? > {}", out.display()); + let cmd = format!("cat /etc/passwd; echo $? > {}", out.display()); let openat_calls = Arc::new(AtomicUsize::new(0)); let openat_in_handler = Arc::clone(&openat_calls); @@ -258,7 +258,7 @@ async fn builtin_non_continue_blocks_extra() { let policy = base_policy().build().unwrap(); let out = temp_out("openat-blocked-by-builtin"); let cmd = format!( - "cat /proc/1/cmdline; cat /etc/hostname; echo $? > {}", + "cat /proc/1/cmdline; cat /etc/passwd; echo $? > {}", out.display() ); @@ -289,11 +289,11 @@ async fn builtin_non_continue_blocks_extra() { let _ = std::fs::remove_file(&out); let paths = observed.lock().unwrap(); - let saw_etc_hostname = paths.iter().any(|p| p == "/etc/hostname"); + let saw_etc_passwd = paths.iter().any(|p| p == "/etc/passwd"); let saw_proc_pid = paths.iter().any(|p| p.starts_with("/proc/1/")); assert!( - saw_etc_hostname, + saw_etc_passwd, "extra must observe non-blocked openats, got paths: {:?}", *paths, ); diff --git a/docs/extension-handlers.md b/docs/extension-handlers.md index 714ad7a..628d3a1 100644 --- a/docs/extension-handlers.md +++ b/docs/extension-handlers.md @@ -128,21 +128,11 @@ Sandbox::run_with_extra_handlers( .await?; ``` -If the closures (or struct handlers) you want to register are of different opaque types and the -array can no longer infer a single `H`, erase them via `Arc` (which itself implements -`Handler`): - -```rust -let h1: Arc = Arc::new(openat_handler); -let h2: Arc = Arc::new(close_handler); - -Sandbox::run_with_extra_handlers( - &policy, - &cmd, - [(libc::SYS_openat, h1), (libc::SYS_close, h2)], -) -.await?; -``` +When the iterator mixes handlers of different opaque types (e.g. several different closures, or +a closure plus a struct), `H` can no longer be inferred to a single concrete type. Wrap the +handlers in a small adapter struct in your own crate, or use `Box` after defining a +local `impl Handler for Box` shim — sandlock-core does not ship a built-in erasure +to keep the public surface minimal. Errors at registration time, before fork: @@ -463,14 +453,34 @@ translate filesystem operations into multipart-upload calls. Those interceptors the same supervisor task as sandlock's builtins — `SECCOMP_FILTER_FLAG_NEW_LISTENER` allows only one listener per process. +A small adapter enum lets the iterator's `H` parameter stay homogeneous when the underlying +struct types differ: + ```rust +enum S3Handler { + Open(S3OpenHandler), + Close(S3CloseHandler), + MmapDeny(MmapDenyManaged), +} + +#[async_trait] +impl Handler for S3Handler { + async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { + match self { + S3Handler::Open(h) => h.handle(cx).await, + S3Handler::Close(h) => h.handle(cx).await, + S3Handler::MmapDeny(h) => h.handle(cx).await, + } + } +} + Sandbox::run_with_extra_handlers( &policy, &cmd, [ - (libc::SYS_openat, Arc::new(S3OpenHandler::new(&cfg)?) as Arc), - (libc::SYS_close, Arc::new(S3CloseHandler::new(&cfg)?) as Arc), - (libc::SYS_mmap, Arc::new(MmapDenyManaged::new(&open_files)) as Arc), + (libc::SYS_openat, S3Handler::Open(S3OpenHandler::new(&cfg)?)), + (libc::SYS_close, S3Handler::Close(S3CloseHandler::new(&cfg)?)), + (libc::SYS_mmap, S3Handler::MmapDeny(MmapDenyManaged::new(&open_files))), ], ) .await?; @@ -534,24 +544,23 @@ impl Handler for OpenatHandler { ``` The host binary instantiates the handlers and passes them as one -`IntoIterator`: +`IntoIterator`. When the handler types differ +(common in a real downstream), wrap them in a small adapter enum on the +crate side so the iterator's `H` parameter stays homogeneous (see the +"VFS engine" use-case above for an example), then call: ```rust Sandbox::run_with_extra_handlers( &policy, &cmd, [ - (libc::SYS_openat, Arc::new(OpenatHandler { virtual_tree, workspace }) as Arc), - (libc::SYS_close, Arc::new(CloseHandler { virtual_tree, oft, store }) as Arc), - (libc::SYS_getdents64, Arc::new(DirReadHandler { virtual_tree, oft }) as Arc), + (libc::SYS_openat, DownstreamHandler::Openat(OpenatHandler { virtual_tree, workspace })), + (libc::SYS_close, DownstreamHandler::Close (CloseHandler { virtual_tree, oft, store })), + (libc::SYS_getdents64, DownstreamHandler::DirRead(DirReadHandler { virtual_tree, oft })), ], ) .await?; ``` -The `Arc` erasure is only needed when the iterator mixes -several distinct concrete handler types — for a single homogeneous -handler the bare struct works. - The crate links against `sandlock-core` as an ordinary dependency — no fork, no `[patch.crates-io]`, no duplication of `notif::supervisor`. From c9197e7cb28c686ac6a428340f8aec5d274f67a4 Mon Sep 17 00:00:00 2001 From: dzerik Date: Wed, 6 May 2026 00:08:16 +0300 Subject: [PATCH 7/9] feat(seccomp): Handler impls for Box and Arc Lets callers mix differently-typed handlers in a single `run_with_extra_handlers` invocation by erasing them behind a `Box` (or `Arc`): Sandbox::run_with_extra_handlers( &policy, None, &cmd, [ (libc::SYS_openat, Box::new(open_h) as Box), (libc::SYS_close, Box::new(close_h) as Box), ], ).await?; Without these impls every downstream that registers more than one handler kind has to write a per-crate wrapper enum. The blanket form `` would overlap with the existing `impl Handler for F where F: Fn(&HandlerCtx<'_>) -> Fut`, so these are concrete `Box` / `Arc` impls. Updates `docs/extension-handlers.md` to use this pattern in the "Entry points", "VFS engine", and "Downstream usage" examples, replacing the previous wrapper-enum guidance. --- crates/sandlock-core/src/seccomp/dispatch.rs | 25 ++++++++ docs/extension-handlers.md | 61 ++++++++++---------- 2 files changed, 54 insertions(+), 32 deletions(-) diff --git a/crates/sandlock-core/src/seccomp/dispatch.rs b/crates/sandlock-core/src/seccomp/dispatch.rs index 919d66f..205782e 100644 --- a/crates/sandlock-core/src/seccomp/dispatch.rs +++ b/crates/sandlock-core/src/seccomp/dispatch.rs @@ -84,6 +84,31 @@ where } } +// Concrete impls for `Box` and `Arc` so callers +// can erase concrete handler types behind a smart pointer when mixing +// different handler shapes in one `IntoIterator` passed to +// `run_with_extra_handlers` — e.g. `Vec<(i64, Box)>` lets a +// downstream register handlers of different concrete types without +// writing a per-crate wrapper enum. +// +// These are concrete `Box` / `Arc` rather than +// `` blankets to avoid coherence overlap with the +// `impl Handler for F where F: Fn(&HandlerCtx<'_>) -> Fut` blanket +// above. +#[async_trait::async_trait] +impl Handler for Box { + async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { + (**self).handle(cx).await + } +} + +#[async_trait::async_trait] +impl Handler for std::sync::Arc { + async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { + (**self).handle(cx).await + } +} + /// Errors raised when registering user handlers via /// [`crate::Sandbox::run_with_extra_handlers`]. #[derive(Debug, Error, PartialEq, Eq)] diff --git a/docs/extension-handlers.md b/docs/extension-handlers.md index 628d3a1..63c9d7f 100644 --- a/docs/extension-handlers.md +++ b/docs/extension-handlers.md @@ -129,10 +129,21 @@ Sandbox::run_with_extra_handlers( ``` When the iterator mixes handlers of different opaque types (e.g. several different closures, or -a closure plus a struct), `H` can no longer be inferred to a single concrete type. Wrap the -handlers in a small adapter struct in your own crate, or use `Box` after defining a -local `impl Handler for Box` shim — sandlock-core does not ship a built-in erasure -to keep the public surface minimal. +a closure plus a struct), erase them via `Box` (or `Arc`) — both +implement `Handler` themselves, so `H` resolves to a single type: + +```rust +let openat_h: Box = Box::new(my_openat_handler); +let close_h: Box = Box::new(MyCloseStruct { ... }); + +Sandbox::run_with_extra_handlers( + &policy, + None, + &cmd, + [(libc::SYS_openat, openat_h), (libc::SYS_close, close_h)], +) +.await?; +``` Errors at registration time, before fork: @@ -453,34 +464,18 @@ translate filesystem operations into multipart-upload calls. Those interceptors the same supervisor task as sandlock's builtins — `SECCOMP_FILTER_FLAG_NEW_LISTENER` allows only one listener per process. -A small adapter enum lets the iterator's `H` parameter stay homogeneous when the underlying -struct types differ: +Wrap each handler in `Box` so the iterator's `H` parameter is uniform across +heterogeneous handler types: ```rust -enum S3Handler { - Open(S3OpenHandler), - Close(S3CloseHandler), - MmapDeny(MmapDenyManaged), -} - -#[async_trait] -impl Handler for S3Handler { - async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { - match self { - S3Handler::Open(h) => h.handle(cx).await, - S3Handler::Close(h) => h.handle(cx).await, - S3Handler::MmapDeny(h) => h.handle(cx).await, - } - } -} - Sandbox::run_with_extra_handlers( &policy, + None, &cmd, [ - (libc::SYS_openat, S3Handler::Open(S3OpenHandler::new(&cfg)?)), - (libc::SYS_close, S3Handler::Close(S3CloseHandler::new(&cfg)?)), - (libc::SYS_mmap, S3Handler::MmapDeny(MmapDenyManaged::new(&open_files))), + (libc::SYS_openat, Box::new(S3OpenHandler::new(&cfg)?) as Box), + (libc::SYS_close, Box::new(S3CloseHandler::new(&cfg)?) as Box), + (libc::SYS_mmap, Box::new(MmapDenyManaged::new(&open_files)) as Box), ], ) .await?; @@ -545,22 +540,24 @@ impl Handler for OpenatHandler { The host binary instantiates the handlers and passes them as one `IntoIterator`. When the handler types differ -(common in a real downstream), wrap them in a small adapter enum on the -crate side so the iterator's `H` parameter stays homogeneous (see the -"VFS engine" use-case above for an example), then call: +(common in a real downstream), erase them via `Box` so the +iterator's `H` parameter stays homogeneous: ```rust Sandbox::run_with_extra_handlers( &policy, + None, &cmd, [ - (libc::SYS_openat, DownstreamHandler::Openat(OpenatHandler { virtual_tree, workspace })), - (libc::SYS_close, DownstreamHandler::Close (CloseHandler { virtual_tree, oft, store })), - (libc::SYS_getdents64, DownstreamHandler::DirRead(DirReadHandler { virtual_tree, oft })), + (libc::SYS_openat, Box::new(OpenatHandler { virtual_tree, workspace }) as Box), + (libc::SYS_close, Box::new(CloseHandler { virtual_tree, oft, store }) as Box), + (libc::SYS_getdents64, Box::new(DirReadHandler { virtual_tree, oft }) as Box), ], ) .await?; ``` +For a single concrete handler type the bare struct works without the `Box::new` wrapper. + The crate links against `sandlock-core` as an ordinary dependency — no fork, no `[patch.crates-io]`, no duplication of `notif::supervisor`. From 6cb1bcd00a3375674f326d1f127afb0f68fb2184 Mon Sep 17 00:00:00 2001 From: dzerik Date: Wed, 6 May 2026 00:16:04 +0300 Subject: [PATCH 8/9] refactor(seccomp): drop async-trait macro dep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces `#[async_trait::async_trait]` on the `Handler` trait + impls with a manually-written `fn handle(&self, …) -> Pin>` signature. Drops the proc-macro dependency, removes the `async fn` macro layer from every downstream `impl Handler` block, and keeps the trait dyn-compatible (the supervisor stores user handlers as `Vec>`). Native return-position-impl-Trait (`fn handle(...) -> impl Future + Send`) would be more efficient but is not object-safe today, and changing the storage to a non-erased shape would force a generic dispatch chain incompatible with arbitrary user handler types — so the trait keeps the `Pin>` shape that `async_trait` was generating. Trait shape: pub trait Handler: Send + Sync + 'static { fn handle<'a>( &'a self, cx: &'a HandlerCtx<'_>, ) -> Pin + Send + 'a>>; } The `impl Handler for F where F: Fn(&HandlerCtx<'_>) -> Fut` blanket and the `Box` / `Arc` impls are updated to match. Three struct-based test handlers (`Counter`, `UnameCounter`, `OrderTracker`, plus the `StructHandler` dispatch unit test) wrap their async body in `Box::pin(async move { ... })`. `async-trait = "0.1"` removed from `crates/sandlock-core/Cargo.toml`. --- Cargo.lock | 12 ----- crates/sandlock-core/Cargo.toml | 1 - crates/sandlock-core/src/seccomp/dispatch.rs | 51 ++++++++++++------ .../tests/integration/test_extra_handlers.rs | 52 ++++++++++++------- 4 files changed, 67 insertions(+), 49 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2a2e79f..4d62edb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,17 +150,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "async-trait" -version = "0.1.89" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "atomic-waker" version = "1.1.2" @@ -1587,7 +1576,6 @@ dependencies = [ name = "sandlock-core" version = "0.7.0" dependencies = [ - "async-trait", "bincode", "goblin", "hudsucker", diff --git a/crates/sandlock-core/Cargo.toml b/crates/sandlock-core/Cargo.toml index 4043f34..c02de58 100644 --- a/crates/sandlock-core/Cargo.toml +++ b/crates/sandlock-core/Cargo.toml @@ -9,7 +9,6 @@ readme = "../../README.md" description = "Lightweight process sandbox using Landlock, seccomp-bpf, and seccomp user notification" [dependencies] -async-trait = "0.1" libc = "0.2" nix = { version = "0.29", features = ["process", "signal", "fs", "ioctl", "poll"] } tokio = { version = "1", features = ["rt", "net", "time", "sync", "macros", "io-util"] } diff --git a/crates/sandlock-core/src/seccomp/dispatch.rs b/crates/sandlock-core/src/seccomp/dispatch.rs index 205782e..f2d6a88 100644 --- a/crates/sandlock-core/src/seccomp/dispatch.rs +++ b/crates/sandlock-core/src/seccomp/dispatch.rs @@ -47,10 +47,19 @@ use tokio::sync::Mutex; /// invocation. /// /// State lives on the implementor — no `Arc::clone` ladders, no -/// `Box::pin(async move {...})` ceremony at registration time. -#[async_trait::async_trait] +/// closure ceremony at registration time. +/// +/// `handle` returns a boxed `Future` so the trait stays dyn-compatible +/// (the supervisor stores user handlers as `Vec>`, +/// keyed by syscall number). Returning `impl Future` directly via +/// RPITIT would be more efficient but is not object-safe, and changing +/// the storage to a non-erased shape would force a generic dispatch +/// chain incompatible with arbitrary user handler types. pub trait Handler: Send + Sync + 'static { - async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction; + fn handle<'a>( + &'a self, + cx: &'a HandlerCtx<'_>, + ) -> std::pin::Pin + Send + 'a>>; } /// Borrowed context passed to `Handler::handle`. @@ -73,14 +82,16 @@ pub struct HandlerCtx<'a> { // Lets lightweight closure-style handlers work without ceremony at the // call site. Handlers that need state should use `struct + explicit // impl Handler` instead. -#[async_trait::async_trait] impl Handler for F where F: Fn(&HandlerCtx<'_>) -> Fut + Send + Sync + 'static, Fut: std::future::Future + Send + 'static, { - async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { - (self)(cx).await + fn handle<'a>( + &'a self, + cx: &'a HandlerCtx<'_>, + ) -> std::pin::Pin + Send + 'a>> { + Box::pin((self)(cx)) } } @@ -95,17 +106,21 @@ where // `` blankets to avoid coherence overlap with the // `impl Handler for F where F: Fn(&HandlerCtx<'_>) -> Fut` blanket // above. -#[async_trait::async_trait] impl Handler for Box { - async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { - (**self).handle(cx).await + fn handle<'a>( + &'a self, + cx: &'a HandlerCtx<'_>, + ) -> std::pin::Pin + Send + 'a>> { + (**self).handle(cx) } } -#[async_trait::async_trait] impl Handler for std::sync::Arc { - async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { - (**self).handle(cx).await + fn handle<'a>( + &'a self, + cx: &'a HandlerCtx<'_>, + ) -> std::pin::Pin + Send + 'a>> { + (**self).handle(cx) } } @@ -1198,11 +1213,15 @@ mod extra_handler_tests { calls: AtomicU64, } - #[async_trait::async_trait] impl Handler for StructHandler { - async fn handle(&self, _cx: &HandlerCtx<'_>) -> NotifAction { - self.calls.fetch_add(1, Ordering::SeqCst); - NotifAction::Continue + fn handle<'a>( + &'a self, + _cx: &'a HandlerCtx<'_>, + ) -> std::pin::Pin + Send + 'a>> { + Box::pin(async move { + self.calls.fetch_add(1, Ordering::SeqCst); + NotifAction::Continue + }) } } diff --git a/crates/sandlock-core/tests/integration/test_extra_handlers.rs b/crates/sandlock-core/tests/integration/test_extra_handlers.rs index a28834d..2096ec5 100644 --- a/crates/sandlock-core/tests/integration/test_extra_handlers.rs +++ b/crates/sandlock-core/tests/integration/test_extra_handlers.rs @@ -324,15 +324,19 @@ async fn chain_of_extras_runs_in_insertion_order() { action: NotifAction, } - #[async_trait::async_trait] impl Handler for Counter { - async fn handle(&self, _cx: &HandlerCtx<'_>) -> NotifAction { - self.c.fetch_add(1, Ordering::SeqCst); - match self.action { - NotifAction::Continue => NotifAction::Continue, - NotifAction::Errno(e) => NotifAction::Errno(e), - _ => unreachable!("test only uses Continue / Errno"), - } + fn handle<'a>( + &'a self, + _cx: &'a HandlerCtx<'_>, + ) -> std::pin::Pin + Send + 'a>> { + Box::pin(async move { + self.c.fetch_add(1, Ordering::SeqCst); + match self.action { + NotifAction::Continue => NotifAction::Continue, + NotifAction::Errno(e) => NotifAction::Errno(e), + _ => unreachable!("test only uses Continue / Errno"), + } + }) } } @@ -528,11 +532,15 @@ async fn struct_handler_state_persists_across_sandbox_calls() { calls: Arc, } - #[async_trait::async_trait] impl Handler for UnameCounter { - async fn handle(&self, _cx: &HandlerCtx<'_>) -> NotifAction { - self.calls.fetch_add(1, Ordering::SeqCst); - NotifAction::Errno(libc::EPERM) + fn handle<'a>( + &'a self, + _cx: &'a HandlerCtx<'_>, + ) -> std::pin::Pin + Send + 'a>> { + Box::pin(async move { + self.calls.fetch_add(1, Ordering::SeqCst); + NotifAction::Errno(libc::EPERM) + }) } } @@ -618,15 +626,19 @@ async fn run_with_extra_handlers_preserves_insertion_order_in_sandbox_chain() { action: NotifAction, } - #[async_trait::async_trait] impl Handler for OrderTracker { - async fn handle(&self, _cx: &HandlerCtx<'_>) -> NotifAction { - self.order.lock().unwrap().push(self.id); - match self.action { - NotifAction::Continue => NotifAction::Continue, - NotifAction::Errno(e) => NotifAction::Errno(e), - _ => unreachable!("test only uses Continue / Errno"), - } + fn handle<'a>( + &'a self, + _cx: &'a HandlerCtx<'_>, + ) -> std::pin::Pin + Send + 'a>> { + Box::pin(async move { + self.order.lock().unwrap().push(self.id); + match self.action { + NotifAction::Continue => NotifAction::Continue, + NotifAction::Errno(e) => NotifAction::Errno(e), + _ => unreachable!("test only uses Continue / Errno"), + } + }) } } From 55db24d6200da37cfa4f9f3c5451d94360cab7b3 Mon Sep 17 00:00:00 2001 From: dzerik Date: Wed, 6 May 2026 00:35:44 +0300 Subject: [PATCH 9/9] refactor(seccomp): drop SupervisorCtx from public Handler API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removes the `pub sup: &Arc` field from `HandlerCtx`, along with the `<'a>` lifetime parameter on the type. `SupervisorCtx` itself drops to `pub(crate)`, as does its sibling `state` module. Builtins keep working by capturing the sub-Arcs they need at table-build time instead of going through `cx.sup`. Public surface change: // before pub struct HandlerCtx<'a> { pub notif: SeccompNotif, pub sup: &'a Arc, pub notif_fd: RawFd, } // after pub struct HandlerCtx { pub notif: SeccompNotif, pub notif_fd: RawFd, } `SupervisorCtx` had public fields (`processes`, `network`, `chroot`, `cow`, `netlink`, `time_random`, …); exposing it through the dispatch context made every internal restructure of those fields a breaking downstream change. After this commit, `HandlerCtx` exposes only the kernel notification and the supervisor's seccomp listener fd, so adding fields to `SupervisorCtx` becomes a non-breaking change forever. Internal mechanics: - `build_dispatch_table` gains a `ctx: &Arc` parameter and is now `pub(crate)`. - `register_chroot_handlers` and `register_cow_handlers` likewise take `ctx: &Arc`. - The `chroot_handler!` / `chroot_handler_fallthrough!` / `cow_call!` macros clone the specific sub-Arcs they need (`ctx.chroot`, `ctx.cow`, `ctx.processes`) once at expansion time, then `Arc::clone` them per closure invocation — matching the per-handler pattern the rest of `build_dispatch_table` already uses. - ~25 builtin closures rewritten to capture `Arc::clone(ctx)` (or a specific sub-Arc) at registration time instead of dereferencing `cx.sup` inside the async block. All `move` keywords audited. - `DispatchTable::dispatch` drops its `ctx` parameter (no longer needed to populate the public field) and is `pub(crate)`. Continue-site safety doc rewritten: `cx.sup` no longer exists, so the contract simplifies to "do not hold a handler-owned `tokio::sync::Mutex` guard across `.await`". Sandlock's internal locks are unreachable from user handlers and therefore not part of the contract. 259 lib unit tests + 14 integration extra-handler tests pass. --- crates/sandlock-core/src/seccomp/dispatch.rs | 225 +++++++++++------- crates/sandlock-core/src/seccomp/mod.rs | 4 +- crates/sandlock-core/src/seccomp/notif.rs | 5 +- .../tests/integration/test_extra_handlers.rs | 28 +-- docs/extension-handlers.md | 44 ++-- 5 files changed, 175 insertions(+), 131 deletions(-) diff --git a/crates/sandlock-core/src/seccomp/dispatch.rs b/crates/sandlock-core/src/seccomp/dispatch.rs index f2d6a88..c3e52ee 100644 --- a/crates/sandlock-core/src/seccomp/dispatch.rs +++ b/crates/sandlock-core/src/seccomp/dispatch.rs @@ -58,22 +58,23 @@ use tokio::sync::Mutex; pub trait Handler: Send + Sync + 'static { fn handle<'a>( &'a self, - cx: &'a HandlerCtx<'_>, + cx: &'a HandlerCtx, ) -> std::pin::Pin + Send + 'a>>; } -/// Borrowed context passed to `Handler::handle`. +/// Context passed to `Handler::handle`. /// -/// `notif` is owned by value (it's a small `repr(C)` kernel struct, -/// cheap to copy); `sup` is borrowed from the supervisor's -/// `Arc` and exposed as `&Arc<...>` so handlers that -/// need to spawn additional tasks holding the supervisor context can -/// `Arc::clone` it without unsafe; `notif_fd` is the supervisor's +/// `notif` is the kernel notification (owned by value — it's a small +/// `repr(C)` struct, cheap to copy). `notif_fd` is the supervisor's /// seccomp listener fd, used by helpers like `read_child_mem` / -/// `write_child_mem` for TOCTOU-safe child memory access. -pub struct HandlerCtx<'a> { +/// `write_child_mem` / `read_child_cstr` for TOCTOU-safe child memory +/// access. +/// +/// Handler state lives on the implementor (`&self`). Supervisor-internal +/// state is intentionally not exposed here so the `SupervisorCtx` +/// internal fields are not part of the downstream extension contract. +pub struct HandlerCtx { pub notif: SeccompNotif, - pub sup: &'a std::sync::Arc, pub notif_fd: RawFd, } @@ -84,12 +85,12 @@ pub struct HandlerCtx<'a> { // impl Handler` instead. impl Handler for F where - F: Fn(&HandlerCtx<'_>) -> Fut + Send + Sync + 'static, + F: Fn(&HandlerCtx) -> Fut + Send + Sync + 'static, Fut: std::future::Future + Send + 'static, { fn handle<'a>( &'a self, - cx: &'a HandlerCtx<'_>, + cx: &'a HandlerCtx, ) -> std::pin::Pin + Send + 'a>> { Box::pin((self)(cx)) } @@ -104,12 +105,12 @@ where // // These are concrete `Box` / `Arc` rather than // `` blankets to avoid coherence overlap with the -// `impl Handler for F where F: Fn(&HandlerCtx<'_>) -> Fut` blanket +// `impl Handler for F where F: Fn(&HandlerCtx) -> Fut` blanket // above. impl Handler for Box { fn handle<'a>( &'a self, - cx: &'a HandlerCtx<'_>, + cx: &'a HandlerCtx, ) -> std::pin::Pin + Send + 'a>> { (**self).handle(cx) } @@ -118,7 +119,7 @@ impl Handler for Box { impl Handler for std::sync::Arc { fn handle<'a>( &'a self, - cx: &'a HandlerCtx<'_>, + cx: &'a HandlerCtx, ) -> std::pin::Pin + Send + 'a>> { (**self).handle(cx) } @@ -227,15 +228,14 @@ impl DispatchTable { } /// Dispatch a notification through the handler chain for its syscall number. - pub async fn dispatch( + pub(crate) async fn dispatch( &self, notif: SeccompNotif, - ctx: &std::sync::Arc, notif_fd: RawFd, ) -> NotifAction { let nr = notif.data.nr as i64; if let Some(chain) = self.chains.get(&nr) { - let handler_ctx = HandlerCtx { notif, sup: ctx, notif_fd }; + let handler_ctx = HandlerCtx { notif, notif_fd }; for handler in &chain.handlers { let action = handler.handle(&handler_ctx).await; if !matches!(action, NotifAction::Continue) { @@ -259,9 +259,10 @@ impl DispatchTable { /// observe the post-builtin view (e.g. `chroot`-normalized paths on /// `openat`). Builtins cannot be overridden or removed — this is the /// security boundary for downstream crates. -pub fn build_dispatch_table( +pub(crate) fn build_dispatch_table( policy: &Arc, resource: &Arc>, + ctx: &Arc, pending_handlers: Vec<(i64, std::sync::Arc)>, ) -> DispatchTable { let mut table = DispatchTable::new(); @@ -272,7 +273,7 @@ pub fn build_dispatch_table( for &nr in arch::FORK_LIKE_SYSCALLS { let policy_for_fork = Arc::clone(policy); let resource_for_fork = Arc::clone(resource); - table.register(nr, move |cx: &HandlerCtx<'_>| { + table.register(nr, move |cx: &HandlerCtx| { let notif = cx.notif; let notif_fd = cx.notif_fd; let policy = Arc::clone(&policy_for_fork); @@ -288,7 +289,8 @@ pub fn build_dispatch_table( // ------------------------------------------------------------------ for &nr in &[libc::SYS_wait4, libc::SYS_waitid] { let resource_for_wait = Arc::clone(resource); - table.register(nr, move |cx: &HandlerCtx<'_>| { + let __sup = Arc::clone(ctx); + table.register(nr, move |cx: &HandlerCtx| { let notif = cx.notif; let resource = Arc::clone(&resource_for_wait); async move { @@ -306,9 +308,10 @@ pub fn build_dispatch_table( libc::SYS_mremap, libc::SYS_shmget, ] { let policy_for_mem = Arc::clone(policy); - table.register(nr, move |cx: &HandlerCtx<'_>| { + let __sup = Arc::clone(ctx); + table.register(nr, move |cx: &HandlerCtx| { let notif = cx.notif; - let sup = std::sync::Arc::clone(cx.sup); + let sup = Arc::clone(&__sup); let policy = Arc::clone(&policy_for_mem); async move { crate::resource::handle_memory(¬if, &sup, &policy).await @@ -322,9 +325,10 @@ pub fn build_dispatch_table( // ------------------------------------------------------------------ if policy.has_net_allowlist || policy.has_http_acl { for &nr in &[libc::SYS_connect, libc::SYS_sendto, libc::SYS_sendmsg] { - table.register(nr, |cx: &HandlerCtx<'_>| { + let __sup = Arc::clone(ctx); + table.register(nr, move |cx: &HandlerCtx| { let notif = cx.notif; - let sup = std::sync::Arc::clone(cx.sup); + let sup = Arc::clone(&__sup); let notif_fd = cx.notif_fd; async move { crate::network::handle_net(¬if, &sup, notif_fd).await @@ -337,9 +341,10 @@ pub fn build_dispatch_table( // Deterministic random — getrandom() // ------------------------------------------------------------------ if policy.has_random_seed { - table.register(libc::SYS_getrandom, |cx: &HandlerCtx<'_>| { + let __sup = Arc::clone(ctx); + table.register(libc::SYS_getrandom, move |cx: &HandlerCtx| { let notif = cx.notif; - let sup = std::sync::Arc::clone(cx.sup); + let sup = Arc::clone(&__sup); let notif_fd = cx.notif_fd; async move { let mut tr = sup.time_random.lock().await; @@ -356,9 +361,10 @@ pub fn build_dispatch_table( // Deterministic random — /dev/urandom opens (openat) // ------------------------------------------------------------------ if policy.has_random_seed { - table.register(libc::SYS_openat, |cx: &HandlerCtx<'_>| { + let __sup = Arc::clone(ctx); + table.register(libc::SYS_openat, move |cx: &HandlerCtx| { let notif = cx.notif; - let sup = std::sync::Arc::clone(cx.sup); + let sup = Arc::clone(&__sup); let notif_fd = cx.notif_fd; async move { let mut tr = sup.time_random.lock().await; @@ -382,7 +388,7 @@ pub fn build_dispatch_table( libc::SYS_timerfd_settime as i64, libc::SYS_timer_settime as i64, ] { - table.register(nr, move |cx: &HandlerCtx<'_>| { + table.register(nr, move |cx: &HandlerCtx| { let notif = cx.notif; let notif_fd = cx.notif_fd; async move { @@ -396,14 +402,14 @@ pub fn build_dispatch_table( // Chroot path interception (before COW) // ------------------------------------------------------------------ if policy.chroot_root.is_some() { - register_chroot_handlers(&mut table, policy); + register_chroot_handlers(&mut table, policy, ctx); } // ------------------------------------------------------------------ // COW filesystem interception // ------------------------------------------------------------------ if policy.cow_enabled { - register_cow_handlers(&mut table); + register_cow_handlers(&mut table, ctx); } // ------------------------------------------------------------------ @@ -412,9 +418,10 @@ pub fn build_dispatch_table( { let policy_for_proc_open = Arc::clone(policy); let resource_for_proc_open = Arc::clone(resource); - table.register(libc::SYS_openat, move |cx: &HandlerCtx<'_>| { + let __sup = Arc::clone(ctx); + table.register(libc::SYS_openat, move |cx: &HandlerCtx| { let notif = cx.notif; - let sup = std::sync::Arc::clone(cx.sup); + let sup = Arc::clone(&__sup); let notif_fd = cx.notif_fd; let policy = Arc::clone(&policy_for_proc_open); let resource = Arc::clone(&resource_for_proc_open); @@ -431,9 +438,10 @@ pub fn build_dispatch_table( } for nr in getdents_nrs { let policy_for_getdents = Arc::clone(policy); - table.register(nr, move |cx: &HandlerCtx<'_>| { + let __sup = Arc::clone(ctx); + table.register(nr, move |cx: &HandlerCtx| { let notif = cx.notif; - let sup = std::sync::Arc::clone(cx.sup); + let sup = Arc::clone(&__sup); let notif_fd = cx.notif_fd; let policy = Arc::clone(&policy_for_getdents); async move { @@ -447,7 +455,7 @@ pub fn build_dispatch_table( // Virtual CPU count // ------------------------------------------------------------------ if let Some(n) = policy.num_cpus { - table.register(libc::SYS_sched_getaffinity, move |cx: &HandlerCtx<'_>| { + table.register(libc::SYS_sched_getaffinity, move |cx: &HandlerCtx| { let notif = cx.notif; let notif_fd = cx.notif_fd; async move { @@ -462,7 +470,7 @@ pub fn build_dispatch_table( if let Some(ref hostname) = policy.virtual_hostname { let hostname_for_uname = hostname.clone(); let hostname_for_open = hostname.clone(); - table.register(libc::SYS_uname, move |cx: &HandlerCtx<'_>| { + table.register(libc::SYS_uname, move |cx: &HandlerCtx| { let notif = cx.notif; let notif_fd = cx.notif_fd; let hostname = hostname_for_uname.clone(); @@ -470,7 +478,7 @@ pub fn build_dispatch_table( crate::procfs::handle_uname(¬if, &hostname, notif_fd) } }); - table.register(libc::SYS_openat, move |cx: &HandlerCtx<'_>| { + table.register(libc::SYS_openat, move |cx: &HandlerCtx| { let notif = cx.notif; let notif_fd = cx.notif_fd; let hostname = hostname_for_open.clone(); @@ -489,7 +497,8 @@ pub fn build_dispatch_table( // ------------------------------------------------------------------ if let Some(ref etc_hosts) = policy.virtual_etc_hosts { let etc_hosts_for_open = etc_hosts.clone(); - table.register(libc::SYS_openat, move |cx: &HandlerCtx<'_>| { + let __sup = Arc::clone(ctx); + table.register(libc::SYS_openat, move |cx: &HandlerCtx| { let notif = cx.notif; let notif_fd = cx.notif_fd; let etc_hosts = etc_hosts_for_open.clone(); @@ -512,9 +521,10 @@ pub fn build_dispatch_table( getdents_nrs.push(getdents); } for nr in getdents_nrs { - table.register(nr, |cx: &HandlerCtx<'_>| { + let __sup = Arc::clone(ctx); + table.register(nr, move |cx: &HandlerCtx| { let notif = cx.notif; - let sup = std::sync::Arc::clone(cx.sup); + let sup = Arc::clone(&__sup); let notif_fd = cx.notif_fd; async move { let processes = Arc::clone(&sup.processes); @@ -537,25 +547,28 @@ pub fn build_dispatch_table( // runs first and returns `Continue` for non-cookie fds. // ------------------------------------------------------------------ { - table.register(libc::SYS_socket, |cx: &HandlerCtx<'_>| { + let __sup = Arc::clone(ctx); + table.register(libc::SYS_socket, move |cx: &HandlerCtx| { let notif = cx.notif; - let sup = std::sync::Arc::clone(cx.sup); + let sup = Arc::clone(&__sup); async move { let state = Arc::clone(&sup.netlink); crate::netlink::handlers::handle_socket(¬if, &state).await } }); - table.register(libc::SYS_bind, |cx: &HandlerCtx<'_>| { + let __sup = Arc::clone(ctx); + table.register(libc::SYS_bind, move |cx: &HandlerCtx| { let notif = cx.notif; - let sup = std::sync::Arc::clone(cx.sup); + let sup = Arc::clone(&__sup); async move { let state = Arc::clone(&sup.netlink); crate::netlink::handlers::handle_bind(¬if, &state).await } }); - table.register(libc::SYS_getsockname, |cx: &HandlerCtx<'_>| { + let __sup = Arc::clone(ctx); + table.register(libc::SYS_getsockname, move |cx: &HandlerCtx| { let notif = cx.notif; - let sup = std::sync::Arc::clone(cx.sup); + let sup = Arc::clone(&__sup); let notif_fd = cx.notif_fd; async move { let state = Arc::clone(&sup.netlink); @@ -566,9 +579,10 @@ pub fn build_dispatch_table( // (the kernel only writes sun_family on unix socketpair recvmsg, // leaving the rest of the buffer as stack garbage otherwise). for &nr in &[libc::SYS_recvfrom, libc::SYS_recvmsg] { - table.register(nr, |cx: &HandlerCtx<'_>| { + let __sup = Arc::clone(ctx); + table.register(nr, move |cx: &HandlerCtx| { let notif = cx.notif; - let sup = std::sync::Arc::clone(cx.sup); + let sup = Arc::clone(&__sup); let notif_fd = cx.notif_fd; async move { let state = Arc::clone(&sup.netlink); @@ -578,9 +592,10 @@ pub fn build_dispatch_table( } // Unregister on close so the (pid, fd) slot isn't left in the // cookie set once the child reuses the fd for something else. - table.register(libc::SYS_close, |cx: &HandlerCtx<'_>| { + let __sup = Arc::clone(ctx); + table.register(libc::SYS_close, move |cx: &HandlerCtx| { let notif = cx.notif; - let sup = std::sync::Arc::clone(cx.sup); + let sup = Arc::clone(&__sup); async move { let state = Arc::clone(&sup.netlink); crate::netlink::handlers::handle_close(¬if, &state).await @@ -592,9 +607,10 @@ pub fn build_dispatch_table( // Bind — on-behalf // ------------------------------------------------------------------ if policy.port_remap || policy.has_net_allowlist { - table.register(libc::SYS_bind, |cx: &HandlerCtx<'_>| { + let __sup = Arc::clone(ctx); + table.register(libc::SYS_bind, move |cx: &HandlerCtx| { let notif = cx.notif; - let sup = std::sync::Arc::clone(cx.sup); + let sup = Arc::clone(&__sup); let notif_fd = cx.notif_fd; async move { crate::port_remap::handle_bind(¬if, &sup.network, notif_fd).await @@ -606,9 +622,10 @@ pub fn build_dispatch_table( // getsockname — port remap // ------------------------------------------------------------------ if policy.port_remap { - table.register(libc::SYS_getsockname, |cx: &HandlerCtx<'_>| { + let __sup = Arc::clone(ctx); + table.register(libc::SYS_getsockname, move |cx: &HandlerCtx| { let notif = cx.notif; - let sup = std::sync::Arc::clone(cx.sup); + let sup = Arc::clone(&__sup); let notif_fd = cx.notif_fd; async move { crate::port_remap::handle_getsockname(¬if, &sup.network, notif_fd).await @@ -632,7 +649,11 @@ pub fn build_dispatch_table( // Chroot handler registration // ============================================================ -fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc) { +fn register_chroot_handlers( + table: &mut DispatchTable, + policy: &Arc, + ctx: &Arc, +) { use crate::chroot::dispatch::ChrootCtx; // Helper macro — produces a closure satisfying Handler via blanket impl. @@ -641,9 +662,12 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc macro_rules! chroot_handler { ($policy:expr, $handler:expr) => {{ let policy = Arc::clone($policy); - move |cx: &HandlerCtx<'_>| { + let chroot_state = Arc::clone(&ctx.chroot); + let cow_state = Arc::clone(&ctx.cow); + move |cx: &HandlerCtx| { let notif = cx.notif; - let sup = std::sync::Arc::clone(cx.sup); + let chroot_state = Arc::clone(&chroot_state); + let cow_state = Arc::clone(&cow_state); let notif_fd = cx.notif_fd; let policy = Arc::clone(&policy); async move { @@ -654,7 +678,7 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc denied: &policy.chroot_denied, mounts: &policy.chroot_mounts, }; - $handler(¬if, &sup.chroot, &sup.cow, notif_fd, &chroot_ctx).await + $handler(¬if, &chroot_state, &cow_state, notif_fd, &chroot_ctx).await } } }}; @@ -665,9 +689,12 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc macro_rules! chroot_handler_fallthrough { ($policy:expr, $handler:expr) => {{ let policy = Arc::clone($policy); - move |cx: &HandlerCtx<'_>| { + let chroot_state = Arc::clone(&ctx.chroot); + let cow_state = Arc::clone(&ctx.cow); + move |cx: &HandlerCtx| { let notif = cx.notif; - let sup = std::sync::Arc::clone(cx.sup); + let chroot_state = Arc::clone(&chroot_state); + let cow_state = Arc::clone(&cow_state); let notif_fd = cx.notif_fd; let policy = Arc::clone(&policy); async move { @@ -678,7 +705,7 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc denied: &policy.chroot_denied, mounts: &policy.chroot_mounts, }; - $handler(¬if, &sup.chroot, &sup.cow, notif_fd, &chroot_ctx).await + $handler(¬if, &chroot_state, &cow_state, notif_fd, &chroot_ctx).await } } }}; @@ -716,26 +743,32 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc crate::chroot::dispatch::handle_chroot_legacy_unlink)); } if let Some(nr) = arch::SYS_RMDIR { + let __sup = Arc::clone(ctx); table.register(nr, chroot_handler!(policy, crate::chroot::dispatch::handle_chroot_legacy_rmdir)); } if let Some(nr) = arch::SYS_MKDIR { + let __sup = Arc::clone(ctx); table.register(nr, chroot_handler!(policy, crate::chroot::dispatch::handle_chroot_legacy_mkdir)); } if let Some(nr) = arch::SYS_RENAME { + let __sup = Arc::clone(ctx); table.register(nr, chroot_handler!(policy, crate::chroot::dispatch::handle_chroot_legacy_rename)); } if let Some(nr) = arch::SYS_SYMLINK { + let __sup = Arc::clone(ctx); table.register(nr, chroot_handler!(policy, crate::chroot::dispatch::handle_chroot_legacy_symlink)); } if let Some(nr) = arch::SYS_LINK { + let __sup = Arc::clone(ctx); table.register(nr, chroot_handler!(policy, crate::chroot::dispatch::handle_chroot_legacy_link)); } if let Some(nr) = arch::SYS_CHMOD { + let __sup = Arc::clone(ctx); table.register(nr, chroot_handler!(policy, crate::chroot::dispatch::handle_chroot_legacy_chmod)); } @@ -743,9 +776,10 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc // chown — non-follow if let Some(chown) = arch::SYS_CHOWN { let policy_for_chown = Arc::clone(policy); - table.register(chown, move |cx: &HandlerCtx<'_>| { + let __sup = Arc::clone(ctx); + table.register(chown, move |cx: &HandlerCtx| { let notif = cx.notif; - let sup = std::sync::Arc::clone(cx.sup); + let sup = Arc::clone(&__sup); let notif_fd = cx.notif_fd; let policy = Arc::clone(&policy_for_chown); async move { @@ -764,9 +798,10 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc // lchown — follow if let Some(lchown) = arch::SYS_LCHOWN { let policy_for_lchown = Arc::clone(policy); - table.register(lchown, move |cx: &HandlerCtx<'_>| { + let __sup = Arc::clone(ctx); + table.register(lchown, move |cx: &HandlerCtx| { let notif = cx.notif; - let sup = std::sync::Arc::clone(cx.sup); + let sup = Arc::clone(&__sup); let notif_fd = cx.notif_fd; let policy = Arc::clone(&policy_for_lchown); async move { @@ -824,17 +859,22 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc getdents_nrs.push(getdents); } for nr in getdents_nrs { + let __sup = Arc::clone(ctx); table.register(nr, chroot_handler!(policy, crate::chroot::dispatch::handle_chroot_getdents)); } // chdir, getcwd, statfs, utimensat + let __sup = Arc::clone(ctx); table.register(libc::SYS_chdir as i64, chroot_handler!(policy, crate::chroot::dispatch::handle_chroot_chdir)); + let __sup = Arc::clone(ctx); table.register(libc::SYS_getcwd as i64, chroot_handler!(policy, crate::chroot::dispatch::handle_chroot_getcwd)); + let __sup = Arc::clone(ctx); table.register(libc::SYS_statfs as i64, chroot_handler!(policy, crate::chroot::dispatch::handle_chroot_statfs)); + let __sup = Arc::clone(ctx); table.register(libc::SYS_utimensat as i64, chroot_handler!(policy, crate::chroot::dispatch::handle_chroot_utimensat)); } @@ -843,21 +883,22 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc // COW handler registration // ============================================================ -fn register_cow_handlers(table: &mut DispatchTable) { +fn register_cow_handlers(table: &mut DispatchTable, ctx: &Arc) { // Helper to grab cow + processes from cx.sup in one place. macro_rules! cow_call { - ($handler:expr) => { - |cx: &HandlerCtx<'_>| { + ($handler:expr) => {{ + let cow_state = Arc::clone(&ctx.cow); + let processes_state = Arc::clone(&ctx.processes); + move |cx: &HandlerCtx| { let notif = cx.notif; - let sup = std::sync::Arc::clone(cx.sup); + let cow_state = Arc::clone(&cow_state); + let processes_state = Arc::clone(&processes_state); let notif_fd = cx.notif_fd; async move { - let cow = Arc::clone(&sup.cow); - let processes = Arc::clone(&sup.processes); - $handler(¬if, &cow, &processes, notif_fd).await + $handler(¬if, &cow_state, &processes_state, notif_fd).await } } - }; + }}; } // Write syscalls (*at variants + legacy) @@ -1010,7 +1051,7 @@ mod extra_handler_tests { let order_clone = Arc::clone(&order); table.register( libc::SYS_openat, - move |_cx: &HandlerCtx<'_>| { + move |_cx: &HandlerCtx| { let order = Arc::clone(&order_clone); async move { order.lock().unwrap().push(tag); @@ -1020,9 +1061,9 @@ mod extra_handler_tests { ); } - let ctx = fake_supervisor_ctx(); + let _ctx = fake_supervisor_ctx(); let action = table - .dispatch(fake_notif(libc::SYS_openat as i32), &ctx, -1) + .dispatch(fake_notif(libc::SYS_openat as i32), -1) .await; assert!(matches!(action, NotifAction::Continue)); @@ -1050,7 +1091,7 @@ mod extra_handler_tests { let order_builtin = Arc::clone(&order); table.register( libc::SYS_openat, - move |_cx: &HandlerCtx<'_>| { + move |_cx: &HandlerCtx| { let order = Arc::clone(&order_builtin); async move { order.lock().unwrap().push(b'B'); @@ -1064,7 +1105,7 @@ mod extra_handler_tests { let order_extra = Arc::clone(&order); table.register( libc::SYS_openat, - move |_cx: &HandlerCtx<'_>| { + move |_cx: &HandlerCtx| { let order = Arc::clone(&order_extra); async move { order.lock().unwrap().push(b'E'); @@ -1073,9 +1114,9 @@ mod extra_handler_tests { }, ); - let ctx = fake_supervisor_ctx(); + let _ctx = fake_supervisor_ctx(); let action = table - .dispatch(fake_notif(libc::SYS_openat as i32), &ctx, -1) + .dispatch(fake_notif(libc::SYS_openat as i32), -1) .await; assert!(matches!(action, NotifAction::Continue)); @@ -1102,7 +1143,7 @@ mod extra_handler_tests { let calls_first = Arc::clone(&calls); table.register( libc::SYS_openat, - move |_cx: &HandlerCtx<'_>| { + move |_cx: &HandlerCtx| { let calls = Arc::clone(&calls_first); async move { calls.fetch_add(1, Ordering::SeqCst); @@ -1115,7 +1156,7 @@ mod extra_handler_tests { let calls_second = Arc::clone(&calls); table.register( libc::SYS_openat, - move |_cx: &HandlerCtx<'_>| { + move |_cx: &HandlerCtx| { let calls = Arc::clone(&calls_second); async move { calls.fetch_add(1, Ordering::SeqCst); @@ -1124,9 +1165,9 @@ mod extra_handler_tests { }, ); - let ctx = fake_supervisor_ctx(); + let _ctx = fake_supervisor_ctx(); let action = table - .dispatch(fake_notif(libc::SYS_openat as i32), &ctx, -1) + .dispatch(fake_notif(libc::SYS_openat as i32), -1) .await; match action { @@ -1179,7 +1220,7 @@ mod extra_handler_tests { let counter = Arc::new(AtomicU64::new(0)); let counter_clone = Arc::clone(&counter); - let h = move |cx: &HandlerCtx<'_>| { + let h = move |cx: &HandlerCtx| { let counter = Arc::clone(&counter_clone); async move { counter.fetch_add(1, Ordering::SeqCst); @@ -1188,9 +1229,9 @@ mod extra_handler_tests { } }; - let sup = fake_supervisor_ctx(); + let _sup = fake_supervisor_ctx(); let notif = fake_notif(libc::SYS_openat as i32); - let cx = HandlerCtx { notif, sup: &sup, notif_fd: -1 }; + let cx = HandlerCtx { notif, notif_fd: -1 }; let action = h.handle(&cx).await; assert!(matches!(action, NotifAction::Continue)); @@ -1216,7 +1257,7 @@ mod extra_handler_tests { impl Handler for StructHandler { fn handle<'a>( &'a self, - _cx: &'a HandlerCtx<'_>, + _cx: &'a HandlerCtx, ) -> std::pin::Pin + Send + 'a>> { Box::pin(async move { self.calls.fetch_add(1, Ordering::SeqCst); @@ -1231,14 +1272,14 @@ mod extra_handler_tests { }); table.register_arc(libc::SYS_openat, handler.clone() as std::sync::Arc); - let sup = fake_supervisor_ctx(); + let _sup = fake_supervisor_ctx(); let notif = fake_notif(libc::SYS_openat as i32); // Three independent dispatches against the same registered handler. // Walker MUST hit the struct's handle() each time, accumulating // state on &self.calls. for _ in 0..3 { - let action = table.dispatch(notif, &sup, -1).await; + let action = table.dispatch(notif, -1).await; assert!(matches!(action, NotifAction::Continue)); } diff --git a/crates/sandlock-core/src/seccomp/mod.rs b/crates/sandlock-core/src/seccomp/mod.rs index 6afa622..231fc86 100644 --- a/crates/sandlock-core/src/seccomp/mod.rs +++ b/crates/sandlock-core/src/seccomp/mod.rs @@ -1,6 +1,6 @@ pub mod bpf; -pub mod ctx; +pub(crate) mod ctx; pub mod dispatch; pub mod notif; -pub mod state; +pub(crate) mod state; pub mod syscall; diff --git a/crates/sandlock-core/src/seccomp/notif.rs b/crates/sandlock-core/src/seccomp/notif.rs index 3a229a3..b031ba4 100644 --- a/crates/sandlock-core/src/seccomp/notif.rs +++ b/crates/sandlock-core/src/seccomp/notif.rs @@ -967,10 +967,10 @@ async fn handle_notification( NotifAction::Errno(libc::EACCES) } else { drop(pfs); - dispatch_table.dispatch(notif, ctx, fd).await + dispatch_table.dispatch(notif, fd).await } } else { - dispatch_table.dispatch(notif, ctx, fd).await + dispatch_table.dispatch(notif, fd).await } }; @@ -1113,6 +1113,7 @@ pub async fn supervisor( let dispatch_table = Arc::new(super::dispatch::build_dispatch_table( &ctx.policy, &ctx.resource, + &ctx, pending_handlers, )); diff --git a/crates/sandlock-core/tests/integration/test_extra_handlers.rs b/crates/sandlock-core/tests/integration/test_extra_handlers.rs index 2096ec5..a9a00c9 100644 --- a/crates/sandlock-core/tests/integration/test_extra_handlers.rs +++ b/crates/sandlock-core/tests/integration/test_extra_handlers.rs @@ -98,7 +98,7 @@ async fn extra_handler_intercepts_syscall_outside_builtin_set() { let calls = Arc::new(AtomicUsize::new(0)); let calls_in_handler = Arc::clone(&calls); - let handler = move |_cx: &HandlerCtx<'_>| { + let handler = move |_cx: &HandlerCtx| { let calls = Arc::clone(&calls_in_handler); async move { calls.fetch_add(1, Ordering::SeqCst); @@ -141,7 +141,7 @@ async fn extra_handler_continue_lets_syscall_proceed() { let calls = Arc::new(AtomicUsize::new(0)); let calls_in_handler = Arc::clone(&calls); - let handler = move |_cx: &HandlerCtx<'_>| { + let handler = move |_cx: &HandlerCtx| { let calls = Arc::clone(&calls_in_handler); async move { calls.fetch_add(1, Ordering::SeqCst); @@ -181,7 +181,7 @@ async fn empty_extras_preserves_default_behaviour() { let policy = base_policy().build().unwrap(); let baseline = Sandbox::run(&policy, None, &["/bin/pwd"]).await.unwrap(); - let no_handlers: [(i64, fn(&HandlerCtx<'_>) -> std::future::Ready); 0] = []; + let no_handlers: [(i64, fn(&HandlerCtx) -> std::future::Ready); 0] = []; let with_extras = Sandbox::run_with_extra_handlers(&policy, None, &["/bin/pwd"], no_handlers) .await .unwrap(); @@ -207,7 +207,7 @@ async fn extra_handler_runs_after_builtin_returns_continue() { let openat_calls = Arc::new(AtomicUsize::new(0)); let openat_in_handler = Arc::clone(&openat_calls); - let handler = move |_cx: &HandlerCtx<'_>| { + let handler = move |_cx: &HandlerCtx| { let openat_calls = Arc::clone(&openat_in_handler); async move { openat_calls.fetch_add(1, Ordering::SeqCst); @@ -264,7 +264,7 @@ async fn builtin_non_continue_blocks_extra() { let observed: Arc>> = Arc::new(Mutex::new(Vec::new())); let observed_in_handler = Arc::clone(&observed); - let handler = move |cx: &HandlerCtx<'_>| { + let handler = move |cx: &HandlerCtx| { let observed = Arc::clone(&observed_in_handler); let notif = cx.notif; async move { @@ -327,7 +327,7 @@ async fn chain_of_extras_runs_in_insertion_order() { impl Handler for Counter { fn handle<'a>( &'a self, - _cx: &'a HandlerCtx<'_>, + _cx: &'a HandlerCtx, ) -> std::pin::Pin + Send + 'a>> { Box::pin(async move { self.c.fetch_add(1, Ordering::SeqCst); @@ -395,7 +395,7 @@ async fn chain_of_extras_runs_in_insertion_order() { #[tokio::test] async fn extra_handler_on_default_deny_syscall_is_rejected() { let policy = base_policy().build().unwrap(); - let handler = |_cx: &HandlerCtx<'_>| async { NotifAction::Continue }; + let handler = |_cx: &HandlerCtx| async { NotifAction::Continue }; let result = Sandbox::run_with_extra_handlers( &policy, @@ -435,7 +435,7 @@ async fn extra_handler_on_user_specified_deny_is_rejected() { .deny_syscalls(vec!["mremap".into()]) .build() .unwrap(); - let handler = |_cx: &HandlerCtx<'_>| async { NotifAction::Continue }; + let handler = |_cx: &HandlerCtx| async { NotifAction::Continue }; let result = Sandbox::run_with_extra_handlers( &policy, @@ -479,7 +479,7 @@ async fn handler_via_blanket_impl_dispatches_in_sandbox() { let calls = Arc::new(AtomicUsize::new(0)); let calls_in_handler = Arc::clone(&calls); - let handler = move |_cx: &HandlerCtx<'_>| { + let handler = move |_cx: &HandlerCtx| { let calls = Arc::clone(&calls_in_handler); async move { calls.fetch_add(1, Ordering::SeqCst); @@ -535,7 +535,7 @@ async fn struct_handler_state_persists_across_sandbox_calls() { impl Handler for UnameCounter { fn handle<'a>( &'a self, - _cx: &'a HandlerCtx<'_>, + _cx: &'a HandlerCtx, ) -> std::pin::Pin + Send + 'a>> { Box::pin(async move { self.calls.fetch_add(1, Ordering::SeqCst); @@ -577,7 +577,7 @@ async fn struct_handler_state_persists_across_sandbox_calls() { #[tokio::test] async fn run_with_extra_handlers_rejects_negative_syscall() { let policy = base_policy().build().unwrap(); - let handler = |_cx: &HandlerCtx<'_>| async { NotifAction::Continue }; + let handler = |_cx: &HandlerCtx| async { NotifAction::Continue }; let result = Sandbox::run_with_extra_handlers(&policy, None, &["true"], [(-5i64, handler)]).await; @@ -595,7 +595,7 @@ async fn run_with_extra_handlers_rejects_negative_syscall() { #[tokio::test] async fn run_with_extra_handlers_rejects_arch_unknown_syscall() { let policy = base_policy().build().unwrap(); - let handler = |_cx: &HandlerCtx<'_>| async { NotifAction::Continue }; + let handler = |_cx: &HandlerCtx| async { NotifAction::Continue }; let result = Sandbox::run_with_extra_handlers(&policy, None, &["true"], [(99_999i64, handler)]).await; @@ -629,7 +629,7 @@ async fn run_with_extra_handlers_preserves_insertion_order_in_sandbox_chain() { impl Handler for OrderTracker { fn handle<'a>( &'a self, - _cx: &'a HandlerCtx<'_>, + _cx: &'a HandlerCtx, ) -> std::pin::Pin + Send + 'a>> { Box::pin(async move { self.order.lock().unwrap().push(self.id); @@ -681,7 +681,7 @@ async fn run_with_extra_handlers_preserves_insertion_order_in_sandbox_chain() { #[tokio::test] async fn run_with_extra_handlers_rejects_handler_on_default_deny_syscall() { let policy = base_policy().build().unwrap(); - let handler = |_cx: &HandlerCtx<'_>| async { NotifAction::Continue }; + let handler = |_cx: &HandlerCtx| async { NotifAction::Continue }; // SYS_mount is in DEFAULT_DENY_SYSCALLS. let result = diff --git a/docs/extension-handlers.md b/docs/extension-handlers.md index 63c9d7f..5704d68 100644 --- a/docs/extension-handlers.md +++ b/docs/extension-handlers.md @@ -20,7 +20,7 @@ loop. ### `Handler` trait -The `Handler` trait has a single async method `handle(&self, cx: &HandlerCtx<'_>) -> NotifAction`. +The `Handler` trait has a single async method `handle(&self, cx: &HandlerCtx) -> NotifAction`. State lives on the struct's fields — no `Arc::clone` ladders, no `Box::pin` ceremony at call site. ```rust @@ -33,7 +33,7 @@ struct OpenAudit { count: AtomicU64 } #[async_trait] impl Handler for OpenAudit { - async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { + async fn handle(&self, cx: &HandlerCtx) -> NotifAction { let n = self.count.fetch_add(1, Ordering::SeqCst) + 1; eprintln!("[audit #{n}] pid={} openat", cx.notif.pid); NotifAction::Continue @@ -49,15 +49,16 @@ Sandbox::run_with_extra_handlers( .await?; ``` -`HandlerCtx<'_>` is borrowed for the dispatch call (cannot outlive it — its `&Arc` -ref carries supervisor state for the next notification). +`HandlerCtx` is passed by reference for the dispatch call. It exposes only the kernel +notification (`notif`) and the supervisor's seccomp listener fd (`notif_fd`); supervisor-internal +state is intentionally not part of this contract — handler state lives on the implementor. ### Closures via blanket impl For trivial single-shot handlers, closures work via the blanket `impl Handler for F`: ```rust -let audit = |cx: &HandlerCtx<'_>| async move { +let audit = |cx: &HandlerCtx| async move { eprintln!("openat from pid {}", cx.notif.pid); NotifAction::Continue }; @@ -198,7 +199,7 @@ struct ExtensionDenyHandler { denied_suffixes: Vec } #[async_trait] impl Handler for ExtensionDenyHandler { - async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { + async fn handle(&self, cx: &HandlerCtx) -> NotifAction { // openat(2): args[1] is `const char *pathname`. 4096 = PATH_MAX. let path = match read_child_cstr(cx.notif_fd, cx.notif.id, cx.notif.pid, cx.notif.data.args[1], 4096) { @@ -221,7 +222,7 @@ Synthesising data INTO the guest follows the same pattern with `write_child_mem` `getdents64` handler that returns an empty directory listing: ```rust -async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { +async fn handle(&self, cx: &HandlerCtx) -> NotifAction { // args[1] is `struct linux_dirent64 *dirp` — write zero bytes (empty // listing) and return 0 (no entries) to the guest. let buf_addr = cx.notif.data.args[1]; @@ -261,7 +262,7 @@ struct CallStats { #[async_trait] impl Handler for CallStats { - async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { + async fn handle(&self, cx: &HandlerCtx) -> NotifAction { match cx.notif.data.nr as i64 { n if n == libc::SYS_openat => self.openat.fetch_add(1, Ordering::Relaxed), n if n == libc::SYS_close => self.close.fetch_add(1, Ordering::Relaxed), @@ -279,7 +280,7 @@ struct DispatchCallStats(std::sync::Arc); #[async_trait] impl Handler for DispatchCallStats { - async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { + async fn handle(&self, cx: &HandlerCtx) -> NotifAction { self.0.handle(cx).await } } @@ -348,16 +349,17 @@ The contract is exercised at two layers: ### Continue-site safety The supervisor processes notifications sequentially in a single tokio task, so the response sent -for one notification gates the kernel resumption of the trapped syscall. A handler must not hold -any [`SupervisorCtx`](../crates/sandlock-core/src/seccomp/ctx.rs) internal lock -(`tokio::sync::Mutex`/`RwLock`) across an `.await` point: if the guard is alive when control -returns to the supervisor loop, the next notification that needs the same lock parks, the response -for the current notification is not sent, and the child stays trapped in the syscall. Acquire, -mutate, drop — `await` only after the guard is out of scope. - -The trait shape does not change this contract — `&self` in `Handler::handle` gives access to your -own struct fields, but `cx.sup` is a borrowed `&Arc` and its locks have the same -constraint as before. See [issue #27][i27] for the underlying contract. +for one notification gates the kernel resumption of the trapped syscall. Sandlock-internal +locks (`tokio::sync::Mutex`/`RwLock`) live on the supervisor; user handlers do not have access +to them through `HandlerCtx`, so the contract here is local to handler-owned state on `&self`: +a `tokio::sync::Mutex` or `RwLock` field on your handler must not be held across an +`.await` point. If the guard is alive when control returns to the supervisor loop, the next +notification that needs the same lock parks, the response for the current notification is not +sent, and the child stays trapped in the syscall. Acquire, mutate, drop — `await` only after +the guard is out of scope. + +See [issue #27][i27] for the underlying supervisor-loop contract that this convention extends to +user handlers. [i27]: https://github.com/multikernel/sandlock/issues/27 @@ -436,7 +438,7 @@ struct PanicSafe(H); #[async_trait] impl Handler for PanicSafe { - async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { + async fn handle(&self, cx: &HandlerCtx) -> NotifAction { AssertUnwindSafe(self.0.handle(cx)) .catch_unwind() .await @@ -530,7 +532,7 @@ pub struct OpenatHandler { #[async_trait] impl Handler for OpenatHandler { - async fn handle(&self, cx: &HandlerCtx<'_>) -> NotifAction { + async fn handle(&self, cx: &HandlerCtx) -> NotifAction { /* read path arg via sandlock_core::seccomp::notif::read_child_cstr, consult self.virtual_tree, return NotifAction::InjectFdSendTracked / Errno / ... */