Skip to content
Merged
12 changes: 12 additions & 0 deletions crates/sandlock-core/src/arch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#[cfg(target_arch = "x86_64")]
mod imp {
pub const AUDIT_ARCH: u32 = 0xC000_003E;
pub const MAX_SYSCALL_NR: i64 = 462;
pub const SYS_SECCOMP: i64 = 317;
pub const SYS_MEMFD_CREATE: i64 = 319;
pub const SYS_PIDFD_OPEN: i64 = 434;
Expand Down Expand Up @@ -45,6 +46,7 @@ mod imp {
#[cfg(target_arch = "aarch64")]
mod imp {
pub const AUDIT_ARCH: u32 = 0xC000_00B7;
pub const MAX_SYSCALL_NR: i64 = 463;
pub const SYS_SECCOMP: i64 = 277;
pub const SYS_MEMFD_CREATE: i64 = 279;
pub const SYS_PIDFD_OPEN: i64 = 434;
Expand Down Expand Up @@ -82,6 +84,16 @@ mod imp {

pub use imp::*;

/// True if `nr` is plausibly a syscall number on the current architecture.
/// Used by [`crate::seccomp::syscall::Syscall::checked`] to reject foot-gun
/// cases like negative or arch-mismatched numbers.
///
/// Conservative: validates `0 <= nr <= MAX_SYSCALL_NR`. Doesn't enumerate
/// every nr — kernel's seccomp filter rejects unknowns at JEQ stage anyway.
pub fn is_known_syscall(nr: i64) -> bool {
nr >= 0 && nr <= imp::MAX_SYSCALL_NR
}

pub fn push_optional_syscall(v: &mut Vec<u32>, nr: Option<i64>) {
if let Some(nr) = nr {
v.push(nr as u32);
Expand Down
2 changes: 1 addition & 1 deletion crates/sandlock-core/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -763,7 +763,7 @@ pub(crate) struct ChildSpawnArgs<'a> {
/// Sandbox instance name. When set, it is also exposed as the
/// sandbox's virtual hostname.
pub sandbox_name: Option<&'a str>,
/// Syscall numbers for which the parent registered `ExtraHandler`s.
/// Syscall numbers for which the parent registered user `Handler`s.
/// Merged into the child's BPF notif list so the kernel actually
/// raises USER_NOTIF for them.
pub extra_syscalls: &'a [u32],
Expand Down
3 changes: 3 additions & 0 deletions crates/sandlock-core/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ pub enum SandlockError {

#[error("memory protection error: {0}")]
MemoryProtect(String),

#[error("handler error: {0}")]
Handler(#[from] crate::seccomp::dispatch::HandlerError),
}

#[derive(Debug, Error)]
Expand Down
4 changes: 4 additions & 0 deletions crates/sandlock-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ pub use sandbox::Sandbox;
pub use pipeline::{Stage, Pipeline, Gather};
pub use dry_run::{Change, ChangeKind, DryRunResult};

// Public extension API — see docs/extension-handlers.md.
pub use seccomp::dispatch::{Handler, HandlerCtx, HandlerError};
pub use seccomp::syscall::{Syscall, SyscallError};

/// Query the Landlock ABI version supported by the running kernel.
pub fn landlock_abi_version() -> Result<u32, error::ConfinementError> {
landlock::abi_version()
Expand Down
145 changes: 102 additions & 43 deletions crates/sandlock-core/src/sandbox.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,10 @@ pub struct Sandbox {
/// Optional callback invoked when a port bind is recorded.
#[allow(clippy::type_complexity)]
on_bind: Option<Box<dyn Fn(&std::collections::HashMap<u16, u16>) + Send + Sync>>,
/// User-supplied extra syscall handlers. Taken on spawn and
/// appended to the dispatch table after all builtin handlers.
extra_handlers: Vec<crate::seccomp::dispatch::ExtraHandler>,
/// User-supplied extra syscall handlers as `(syscall_nr, Arc<dyn Handler>)`
/// pairs. Taken on spawn and appended to the dispatch table after
/// all builtin handlers.
extra_handlers: Vec<(i64, Arc<dyn crate::seccomp::dispatch::Handler>)>,
}

impl Sandbox {
Expand Down Expand Up @@ -182,7 +183,9 @@ impl Sandbox {
name: Option<&str>,
cmd: &[&str],
) -> Result<RunResult, SandlockError> {
Self::run_with_extra_handlers(policy, name, cmd, Vec::new()).await
let mut sb = Self::new(policy, name)?;
sb.do_spawn(cmd, true).await?;
sb.wait().await
}

/// Run a sandboxed process with inherited stdio (interactive mode).
Expand All @@ -198,69 +201,90 @@ impl Sandbox {

/// One-shot run with user-supplied syscall handlers.
///
/// `extra_handlers` are registered in the dispatch table **after** all
/// builtin handlers for the same syscall. They observe the post-builtin
/// view (e.g. [`chroot`]-normalized paths on `openat`) and cannot be used
/// to bypass builtin confinement. See
/// [`crate::seccomp::dispatch::ExtraHandler`] for the ordering contract.
/// `extra_handlers` is any `IntoIterator` over `(syscall, handler)` pairs
/// where:
///
/// * `syscall: S` is anything implementing `TryInto<Syscall>` — `i64`/`u32`
/// raw numbers (validated through
/// [`crate::seccomp::syscall::Syscall::checked`]), or a pre-validated
/// [`crate::seccomp::syscall::Syscall`].
/// * `handler: H` is anything implementing
/// [`crate::seccomp::dispatch::Handler`] — a struct with explicit
/// `impl Handler` for stateful handlers, or a closure of shape
/// `Fn(&HandlerCtx) -> impl Future<Output = NotifAction>` via the
/// blanket impl.
///
/// When called with an empty vector, this function is identical to
/// [`Self::run`].
/// Handlers are registered in the dispatch table **after** all builtin
/// handlers for the same syscall, so they observe the post-builtin view
/// (e.g. `chroot`-normalized paths on `openat`) and cannot bypass builtin
/// confinement.
///
/// Validation happens up-front (before fork): each `syscall` is checked
/// through `Syscall::checked`, and the deny-list contract is enforced via
/// [`crate::seccomp::dispatch::validate_handler_syscalls_against_policy`].
///
/// # Example
///
/// ```ignore
/// use sandlock_core::{Policy, Sandbox};
/// use sandlock_core::seccomp::dispatch::{ExtraHandler, HandlerFn};
/// use sandlock_core::seccomp::notif::NotifAction;
///
/// # tokio_test::block_on(async {
/// let policy = Policy::builder().fs_read("/usr").build().unwrap();
///
/// let audit: HandlerFn = Box::new(|notif, _ctx, _fd| {
/// Box::pin(async move {
/// eprintln!("openat from pid {}", notif.data.pid);
/// let audit = |cx: &sandlock_core::HandlerCtx<'_>| {
/// let pid = cx.notif.data.pid;
/// async move {
/// eprintln!("openat from pid {}", pid);
/// NotifAction::Continue
/// })
/// });
/// }
/// };
///
/// let result = Sandbox::run_with_extra_handlers(
/// &policy,
/// Some("audit"),
/// &["/usr/bin/true"],
/// vec![ExtraHandler::new(libc::SYS_openat, audit)],
/// [(libc::SYS_openat, audit)],
/// ).await.unwrap();
/// # });
/// ```
pub async fn run_with_extra_handlers(
pub async fn run_with_extra_handlers<I, S, H>(
policy: &Policy,
name: Option<&str>,
cmd: &[&str],
extra_handlers: Vec<crate::seccomp::dispatch::ExtraHandler>,
) -> Result<RunResult, SandlockError> {
// Reject extras that would weaken confinement (e.g. one registered
// on a default-deny syscall). See
// [`crate::seccomp::dispatch::validate_extras_against_policy`] for the
// rationale. Done before fork so the caller gets a clear error
// instead of a silently-broken sandbox.
if let Err(nr) =
crate::seccomp::dispatch::validate_extras_against_policy(&extra_handlers, policy)
{
return Err(SandboxError::Child(format!(
"ExtraHandler on syscall {} conflicts with the deny list \
(DEFAULT_DENY_SYSCALLS or policy.deny_syscalls) and would let \
user code bypass it via SECCOMP_USER_NOTIF_FLAG_CONTINUE",
nr
))
.into());
}

extra_handlers: I,
) -> Result<RunResult, SandlockError>
where
I: IntoIterator<Item = (S, H)>,
S: TryInto<crate::seccomp::syscall::Syscall, Error = crate::seccomp::syscall::SyscallError>,
H: crate::seccomp::dispatch::Handler,
{
let pending = collect_extra_handlers(extra_handlers, policy)?;
let mut sb = Self::new(policy, name)?;
sb.extra_handlers = extra_handlers;
sb.extra_handlers = pending;
sb.do_spawn(cmd, true).await?;
sb.wait().await
}

/// Interactive-stdio counterpart of [`Self::run_with_extra_handlers`].
pub async fn run_interactive_with_extra_handlers<I, S, H>(
policy: &Policy,
name: Option<&str>,
cmd: &[&str],
extra_handlers: I,
) -> Result<RunResult, SandlockError>
where
I: IntoIterator<Item = (S, H)>,
S: TryInto<crate::seccomp::syscall::Syscall, Error = crate::seccomp::syscall::SyscallError>,
H: crate::seccomp::dispatch::Handler,
{
let pending = collect_extra_handlers(extra_handlers, policy)?;
let mut sb = Self::new(policy, name)?;
sb.extra_handlers = pending;
sb.do_spawn(cmd, false).await?;
sb.wait().await
}

/// Dry-run: spawn, wait, collect filesystem changes, then abort.
/// Returns the run result plus a list of changes that would have been
/// committed. The workdir is left unchanged.
Expand Down Expand Up @@ -939,7 +963,7 @@ impl Sandbox {
let extra_syscalls: Vec<u32> = self
.extra_handlers
.iter()
.map(|h| h.syscall_nr as u32)
.map(|h| h.0 as u32)
.collect();

// This never returns.
Expand Down Expand Up @@ -1036,8 +1060,8 @@ impl Sandbox {
// argv reads TOCTOU-safe.
argv_safety_required: self.policy.policy_fn.is_some()
|| self.extra_handlers.iter().any(|h| {
h.syscall_nr == libc::SYS_execve
|| h.syscall_nr == libc::SYS_execveat
h.0 == libc::SYS_execve
|| h.0 == libc::SYS_execveat
}),
time_offset: time_offset_val,
num_cpus: self.policy.num_cpus,
Expand Down Expand Up @@ -1207,7 +1231,8 @@ impl Sandbox {
});

// Spawn notif supervisor. `extra_handlers` is consumed here
// (moved into the supervisor task) because HandlerFn is not Clone.
// (moved into the supervisor task) because each `Arc<dyn Handler>`
// is shared with the dispatch table and must outlive it.
let extra_handlers = std::mem::take(&mut self.extra_handlers);
self.notif_handle = Some(tokio::spawn(
notif::supervisor(notif_fd, ctx, extra_handlers),
Expand Down Expand Up @@ -1246,6 +1271,40 @@ impl Sandbox {
}
}

// ============================================================
// Helpers
// ============================================================

/// Convert a user-supplied iterator of `(syscall, handler)` pairs into
/// the internal `Vec<(i64, Arc<dyn Handler>)>` shape used by the
/// supervisor, validating each syscall up-front against the deny list.
fn collect_extra_handlers<I, S, H>(
extra_handlers: I,
policy: &Policy,
) -> Result<Vec<(i64, Arc<dyn crate::seccomp::dispatch::Handler>)>, SandlockError>
where
I: IntoIterator<Item = (S, H)>,
S: TryInto<crate::seccomp::syscall::Syscall, Error = crate::seccomp::syscall::SyscallError>,
H: crate::seccomp::dispatch::Handler,
{
use crate::seccomp::dispatch::{Handler, HandlerError};

let pending: Vec<(i64, Arc<dyn Handler>)> = extra_handlers
.into_iter()
.map(|(syscall, handler)| {
let nr = syscall.try_into().map_err(HandlerError::from)?.raw();
let h: Arc<dyn Handler> = Arc::new(handler);
Ok::<_, HandlerError>((nr, h))
})
.collect::<Result<_, _>>()?;

let nrs: Vec<i64> = pending.iter().map(|(nr, _)| *nr).collect();
crate::seccomp::dispatch::validate_handler_syscalls_against_policy(&nrs, policy)
.map_err(|nr_u| HandlerError::OnDenySyscall { syscall_nr: nr_u as i64 })?;

Ok(pending)
}

// ============================================================
// Drop — kill and reap child if still running
// ============================================================
Expand Down
Loading
Loading