From 4cd07259dcbdb41eeb561607c393f141e429c425 Mon Sep 17 00:00:00 2001 From: Yair Etziony Date: Sat, 25 Apr 2026 02:34:48 +0200 Subject: [PATCH 1/9] feat(syva-cp-client): zone CRUD methods (create, update, delete, get, list) --- syva-cp-client/src/client.rs | 228 ++++++++++++++++++++++++++++++++++- syva-cp-client/src/error.rs | 23 +++- syva-cp-client/src/lib.rs | 6 +- 3 files changed, 250 insertions(+), 7 deletions(-) diff --git a/syva-cp-client/src/client.rs b/syva-cp-client/src/client.rs index 8d160bb..c87b2c3 100644 --- a/syva-cp-client/src/client.rs +++ b/syva-cp-client/src/client.rs @@ -10,9 +10,12 @@ use uuid::Uuid; use syva_proto::syva_control::v1::assignment_service_client::AssignmentServiceClient; use syva_proto::syva_control::v1::node_service_client::NodeServiceClient; +use syva_proto::syva_control::v1::zone_service_client::ZoneServiceClient; use syva_proto::syva_control::v1::{ - AppliedAssignment, FailedAssignment, HeartbeatRequest, NodeAssignmentUpdate, - RegisterNodeRequest, ReportAssignmentStateRequest, SubscribeAssignmentsRequest, + get_zone_request::Identifier as GetZoneIdentifier, AppliedAssignment, CreateZoneRequest, + FailedAssignment, GetZoneRequest, HeartbeatRequest, ListZonesRequest, + NodeAssignmentUpdate, RegisterNodeRequest, ReportAssignmentStateRequest, + SubscribeAssignmentsRequest, UpdateZoneRequest, DeleteZoneRequest, }; #[derive(Debug, Clone)] @@ -55,6 +58,52 @@ pub struct CpClient { registration: Arc>>, } +pub struct CreateZoneArgs { + pub team_id: Uuid, + pub name: String, + pub display_name: Option, + pub policy_json: serde_json::Value, + pub summary_json: Option, + pub selector_json: Option, + pub metadata_json: Option, +} + +pub struct CreatedZone { + pub zone_id: Uuid, + pub policy_id: Uuid, + pub version: i64, +} + +pub struct UpdateZoneArgs { + pub zone_id: Uuid, + pub if_version: i64, + pub policy_json: Option, + pub selector_json: Option, + pub metadata_json: Option, +} + +pub struct UpdatedZone { + pub zone_id: Uuid, + pub version: i64, + pub new_policy_id: Option, + pub new_policy_version: Option, +} + +pub struct DeleteZoneArgs { + pub zone_id: Uuid, + pub if_version: i64, + pub drain: bool, +} + +pub struct ZoneSnapshot { + pub zone_id: Uuid, + pub team_id: Uuid, + pub name: String, + pub status: String, + pub version: i64, + pub current_policy_id: Option, +} + impl CpClient { pub async fn connect(config: CpClientConfig) -> Result { let endpoint = Endpoint::from_shared(config.endpoint.clone()) @@ -180,6 +229,177 @@ impl CpClient { )) } + pub async fn create_zone(&self, args: CreateZoneArgs) -> Result { + let mut client = ZoneServiceClient::new(self.channel.clone()); + let response = client + .create_zone(CreateZoneRequest { + team_id: args.team_id.to_string(), + name: args.name, + display_name: args.display_name.unwrap_or_default(), + policy_json: args.policy_json.to_string(), + summary_json: args + .summary_json + .map(|value| value.to_string()) + .unwrap_or_default(), + selector_json: args + .selector_json + .map(|value| value.to_string()) + .unwrap_or_default(), + metadata_json: args + .metadata_json + .map(|value| value.to_string()) + .unwrap_or_default(), + }) + .await? + .into_inner(); + + let zone = response + .zone + .ok_or_else(|| CpClientError::Internal("CreateZoneResponse missing zone".into()))?; + let policy = response + .policy + .ok_or_else(|| CpClientError::Internal("CreateZoneResponse missing policy".into()))?; + + Ok(CreatedZone { + zone_id: parse_uuid(&zone.id, "zone.id")?, + policy_id: parse_uuid(&policy.id, "policy.id")?, + version: zone.version, + }) + } + + pub async fn update_zone(&self, args: UpdateZoneArgs) -> Result { + let mut client = ZoneServiceClient::new(self.channel.clone()); + let response = client + .update_zone(UpdateZoneRequest { + zone_id: args.zone_id.to_string(), + if_version: args.if_version, + policy_json: args + .policy_json + .map(|value| value.to_string()) + .unwrap_or_default(), + selector_json: args + .selector_json + .map(|value| value.to_string()) + .unwrap_or_default(), + metadata_json: args + .metadata_json + .map(|value| value.to_string()) + .unwrap_or_default(), + }) + .await? + .into_inner(); + + let zone = response + .zone + .ok_or_else(|| CpClientError::Internal("UpdateZoneResponse missing zone".into()))?; + let new_policy = response.new_policy; + + Ok(UpdatedZone { + zone_id: parse_uuid(&zone.id, "zone.id")?, + version: zone.version, + new_policy_id: new_policy + .as_ref() + .map(|policy| parse_uuid(&policy.id, "policy.id")) + .transpose()?, + new_policy_version: new_policy.map(|policy| policy.version), + }) + } + + pub async fn delete_zone(&self, args: DeleteZoneArgs) -> Result<(), CpClientError> { + let mut client = ZoneServiceClient::new(self.channel.clone()); + client + .delete_zone(DeleteZoneRequest { + zone_id: args.zone_id.to_string(), + if_version: args.if_version, + drain: args.drain, + }) + .await?; + Ok(()) + } + + pub async fn get_zone_by_name( + &self, + team_id: Uuid, + name: &str, + ) -> Result, CpClientError> { + let mut client = ZoneServiceClient::new(self.channel.clone()); + let response = match client + .get_zone(GetZoneRequest { + identifier: Some(GetZoneIdentifier::NameRef( + syva_proto::syva_control::v1::ZoneNameRef { + team_id: team_id.to_string(), + name: name.to_string(), + }, + )), + }) + .await + { + Ok(response) => response.into_inner(), + Err(error) if error.code() == tonic::Code::NotFound => return Ok(None), + Err(error) => return Err(error.into()), + }; + + let zone = match response.zone { + Some(zone) => zone, + None => return Ok(None), + }; + + Ok(Some(ZoneSnapshot { + zone_id: parse_uuid(&zone.id, "zone.id")?, + team_id: parse_uuid(&zone.team_id, "zone.team_id")?, + name: zone.name, + status: zone.status, + version: zone.version, + current_policy_id: if zone.current_policy_id.is_empty() { + None + } else { + Some(parse_uuid( + &zone.current_policy_id, + "zone.current_policy_id", + )?) + }, + })) + } + + pub async fn list_zones( + &self, + team_id: Uuid, + status_filter: Option<&str>, + limit: i64, + ) -> Result, CpClientError> { + let mut client = ZoneServiceClient::new(self.channel.clone()); + let response = client + .list_zones(ListZonesRequest { + team_id: team_id.to_string(), + status: status_filter.unwrap_or_default().to_string(), + limit, + }) + .await? + .into_inner(); + + response + .zones + .into_iter() + .map(|zone| { + Ok(ZoneSnapshot { + zone_id: parse_uuid(&zone.id, "zone.id")?, + team_id: parse_uuid(&zone.team_id, "zone.team_id")?, + name: zone.name, + status: zone.status, + version: zone.version, + current_policy_id: if zone.current_policy_id.is_empty() { + None + } else { + Some(parse_uuid( + &zone.current_policy_id, + "zone.current_policy_id", + )?) + }, + }) + }) + .collect() + } + async fn require_registered(&self) -> Result { self.registration .read() @@ -244,3 +464,7 @@ pub struct FailedReport { pub error_json: serde_json::Value, } +fn parse_uuid(value: &str, field: &str) -> Result { + Uuid::parse_str(value) + .map_err(|error| CpClientError::Internal(format!("could not parse {field} as UUID: {error}"))) +} diff --git a/syva-cp-client/src/error.rs b/syva-cp-client/src/error.rs index 9644b57..a30b912 100644 --- a/syva-cp-client/src/error.rs +++ b/syva-cp-client/src/error.rs @@ -3,10 +3,10 @@ use thiserror::Error; #[derive(Debug, Error)] pub enum CpClientError { #[error("connection failed: {0}")] - Connection(#[from] tonic::transport::Error), + Connection(#[from] Box), #[error("grpc error: {0}")] - Grpc(#[from] tonic::Status), + Grpc(#[from] Box), #[error("invalid endpoint: {0}")] InvalidEndpoint(String), @@ -15,9 +15,26 @@ pub enum CpClientError { NotRegistered, #[error("serialization error: {0}")] - Serde(#[from] serde_json::Error), + Serde(#[from] Box), #[error("internal: {0}")] Internal(String), } +impl From for CpClientError { + fn from(value: tonic::transport::Error) -> Self { + Self::Connection(Box::new(value)) + } +} + +impl From for CpClientError { + fn from(value: tonic::Status) -> Self { + Self::Grpc(Box::new(value)) + } +} + +impl From for CpClientError { + fn from(value: serde_json::Error) -> Self { + Self::Serde(Box::new(value)) + } +} diff --git a/syva-cp-client/src/lib.rs b/syva-cp-client/src/lib.rs index 8dc50eb..9001098 100644 --- a/syva-cp-client/src/lib.rs +++ b/syva-cp-client/src/lib.rs @@ -16,7 +16,9 @@ pub mod client; pub mod error; -pub use client::{AppliedReport, CpClient, CpClientConfig, FailedReport, NodeRegistration}; +pub use client::{ + AppliedReport, CpClient, CpClientConfig, CreateZoneArgs, CreatedZone, DeleteZoneArgs, + FailedReport, NodeRegistration, UpdateZoneArgs, UpdatedZone, ZoneSnapshot, +}; pub use error::CpClientError; pub use syva_proto::syva_control::v1::{NodeAssignmentUpdate, ZoneAssignment}; - From ec5ef3960dcae9537776a0a8e26d1dabb18dfbad Mon Sep 17 00:00:00 2001 From: Yair Etziony Date: Sat, 25 Apr 2026 02:38:37 +0200 Subject: [PATCH 2/9] feat(syva-adapter-file): rewrite to push zones to syva-cp via syva-cp-client - Removes dependency on local syva-core gRPC surface - New CLI: --cp-endpoint, --team-id required - Reconciles TOML directory against syva-cp every --reconcile-secs - Container watcher deferred to a later session - verify subcommand preserved --- Cargo.lock | 10 +- syva-adapter-file/Cargo.toml | 14 +- syva-adapter-file/src/connect.rs | 38 -- syva-adapter-file/src/lib.rs | 5 + syva-adapter-file/src/main.rs | 605 ++--------------------------- syva-adapter-file/src/mapper.rs | 15 - syva-adapter-file/src/policy.rs | 85 ++-- syva-adapter-file/src/reload.rs | 198 ---------- syva-adapter-file/src/run.rs | 155 ++++++++ syva-adapter-file/src/translate.rs | 58 ++- syva-adapter-file/src/types.rs | 16 +- syva-adapter-file/src/verify.rs | 16 + syva-adapter-file/src/watcher.rs | 503 ------------------------ syva-cp-client/src/client.rs | 35 ++ 14 files changed, 331 insertions(+), 1422 deletions(-) delete mode 100644 syva-adapter-file/src/connect.rs create mode 100644 syva-adapter-file/src/lib.rs delete mode 100644 syva-adapter-file/src/mapper.rs delete mode 100644 syva-adapter-file/src/reload.rs create mode 100644 syva-adapter-file/src/run.rs create mode 100644 syva-adapter-file/src/verify.rs delete mode 100644 syva-adapter-file/src/watcher.rs diff --git a/Cargo.lock b/Cargo.lock index 095d0d7..524805d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3043,21 +3043,15 @@ version = "0.2.0" dependencies = [ "anyhow", "clap", - "containerd-client", - "hyper-util", - "prost", - "prost-types", "serde", "serde_json", + "syva-cp-client", "syva-ebpf-common", - "syva-proto", "tokio", - "tokio-stream", "toml", - "tonic", - "tower 0.5.3", "tracing", "tracing-subscriber", + "uuid", ] [[package]] diff --git a/syva-adapter-file/Cargo.toml b/syva-adapter-file/Cargo.toml index 57a0076..7431d53 100644 --- a/syva-adapter-file/Cargo.toml +++ b/syva-adapter-file/Cargo.toml @@ -3,15 +3,17 @@ name = "syva-adapter-file" version.workspace = true edition.workspace = true +[lib] +name = "syva_file" +path = "src/lib.rs" + [[bin]] name = "syva-file" path = "src/main.rs" [dependencies] -syva-proto = { path = "../syva-proto" } -tonic = { workspace = true } +syva-cp-client = { path = "../syva-cp-client" } tokio = { workspace = true } -tokio-stream = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true } anyhow = { workspace = true } @@ -19,9 +21,5 @@ serde = { workspace = true } serde_json = { workspace = true } toml = { workspace = true } clap = { workspace = true } -containerd-client = { workspace = true } -prost = { workspace = true } -prost-types = { workspace = true } -tower = "0.5" -hyper-util = { version = "0.1", features = ["tokio"] } +uuid = { version = "1", features = ["v4", "serde"] } syva-ebpf-common = { path = "../syva-ebpf-common", features = ["userspace"] } diff --git a/syva-adapter-file/src/connect.rs b/syva-adapter-file/src/connect.rs deleted file mode 100644 index b303142..0000000 --- a/syva-adapter-file/src/connect.rs +++ /dev/null @@ -1,38 +0,0 @@ -//! gRPC connection to syva-core over Unix domain socket. - -use syva_proto::syva_core::syva_core_client::SyvaCoreClient; -use tonic::transport::Channel; - -/// Connect to syva-core over Unix socket. -pub async fn connect_to_core(socket_path: &str) -> anyhow::Result> { - let path = socket_path.to_string(); - let channel = tonic::transport::Endpoint::try_from("http://[::]:50051")? - .connect_with_connector(tower::service_fn(move |_: tonic::transport::Uri| { - let path = path.clone(); - async move { - let stream = tokio::net::UnixStream::connect(&path).await?; - Ok::<_, std::io::Error>(hyper_util::rt::TokioIo::new(stream)) - } - })) - .await?; - Ok(SyvaCoreClient::new(channel)) -} - -/// Connect with exponential backoff retry. -pub async fn connect_with_retry(socket_path: &str, max_attempts: usize) -> anyhow::Result> { - let mut backoff = std::time::Duration::from_millis(100); - for attempt in 1..=max_attempts { - match connect_to_core(socket_path).await { - Ok(client) => return Ok(client), - Err(e) => { - if attempt == max_attempts { - return Err(anyhow::anyhow!("failed to connect to syva-core after {max_attempts} attempts: {e}")); - } - tracing::warn!(attempt, %e, "failed to connect to syva-core — retrying"); - tokio::time::sleep(backoff).await; - backoff = (backoff * 2).min(std::time::Duration::from_secs(5)); - } - } - } - unreachable!() -} diff --git a/syva-adapter-file/src/lib.rs b/syva-adapter-file/src/lib.rs new file mode 100644 index 0000000..f345e10 --- /dev/null +++ b/syva-adapter-file/src/lib.rs @@ -0,0 +1,5 @@ +pub mod policy; +pub mod run; +pub mod translate; +pub mod types; +pub mod verify; diff --git a/syva-adapter-file/src/main.rs b/syva-adapter-file/src/main.rs index 62ba7bf..442a91c 100644 --- a/syva-adapter-file/src/main.rs +++ b/syva-adapter-file/src/main.rs @@ -1,597 +1,58 @@ -//! syva-file — file/ConfigMap policy adapter for syva-core. -//! -//! Reads TOML policy files from a directory, translates them to gRPC calls, -//! and watches containerd for container start/stop events to manage zone -//! membership via syva-core. - -mod connect; -mod mapper; -mod policy; -mod reload; -mod translate; -mod types; -mod watcher; - -use std::collections::{HashMap, HashSet}; -use std::path::PathBuf; -use std::sync::Arc; - +use anyhow::Result; use clap::{Parser, Subcommand}; -use syva_proto::syva_core::syva_core_client::SyvaCoreClient; -use syva_proto::syva_core::{ - AllowCommRequest, AttachContainerRequest, DenyCommRequest, DetachContainerRequest, - RegisterHostPathRequest, RegisterZoneRequest, RemoveZoneRequest, -}; -use tonic::transport::Channel; - -use crate::reload::{PolicyChange, PolicyDirWatcher, diff_policies}; -use crate::translate::to_proto_policy; -use crate::types::ZonePolicy; -use crate::watcher::{WatcherEvent, ZoneAssignment}; +use std::path::PathBuf; +use uuid::Uuid; -#[derive(Parser)] -#[command(name = "syva-file", about = "File/ConfigMap policy adapter for syva-core")] +#[derive(Parser, Debug)] +#[command(name = "syva-file", version)] struct Cli { - /// Path to the policy directory containing .toml zone policy files. - #[arg(long, default_value = "./policies")] + /// Directory containing TOML zone policies. One file per zone. + /// Filename (without .toml) becomes the zone name. + #[arg(long, env = "SYVA_FILE_POLICY_DIR", default_value = "/etc/syva/policies")] policy_dir: PathBuf, - /// Unix socket path for connecting to syva-core. - #[arg(long, default_value = "/run/syva/syva-core.sock")] - socket_path: String, + /// syva-cp gRPC endpoint. + #[arg(long, env = "SYVA_CP_ENDPOINT")] + cp_endpoint: String, - /// Containerd socket path. - #[arg(long, default_value = "/run/containerd/containerd.sock")] - containerd_sock: String, + /// Team UUID this adapter manages zones for. + #[arg(long, env = "SYVA_TEAM_ID")] + team_id: Uuid, + + /// Reconcile interval in seconds. + #[arg(long, env = "SYVA_RECONCILE_SECS", default_value = "5")] + reconcile_secs: u64, #[command(subcommand)] - command: Option, + command: Option, } -#[derive(Subcommand)] -enum Commands { - /// Dry-run: load and validate policies without connecting to syva-core. +#[derive(Subcommand, Debug)] +enum Cmd { + /// Validate the TOML files in --policy-dir without connecting to syva-cp. Verify, } #[tokio::main] -async fn main() -> anyhow::Result<()> { +async fn main() -> Result<()> { tracing_subscriber::fmt() .with_env_filter( tracing_subscriber::EnvFilter::try_from_default_env() - .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("syva_adapter_file=info")), + .unwrap_or_else(|_| "syva_file=info".into()), ) .init(); let cli = Cli::parse(); - match cli.command { - Some(Commands::Verify) => run_verify(&cli.policy_dir), - None => run_adapter(cli).await, - } -} - -// --------------------------------------------------------------------------- -// Verify subcommand -// --------------------------------------------------------------------------- - -fn run_verify(policy_dir: &PathBuf) -> anyhow::Result<()> { - let policies = policy::load_policies(policy_dir)?; - - if policies.is_empty() { - println!("No policies found in {}", policy_dir.display()); - return Ok(()); - } - - println!("Loaded {} zone policies:", policies.len()); - - let mut has_errors = false; - - for (name, pol) in &policies { - println!(" - {name}"); - - // Check allowed_zones symmetry. - for peer in &pol.network.allowed_zones { - match policies.get(peer) { - None => { - println!(" WARN: allowed_zones references '{peer}' which has no policy file"); - has_errors = true; - } - Some(peer_pol) => { - if !peer_pol.network.allowed_zones.contains(&name.to_string()) { - println!(" WARN: allowed_zones lists '{peer}' but '{peer}' does not list '{name}' — comm will NOT be established"); - has_errors = true; - } - } - } - } - - // Check host_paths exist. - for path in &pol.filesystem.host_paths { - if !std::path::Path::new(path).exists() { - println!(" WARN: host_path '{path}' does not exist on this host"); - } - } - } - - if has_errors { - println!("\nVerification completed with warnings."); - std::process::exit(1); - } else { - println!("\nAll policies valid."); - Ok(()) - } -} - -// --------------------------------------------------------------------------- -// Main adapter loop -// --------------------------------------------------------------------------- - -async fn run_adapter(cli: Cli) -> anyhow::Result<()> { - tracing::info!( - policy_dir = %cli.policy_dir.display(), - socket = %cli.socket_path, - containerd = %cli.containerd_sock, - "starting syva-file adapter" - ); - - // 1. Connect to syva-core with retry. - let mut client = connect::connect_with_retry(&cli.socket_path, 10).await?; - tracing::info!("connected to syva-core"); - - // 2. Load all policies from disk. - let mut current_policies = policy::load_policies(&cli.policy_dir)?; - tracing::info!(zones = current_policies.len(), "loaded policies"); - - // 3. Register zones with syva-core. - register_all_zones(&mut client, ¤t_policies).await?; - - // 4. Enumerate existing containers and attach them. - let zone_names: HashSet = current_policies.keys().cloned().collect(); - let assignments = watcher::enumerate_cgroups(&zone_names)?; - for assignment in &assignments { - attach_container(&mut client, assignment).await; - } - tracing::info!(containers = assignments.len(), "enumerated existing containers"); - - // 5. Set up zone names watch channel for the containerd watcher. - let (zone_names_tx, zone_names_rx) = tokio::sync::watch::channel(Arc::new(zone_names)); - - // 6. Start containerd event watcher. - let (event_tx, mut event_rx) = tokio::sync::mpsc::channel::(256); - let containerd_sock = cli.containerd_sock.clone(); - tokio::spawn(async move { - watcher::watch_containerd_events(containerd_sock, zone_names_rx, event_tx).await; - }); - - // 7. Start hot-reload watcher. - let mut dir_watcher = PolicyDirWatcher::new(cli.policy_dir.clone()); - let mut reload_interval = tokio::time::interval(std::time::Duration::from_secs(5)); - reload_interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); - - // 8. Set up SIGTERM handler. - let mut sigterm = tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())?; - let mut sigint = tokio::signal::unix::signal(tokio::signal::unix::SignalKind::interrupt())?; - - tracing::info!("adapter running — watching for events and policy changes"); - - loop { - tokio::select! { - // Handle containerd events. - Some(event) = event_rx.recv() => { - match event { - WatcherEvent::Add(assignment) => { - tracing::info!( - container = assignment.container_id, - zone = assignment.zone_name, - cgroup_id = assignment.cgroup_id, - "attaching container" - ); - attach_container(&mut client, &assignment).await; - } - WatcherEvent::Remove { container_id, .. } => { - tracing::info!(container = container_id, "detaching container"); - detach_container(&mut client, &container_id).await; - } - } - } - - // Hot-reload tick. - _ = reload_interval.tick() => { - if dir_watcher.check_changed() { - if let Err(e) = handle_reload( - &mut client, - dir_watcher.dir(), - &mut current_policies, - &zone_names_tx, - ).await { - tracing::error!(%e, "policy reload failed"); - } - } - } - - // Graceful shutdown. - _ = sigterm.recv() => { - tracing::info!("received SIGTERM — shutting down"); - break; - } - _ = sigint.recv() => { - tracing::info!("received SIGINT — shutting down"); - break; - } - } - } - - Ok(()) -} - -// --------------------------------------------------------------------------- -// gRPC helpers -// --------------------------------------------------------------------------- - -/// Register all zones and set up comms + host paths. -async fn register_all_zones( - client: &mut SyvaCoreClient, - policies: &HashMap, -) -> anyhow::Result<()> { - // Phase 1: Register each zone. - for (name, pol) in policies { - let resp = client - .register_zone(RegisterZoneRequest { - zone_name: name.clone(), - policy: Some(to_proto_policy(pol)), - }) - .await?; - tracing::info!(zone = name, zone_id = resp.into_inner().zone_id, "registered zone"); - } - - // Phase 2: Register host paths. - for (name, pol) in policies { - for path in &pol.filesystem.host_paths { - match client - .register_host_path(RegisterHostPathRequest { - zone_name: name.clone(), - path: path.clone(), - recursive: true, - }) - .await - { - Ok(resp) => { - let inodes = resp.into_inner().inodes_registered; - if inodes > 0 { - tracing::info!(zone = name, path, inodes, "registered host path"); - } - } - Err(e) => { - tracing::warn!(zone = name, path, %e, "failed to register host path"); - } - } - } - } - - // Phase 3: Set up bilateral comms. - let mut established: HashSet<(String, String)> = HashSet::new(); - for (name, pol) in policies { - for peer in &pol.network.allowed_zones { - // Check bilateral: peer must also list this zone. - let bilateral = policies - .get(peer) - .map(|p| p.network.allowed_zones.contains(&name.to_string())) - .unwrap_or(false); - - if !bilateral { - tracing::warn!( - zone = name, - peer = peer, - "one-sided allowed_zones — '{name}' lists '{peer}' but not vice versa; comm NOT established" - ); - continue; - } - - // Avoid duplicate calls (A,B) and (B,A). - let pair = if name < peer { - (name.clone(), peer.clone()) - } else { - (peer.clone(), name.clone()) - }; - if established.contains(&pair) { - continue; - } - - match client - .allow_comm(AllowCommRequest { - zone_a: name.clone(), - zone_b: peer.clone(), - }) - .await - { - Ok(_) => { - tracing::info!(zone_a = name, zone_b = peer, "established bilateral comm"); - established.insert(pair); - } - Err(e) => { - tracing::warn!(zone_a = name, zone_b = peer, %e, "failed to establish comm"); - } - } - } - } - - Ok(()) -} - -/// Attach a single container to its zone via gRPC. -async fn attach_container( - client: &mut SyvaCoreClient, - assignment: &ZoneAssignment, -) { - match client - .attach_container(AttachContainerRequest { - container_id: assignment.container_id.clone(), - zone_name: assignment.zone_name.clone(), - cgroup_id: assignment.cgroup_id, - }) - .await - { - Ok(_) => { - tracing::debug!( - container = assignment.container_id, - zone = assignment.zone_name, - "container attached" - ); - } - Err(e) => { - tracing::error!( - container = assignment.container_id, - zone = assignment.zone_name, - %e, - "failed to attach container" - ); - } - } -} - -/// Detach a container via gRPC. -async fn detach_container( - client: &mut SyvaCoreClient, - container_id: &str, -) { - match client - .detach_container(DetachContainerRequest { - container_id: container_id.to_string(), - }) - .await - { - Ok(_) => { - tracing::debug!(container = container_id, "container detached"); - } - Err(e) => { - tracing::error!(container = container_id, %e, "failed to detach container"); - } - } -} - -/// Handle a policy reload: diff, apply changes via gRPC, update zone names channel. -async fn handle_reload( - client: &mut SyvaCoreClient, - policy_dir: &std::path::Path, - current_policies: &mut HashMap, - zone_names_tx: &tokio::sync::watch::Sender>>, -) -> anyhow::Result<()> { - let new_policies = policy::load_policies(policy_dir)?; - - // Guard: empty set during ConfigMap rotation. - if new_policies.is_empty() && !current_policies.is_empty() { - tracing::debug!("policy reload returned empty set — skipping (possible ConfigMap rotation)"); - return Ok(()); - } - - let changes = diff_policies(current_policies, &new_policies); - if changes.is_empty() { - return Ok(()); - } - - tracing::info!(changes = changes.len(), "policy changes detected — applying"); - - let mut applied = 0; - - // Apply additions first. - for change in &changes { - if let PolicyChange::Added(name, pol) = change { - match apply_zone_addition(client, name, pol, &new_policies).await { - Ok(()) => { - current_policies.insert(name.clone(), pol.clone()); - applied += 1; - tracing::info!(zone = name.as_str(), "reload: zone added"); - } - Err(e) => { - tracing::error!(zone = name.as_str(), %e, "reload: failed to add zone"); - } - } - } - } - - // Apply modifications. - for change in &changes { - if let PolicyChange::Modified(name, new_pol) = change { - // Capture the old policy before the overwrite below so the - // modifier can diff allowed_zones and emit DenyComm for peers - // that dropped out of the bilateral allowed set. - let old_pol = current_policies.get(name).cloned(); - match apply_zone_modification(client, name, new_pol, old_pol.as_ref(), &new_policies).await { - Ok(()) => { - current_policies.insert(name.clone(), new_pol.clone()); - applied += 1; - tracing::info!(zone = name.as_str(), "reload: zone policy updated"); - } - Err(e) => { - tracing::error!(zone = name.as_str(), %e, "reload: failed to modify zone"); - } - } - } - } - - // Apply removals last. - for change in &changes { - if let PolicyChange::Removed(name) = change { - match client - .remove_zone(RemoveZoneRequest { - zone_name: name.clone(), - drain: true, - }) - .await - { - Ok(resp) => { - let resp = resp.into_inner(); - tracing::info!(zone = name.as_str(), msg = resp.message, "reload: zone removed"); - current_policies.remove(name); - applied += 1; - } - Err(e) => { - tracing::error!(zone = name.as_str(), %e, "reload: failed to remove zone"); - } - } - } - } - - // Update zone names channel for the watcher. - if applied > 0 { - let zone_names: HashSet = current_policies.keys().cloned().collect(); - let _ = zone_names_tx.send(Arc::new(zone_names)); - } - - tracing::info!(applied, "reload complete"); - Ok(()) -} - -/// Register a new zone and set up its host paths and comms. -async fn apply_zone_addition( - client: &mut SyvaCoreClient, - zone_name: &str, - policy: &ZonePolicy, - all_policies: &HashMap, -) -> anyhow::Result<()> { - // Register the zone. - client - .register_zone(RegisterZoneRequest { - zone_name: zone_name.to_string(), - policy: Some(to_proto_policy(policy)), - }) - .await?; - - // Register host paths. - for path in &policy.filesystem.host_paths { - if let Err(e) = client - .register_host_path(RegisterHostPathRequest { - zone_name: zone_name.to_string(), - path: path.clone(), - recursive: true, - }) - .await - { - tracing::warn!(zone = zone_name, path, %e, "failed to register host path during addition"); - } - } - - // Set up bilateral comms. - for peer in &policy.network.allowed_zones { - let bilateral = all_policies - .get(peer) - .map(|p| p.network.allowed_zones.contains(&zone_name.to_string())) - .unwrap_or(false); - if bilateral { - let _ = client - .allow_comm(AllowCommRequest { - zone_a: zone_name.to_string(), - zone_b: peer.clone(), - }) - .await; - } - } - - Ok(()) -} - -/// Update an existing zone's policy, host paths, and comms. -/// -/// `old_policy` is the previously-applied policy for this zone (if any). -/// When a peer was in the old bilateral-allowed set but is no longer -/// bilaterally allowed in the new state, DenyComm is issued so the BPF map -/// stops permitting the pair. Without this, hot-reload could only *add* -/// allowed comms, never retract them. -async fn apply_zone_modification( - client: &mut SyvaCoreClient, - zone_name: &str, - new_policy: &ZonePolicy, - old_policy: Option<&ZonePolicy>, - all_policies: &HashMap, -) -> anyhow::Result<()> { - // Re-register zone with updated policy (idempotent). - client - .register_zone(RegisterZoneRequest { - zone_name: zone_name.to_string(), - policy: Some(to_proto_policy(new_policy)), - }) - .await?; - - // Re-register host paths (syva-core handles dedup). - for path in &new_policy.filesystem.host_paths { - if let Err(e) = client - .register_host_path(RegisterHostPathRequest { - zone_name: zone_name.to_string(), - path: path.clone(), - recursive: true, - }) - .await - { - tracing::warn!(zone = zone_name, path, %e, "failed to register host path during modification"); - } - } - - let bilateral_now: HashSet<&String> = new_policy.network.allowed_zones.iter() - .filter(|peer| { - all_policies.get(*peer) - .map(|p| p.network.allowed_zones.contains(&zone_name.to_string())) - .unwrap_or(false) - }) - .collect(); - - // Retract comms that were bilateral under the old policy but no longer are - // (peer removed from allowed_zones, or the counterparty's side was removed). - if let Some(old) = old_policy { - let bilateral_before: HashSet<&String> = old.network.allowed_zones.iter() - .filter(|peer| { - // Use current all_policies for the counter-side check — the peer - // zone's latest policy is the source of truth at reload time. - all_policies.get(*peer) - .map(|p| p.network.allowed_zones.contains(&zone_name.to_string())) - .unwrap_or(false) - }) - .collect(); - - for peer in bilateral_before.difference(&bilateral_now) { - if let Err(e) = client - .deny_comm(DenyCommRequest { - zone_a: zone_name.to_string(), - zone_b: (*peer).clone(), - }) - .await - { - tracing::warn!(zone = zone_name, peer = peer.as_str(), %e, "failed to deny comm during modification"); - } else { - tracing::info!(zone = zone_name, peer = peer.as_str(), "reload: comm retracted"); - } - } - } - - // Rebuild the currently-bilateral set (idempotent on the core side). - for peer in &bilateral_now { - let _ = client - .allow_comm(AllowCommRequest { - zone_a: zone_name.to_string(), - zone_b: (*peer).clone(), - }) - .await; + if let Some(Cmd::Verify) = &cli.command { + return syva_file::verify::run(&cli.policy_dir); } - Ok(()) + syva_file::run::run(syva_file::run::Config { + policy_dir: cli.policy_dir, + cp_endpoint: cli.cp_endpoint, + team_id: cli.team_id, + reconcile_interval: std::time::Duration::from_secs(cli.reconcile_secs), + }) + .await } diff --git a/syva-adapter-file/src/mapper.rs b/syva-adapter-file/src/mapper.rs deleted file mode 100644 index 015a305..0000000 --- a/syva-adapter-file/src/mapper.rs +++ /dev/null @@ -1,15 +0,0 @@ -//! Zone mapper — maps container labels to zone assignments. -//! -//! Containers with a `syva.dev/zone` label are assigned to the named zone. -//! Containers without this label are treated as global (no enforcement). - -/// Label keys for zone assignment. -pub const LABEL_ZONE: &str = "syva.dev/zone"; -pub const LABEL_POLICY: &str = "syva.dev/policy"; - -/// Determine the zone name from container labels. -/// -/// Returns None if the container has no `syva.dev/zone` label (global/unzoned). -pub fn zone_from_labels(labels: &std::collections::HashMap) -> Option { - labels.get(LABEL_ZONE).cloned() -} diff --git a/syva-adapter-file/src/policy.rs b/syva-adapter-file/src/policy.rs index 027c71a..1de72f1 100644 --- a/syva-adapter-file/src/policy.rs +++ b/syva-adapter-file/src/policy.rs @@ -1,73 +1,56 @@ -//! Zone policy loader — reads TOML policy files from a directory. -//! -//! Each file in the policy directory becomes a zone. The filename (without -//! extension) is the zone name. E.g., `agent-sandbox.toml` defines zone -//! `agent-sandbox`. - +use crate::types::ZonePolicy; +use anyhow::{Context, Result}; +use serde::{Deserialize, Serialize}; +use serde_json::Value as JsonValue; use std::collections::HashMap; use std::path::Path; -use crate::types::ZonePolicy; +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct FilePolicy { + #[serde(default)] + pub display_name: Option, + + #[serde(default)] + pub selector: Option, -/// Load all `.toml` policy files from a directory. -/// -/// Returns a map of zone_name -> ZonePolicy. Files that fail to parse are -/// logged as warnings and skipped — never silently enforce with wrong policy. -pub fn load_policies(dir: &Path) -> anyhow::Result> { + #[serde(flatten)] + pub policy: ZonePolicy, +} + +pub fn load_policies_from_dir(dir: &Path) -> Result> { let mut policies = HashMap::new(); if !dir.exists() { - tracing::warn!(dir = %dir.display(), "policy directory does not exist — no zones will be enforced"); + tracing::warn!( + dir = %dir.display(), + "policy directory does not exist; no zones will be reconciled" + ); return Ok(policies); } let entries = std::fs::read_dir(dir) - .map_err(|e| anyhow::anyhow!("failed to read policy dir {}: {e}", dir.display()))?; + .with_context(|| format!("read_dir {}", dir.display()))?; for entry in entries { - let entry = match entry { - Ok(e) => e, - Err(e) => { - tracing::warn!(%e, "skipping unreadable directory entry"); - continue; - } - }; - + let entry = entry?; let path = entry.path(); - if path.extension().map(|e| e != "toml").unwrap_or(true) { + if path.extension().and_then(|ext| ext.to_str()) != Some("toml") { continue; } - let zone_name = match path.file_stem().and_then(|s| s.to_str()) { - Some(name) => name.to_string(), - None => continue, - }; - - let content = match std::fs::read_to_string(&path) { - Ok(c) => c, - Err(e) => { - tracing::warn!(file = %path.display(), %e, "skipping unreadable policy file"); - continue; - } + let Some(stem) = path.file_stem().and_then(|stem| stem.to_str()) else { + continue; }; - - match toml::from_str::(&content) { - Ok(policy) => match policy.validate(&zone_name) { - Ok(()) => { - tracing::info!(zone = zone_name, file = %path.display(), "loaded zone policy"); - policies.insert(zone_name, policy); - } - Err(e) => { - tracing::warn!(file = %path.display(), %e, "invalid policy — zone will not be enforced"); - } - } - Err(e) => { - tracing::warn!( - file = %path.display(), %e, - "failed to parse policy file — zone will not be enforced" - ); - } + if stem.is_empty() { + continue; } + + let text = std::fs::read_to_string(&path) + .with_context(|| format!("read {}", path.display()))?; + let policy: FilePolicy = toml::from_str(&text) + .with_context(|| format!("parse {}", path.display()))?; + policy.policy.validate(stem)?; + policies.insert(stem.to_string(), policy); } Ok(policies) diff --git a/syva-adapter-file/src/reload.rs b/syva-adapter-file/src/reload.rs deleted file mode 100644 index a6423ea..0000000 --- a/syva-adapter-file/src/reload.rs +++ /dev/null @@ -1,198 +0,0 @@ -//! Policy hot-reload — detects policy file changes and produces diffs. -//! -//! Polls the policy directory every 5 seconds. Detects changes via: -//! 1. ConfigMap symlink rotation (`..data` target changes) -//! 2. File fingerprint (sorted filenames + mtimes + sizes) -//! -//! On change: full reload -> diff -> emit PolicyChange events. -//! The caller (main.rs) translates these into gRPC calls to syva-core. - -use std::collections::HashMap; -use std::hash::{Hash, Hasher}; -use std::path::{Path, PathBuf}; - -use crate::types::ZonePolicy; - -/// Watches a policy directory for changes via polling. -pub struct PolicyDirWatcher { - dir: PathBuf, - last_symlink_target: Option, - last_fingerprint: u64, -} - -impl PolicyDirWatcher { - pub fn new(dir: PathBuf) -> Self { - let symlink_target = std::fs::read_link(dir.join("..data")).ok(); - let fingerprint = compute_fingerprint(&dir); - Self { - dir, - last_symlink_target: symlink_target, - last_fingerprint: fingerprint, - } - } - - /// Returns true if the policy directory has changed since the last check. - pub fn check_changed(&mut self) -> bool { - // Check ConfigMap symlink rotation first (atomic, reliable). - if let Ok(target) = std::fs::read_link(self.dir.join("..data")) { - if self.last_symlink_target.as_ref() != Some(&target) { - self.last_symlink_target = Some(target); - self.last_fingerprint = compute_fingerprint(&self.dir); - return true; - } - // Symlink unchanged — still check fingerprint in case files were - // edited in place (e.g. `..data` exists but isn't a true ConfigMap). - } else { - self.last_symlink_target = None; - } - - // Compare file fingerprint to detect direct edits. - let fingerprint = compute_fingerprint(&self.dir); - if fingerprint != self.last_fingerprint { - self.last_fingerprint = fingerprint; - return true; - } - false - } - - /// Returns the watched directory path. - pub fn dir(&self) -> &Path { - &self.dir - } -} - -/// Compute a fingerprint from the policy directory's file metadata. -fn compute_fingerprint(dir: &Path) -> u64 { - let mut hasher = std::collections::hash_map::DefaultHasher::new(); - let mut entries: Vec<(String, u64, u64)> = Vec::new(); - - if let Ok(read_dir) = std::fs::read_dir(dir) { - for entry in read_dir.flatten() { - let name = entry.file_name().to_string_lossy().to_string(); - if !name.ends_with(".toml") { - continue; - } - if let Ok(meta) = entry.metadata() { - use std::time::UNIX_EPOCH; - let mtime = meta.modified() - .ok() - .and_then(|t| t.duration_since(UNIX_EPOCH).ok()) - .map(|d| d.as_nanos() as u64) - .unwrap_or(0); - entries.push((name, mtime, meta.len())); - } - } - } - entries.sort(); - entries.hash(&mut hasher); - hasher.finish() -} - -/// A single policy change detected by diff. -#[derive(Debug)] -pub enum PolicyChange { - Added(String, ZonePolicy), - Modified(String, ZonePolicy), - Removed(String), -} - -/// Diff two policy sets. Returns the changes needed to go from old -> new. -pub fn diff_policies( - old: &HashMap, - new: &HashMap, -) -> Vec { - let mut changes = Vec::new(); - - for (name, new_policy) in new { - match old.get(name) { - None => changes.push(PolicyChange::Added(name.clone(), new_policy.clone())), - Some(old_policy) if old_policy != new_policy => { - changes.push(PolicyChange::Modified(name.clone(), new_policy.clone())); - } - Some(_) => {} // Unchanged. - } - } - - for name in old.keys() { - if !new.contains_key(name) { - changes.push(PolicyChange::Removed(name.clone())); - } - } - - changes -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::types::ZonePolicy; - - fn make_policy() -> ZonePolicy { - ZonePolicy::default() - } - - fn make_policy_with_zones(zones: Vec<&str>) -> ZonePolicy { - let mut p = ZonePolicy::default(); - p.network.allowed_zones = zones.into_iter().map(String::from).collect(); - p - } - - #[test] - fn diff_empty_to_one_zone_is_added() { - let old = HashMap::new(); - let mut new = HashMap::new(); - new.insert("frontend".to_string(), make_policy()); - - let changes = diff_policies(&old, &new); - assert_eq!(changes.len(), 1); - assert!(matches!(&changes[0], PolicyChange::Added(name, _) if name == "frontend")); - } - - #[test] - fn diff_removed_zone_detected() { - let mut old = HashMap::new(); - old.insert("frontend".to_string(), make_policy()); - let new = HashMap::new(); - - let changes = diff_policies(&old, &new); - assert_eq!(changes.len(), 1); - assert!(matches!(&changes[0], PolicyChange::Removed(name) if name == "frontend")); - } - - #[test] - fn diff_modified_zone_detected() { - let mut old = HashMap::new(); - old.insert("frontend".to_string(), make_policy()); - - let mut new = HashMap::new(); - new.insert("frontend".to_string(), make_policy_with_zones(vec!["database"])); - - let changes = diff_policies(&old, &new); - assert_eq!(changes.len(), 1); - assert!(matches!(&changes[0], PolicyChange::Modified(name, _) if name == "frontend")); - } - - #[test] - fn diff_no_changes_returns_empty() { - let mut policies = HashMap::new(); - policies.insert("frontend".to_string(), make_policy()); - - let changes = diff_policies(&policies, &policies); - assert!(changes.is_empty()); - } - - #[test] - fn diff_mixed_changes() { - let mut old = HashMap::new(); - old.insert("frontend".to_string(), make_policy()); - old.insert("database".to_string(), make_policy()); - - let mut new = HashMap::new(); - new.insert("frontend".to_string(), make_policy_with_zones(vec!["api"])); // modified - new.insert("api".to_string(), make_policy()); // added - // database removed - - let changes = diff_policies(&old, &new); - assert_eq!(changes.len(), 3); - } -} diff --git a/syva-adapter-file/src/run.rs b/syva-adapter-file/src/run.rs new file mode 100644 index 0000000..7fe3f2b --- /dev/null +++ b/syva-adapter-file/src/run.rs @@ -0,0 +1,155 @@ +use crate::policy::load_policies_from_dir; +use crate::translate::{policy_to_create_args, policy_to_update_args}; +use anyhow::{Context, Result}; +use std::collections::HashMap; +use std::path::PathBuf; +use std::time::Duration; +use syva_cp_client::{CpClient, CpClientConfig, DeleteZoneArgs, ZoneSnapshot}; +use tracing::{debug, info, warn}; +use uuid::Uuid; + +pub struct Config { + pub policy_dir: PathBuf, + pub cp_endpoint: String, + pub team_id: Uuid, + pub reconcile_interval: Duration, +} + +pub async fn run(config: Config) -> Result<()> { + let cp = connect_with_retry(&config.cp_endpoint).await; + + info!( + policy_dir = %config.policy_dir.display(), + team_id = %config.team_id, + "syva-file starting" + ); + info!( + "container watcher and container membership reconciliation are deferred until ContainerService is implemented" + ); + + let mut ticker = tokio::time::interval(config.reconcile_interval); + ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); + + loop { + tokio::select! { + _ = ticker.tick() => { + match reconcile_once(&cp, &config).await { + Ok(stats) if stats.changed > 0 => { + info!( + created = stats.created, + updated = stats.updated, + deleted = stats.deleted, + "reconcile done" + ); + } + Ok(_) => debug!("reconcile done, no changes"), + Err(error) => warn!("reconcile failed: {error:#}"), + } + } + result = tokio::signal::ctrl_c() => { + result.context("wait for ctrl-c")?; + info!("received shutdown signal"); + return Ok(()); + } + } + } +} + +#[derive(Default)] +struct ReconcileStats { + created: usize, + updated: usize, + deleted: usize, + changed: usize, +} + +async fn connect_with_retry(endpoint: &str) -> CpClient { + let mut backoff = Duration::from_millis(250); + let max_backoff = Duration::from_secs(30); + + loop { + match CpClient::connect(CpClientConfig { + endpoint: endpoint.to_string(), + ..Default::default() + }) + .await + { + Ok(client) => return client, + Err(error) => { + warn!( + endpoint, + error = %error, + backoff_ms = backoff.as_millis(), + "could not connect to syva-cp; retrying" + ); + tokio::time::sleep(backoff).await; + backoff = (backoff * 2).min(max_backoff); + } + } + } +} + +async fn reconcile_once(cp: &CpClient, config: &Config) -> Result { + let on_disk = load_policies_from_dir(&config.policy_dir) + .with_context(|| format!("load policies from {}", config.policy_dir.display()))?; + + let in_cp = cp.list_zones(config.team_id, None, 500).await?; + let in_cp_by_name: HashMap = + in_cp.into_iter().map(|zone| (zone.name.clone(), zone)).collect(); + + let mut stats = ReconcileStats::default(); + + for (name, policy) in &on_disk { + match cp.get_zone_by_name(config.team_id, name).await? { + None => match cp.create_zone(policy_to_create_args(config.team_id, name, policy)?).await + { + Ok(output) => { + stats.created += 1; + stats.changed += 1; + info!(zone = %name, zone_id = %output.zone_id, "zone created"); + } + Err(error) => warn!(zone = %name, error = %error, "create_zone failed"), + }, + Some(snapshot) => match policy_to_update_args(&snapshot, policy)? { + Some(args) => match cp.update_zone(args).await { + Ok(output) => { + stats.updated += 1; + stats.changed += 1; + info!( + zone = %name, + zone_id = %output.zone_id, + version = output.version, + "zone updated" + ); + } + Err(error) => warn!(zone = %name, error = %error, "update_zone failed"), + }, + None => debug!(zone = %name, "zone unchanged"), + }, + } + } + + for (name, snapshot) in &in_cp_by_name { + if on_disk.contains_key(name) || snapshot.status == "deleted" { + continue; + } + + match cp + .delete_zone(DeleteZoneArgs { + zone_id: snapshot.zone_id, + if_version: snapshot.version, + drain: true, + }) + .await + { + Ok(()) => { + stats.deleted += 1; + stats.changed += 1; + info!(zone = %name, "zone deletion requested (drain)"); + } + Err(error) => warn!(zone = %name, error = %error, "delete_zone failed"), + } + } + + Ok(stats) +} diff --git a/syva-adapter-file/src/translate.rs b/syva-adapter-file/src/translate.rs index 213f382..e39f51a 100644 --- a/syva-adapter-file/src/translate.rs +++ b/syva-adapter-file/src/translate.rs @@ -1,18 +1,48 @@ -//! Translation between adapter-local TOML policy types and proto types. -//! -//! The adapter owns the TOML deserialization format. The proto types are -//! what syva-core understands. This module bridges the two. +use crate::policy::FilePolicy; +use anyhow::Result; +use syva_cp_client::{CreateZoneArgs, UpdateZoneArgs, ZoneSnapshot}; +use uuid::Uuid; -use crate::types::ZonePolicy; -use syva_proto::syva_core; +pub fn policy_to_create_args( + team_id: Uuid, + name: &str, + policy: &FilePolicy, +) -> Result { + Ok(CreateZoneArgs { + team_id, + name: name.to_string(), + display_name: policy.display_name.clone(), + policy_json: serde_json::to_value(&policy.policy)?, + summary_json: None, + selector_json: policy.selector.clone(), + metadata_json: None, + }) +} + +pub fn policy_to_update_args( + snapshot: &ZoneSnapshot, + policy: &FilePolicy, +) -> Result> { + let desired_policy_json = serde_json::to_value(&policy.policy)?; + let desired_selector_json = policy.selector.clone(); -/// Convert a local ZonePolicy (deserialized from TOML) to a proto ZonePolicy. -pub fn to_proto_policy(policy: &ZonePolicy) -> syva_core::ZonePolicy { - syva_core::ZonePolicy { - host_paths: policy.filesystem.host_paths.clone(), - allowed_zones: policy.network.allowed_zones.clone(), - allow_ptrace: policy.capabilities.allowed.iter() - .any(|c| c == "CAP_SYS_PTRACE"), - zone_type: syva_core::ZoneType::Standard.into(), + let policy_matches = snapshot + .current_policy_json + .as_ref() + .map(|current| current == &desired_policy_json) + .unwrap_or(false); + let selector_matches = snapshot.selector_json == desired_selector_json; + let display_name_matches = snapshot.display_name == policy.display_name; + + if policy_matches && selector_matches && display_name_matches { + return Ok(None); } + + Ok(Some(UpdateZoneArgs { + zone_id: snapshot.zone_id, + if_version: snapshot.version, + policy_json: Some(desired_policy_json), + selector_json: desired_selector_json, + metadata_json: None, + })) } diff --git a/syva-adapter-file/src/types.rs b/syva-adapter-file/src/types.rs index 81da073..c9b2080 100644 --- a/syva-adapter-file/src/types.rs +++ b/syva-adapter-file/src/types.rs @@ -104,7 +104,7 @@ pub struct ZoneMetadata { } /// Declarative policy defining what a zone can do. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)] #[serde(deny_unknown_fields)] pub struct ZonePolicy { /// Optional metadata section — parsed but not used for enforcement. @@ -118,20 +118,6 @@ pub struct ZonePolicy { pub syscalls: SyscallPolicy, } -impl Default for ZonePolicy { - fn default() -> Self { - Self { - zone: ZoneMetadata::default(), - capabilities: CapabilityPolicy::default(), - resources: ResourcePolicy::default(), - network: NetworkPolicy::default(), - filesystem: FilesystemPolicy::default(), - devices: DevicePolicy::default(), - syscalls: SyscallPolicy::default(), - } - } -} - impl ZonePolicy { /// Validate policy values against kernel constraints. pub fn validate(&self, zone_name: &str) -> anyhow::Result<()> { diff --git a/syva-adapter-file/src/verify.rs b/syva-adapter-file/src/verify.rs new file mode 100644 index 0000000..2e9805b --- /dev/null +++ b/syva-adapter-file/src/verify.rs @@ -0,0 +1,16 @@ +use crate::policy::load_policies_from_dir; +use anyhow::Result; +use std::path::Path; + +pub fn run(policy_dir: &Path) -> Result<()> { + let policies = load_policies_from_dir(policy_dir)?; + println!( + "Validated {} policies in {}", + policies.len(), + policy_dir.display() + ); + for name in policies.keys() { + println!(" OK {name}"); + } + Ok(()) +} diff --git a/syva-adapter-file/src/watcher.rs b/syva-adapter-file/src/watcher.rs deleted file mode 100644 index 6a26d07..0000000 --- a/syva-adapter-file/src/watcher.rs +++ /dev/null @@ -1,503 +0,0 @@ -//! Container watcher — enumerates existing container cgroups and watches -//! for new containers via containerd's event stream. -//! -//! Zone assignment is label-driven: containers with a `syva.dev/zone` -//! annotation in their OCI spec are assigned to the named zone. Containers -//! without the label are treated as global (no enforcement). - -use std::collections::HashSet; -use std::os::unix::fs::MetadataExt; -use std::path::Path; -use std::sync::Arc; -use std::time::Duration; - -use tokio::sync::mpsc; - -use crate::mapper::LABEL_ZONE; - -/// Validate that a container ID is safe for use in filesystem paths. -/// Containerd uses 64-char hex strings (SHA256 digests). Reject anything -/// that could cause path traversal or injection. -fn is_valid_container_id(id: &str) -> bool { - !id.is_empty() - && id.len() <= 128 - && id.bytes().all(|b| b.is_ascii_hexdigit() || b == b'-' || b == b'_') -} - -/// A container's zone assignment. -pub struct ZoneAssignment { - pub container_id: String, - pub zone_name: String, - pub cgroup_id: u64, -} - -/// Enumerate existing container cgroups and assign zones by label. -pub fn enumerate_cgroups( - zone_names: &HashSet, -) -> anyhow::Result> { - let mut assignments = Vec::new(); - - let cgroup_roots = [ - "/sys/fs/cgroup/system.slice", - "/sys/fs/cgroup/kubepods.slice", - "/sys/fs/cgroup/kubepods", - ]; - - for root in &cgroup_roots { - let root_path = Path::new(root); - if !root_path.exists() { - continue; - } - - scan_cgroup_dir(root_path, zone_names, &mut assignments, 0)?; - } - - if assignments.is_empty() { - tracing::info!("no labelled containers found — running in monitor-only mode"); - } - - Ok(assignments) -} - -/// Maximum recursion depth for cgroup directory scanning. -/// Prevents unbounded recursion from malformed hierarchies or bind mount loops. -const MAX_CGROUP_SCAN_DEPTH: usize = 8; - -fn scan_cgroup_dir( - dir: &Path, - zone_names: &HashSet, - assignments: &mut Vec, - depth: usize, -) -> anyhow::Result<()> { - if depth > MAX_CGROUP_SCAN_DEPTH { - return Ok(()); - } - let entries = match std::fs::read_dir(dir) { - Ok(e) => e, - Err(_) => return Ok(()), - }; - - for entry in entries.flatten() { - let path = entry.path(); - if !path.is_dir() { - continue; - } - - let name = match path.file_name().and_then(|n| n.to_str()) { - Some(n) => n.to_string(), - None => continue, - }; - - if name.starts_with("containerd-") && name.ends_with(".scope") { - let container_id = name - .strip_prefix("containerd-") - .and_then(|s| s.strip_suffix(".scope")) - .unwrap_or(&name) - .to_string(); - - if !is_valid_container_id(&container_id) { - continue; - } - - let cgroup_id = resolve_cgroup_id(&path); - if cgroup_id == 0 { - continue; - } - - if let Some(zone_name) = read_container_zone_label(&container_id) { - if zone_names.contains(&zone_name) { - assignments.push(ZoneAssignment { - container_id, - zone_name, - cgroup_id, - }); - } else { - tracing::warn!( - container = container_id, - zone = zone_name, - "container has syva.dev/zone label but no matching policy — skipping" - ); - } - } - } - - scan_cgroup_dir(&path, zone_names, assignments, depth + 1)?; - } - - Ok(()) -} - -/// Read the `syva.dev/zone` annotation from a container's OCI config.json. -fn read_container_zone_label(container_id: &str) -> Option { - let state_paths = [ - format!( - "/run/containerd/io.containerd.runtime.v2.task/default/{container_id}/config.json" - ), - format!( - "/run/containerd/io.containerd.runtime.v2.task/k8s.io/{container_id}/config.json" - ), - format!( - "/run/containerd/io.containerd.runtime.v2.task/moby/{container_id}/config.json" - ), - ]; - - for path in &state_paths { - if let Ok(data) = std::fs::read_to_string(path) { - if let Ok(spec) = serde_json::from_str::(&data) { - if let Some(zone) = spec - .get("annotations") - .and_then(|a| a.get(LABEL_ZONE)) - .and_then(|v| v.as_str()) - { - return Some(zone.to_string()); - } - } - } - } - None -} - -/// Resolve a cgroup directory path to its cgroup_id. -pub fn resolve_cgroup_id(cgroup_path: &Path) -> u64 { - match std::fs::metadata(cgroup_path) { - Ok(meta) => meta.ino(), - Err(e) => { - tracing::debug!(path = %cgroup_path.display(), %e, "failed to stat cgroup dir"); - 0 - } - } -} - -// --- Live container event watching --- - -/// Events emitted by the live container watcher. -pub enum WatcherEvent { - Add(ZoneAssignment), - Remove { container_id: String, cgroup_id: Option }, -} - -/// Watch containerd for container start/stop events. -pub async fn watch_containerd_events( - socket_path: String, - zone_names_rx: tokio::sync::watch::Receiver>>, - tx: mpsc::Sender, -) { - let mut backoff = Duration::from_secs(1); - let max_backoff = Duration::from_secs(30); - - loop { - match try_watch(&socket_path, &zone_names_rx, &tx).await { - Ok(()) => { - tracing::warn!("containerd event stream ended — reconnecting"); - backoff = Duration::from_secs(1); - } - Err(e) => { - tracing::warn!( - %e, - backoff_secs = backoff.as_secs(), - "containerd event watch failed — reconnecting" - ); - backoff = (backoff * 2).min(max_backoff); - } - } - tokio::time::sleep(backoff).await; - } -} - -async fn try_watch( - socket_path: &str, - zone_names_rx: &tokio::sync::watch::Receiver>>, - tx: &mpsc::Sender, -) -> anyhow::Result<()> { - use containerd_client::{connect, services::v1::events_client::EventsClient}; - use containerd_client::services::v1::SubscribeRequest; - - let channel = connect(socket_path).await - .map_err(|e| anyhow::anyhow!("failed to connect to containerd at {socket_path}: {e}"))?; - - let mut client = EventsClient::new(channel); - - let request = SubscribeRequest { - filters: vec![ - "topic==/tasks/start".to_string(), - "topic==/tasks/delete".to_string(), - ], - }; - - let mut stream = client.subscribe(request).await?.into_inner(); - - tracing::info!(socket = socket_path, "subscribed to containerd events"); - - while let Some(envelope) = stream.message().await? { - let topic = &envelope.topic; - let event = match &envelope.event { - Some(e) => e, - None => continue, - }; - - if topic == "/tasks/start" { - let zone_names = zone_names_rx.borrow().clone(); - handle_task_start(event, &zone_names, tx).await; - } else if topic == "/tasks/delete" { - handle_task_delete(event, tx).await; - } - } - - Ok(()) -} - -async fn handle_task_start( - event: &prost_types::Any, - zone_names: &HashSet, - tx: &mpsc::Sender, -) { - use prost::Message; - - let start = match TaskStartEvent::decode(event.value.as_slice()) { - Ok(s) if !s.container_id.is_empty() => s, - _ => return, - }; - let container_id = start.container_id; - - if !is_valid_container_id(&container_id) { - tracing::warn!( - container = container_id, - "rejecting container with invalid ID (possible path traversal)" - ); - return; - } - - // Retry loop: OCI config.json may not exist immediately after task start. - let mut zone_name = None; - for attempt in 0..10 { - if attempt > 0 { - tokio::time::sleep(Duration::from_millis(50 * attempt)).await; - } - if let Some(z) = read_container_zone_label(&container_id) { - zone_name = Some(z); - break; - } - } - let zone_name = match zone_name { - Some(z) => z, - None => return, // No label after retries -> global, skip. - }; - - if !zone_names.contains(&zone_name) { - tracing::warn!( - container = container_id, - zone = zone_name, - "live: container has zone label but no matching policy — skipping" - ); - return; - } - - let cgroup_id = if start.pid > 0 { - let id = resolve_cgroup_id_from_pid(start.pid); - if id != 0 { id } else { resolve_container_cgroup_id(&container_id) } - } else { - resolve_container_cgroup_id(&container_id) - }; - if cgroup_id == 0 { - tracing::warn!(container = container_id, "live: could not resolve cgroup_id"); - return; - } - - if let Err(e) = tx - .send(WatcherEvent::Add(ZoneAssignment { - container_id, - zone_name, - cgroup_id, - })) - .await - { - if let WatcherEvent::Add(a) = e.0 { - tracing::error!(container = a.container_id, "zone assignment channel closed — enforcement loop is dead"); - } - } -} - -async fn handle_task_delete( - event: &prost_types::Any, - tx: &mpsc::Sender, -) { - let container_id = match extract_container_id(event) { - Some(id) if is_valid_container_id(&id) => id, - _ => return, - }; - - if let Err(e) = tx - .send(WatcherEvent::Remove { - container_id, - cgroup_id: None, - }) - .await - { - if let WatcherEvent::Remove { container_id, .. } = e.0 { - tracing::error!(container = container_id, "zone assignment channel closed — enforcement loop is dead"); - } - } -} - -#[derive(Clone, PartialEq, prost::Message)] -struct TaskStartEvent { - #[prost(string, tag = "1")] - container_id: String, - #[prost(uint32, tag = "2")] - pid: u32, -} - -#[derive(Clone, PartialEq, prost::Message)] -struct TaskDeleteEvent { - #[prost(string, tag = "1")] - container_id: String, -} - -fn extract_container_id(event: &prost_types::Any) -> Option { - use prost::Message; - - let from_start = || { - let msg = TaskStartEvent::decode(event.value.as_slice()).ok()?; - (!msg.container_id.is_empty()).then_some(msg.container_id) - }; - - let from_delete = || { - let msg = TaskDeleteEvent::decode(event.value.as_slice()).ok()?; - (!msg.container_id.is_empty()).then_some(msg.container_id) - }; - - match event.type_url.as_str() { - s if s.contains("TaskStart") => from_start(), - s if s.contains("TaskDelete") => from_delete(), - _ => from_start().or_else(from_delete), - } -} - -fn resolve_container_cgroup_id(container_id: &str) -> u64 { - let candidates = [ - format!("/sys/fs/cgroup/system.slice/containerd-{container_id}.scope"), - format!("/sys/fs/cgroup/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod{container_id}.slice"), - ]; - - for path in &candidates { - let p = Path::new(path); - if p.exists() { - return resolve_cgroup_id(p); - } - } - - 0 -} - -fn resolve_cgroup_id_from_pid(pid: u32) -> u64 { - let cgroup_file = format!("/proc/{pid}/cgroup"); - let content = match std::fs::read_to_string(&cgroup_file) { - Ok(c) => c, - Err(_) => return 0, - }; - - for line in content.lines() { - if let Some(path) = line.strip_prefix("0::") { - let cgroup_path = format!("/sys/fs/cgroup{path}"); - let p = std::path::Path::new(&cgroup_path); - if p.exists() { - return resolve_cgroup_id(p); - } - } - } - - 0 -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn valid_container_id_hex() { - assert!(is_valid_container_id("abc123def456")); - } - - #[test] - fn valid_container_id_with_dash_underscore() { - assert!(is_valid_container_id("abc-123_def")); - } - - #[test] - fn valid_container_id_64_hex() { - assert!(is_valid_container_id(&"a".repeat(64))); - } - - #[test] - fn reject_empty_container_id() { - assert!(!is_valid_container_id("")); - } - - #[test] - fn reject_path_traversal() { - assert!(!is_valid_container_id("../../../etc/passwd")); - } - - #[test] - fn reject_slash() { - assert!(!is_valid_container_id("abc/def")); - } - - #[test] - fn reject_too_long() { - assert!(!is_valid_container_id(&"a".repeat(129))); - } - - #[test] - fn max_length_accepted() { - assert!(is_valid_container_id(&"a".repeat(128))); - } - - #[test] - fn extract_container_id_from_start_event() { - use prost::Message; - let start = TaskStartEvent { container_id: "abc123".into(), pid: 42 }; - let mut buf = Vec::new(); - start.encode(&mut buf).unwrap(); - let any = prost_types::Any { - type_url: "containerd.events.TaskStart".into(), - value: buf, - }; - assert_eq!(extract_container_id(&any), Some("abc123".to_string())); - } - - #[test] - fn extract_container_id_from_delete_event() { - use prost::Message; - let del = TaskDeleteEvent { container_id: "def456".into() }; - let mut buf = Vec::new(); - del.encode(&mut buf).unwrap(); - let any = prost_types::Any { - type_url: "containerd.events.TaskDelete".into(), - value: buf, - }; - assert_eq!(extract_container_id(&any), Some("def456".to_string())); - } - - #[test] - fn extract_container_id_empty_is_none() { - use prost::Message; - let start = TaskStartEvent { container_id: String::new(), pid: 0 }; - let mut buf = Vec::new(); - start.encode(&mut buf).unwrap(); - let any = prost_types::Any { - type_url: "containerd.events.TaskStart".into(), - value: buf, - }; - assert_eq!(extract_container_id(&any), None); - } - - #[test] - fn extract_container_id_garbage_does_not_panic() { - let any = prost_types::Any { - type_url: "unknown".into(), - value: vec![0xff, 0xff], - }; - let _ = extract_container_id(&any); - } -} diff --git a/syva-cp-client/src/client.rs b/syva-cp-client/src/client.rs index c87b2c3..ae28dc7 100644 --- a/syva-cp-client/src/client.rs +++ b/syva-cp-client/src/client.rs @@ -99,9 +99,13 @@ pub struct ZoneSnapshot { pub zone_id: Uuid, pub team_id: Uuid, pub name: String, + pub display_name: Option, pub status: String, pub version: i64, pub current_policy_id: Option, + pub current_policy_json: Option, + pub selector_json: Option, + pub metadata_json: Option, } impl CpClient { @@ -348,6 +352,11 @@ impl CpClient { zone_id: parse_uuid(&zone.id, "zone.id")?, team_id: parse_uuid(&zone.team_id, "zone.team_id")?, name: zone.name, + display_name: if zone.display_name.is_empty() { + None + } else { + Some(zone.display_name) + }, status: zone.status, version: zone.version, current_policy_id: if zone.current_policy_id.is_empty() { @@ -358,6 +367,14 @@ impl CpClient { "zone.current_policy_id", )?) }, + current_policy_json: response + .current_policy + .as_ref() + .filter(|policy| !policy.policy_json.is_empty()) + .map(|policy| serde_json::from_str(&policy.policy_json)) + .transpose()?, + selector_json: parse_optional_json(&zone.selector_json)?, + metadata_json: parse_optional_json(&zone.metadata_json)?, })) } @@ -385,6 +402,11 @@ impl CpClient { zone_id: parse_uuid(&zone.id, "zone.id")?, team_id: parse_uuid(&zone.team_id, "zone.team_id")?, name: zone.name, + display_name: if zone.display_name.is_empty() { + None + } else { + Some(zone.display_name) + }, status: zone.status, version: zone.version, current_policy_id: if zone.current_policy_id.is_empty() { @@ -395,6 +417,9 @@ impl CpClient { "zone.current_policy_id", )?) }, + current_policy_json: None, + selector_json: parse_optional_json(&zone.selector_json)?, + metadata_json: parse_optional_json(&zone.metadata_json)?, }) }) .collect() @@ -468,3 +493,13 @@ fn parse_uuid(value: &str, field: &str) -> Result { Uuid::parse_str(value) .map_err(|error| CpClientError::Internal(format!("could not parse {field} as UUID: {error}"))) } + +fn parse_optional_json( + value: &str, +) -> Result, CpClientError> { + if value.is_empty() { + return Ok(None); + } + + serde_json::from_str(value).map(Some).map_err(Into::into) +} From 84c81913ad49b9f0b905a9ac00d03603b306e595 Mon Sep 17 00:00:00 2001 From: Yair Etziony Date: Sat, 25 Apr 2026 02:41:14 +0200 Subject: [PATCH 3/9] feat(syva-adapter-k8s): rewrite to push zones to syva-cp via syva-cp-client - CRD remains source of truth (kubectl apply wins over API edits) - Removes dependency on local syva-core gRPC surface - New CLI: --cp-endpoint, --team-id required, --namespace optional - Pod annotation/container watcher deferred to a later session --- Cargo.lock | 7 +- syva-adapter-k8s/Cargo.toml | 7 +- syva-adapter-k8s/src/connect.rs | 38 --- syva-adapter-k8s/src/crd.rs | 30 ++- syva-adapter-k8s/src/main.rs | 84 ++----- syva-adapter-k8s/src/mapper.rs | 179 +++++++------- syva-adapter-k8s/src/watcher.rs | 402 ++++++++++++-------------------- 7 files changed, 294 insertions(+), 453 deletions(-) delete mode 100644 syva-adapter-k8s/src/connect.rs diff --git a/Cargo.lock b/Cargo.lock index 524805d..99c4a1a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3061,19 +3061,16 @@ dependencies = [ "anyhow", "clap", "futures", - "hyper-util", "k8s-openapi", "kube", "schemars", "serde", "serde_json", - "syva-proto", + "syva-cp-client", "tokio", - "tokio-stream", - "tonic", - "tower 0.5.3", "tracing", "tracing-subscriber", + "uuid", ] [[package]] diff --git a/syva-adapter-k8s/Cargo.toml b/syva-adapter-k8s/Cargo.toml index 157ec86..b96a588 100644 --- a/syva-adapter-k8s/Cargo.toml +++ b/syva-adapter-k8s/Cargo.toml @@ -8,10 +8,8 @@ name = "syva-k8s" path = "src/main.rs" [dependencies] -syva-proto = { path = "../syva-proto" } -tonic = { workspace = true } +syva-cp-client = { path = "../syva-cp-client" } tokio = { workspace = true } -tokio-stream = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true } anyhow = { workspace = true } @@ -22,5 +20,4 @@ futures = { workspace = true } kube = { version = "0.95", features = ["runtime", "derive"] } k8s-openapi = { version = "0.23", features = ["v1_29"] } schemars = "0.8" -tower = "0.5" -hyper-util = { version = "0.1", features = ["tokio"] } +uuid = { version = "1", features = ["v4", "serde"] } diff --git a/syva-adapter-k8s/src/connect.rs b/syva-adapter-k8s/src/connect.rs deleted file mode 100644 index b303142..0000000 --- a/syva-adapter-k8s/src/connect.rs +++ /dev/null @@ -1,38 +0,0 @@ -//! gRPC connection to syva-core over Unix domain socket. - -use syva_proto::syva_core::syva_core_client::SyvaCoreClient; -use tonic::transport::Channel; - -/// Connect to syva-core over Unix socket. -pub async fn connect_to_core(socket_path: &str) -> anyhow::Result> { - let path = socket_path.to_string(); - let channel = tonic::transport::Endpoint::try_from("http://[::]:50051")? - .connect_with_connector(tower::service_fn(move |_: tonic::transport::Uri| { - let path = path.clone(); - async move { - let stream = tokio::net::UnixStream::connect(&path).await?; - Ok::<_, std::io::Error>(hyper_util::rt::TokioIo::new(stream)) - } - })) - .await?; - Ok(SyvaCoreClient::new(channel)) -} - -/// Connect with exponential backoff retry. -pub async fn connect_with_retry(socket_path: &str, max_attempts: usize) -> anyhow::Result> { - let mut backoff = std::time::Duration::from_millis(100); - for attempt in 1..=max_attempts { - match connect_to_core(socket_path).await { - Ok(client) => return Ok(client), - Err(e) => { - if attempt == max_attempts { - return Err(anyhow::anyhow!("failed to connect to syva-core after {max_attempts} attempts: {e}")); - } - tracing::warn!(attempt, %e, "failed to connect to syva-core — retrying"); - tokio::time::sleep(backoff).await; - backoff = (backoff * 2).min(std::time::Duration::from_secs(5)); - } - } - } - unreachable!() -} diff --git a/syva-adapter-k8s/src/crd.rs b/syva-adapter-k8s/src/crd.rs index d5044b2..e5e23f2 100644 --- a/syva-adapter-k8s/src/crd.rs +++ b/syva-adapter-k8s/src/crd.rs @@ -1,6 +1,7 @@ use kube::CustomResource; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; #[derive(CustomResource, Debug, Clone, Serialize, Deserialize, JsonSchema)] #[kube( @@ -20,6 +21,8 @@ pub struct SyvaZonePolicySpec { #[serde(default)] pub process: Option, #[serde(default)] + pub selector: Option, + #[serde(default)] pub zone_type: Option, } @@ -44,20 +47,26 @@ pub struct ProcessSpec { pub allow_ptrace: bool, } -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct SelectorSpec { + #[serde(default)] + pub all_nodes: bool, + #[serde(default)] + pub node_names: Vec, + #[serde(default)] + pub match_labels: BTreeMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] #[serde(rename_all = "lowercase")] pub enum ZoneTypeSpec { + #[default] Standard, Privileged, Isolated, } -impl Default for ZoneTypeSpec { - fn default() -> Self { - Self::Standard - } -} - #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] #[serde(rename_all = "camelCase")] pub struct SyvaZonePolicyStatus { @@ -88,7 +97,7 @@ mod tests { #[test] fn spec_deserializes_minimal() { - let json = r#"{"filesystem":null,"network":null,"process":null,"zoneType":null}"#; + let json = r#"{"filesystem":null,"network":null,"process":null,"selector":null,"zoneType":null}"#; let spec: SyvaZonePolicySpec = serde_json::from_str(json).unwrap(); assert!(spec.filesystem.is_none()); } @@ -99,10 +108,15 @@ mod tests { "filesystem": {"hostPaths": ["/data"]}, "network": {"allowedZones": ["db"]}, "process": {"allowPtrace": true}, + "selector": {"matchLabels": {"tier": "prod"}}, "zoneType": "privileged" }"#; let spec: SyvaZonePolicySpec = serde_json::from_str(json).unwrap(); assert_eq!(spec.filesystem.unwrap().host_paths, vec!["/data"]); assert!(spec.process.unwrap().allow_ptrace); + assert_eq!( + spec.selector.unwrap().match_labels.get("tier").map(String::as_str), + Some("prod") + ); } } diff --git a/syva-adapter-k8s/src/main.rs b/syva-adapter-k8s/src/main.rs index 72a443c..8d59d37 100644 --- a/syva-adapter-k8s/src/main.rs +++ b/syva-adapter-k8s/src/main.rs @@ -1,79 +1,41 @@ -mod connect; mod crd; mod mapper; mod watcher; -use std::sync::Arc; - +use anyhow::Result; use clap::Parser; -use tokio::sync::Mutex; -use tracing_subscriber::EnvFilter; +use uuid::Uuid; -#[derive(Parser)] -#[command(name = "syva-k8s", about = "Kubernetes CRD adapter for syva-core")] +#[derive(Parser, Debug)] +#[command(name = "syva-k8s", version)] struct Cli { - /// Path to the syva-core Unix socket. - #[arg(long, default_value = "/run/syva/syva-core.sock")] - socket_path: String, + /// Kubernetes namespace to watch SyvaZonePolicy CRDs in. + #[arg(long, env = "SYVA_K8S_NAMESPACE", default_value = "default")] + namespace: String, + + /// syva-cp gRPC endpoint. + #[arg(long, env = "SYVA_CP_ENDPOINT")] + cp_endpoint: String, - /// Namespace to watch. If omitted, watches all namespaces. - #[arg(long)] - namespace: Option, + /// Team UUID this adapter manages zones for. + #[arg(long, env = "SYVA_TEAM_ID")] + team_id: Uuid, } #[tokio::main] -async fn main() -> anyhow::Result<()> { +async fn main() -> Result<()> { tracing_subscriber::fmt() .with_env_filter( - EnvFilter::from_default_env().add_directive("syva_k8s=info".parse()?), + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| "syva_k8s=info".into()), ) .init(); let cli = Cli::parse(); - - tracing::info!("syva-k8s starting"); - - // Connect to syva-core - let core_client = connect::connect_with_retry(&cli.socket_path, 30).await?; - tracing::info!(socket = cli.socket_path, "connected to syva-core"); - let client = Arc::new(Mutex::new(core_client)); - - // Connect to Kubernetes - let kube_client = kube::Client::try_default().await?; - tracing::info!("connected to Kubernetes API"); - - // Spawn CRD watcher and pod watcher concurrently - let crd_client = client.clone(); - let crd_kube = kube_client.clone(); - let crd_ns = cli.namespace.clone(); - let crd_task = tokio::spawn(async move { - if let Err(e) = - watcher::watch_zone_policies(crd_client, crd_kube, crd_ns.as_deref()).await - { - tracing::error!(%e, "CRD watcher failed"); - } - }); - - let pod_client = client.clone(); - let pod_kube = kube_client.clone(); - let pod_ns = cli.namespace.clone(); - let pod_task = tokio::spawn(async move { - if let Err(e) = watcher::watch_pods(pod_client, pod_kube, pod_ns.as_deref()).await { - tracing::error!(%e, "pod watcher failed"); - } - }); - - // Shutdown on SIGTERM/SIGINT - let mut sigterm = tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate()) - .expect("failed to register SIGTERM"); - - tokio::select! { - _ = tokio::signal::ctrl_c() => tracing::info!("received SIGINT"), - _ = sigterm.recv() => tracing::info!("received SIGTERM"), - _ = crd_task => tracing::warn!("CRD watcher exited"), - _ = pod_task => tracing::warn!("pod watcher exited"), - } - - tracing::info!("syva-k8s stopped"); - Ok(()) + watcher::run(watcher::Config { + namespace: cli.namespace, + cp_endpoint: cli.cp_endpoint, + team_id: cli.team_id, + }) + .await } diff --git a/syva-adapter-k8s/src/mapper.rs b/syva-adapter-k8s/src/mapper.rs index 1e4b48e..c5b00ee 100644 --- a/syva-adapter-k8s/src/mapper.rs +++ b/syva-adapter-k8s/src/mapper.rs @@ -1,108 +1,129 @@ -pub const ANNOTATION_ZONE: &str = "syva.dev/zone"; +use crate::crd::{SyvaZonePolicy, ZoneTypeSpec}; +use anyhow::Result; +use syva_cp_client::{CreateZoneArgs, UpdateZoneArgs, ZoneSnapshot}; +use uuid::Uuid; -use k8s_openapi::api::core::v1::Pod; -use syva_proto::syva_core; +pub fn spec_to_create_args( + team_id: Uuid, + name: &str, + crd: &SyvaZonePolicy, +) -> Result { + Ok(CreateZoneArgs { + team_id, + name: name.to_string(), + display_name: None, + policy_json: spec_to_policy_json(crd)?, + summary_json: None, + selector_json: spec_to_selector_json(crd)?, + metadata_json: None, + }) +} + +pub fn spec_to_update_args( + snapshot: &ZoneSnapshot, + crd: &SyvaZonePolicy, +) -> Result> { + let desired_policy_json = spec_to_policy_json(crd)?; + let desired_selector_json = spec_to_selector_json(crd)?; + + let policy_matches = snapshot + .current_policy_json + .as_ref() + .map(|current| current == &desired_policy_json) + .unwrap_or(false); + let selector_matches = snapshot.selector_json == desired_selector_json; -use crate::crd::SyvaZonePolicySpec; + if policy_matches && selector_matches { + return Ok(None); + } -pub fn zone_name_from_pod(pod: &Pod) -> Option { - pod.metadata - .annotations - .as_ref()? - .get(ANNOTATION_ZONE) - .cloned() + Ok(Some(UpdateZoneArgs { + zone_id: snapshot.zone_id, + if_version: snapshot.version, + policy_json: Some(desired_policy_json), + selector_json: desired_selector_json, + metadata_json: None, + })) } -pub fn spec_to_proto_policy(spec: &SyvaZonePolicySpec) -> syva_core::ZonePolicy { - syva_core::ZonePolicy { - host_paths: spec - .filesystem - .as_ref() - .map(|f| f.host_paths.clone()) +fn spec_to_policy_json(crd: &SyvaZonePolicy) -> Result { + let spec = &crd.spec; + Ok(serde_json::json!({ + "host_paths": spec.filesystem.as_ref() + .map(|filesystem| filesystem.host_paths.clone()) .unwrap_or_default(), - allowed_zones: spec - .network - .as_ref() - .map(|n| n.allowed_zones.clone()) + "allowed_zones": spec.network.as_ref() + .map(|network| network.allowed_zones.clone()) .unwrap_or_default(), - allow_ptrace: spec - .process - .as_ref() - .map(|p| p.allow_ptrace) + "allow_ptrace": spec.process.as_ref() + .map(|process| process.allow_ptrace) .unwrap_or(false), - zone_type: match spec - .zone_type - .as_ref() - .unwrap_or(&crate::crd::ZoneTypeSpec::Standard) - { - crate::crd::ZoneTypeSpec::Standard => syva_core::ZoneType::Standard.into(), - crate::crd::ZoneTypeSpec::Privileged => syva_core::ZoneType::Privileged.into(), - crate::crd::ZoneTypeSpec::Isolated => syva_core::ZoneType::Isolated.into(), + "zone_type": match spec.zone_type.as_ref().unwrap_or(&ZoneTypeSpec::Standard) { + ZoneTypeSpec::Standard => "standard", + ZoneTypeSpec::Privileged => "privileged", + ZoneTypeSpec::Isolated => "isolated", }, - } + })) +} + +fn spec_to_selector_json(crd: &SyvaZonePolicy) -> Result> { + crd.spec + .selector + .as_ref() + .map(serde_json::to_value) + .transpose() + .map_err(Into::into) } #[cfg(test)] mod tests { use super::*; - use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta; + use crate::crd::{FilesystemSpec, NetworkSpec, ProcessSpec, SelectorSpec, SyvaZonePolicySpec}; use std::collections::BTreeMap; - #[test] - fn zone_name_from_pod_with_annotation() { - let mut annotations = BTreeMap::new(); - annotations.insert(ANNOTATION_ZONE.to_string(), "web".to_string()); - let pod = Pod { - metadata: ObjectMeta { - annotations: Some(annotations), - ..Default::default() - }, - ..Default::default() - }; - assert_eq!(zone_name_from_pod(&pod), Some("web".to_string())); + fn crd(spec: SyvaZonePolicySpec) -> SyvaZonePolicy { + SyvaZonePolicy::new("web", spec) } #[test] - fn zone_name_from_pod_without_annotation() { - let pod = Pod::default(); - assert_eq!(zone_name_from_pod(&pod), None); + fn maps_compact_policy_json() { + let resource = crd(SyvaZonePolicySpec { + filesystem: Some(FilesystemSpec { + host_paths: vec!["/data".into()], + }), + network: Some(NetworkSpec { + allowed_zones: vec!["db".into()], + }), + process: Some(ProcessSpec { allow_ptrace: true }), + selector: None, + zone_type: Some(ZoneTypeSpec::Privileged), + }); + + let value = spec_to_policy_json(&resource).unwrap(); + assert_eq!(value["host_paths"], serde_json::json!(["/data"])); + assert_eq!(value["allowed_zones"], serde_json::json!(["db"])); + assert_eq!(value["allow_ptrace"], serde_json::json!(true)); + assert_eq!(value["zone_type"], serde_json::json!("privileged")); } #[test] - fn spec_to_proto_defaults() { - let spec = SyvaZonePolicySpec { + fn maps_selector_json() { + let mut labels = BTreeMap::new(); + labels.insert("tier".to_string(), "prod".to_string()); + let resource = crd(SyvaZonePolicySpec { filesystem: None, network: None, process: None, + selector: Some(SelectorSpec { + all_nodes: false, + node_names: vec!["n1".into()], + match_labels: labels, + }), zone_type: None, - }; - let policy = spec_to_proto_policy(&spec); - assert!(policy.host_paths.is_empty()); - assert!(policy.allowed_zones.is_empty()); - assert!(!policy.allow_ptrace); - assert_eq!(policy.zone_type, i32::from(syva_core::ZoneType::Standard)); - } - - #[test] - fn spec_to_proto_full() { - use crate::crd::{FilesystemSpec, NetworkSpec, ProcessSpec, ZoneTypeSpec}; + }); - let spec = SyvaZonePolicySpec { - filesystem: Some(FilesystemSpec { - host_paths: vec!["/data".to_string()], - }), - network: Some(NetworkSpec { - allowed_zones: vec!["db".to_string()], - }), - process: Some(ProcessSpec { - allow_ptrace: true, - }), - zone_type: Some(ZoneTypeSpec::Privileged), - }; - let policy = spec_to_proto_policy(&spec); - assert_eq!(policy.host_paths, vec!["/data"]); - assert_eq!(policy.allowed_zones, vec!["db"]); - assert!(policy.allow_ptrace); - assert_eq!(policy.zone_type, i32::from(syva_core::ZoneType::Privileged)); + let value = spec_to_selector_json(&resource).unwrap().unwrap(); + assert_eq!(value["nodeNames"], serde_json::json!(["n1"])); + assert_eq!(value["matchLabels"]["tier"], serde_json::json!("prod")); } } diff --git a/syva-adapter-k8s/src/watcher.rs b/syva-adapter-k8s/src/watcher.rs index 821b9df..09fbcf5 100644 --- a/syva-adapter-k8s/src/watcher.rs +++ b/syva-adapter-k8s/src/watcher.rs @@ -1,294 +1,182 @@ -use std::collections::{HashMap, HashSet}; -use std::sync::Arc; - +use crate::crd::SyvaZonePolicy; +use crate::mapper::{spec_to_create_args, spec_to_update_args}; +use anyhow::{Context, Result}; use futures::StreamExt; -use k8s_openapi::api::core::v1::Pod; -use kube::runtime::watcher::Event; -use kube::{runtime::watcher, Api, Client}; -use syva_proto::syva_core::syva_core_client::SyvaCoreClient; -use syva_proto::syva_core::*; -use tokio::sync::Mutex; -use tonic::transport::Channel; +use kube::runtime::watcher::{watcher, Config as WatcherConfig, Event}; +use kube::{Api, Client as KubeClient}; +use std::collections::{HashMap, HashSet}; +use std::time::Duration; +use syva_cp_client::{CpClient, CpClientConfig, DeleteZoneArgs, ZoneSnapshot}; +use tracing::{info, warn}; +use uuid::Uuid; + +pub struct Config { + pub namespace: String, + pub cp_endpoint: String, + pub team_id: Uuid, +} -use crate::crd::SyvaZonePolicy; -use crate::mapper; +pub async fn run(config: Config) -> Result<()> { + let cp = connect_with_retry(&config.cp_endpoint).await; -/// Watch SyvaZonePolicy CRDs and sync to core. -pub async fn watch_zone_policies( - client: Arc>>, - kube: Client, - namespace: Option<&str>, -) -> anyhow::Result<()> { - let policies: Api = match namespace { - Some(ns) => Api::namespaced(kube, ns), - None => Api::all(kube), - }; + let kube = KubeClient::try_default().await?; + let crds: Api = Api::namespaced(kube.clone(), &config.namespace); - let mut stream = watcher::watcher(policies, watcher::Config::default()).boxed(); + info!( + namespace = %config.namespace, + team_id = %config.team_id, + "syva-k8s starting" + ); + info!( + "pod annotation and container membership reconciliation are deferred until ContainerService is implemented" + ); - // Per-zone snapshot of the allowed_zones set most recently applied to core. - // Used on the next Apply to detect peers that were retracted from the CRD - // so we can emit DenyComm — without this, CRD edits could only *widen* the - // cross-zone allow set, never shrink it. - let mut last_allowed: HashMap> = HashMap::new(); + initial_reconcile(&cp, &crds, config.team_id).await?; + let mut stream = watcher(crds, WatcherConfig::default()).boxed(); while let Some(event) = stream.next().await { match event { - Ok(Event::Apply(policy)) => { - let name = policy.metadata.name.clone().unwrap_or_default(); - let proto_policy = mapper::spec_to_proto_policy(&policy.spec); - - let new_allowed: HashSet = policy - .spec - .network - .as_ref() - .map(|n| n.allowed_zones.iter().cloned().collect()) - .unwrap_or_default(); - let prev_allowed = last_allowed.get(&name).cloned().unwrap_or_default(); - - let mut client = client.lock().await; - - let registered = match client - .register_zone(RegisterZoneRequest { - zone_name: name.clone(), - policy: Some(proto_policy), - }) - .await - { - Ok(resp) => { - let zone_id = resp.into_inner().zone_id; - tracing::info!(zone = name, zone_id, "registered zone from CRD"); - true - } - Err(e) => { - tracing::error!(zone = name, %e, "failed to register zone"); - false - } - }; - - if !registered { - // Skip the rest of the apply pipeline — the zone isn't - // on core, so registering host paths / comms against it - // will just fail. Crucially, leave `last_allowed` - // untouched so the next Apply can retry any pending - // retractions/grants against the correct prior snapshot. - drop(client); - continue; + Ok(Event::Apply(crd)) => { + if let Err(error) = handle_apply(&cp, config.team_id, &crd).await { + warn!(name = ?crd.metadata.name, error = %error, "apply failed"); } - - // Register host paths - if let Some(fs) = &policy.spec.filesystem { - for path in &fs.host_paths { - if let Err(e) = client - .register_host_path(RegisterHostPathRequest { - zone_name: name.clone(), - path: path.clone(), - recursive: true, - }) - .await - { - tracing::warn!(zone = name, path = path.as_str(), %e, "failed to register host path"); - } - } - } - - // Track which retractions/grants were actually applied so - // `last_allowed` stays in sync with core state. A failed - // DenyComm stays in `prev_allowed` for the next Apply to - // retry; a failed AllowComm means the peer isn't in the - // applied set yet. - let mut applied_allowed: HashSet = prev_allowed.clone(); - - // Retract peers that were in the previous allowed set but are - // no longer listed. DenyComm clears both directions on core, - // so the counterparty's CRD doesn't need to have already - // dropped us for this to be safe. - for peer in prev_allowed.difference(&new_allowed) { - match client - .deny_comm(DenyCommRequest { - zone_a: name.clone(), - zone_b: peer.clone(), - }) - .await - { - Ok(_) => { - applied_allowed.remove(peer); - tracing::info!(zone = name, peer = peer.as_str(), "CRD retracted allowed peer — comm denied"); - } - Err(e) => { - tracing::warn!(zone = name, peer = peer.as_str(), %e, "failed to deny comm after CRD retraction"); - } - } - } - - // (Re-)grant currently listed peers. Idempotent on the core side. - for peer in &new_allowed { - match client - .allow_comm(AllowCommRequest { - zone_a: name.clone(), - zone_b: peer.clone(), - }) - .await - { - Ok(_) => { - applied_allowed.insert(peer.clone()); - } - Err(e) => { - tracing::warn!(zone = name, peer = peer.as_str(), %e, "failed to allow comm"); - } - } - } - - drop(client); - // Only store what actually took effect. Next Apply will see - // any failed grants/retractions as still-pending diffs. - last_allowed.insert(name, applied_allowed); } - Ok(Event::Delete(policy)) => { - let name = policy.metadata.name.unwrap_or_default(); - let mut client = client.lock().await; - match client - .remove_zone(RemoveZoneRequest { - zone_name: name.clone(), - drain: true, - }) - .await - { - Ok(_) => tracing::info!(zone = name, "removed zone (CRD deleted)"), - Err(e) => tracing::error!(zone = name, %e, "failed to remove zone"), + Ok(Event::Delete(crd)) => { + if let Err(error) = handle_delete(&cp, config.team_id, &crd).await { + warn!(name = ?crd.metadata.name, error = %error, "delete failed"); } - drop(client); - last_allowed.remove(&name); - } - Ok(_) => {} // InitApply, InitDone - Err(e) => { - tracing::error!(%e, "CRD watcher error"); } + Ok(Event::Init) | Ok(Event::InitDone) | Ok(Event::InitApply(_)) => {} + Err(error) => warn!("watcher error: {error}"), } } Ok(()) } -/// Watch Pods for syva.dev/zone annotation. -pub async fn watch_pods( - client: Arc>>, - kube: Client, - namespace: Option<&str>, -) -> anyhow::Result<()> { - let pods: Api = match namespace { - Some(ns) => Api::namespaced(kube, ns), - None => Api::all(kube), - }; - - let mut stream = watcher::watcher(pods, watcher::Config::default()).boxed(); - - while let Some(event) = stream.next().await { - match event { - Ok(Event::Apply(pod)) => { - if let Some(zone_name) = mapper::zone_name_from_pod(&pod) { - let ns = pod.metadata.namespace.clone().unwrap_or_default(); - let pod_name = pod.metadata.name.clone().unwrap_or_default(); - let container_id = pod.metadata.uid.clone().unwrap_or_default(); - if container_id.is_empty() { - tracing::warn!( - namespace = ns, - pod = pod_name, - zone = zone_name, - "skipping zoned pod: missing metadata.uid" - ); - continue; - } - - // Resolve cgroup_id from pod's container statuses. - // Returns 0 if containerStatuses is missing (pod not yet scheduled/started) - // or if the scope path isn't one we know how to locate. Kubelet will - // re-emit an Apply once status is populated; we rely on that rather - // than retrying here. - let cgroup_id = cgroup_id_from_pod(&pod); - if cgroup_id == 0 { - tracing::warn!( - namespace = ns, - pod = pod_name, - zone = zone_name, - "skipping zoned pod: could not resolve cgroup_id (containerStatuses missing or unknown scope path)" - ); - continue; - } +async fn connect_with_retry(endpoint: &str) -> CpClient { + let mut backoff = Duration::from_millis(250); + let max_backoff = Duration::from_secs(30); + + loop { + match CpClient::connect(CpClientConfig { + endpoint: endpoint.to_string(), + ..Default::default() + }) + .await + { + Ok(client) => return client, + Err(error) => { + warn!( + endpoint, + error = %error, + backoff_ms = backoff.as_millis(), + "could not connect to syva-cp; retrying" + ); + tokio::time::sleep(backoff).await; + backoff = (backoff * 2).min(max_backoff); + } + } + } +} - let mut client = client.lock().await; - match client - .attach_container(AttachContainerRequest { - container_id: container_id.clone(), - zone_name: zone_name.clone(), - cgroup_id, - }) - .await - { - Ok(_) => { - tracing::info!(container = container_id, zone = zone_name, "attached container") - } - Err(e) => { - tracing::warn!(container = container_id, %e, "failed to attach container") - } - } +async fn initial_reconcile( + cp: &CpClient, + crds: &Api, + team_id: Uuid, +) -> Result<()> { + let crd_list = crds.list(&Default::default()).await?; + let in_cp = cp.list_zones(team_id, None, 500).await?; + let in_cp_by_name: HashMap = + in_cp.into_iter().map(|zone| (zone.name.clone(), zone)).collect(); + + let mut crd_names = HashSet::new(); + for crd in &crd_list { + let Some(name) = crd.metadata.name.clone() else { + continue; + }; + crd_names.insert(name.clone()); + + match cp.get_zone_by_name(team_id, &name).await? { + None => { + let args = spec_to_create_args(team_id, &name, crd)?; + match cp.create_zone(args).await { + Ok(_) => info!(zone = %name, "zone created from CRD (initial)"), + Err(error) => warn!(zone = %name, error = %error, "initial create failed"), } } - Ok(Event::Delete(pod)) => { - let container_id = pod.metadata.uid.unwrap_or_default(); - if container_id.is_empty() { - continue; + Some(snapshot) => { + if let Some(args) = spec_to_update_args(&snapshot, crd)? { + match cp.update_zone(args).await { + Ok(_) => info!(zone = %name, "zone updated from CRD (initial)"), + Err(error) => warn!(zone = %name, error = %error, "initial update failed"), + } } - - let mut client = client.lock().await; - let _ = client - .detach_container(DetachContainerRequest { container_id }) - .await; - } - Ok(_) => {} - Err(e) => { - tracing::error!(%e, "pod watcher error"); } } } + for (name, snapshot) in &in_cp_by_name { + if crd_names.contains(name) || snapshot.status == "deleted" { + continue; + } + match cp + .delete_zone(DeleteZoneArgs { + zone_id: snapshot.zone_id, + if_version: snapshot.version, + drain: true, + }) + .await + { + Ok(()) => info!(zone = %name, "zone deleted (no matching CRD)"), + Err(error) => warn!(zone = %name, error = %error, "initial delete failed"), + } + } + Ok(()) } -/// Resolve cgroup_id from a Pod's containerStatuses. -fn cgroup_id_from_pod(pod: &Pod) -> u64 { - // Get first container's PID from container status - let statuses = pod - .status - .as_ref() - .and_then(|s| s.container_statuses.as_ref()); - - if let Some(statuses) = statuses { - for status in statuses { - if let Some(container_id) = &status.container_id { - // Container ID format: containerd://abc123... - let id = container_id.split("://").last().unwrap_or(""); - if !id.is_empty() { - return resolve_cgroup_id_from_containerd(id); - } +async fn handle_apply(cp: &CpClient, team_id: Uuid, crd: &SyvaZonePolicy) -> Result<()> { + let name = crd + .metadata + .name + .clone() + .context("CRD missing metadata.name")?; + + match cp.get_zone_by_name(team_id, &name).await? { + None => { + cp.create_zone(spec_to_create_args(team_id, &name, crd)?) + .await?; + info!(zone = %name, "zone created from CRD"); + } + Some(snapshot) => { + if let Some(args) = spec_to_update_args(&snapshot, crd)? { + cp.update_zone(args).await?; + info!(zone = %name, "zone updated from CRD"); } } } - 0 + + Ok(()) } -fn resolve_cgroup_id_from_containerd(container_id: &str) -> u64 { - use std::os::unix::fs::MetadataExt; +async fn handle_delete(cp: &CpClient, team_id: Uuid, crd: &SyvaZonePolicy) -> Result<()> { + let name = crd + .metadata + .name + .clone() + .context("CRD missing metadata.name")?; - let candidates = [ - format!("/sys/fs/cgroup/system.slice/containerd-{container_id}.scope"), - format!("/sys/fs/cgroup/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod{container_id}.scope"), - ]; + let Some(snapshot) = cp.get_zone_by_name(team_id, &name).await? else { + return Ok(()); + }; - for path in &candidates { - if let Ok(meta) = std::fs::metadata(path) { - return meta.ino(); - } - } - 0 + cp.delete_zone(DeleteZoneArgs { + zone_id: snapshot.zone_id, + if_version: snapshot.version, + drain: true, + }) + .await?; + info!(zone = %name, "zone deleted (CRD removed)"); + Ok(()) } From f06f4fc566dc39cad3308f3848a13e6479394114 Mon Sep 17 00:00:00 2001 From: Yair Etziony Date: Sat, 25 Apr 2026 02:43:19 +0200 Subject: [PATCH 4/9] feat(syva-adapter-api): rewrite as thin REST proxy in front of syva-cp ZoneService - Removes dependency on local syva-core gRPC surface - New CLI: --cp-endpoint, --team-id required - Endpoints: POST/GET/PUT/DELETE /v1/zones[, /{name}] --- Cargo.lock | 7 +- syva-adapter-api/Cargo.toml | 7 +- syva-adapter-api/src/connect.rs | 43 -- syva-adapter-api/src/main.rs | 65 ++- syva-adapter-api/src/routes.rs | 733 +++++++++++--------------------- 5 files changed, 275 insertions(+), 580 deletions(-) delete mode 100644 syva-adapter-api/src/connect.rs diff --git a/Cargo.lock b/Cargo.lock index 99c4a1a..c4d276b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3025,16 +3025,13 @@ dependencies = [ "anyhow", "axum", "clap", - "hyper-util", "serde", "serde_json", - "syva-proto", + "syva-cp-client", "tokio", - "tokio-stream", - "tonic", - "tower 0.5.3", "tracing", "tracing-subscriber", + "uuid", ] [[package]] diff --git a/syva-adapter-api/Cargo.toml b/syva-adapter-api/Cargo.toml index df6ad89..eba10da 100644 --- a/syva-adapter-api/Cargo.toml +++ b/syva-adapter-api/Cargo.toml @@ -8,10 +8,8 @@ name = "syva-api" path = "src/main.rs" [dependencies] -syva-proto = { path = "../syva-proto" } -tonic = { workspace = true } +syva-cp-client = { path = "../syva-cp-client" } tokio = { workspace = true } -tokio-stream = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true } anyhow = { workspace = true } @@ -19,5 +17,4 @@ serde = { workspace = true } serde_json = { workspace = true } clap = { workspace = true } axum = { workspace = true } -tower = "0.5" -hyper-util = { version = "0.1", features = ["tokio"] } +uuid = { version = "1", features = ["v4", "serde"] } diff --git a/syva-adapter-api/src/connect.rs b/syva-adapter-api/src/connect.rs deleted file mode 100644 index ca3c39b..0000000 --- a/syva-adapter-api/src/connect.rs +++ /dev/null @@ -1,43 +0,0 @@ -//! gRPC connection to syva-core over Unix domain socket. - -use syva_proto::syva_core::syva_core_client::SyvaCoreClient; -use tonic::transport::Channel; - -/// Connect to syva-core over Unix socket. -pub async fn connect_to_core(socket_path: &str) -> anyhow::Result> { - let path = socket_path.to_string(); - let channel = tonic::transport::Endpoint::try_from("http://[::]:50051")? - .connect_with_connector(tower::service_fn(move |_: tonic::transport::Uri| { - let path = path.clone(); - async move { - let stream = tokio::net::UnixStream::connect(&path).await?; - Ok::<_, std::io::Error>(hyper_util::rt::TokioIo::new(stream)) - } - })) - .await?; - Ok(SyvaCoreClient::new(channel)) -} - -/// Connect with exponential backoff retry. -pub async fn connect_with_retry( - socket_path: &str, - max_attempts: usize, -) -> anyhow::Result> { - let mut backoff = std::time::Duration::from_millis(100); - for attempt in 1..=max_attempts { - match connect_to_core(socket_path).await { - Ok(client) => return Ok(client), - Err(e) => { - if attempt == max_attempts { - return Err(anyhow::anyhow!( - "failed to connect to syva-core after {max_attempts} attempts: {e}" - )); - } - tracing::warn!(attempt, %e, "failed to connect to syva-core — retrying"); - tokio::time::sleep(backoff).await; - backoff = (backoff * 2).min(std::time::Duration::from_secs(5)); - } - } - } - unreachable!() -} diff --git a/syva-adapter-api/src/main.rs b/syva-adapter-api/src/main.rs index 079666b..108852b 100644 --- a/syva-adapter-api/src/main.rs +++ b/syva-adapter-api/src/main.rs @@ -1,57 +1,40 @@ -mod connect; mod routes; +use anyhow::Result; use clap::Parser; -use tracing_subscriber::EnvFilter; +use std::net::SocketAddr; +use uuid::Uuid; -#[derive(Parser)] -#[command(name = "syva-api", about = "REST API adapter for syva-core")] +#[derive(Parser, Debug)] +#[command(name = "syva-api", version)] struct Cli { - /// Path to the syva-core Unix socket. - #[arg(long, default_value = "/run/syva/syva-core.sock")] - socket_path: String, + /// Address to listen on for the REST API. + #[arg(long, env = "SYVA_API_LISTEN", default_value = "0.0.0.0:8080")] + listen: SocketAddr, - /// Port for the REST API server. - #[arg(long, default_value = "8080")] - port: u16, + /// syva-cp gRPC endpoint. + #[arg(long, env = "SYVA_CP_ENDPOINT")] + cp_endpoint: String, + + /// Team UUID this proxy creates and updates zones in. + #[arg(long, env = "SYVA_TEAM_ID")] + team_id: Uuid, } #[tokio::main] -async fn main() -> anyhow::Result<()> { +async fn main() -> Result<()> { tracing_subscriber::fmt() .with_env_filter( - EnvFilter::from_default_env().add_directive("syva_api=info".parse()?), + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| "syva_api=info".into()), ) .init(); let cli = Cli::parse(); - tracing::info!("syva-api starting"); - - let client = connect::connect_with_retry(&cli.socket_path, 30).await?; - tracing::info!(socket = cli.socket_path, "connected to syva-core"); - - let shared = std::sync::Arc::new(tokio::sync::Mutex::new(client)); - let app = routes::router(shared); - - let addr = std::net::SocketAddr::from(([0, 0, 0, 0], cli.port)); - let listener = tokio::net::TcpListener::bind(addr).await?; - tracing::info!(%addr, "REST API listening"); - - // Shutdown on SIGTERM/SIGINT - let shutdown = async { - let mut sigterm = - tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate()) - .expect("failed to register SIGTERM"); - tokio::select! { - _ = tokio::signal::ctrl_c() => tracing::info!("received SIGINT"), - _ = sigterm.recv() => tracing::info!("received SIGTERM"), - } - }; - - axum::serve(listener, app) - .with_graceful_shutdown(shutdown) - .await?; - - tracing::info!("syva-api stopped"); - Ok(()) + routes::serve(routes::Config { + listen: cli.listen, + cp_endpoint: cli.cp_endpoint, + team_id: cli.team_id, + }) + .await } diff --git a/syva-adapter-api/src/routes.rs b/syva-adapter-api/src/routes.rs index d9d4228..05261ea 100644 --- a/syva-adapter-api/src/routes.rs +++ b/syva-adapter-api/src/routes.rs @@ -1,560 +1,321 @@ -//! REST API route handlers that proxy to syva-core gRPC. - -use std::convert::Infallible; -use std::sync::Arc; - +use anyhow::{Context, Result}; use axum::{ extract::{Path, Query, State}, http::StatusCode, - response::{ - sse::{Event, KeepAlive}, - IntoResponse, Response, Sse, - }, - routing::{delete, get, post}, + response::{IntoResponse, Response}, + routing::{get, post}, Json, Router, }; use serde::{Deserialize, Serialize}; -use syva_proto::syva_core::syva_core_client::SyvaCoreClient; -use syva_proto::syva_core::*; -use tokio::sync::Mutex; -use tokio_stream::StreamExt; -use tonic::transport::Channel; - -pub type SharedClient = Arc>>; - -// --------------------------------------------------------------------------- -// Request / Response types -// --------------------------------------------------------------------------- - -#[derive(Debug, Deserialize, Serialize)] -pub struct RegisterZoneBody { - pub zone_name: String, - pub policy: PolicyBody, +use serde_json::Value as JsonValue; +use std::net::SocketAddr; +use std::time::Duration; +use syva_cp_client::{CpClient, CpClientConfig, CreateZoneArgs, DeleteZoneArgs, UpdateZoneArgs}; +use tracing::warn; +use uuid::Uuid; + +#[derive(Clone)] +pub struct AppState { + cp: CpClient, + team_id: Uuid, } -#[derive(Debug, Deserialize, Serialize)] -pub struct PolicyBody { - #[serde(default)] - pub host_paths: Vec, - #[serde(default)] - pub allowed_zones: Vec, - #[serde(default)] - pub allow_ptrace: bool, +pub struct Config { + pub listen: SocketAddr, + pub cp_endpoint: String, + pub team_id: Uuid, } -#[derive(Debug, Serialize)] -pub struct ZoneIdResponse { - pub zone_id: u32, +#[derive(Debug)] +pub struct ApiError { + status: StatusCode, + message: String, } -#[derive(Debug, Serialize)] -pub struct OkResponse { - pub ok: bool, - #[serde(skip_serializing_if = "Option::is_none")] - pub message: Option, +#[derive(Serialize)] +struct ErrorBody { + error: String, } -#[derive(Debug, Deserialize)] -pub struct RemoveZoneQuery { - #[serde(default)] - pub drain: bool, +#[derive(Deserialize)] +pub struct CreateZoneBody { + pub name: String, + pub display_name: Option, + pub policy_json: JsonValue, + pub selector_json: Option, } -#[derive(Debug, Deserialize)] -pub struct AttachContainerBody { - pub container_id: String, - pub cgroup_id: u64, +#[derive(Deserialize)] +pub struct UpdateZoneBody { + pub if_version: i64, + pub policy_json: Option, + pub selector_json: Option, } -#[derive(Debug, Deserialize)] -pub struct AllowCommBody { - pub peer_zone: String, +#[derive(Deserialize)] +pub struct ListZonesQuery { + pub status: Option, + pub limit: Option, } -#[derive(Debug, Serialize)] -pub struct StatusJson { - pub attached: bool, - pub zones_active: u32, - pub containers_active: u32, - pub uptime_secs: u64, - pub hooks: Vec, - pub max_zones: u32, +#[derive(Serialize)] +pub struct CreateZoneOut { + pub zone_id: String, + pub policy_id: String, + pub version: i64, } -#[derive(Debug, Serialize)] -pub struct ZoneSummaryJson { +#[derive(Serialize)] +pub struct ZoneOut { + pub zone_id: String, + pub team_id: String, pub name: String, - pub zone_id: u32, - pub state: String, - pub containers_active: u32, + pub display_name: Option, + pub status: String, + pub version: i64, + pub current_policy_id: Option, + pub current_policy_json: Option, + pub selector_json: Option, + pub metadata_json: Option, } -#[derive(Debug, Serialize)] -pub struct CommPairJson { - pub zone_a: String, - pub zone_b: String, +#[derive(Serialize)] +pub struct HealthOut { + pub ok: bool, } -#[derive(Debug, Serialize)] -pub struct HookStatusJson { - pub hook: String, - pub allow: u64, - pub deny: u64, - pub error: u64, - pub lost: u64, -} +pub async fn serve(config: Config) -> Result<()> { + let cp = connect_with_retry(&config.cp_endpoint).await; + let app = router(AppState { + cp, + team_id: config.team_id, + }); -#[derive(Debug, Deserialize)] -pub struct WatchEventsQuery { - #[serde(default)] - pub follow: bool, -} + let listener = tokio::net::TcpListener::bind(config.listen) + .await + .with_context(|| format!("bind {}", config.listen))?; + tracing::info!(listen = %config.listen, "syva-api listening"); -#[derive(Debug, Serialize)] -pub struct DenyEventJson { - pub timestamp_ns: u64, - pub hook: String, - pub zone_id: u32, - pub target_zone_id: u32, - pub pid: u32, - pub comm: String, - pub inode: u64, - pub context: String, + axum::serve(listener, app).await?; + Ok(()) } -// --------------------------------------------------------------------------- -// Router -// --------------------------------------------------------------------------- - -pub fn router(client: SharedClient) -> Router { +pub fn router(state: AppState) -> Router { Router::new() - .route("/zones", get(list_zones).post(register_zone)) - .route("/zones/{name}", delete(remove_zone)) - .route("/zones/{name}/containers", post(attach_container)) - .route("/containers/{id}", delete(detach_container)) - .route("/zones/{name}/comms", get(list_comms).post(allow_comm)) - .route("/zones/{name}/comms/{peer}", delete(deny_comm)) - .route("/status", get(status)) - .route("/events", get(watch_events)) - .with_state(client) + .route("/v1/zones", post(create_zone).get(list_zones)) + .route("/v1/zones/{name}", get(get_zone).put(update_zone).delete(delete_zone)) + .route("/healthz", get(healthz)) + .with_state(state) } -// --------------------------------------------------------------------------- -// Handlers -// --------------------------------------------------------------------------- - -async fn register_zone( - State(client): State, - Json(body): Json, -) -> Response { - let policy = ZonePolicy { - host_paths: body.policy.host_paths, - allowed_zones: body.policy.allowed_zones, - allow_ptrace: body.policy.allow_ptrace, - zone_type: ZoneType::Standard.into(), - }; - - let req = RegisterZoneRequest { - zone_name: body.zone_name, - policy: Some(policy), - }; - - let mut c = client.lock().await; - match c.register_zone(req).await { - Ok(resp) => { - let inner = resp.into_inner(); - ( - StatusCode::CREATED, - Json(ZoneIdResponse { - zone_id: inner.zone_id, - }), - ) - .into_response() +async fn connect_with_retry(endpoint: &str) -> CpClient { + let mut backoff = Duration::from_millis(250); + let max_backoff = Duration::from_secs(30); + + loop { + match CpClient::connect(CpClientConfig { + endpoint: endpoint.to_string(), + ..Default::default() + }) + .await + { + Ok(client) => return client, + Err(error) => { + warn!( + endpoint, + error = %error, + backoff_ms = backoff.as_millis(), + "could not connect to syva-cp; retrying" + ); + tokio::time::sleep(backoff).await; + backoff = (backoff * 2).min(max_backoff); + } } - Err(e) => grpc_error_to_response(e), } } -async fn remove_zone( - State(client): State, - Path(name): Path, - Query(q): Query, -) -> Response { - let req = RemoveZoneRequest { - zone_name: name, - drain: q.drain, - }; - - let mut c = client.lock().await; - match c.remove_zone(req).await { - Ok(resp) => { - let inner = resp.into_inner(); - Json(OkResponse { - ok: inner.ok, - message: if inner.message.is_empty() { - None - } else { - Some(inner.message) - }, - }) - .into_response() - } - Err(e) => grpc_error_to_response(e), - } +pub async fn create_zone( + State(state): State, + Json(body): Json, +) -> Result<(StatusCode, Json), ApiError> { + let output = state + .cp + .create_zone(CreateZoneArgs { + team_id: state.team_id, + name: body.name, + display_name: body.display_name, + policy_json: body.policy_json, + summary_json: None, + selector_json: body.selector_json, + metadata_json: None, + }) + .await + .map_err(ApiError::from_cp)?; + + Ok(( + StatusCode::CREATED, + Json(CreateZoneOut { + zone_id: output.zone_id.to_string(), + policy_id: output.policy_id.to_string(), + version: output.version, + }), + )) } -async fn attach_container( - State(client): State, - Path(name): Path, - Json(body): Json, -) -> Response { - let req = AttachContainerRequest { - container_id: body.container_id, - zone_name: name, - cgroup_id: body.cgroup_id, - }; - - let mut c = client.lock().await; - match c.attach_container(req).await { - Ok(resp) => { - let inner = resp.into_inner(); - ( - StatusCode::CREATED, - Json(OkResponse { - ok: inner.ok, - message: if inner.message.is_empty() { - None - } else { - Some(inner.message) - }, - }), - ) - .into_response() - } - Err(e) => grpc_error_to_response(e), - } +pub async fn list_zones( + State(state): State, + Query(query): Query, +) -> Result>, ApiError> { + let zones = state + .cp + .list_zones( + state.team_id, + query.status.as_deref(), + query.limit.unwrap_or(100), + ) + .await + .map_err(ApiError::from_cp)?; + + Ok(Json(zones.into_iter().map(zone_to_out).collect())) } -async fn detach_container( - State(client): State, - Path(id): Path, -) -> Response { - let req = DetachContainerRequest { container_id: id }; - - let mut c = client.lock().await; - match c.detach_container(req).await { - Ok(resp) => { - let inner = resp.into_inner(); - Json(OkResponse { - ok: inner.ok, - message: None, - }) - .into_response() - } - Err(e) => grpc_error_to_response(e), - } +pub async fn get_zone( + State(state): State, + Path(name): Path, +) -> Result, ApiError> { + let zone = state + .cp + .get_zone_by_name(state.team_id, &name) + .await + .map_err(ApiError::from_cp)? + .ok_or_else(|| ApiError::not_found(format!("zone '{name}' not found")))?; + + Ok(Json(zone_to_out(zone))) } -async fn allow_comm( - State(client): State, +pub async fn update_zone( + State(state): State, Path(name): Path, - Json(body): Json, -) -> Response { - let req = AllowCommRequest { - zone_a: name, - zone_b: body.peer_zone, - }; - - let mut c = client.lock().await; - match c.allow_comm(req).await { - Ok(resp) => { - let inner = resp.into_inner(); - Json(OkResponse { - ok: inner.ok, - message: None, - }) - .into_response() - } - Err(e) => grpc_error_to_response(e), - } + Json(body): Json, +) -> Result, ApiError> { + let snapshot = state + .cp + .get_zone_by_name(state.team_id, &name) + .await + .map_err(ApiError::from_cp)? + .ok_or_else(|| ApiError::not_found(format!("zone '{name}' not found")))?; + + state + .cp + .update_zone(UpdateZoneArgs { + zone_id: snapshot.zone_id, + if_version: body.if_version, + policy_json: body.policy_json, + selector_json: body.selector_json, + metadata_json: None, + }) + .await + .map_err(ApiError::from_cp)?; + + let refreshed = state + .cp + .get_zone_by_name(state.team_id, &name) + .await + .map_err(ApiError::from_cp)? + .ok_or_else(|| ApiError::not_found(format!("zone '{name}' not found after update")))?; + + Ok(Json(zone_to_out(refreshed))) } -async fn status(State(client): State) -> Response { - let mut c = client.lock().await; - match c.status(StatusRequest {}).await { - Ok(resp) => { - let inner = resp.into_inner(); - let hooks = inner - .hooks - .into_iter() - .map(|h| HookStatusJson { - hook: h.hook, - allow: h.allow, - deny: h.deny, - error: h.error, - lost: h.lost, - }) - .collect(); - - Json(StatusJson { - attached: inner.attached, - zones_active: inner.zones_active, - containers_active: inner.containers_active, - uptime_secs: inner.uptime_secs, - hooks, - max_zones: inner.max_zones, - }) - .into_response() - } - Err(e) => grpc_error_to_response(e), - } +pub async fn delete_zone( + State(state): State, + Path(name): Path, +) -> Result { + let snapshot = state + .cp + .get_zone_by_name(state.team_id, &name) + .await + .map_err(ApiError::from_cp)? + .ok_or_else(|| ApiError::not_found(format!("zone '{name}' not found")))?; + + state + .cp + .delete_zone(DeleteZoneArgs { + zone_id: snapshot.zone_id, + if_version: snapshot.version, + drain: true, + }) + .await + .map_err(ApiError::from_cp)?; + + Ok(StatusCode::NO_CONTENT) } -async fn list_zones(State(client): State) -> Response { - let mut c = client.lock().await; - match c.list_zones(ListZonesRequest {}).await { - Ok(resp) => { - let out: Vec = resp - .into_inner() - .zones - .into_iter() - .map(|z| ZoneSummaryJson { - name: z.name, - zone_id: z.zone_id, - state: z.state, - containers_active: z.containers_active, - }) - .collect(); - Json(out).into_response() - } - Err(e) => grpc_error_to_response(e), - } +pub async fn healthz() -> Json { + Json(HealthOut { ok: true }) } -async fn list_comms( - State(client): State, - Path(name): Path, -) -> Response { - let mut c = client.lock().await; - match c.list_comms(ListCommsRequest { zone_name: name }).await { - Ok(resp) => { - let out: Vec = resp - .into_inner() - .pairs - .into_iter() - .map(|p| CommPairJson { zone_a: p.zone_a, zone_b: p.zone_b }) - .collect(); - Json(out).into_response() - } - Err(e) => grpc_error_to_response(e), +fn zone_to_out(zone: syva_cp_client::ZoneSnapshot) -> ZoneOut { + ZoneOut { + zone_id: zone.zone_id.to_string(), + team_id: zone.team_id.to_string(), + name: zone.name, + display_name: zone.display_name, + status: zone.status, + version: zone.version, + current_policy_id: zone.current_policy_id.map(|id| id.to_string()), + current_policy_json: zone.current_policy_json, + selector_json: zone.selector_json, + metadata_json: zone.metadata_json, } } -async fn deny_comm( - State(client): State, - Path((name, peer)): Path<(String, String)>, -) -> Response { - let req = DenyCommRequest { zone_a: name, zone_b: peer }; - let mut c = client.lock().await; - match c.deny_comm(req).await { - Ok(resp) => { - let inner = resp.into_inner(); - Json(OkResponse { ok: inner.ok, message: None }).into_response() +impl ApiError { + fn not_found(message: String) -> Self { + Self { + status: StatusCode::NOT_FOUND, + message, } - Err(e) => grpc_error_to_response(e), } -} -async fn watch_events( - State(client): State, - Query(q): Query, -) -> Response { - let req = WatchEventsRequest { follow: q.follow }; - - let mut c = client.lock().await; - let stream = match c.watch_events(req).await { - Ok(resp) => resp.into_inner(), - Err(e) => return grpc_error_to_response(e), - }; - // Release the lock before streaming. - drop(c); - - let sse_stream = stream.map(|result| -> Result { - match result { - Ok(event) => { - let json = DenyEventJson { - timestamp_ns: event.timestamp_ns, - hook: event.hook, - zone_id: event.zone_id, - target_zone_id: event.target_zone_id, - pid: event.pid, - comm: event.comm, - inode: event.inode, - context: event.context, - }; - // Best-effort JSON serialization; on failure send raw debug. - let data = serde_json::to_string(&json) - .unwrap_or_else(|_| format!("{json:?}")); - Ok(Event::default().event("deny").data(data)) - } - Err(e) => Ok(Event::default() - .event("error") - .data(format!("gRPC stream error: {e}"))), + fn from_cp(error: syva_cp_client::CpClientError) -> Self { + Self { + status: StatusCode::BAD_GATEWAY, + message: error.to_string(), } - }); - - Sse::new(sse_stream) - .keep_alive(KeepAlive::default()) - .into_response() + } } -// --------------------------------------------------------------------------- -// Helpers -// --------------------------------------------------------------------------- - -fn grpc_error_to_response(e: tonic::Status) -> Response { - let status = match e.code() { - tonic::Code::NotFound => StatusCode::NOT_FOUND, - tonic::Code::InvalidArgument => StatusCode::BAD_REQUEST, - tonic::Code::AlreadyExists => StatusCode::CONFLICT, - tonic::Code::FailedPrecondition => StatusCode::BAD_REQUEST, - _ => StatusCode::INTERNAL_SERVER_ERROR, - }; - - ( - status, - Json(OkResponse { - ok: false, - message: Some(e.message().to_string()), - }), - ) - .into_response() +impl IntoResponse for ApiError { + fn into_response(self) -> Response { + (self.status, Json(ErrorBody { error: self.message })).into_response() + } } -// --------------------------------------------------------------------------- -// Tests -// --------------------------------------------------------------------------- - #[cfg(test)] mod tests { use super::*; #[test] - fn register_zone_body_deserializes() { + fn create_zone_body_deserializes() { let json = r#"{ - "zone_name": "web", - "policy": { - "host_paths": ["/data"], - "allowed_zones": ["db"], - "allow_ptrace": false - } + "name":"web", + "display_name":"Web", + "policy_json":{"host_paths":["/data"]}, + "selector_json":{"all_nodes":true} }"#; - let body: RegisterZoneBody = serde_json::from_str(json).expect("deserialize"); - assert_eq!(body.zone_name, "web"); - assert_eq!(body.policy.host_paths, vec!["/data"]); - assert_eq!(body.policy.allowed_zones, vec!["db"]); - assert!(!body.policy.allow_ptrace); - } - - #[test] - fn register_zone_body_defaults() { - let json = r#"{"zone_name": "minimal", "policy": {}}"#; - let body: RegisterZoneBody = serde_json::from_str(json).expect("deserialize"); - assert_eq!(body.zone_name, "minimal"); - assert!(body.policy.host_paths.is_empty()); - assert!(body.policy.allowed_zones.is_empty()); - assert!(!body.policy.allow_ptrace); - } - - #[test] - fn zone_id_response_serializes() { - let resp = ZoneIdResponse { zone_id: 42 }; - let json = serde_json::to_string(&resp).expect("serialize"); - assert_eq!(json, r#"{"zone_id":42}"#); - } - - #[test] - fn ok_response_skips_none_message() { - let resp = OkResponse { - ok: true, - message: None, - }; - let json = serde_json::to_string(&resp).expect("serialize"); - assert_eq!(json, r#"{"ok":true}"#); - } - - #[test] - fn ok_response_includes_message() { - let resp = OkResponse { - ok: false, - message: Some("zone not found".into()), - }; - let json = serde_json::to_string(&resp).expect("serialize"); - assert!(json.contains("zone not found")); - } - - #[test] - fn attach_container_body_deserializes() { - let json = r#"{"container_id": "abc123", "cgroup_id": 99999}"#; - let body: AttachContainerBody = serde_json::from_str(json).expect("deserialize"); - assert_eq!(body.container_id, "abc123"); - assert_eq!(body.cgroup_id, 99999); - } - - #[test] - fn allow_comm_body_deserializes() { - let json = r#"{"peer_zone": "backend"}"#; - let body: AllowCommBody = serde_json::from_str(json).expect("deserialize"); - assert_eq!(body.peer_zone, "backend"); - } - - #[test] - fn status_json_serializes() { - let status = StatusJson { - attached: true, - zones_active: 3, - containers_active: 7, - uptime_secs: 3600, - hooks: vec![HookStatusJson { - hook: "file_open".into(), - allow: 100, - deny: 5, - error: 0, - lost: 0, - }], - max_zones: 4096, - }; - let json = serde_json::to_string(&status).expect("serialize"); - assert!(json.contains("\"attached\":true")); - assert!(json.contains("\"file_open\"")); - } - - #[test] - fn deny_event_json_serializes() { - let event = DenyEventJson { - timestamp_ns: 1234567890, - hook: "exec_guard".into(), - zone_id: 1, - target_zone_id: 2, - pid: 42, - comm: "cat".into(), - inode: 12345, - context: "cross-zone exec".into(), - }; - let json = serde_json::to_string(&event).expect("serialize"); - assert!(json.contains("exec_guard")); - assert!(json.contains("cross-zone exec")); - } - - #[test] - fn remove_zone_query_defaults() { - let q: RemoveZoneQuery = serde_json::from_str("{}").expect("deserialize"); - assert!(!q.drain); + let body: CreateZoneBody = serde_json::from_str(json).unwrap(); + assert_eq!(body.name, "web"); + assert_eq!(body.display_name.as_deref(), Some("Web")); + assert_eq!(body.policy_json["host_paths"], serde_json::json!(["/data"])); + assert_eq!(body.selector_json.unwrap()["all_nodes"], serde_json::json!(true)); } #[test] - fn watch_events_query_defaults() { - let q: WatchEventsQuery = serde_json::from_str("{}").expect("deserialize"); - assert!(!q.follow); + fn update_zone_body_deserializes() { + let json = r#"{"if_version":7,"policy_json":{"allow_ptrace":true}}"#; + let body: UpdateZoneBody = serde_json::from_str(json).unwrap(); + assert_eq!(body.if_version, 7); + assert_eq!(body.policy_json.unwrap()["allow_ptrace"], serde_json::json!(true)); } } From 571b44f03a06deb70fdd32111623c8fd11899fd8 Mon Sep 17 00:00:00 2001 From: Yair Etziony Date: Sat, 25 Apr 2026 02:47:48 +0200 Subject: [PATCH 5/9] refactor(syva-core): remove local gRPC surface, make --cp-endpoint mandatory syva-core now only ingests zones via syva-cp's NodeAssignmentUpdate stream. The local gRPC server is deleted. Adapters connect to syva-cp directly (see session 4b). --- Cargo.lock | 3 - syva-core/Cargo.toml | 3 - syva-core/src/cp_reconcile/mod.rs | 22 +- syva-core/src/ebpf.rs | 4 + syva-core/src/ingest.rs | 226 ++++++++++ syva-core/src/main.rs | 176 +++----- syva-core/src/rpc/mod.rs | 680 ------------------------------ syva-core/src/zone.rs | 10 + 8 files changed, 316 insertions(+), 808 deletions(-) create mode 100644 syva-core/src/ingest.rs delete mode 100644 syva-core/src/rpc/mod.rs diff --git a/Cargo.lock b/Cargo.lock index c4d276b..e2caeab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3079,16 +3079,13 @@ dependencies = [ "aya", "clap", "libc", - "prost", "serde", "serde_json", "syva-cp-client", "syva-ebpf-common", "syva-proto", "tokio", - "tokio-stream", "tokio-util", - "tonic", "tracing", "tracing-subscriber", "uuid", diff --git a/syva-core/Cargo.toml b/syva-core/Cargo.toml index 56218af..0301c27 100644 --- a/syva-core/Cargo.toml +++ b/syva-core/Cargo.toml @@ -12,9 +12,7 @@ syva-proto = { path = "../syva-proto" } syva-cp-client = { path = "../syva-cp-client" } syva-ebpf-common = { path = "../syva-ebpf-common", features = ["userspace"] } aya = { workspace = true } -tonic = { workspace = true } tokio = { workspace = true } -tokio-stream = { workspace = true } tokio-util = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true } @@ -24,5 +22,4 @@ serde = { workspace = true } serde_json = { workspace = true } clap = { workspace = true } libc = "0.2" -prost = { workspace = true } uuid = { version = "1", features = ["v4", "serde"] } diff --git a/syva-core/src/cp_reconcile/mod.rs b/syva-core/src/cp_reconcile/mod.rs index dcb756f..738f46a 100644 --- a/syva-core/src/cp_reconcile/mod.rs +++ b/syva-core/src/cp_reconcile/mod.rs @@ -5,16 +5,14 @@ //! //! 1. Receive a `NodeAssignmentUpdate` from syva-cp //! 2. Diff against the last applied state -//! 3. Reuse the same registry/BPF mutation helpers as the local gRPC path +//! 3. Reuse the same registry/BPF mutation helpers as the former local gRPC path //! 4. Report applied or failed status back to syva-cp -#![allow(dead_code)] - pub mod state; use crate::ebpf::EnforceEbpf; use crate::health::SharedHealth; -use crate::rpc::{ +use crate::ingest::{ allow_comm_local, deny_comm_local, register_zone_local, remove_zone_local, CoreZonePolicyInput, }; use crate::types::ZoneType; @@ -146,8 +144,6 @@ impl Reconciler { ); } - self.sync_allowed_comms().await; - applied_reports.push(AppliedReport { assignment_id, actual_zone_version: assignment.desired_zone_version, @@ -170,6 +166,8 @@ impl Reconciler { } } + self.sync_allowed_comms().await; + if !applied_reports.is_empty() || !failed_reports.is_empty() { if let Err(error) = self .cp @@ -208,13 +206,17 @@ impl Reconciler { ) .await { - Ok(result) if result.ok => { - let mut applied = self.applied.lock().await; - applied.record_removed(&zone_id); - drop(applied); + Ok(result) if result.ok && result.fully_removed => { + { + let mut applied = self.applied.lock().await; + applied.record_removed(&zone_id); + } self.sync_allowed_comms().await; info!(zone_name, "zone removed"); } + Ok(result) if result.ok => { + info!(zone_name, "zone draining"); + } Ok(result) => { warn!(zone_name, message = result.message, "zone remove rejected"); } diff --git a/syva-core/src/ebpf.rs b/syva-core/src/ebpf.rs index 12be7c7..5ecf855 100644 --- a/syva-core/src/ebpf.rs +++ b/syva-core/src/ebpf.rs @@ -126,12 +126,14 @@ impl EnforceEbpf { } /// Take ownership of the ring buffer for event streaming. + #[cfg_attr(not(test), allow(dead_code))] pub fn take_event_ring_buf(&mut self) -> Option> { let map = self.bpf.take_map("ENFORCEMENT_EVENTS")?; RingBuf::try_from(map).ok() } /// Register a cgroup as belonging to a zone. + #[cfg_attr(not(test), allow(dead_code))] pub fn add_zone_member( &mut self, cgroup_id: u64, @@ -158,6 +160,7 @@ impl EnforceEbpf { } /// Remove a cgroup from zone membership. + #[cfg_attr(not(test), allow(dead_code))] pub fn remove_zone_member(&mut self, cgroup_id: u64) -> anyhow::Result<()> { let mut map: AyaHashMap<_, u64, ZoneInfoKernel> = AyaHashMap::try_from( self.bpf.map_mut("ZONE_MEMBERSHIP") @@ -459,6 +462,7 @@ impl EnforceEbpf { } /// Register a single path's inode in INODE_ZONE_MAP (non-recursive). + #[cfg_attr(not(test), allow(dead_code))] pub fn register_single_inode(&mut self, zone_id: u32, path: &str) -> anyhow::Result { let canon = fs::canonicalize(path) .map_err(|e| anyhow::anyhow!("failed to canonicalize '{}': {e}", path))?; diff --git a/syva-core/src/ingest.rs b/syva-core/src/ingest.rs new file mode 100644 index 0000000..f9429ea --- /dev/null +++ b/syva-core/src/ingest.rs @@ -0,0 +1,226 @@ +use std::sync::Arc; + +use tokio::sync::{Mutex, RwLock}; + +use crate::ebpf::EnforceEbpf; +use crate::health::SharedHealth; +use crate::types::ZoneType; +use crate::zone::ZoneRegistry; + +#[derive(Debug, Clone)] +pub(crate) struct CoreZonePolicyInput { + pub host_paths: Vec, + pub allowed_zones: Vec, + pub allow_ptrace: bool, + pub zone_type: ZoneType, +} + +#[derive(Debug, Clone)] +pub(crate) struct RemoveZoneResult { + pub ok: bool, + pub message: String, + pub fully_removed: bool, +} + +pub(crate) async fn register_zone_local( + registry: &Arc>, + ebpf: &Arc>, + health: &SharedHealth, + zone_name: &str, + policy: Option, +) -> anyhow::Result { + let (zone_id, zones_loaded, was_new) = { + let mut registry = registry.write().await; + let was_new = registry.zone_id(zone_name).is_none(); + registry.register_zone(zone_name)?; + let zone_id = registry.revive_draining(zone_name)?; + let zones_loaded = registry.zone_count(); + (zone_id, zones_loaded, was_new) + }; + + if let Some(policy) = policy { + let mut internal_policy = crate::types::ZonePolicy::default(); + if policy.allow_ptrace { + internal_policy + .capabilities + .allowed + .push("CAP_SYS_PTRACE".to_string()); + } + internal_policy.filesystem.host_paths = policy.host_paths.clone(); + internal_policy.network.allowed_zones = policy.allowed_zones; + + { + let mut ebpf = ebpf.lock().await; + if let Err(error) = ebpf.set_zone_policy(zone_id, &internal_policy) { + if was_new { + let mut registry = registry.write().await; + let _ = registry.unregister_zone(zone_name); + health.write().await.zones_loaded = registry.zone_count(); + } + return Err(error); + } + + if !policy.host_paths.is_empty() { + match ebpf.populate_inode_zone_map(zone_id, &policy.host_paths) { + Ok(inodes) => { + tracing::info!(zone = zone_name, zone_id, inodes, "inode map populated"); + } + Err(error) => { + tracing::warn!(zone = zone_name, %error, "inode map population failed"); + } + } + } + } + + let _ = policy.zone_type; + } + + health.write().await.zones_loaded = zones_loaded; + Ok(zone_id) +} + +pub(crate) async fn remove_zone_local( + registry: &Arc>, + ebpf: &Arc>, + health: &SharedHealth, + zone_name: &str, + drain: bool, +) -> anyhow::Result { + let (cleanup_zone_id, zones_loaded, outcome) = { + let mut registry = registry.write().await; + + if drain { + registry.mark_draining(zone_name)?; + let refcount = registry.refcount(zone_name); + if refcount > 0 { + let zones_loaded = registry.zone_count(); + ( + None, + zones_loaded, + RemoveZoneResult { + ok: true, + message: String::new(), + fully_removed: false, + }, + ) + } else { + let zone_id = registry.unregister_zone(zone_name)?; + let zones_loaded = registry.zone_count(); + ( + Some(zone_id), + zones_loaded, + RemoveZoneResult { + ok: true, + message: String::new(), + fully_removed: true, + }, + ) + } + } else { + let refcount = registry.refcount(zone_name); + if refcount > 0 { + ( + None, + registry.zone_count(), + RemoveZoneResult { + ok: false, + message: format!( + "zone '{}' has {} active containers — use drain=true or detach them first", + zone_name, refcount + ), + fully_removed: false, + }, + ) + } else { + let zone_id = registry.unregister_zone(zone_name)?; + let zones_loaded = registry.zone_count(); + ( + Some(zone_id), + zones_loaded, + RemoveZoneResult { + ok: true, + message: String::new(), + fully_removed: true, + }, + ) + } + } + }; + + if let Some(zone_id) = cleanup_zone_id { + let mut ebpf = ebpf.lock().await; + let _ = ebpf.remove_zone_policy(zone_id); + let _ = ebpf.remove_zone_comms(zone_id); + let _ = ebpf.remove_zone_inodes(zone_id); + tracing::info!(zone = zone_name, zone_id, "zone removed"); + } else if outcome.ok && drain { + tracing::info!(zone = zone_name, "zone marked as draining"); + } + + health.write().await.zones_loaded = zones_loaded; + Ok(outcome) +} + +pub(crate) async fn allow_comm_local( + registry: &Arc>, + ebpf: &Arc>, + zone_a: &str, + zone_b: &str, +) -> anyhow::Result<()> { + let (zone_a_id, zone_b_id) = { + let registry = registry.read().await; + let zone_a_id = registry + .zone_id(zone_a) + .ok_or_else(|| anyhow::anyhow!("zone '{}' not registered", zone_a))?; + let zone_b_id = registry + .zone_id(zone_b) + .ok_or_else(|| anyhow::anyhow!("zone '{}' not registered", zone_b))?; + (zone_a_id, zone_b_id) + }; + + { + let mut ebpf = ebpf.lock().await; + ebpf.set_zone_allowed_comms(zone_a_id, zone_b_id)?; + } + + { + let mut registry = registry.write().await; + if registry.zone_id(zone_a).is_some() && registry.zone_id(zone_b).is_some() { + registry.record_allow_comm(zone_a, zone_b); + } + } + + tracing::info!(zone_a, zone_b, "cross-zone comm allowed"); + Ok(()) +} + +pub(crate) async fn deny_comm_local( + registry: &Arc>, + ebpf: &Arc>, + zone_a: &str, + zone_b: &str, +) -> anyhow::Result<()> { + let (zone_a_id, zone_b_id) = { + let registry = registry.read().await; + let zone_a_id = registry + .zone_id(zone_a) + .ok_or_else(|| anyhow::anyhow!("zone '{}' not registered", zone_a))?; + let zone_b_id = registry + .zone_id(zone_b) + .ok_or_else(|| anyhow::anyhow!("zone '{}' not registered", zone_b))?; + (zone_a_id, zone_b_id) + }; + + { + let mut ebpf = ebpf.lock().await; + ebpf.remove_zone_comm_pair(zone_a_id, zone_b_id)?; + } + + { + let mut registry = registry.write().await; + registry.record_deny_comm(zone_a, zone_b); + } + + tracing::info!(zone_a, zone_b, "cross-zone comm denied"); + Ok(()) +} diff --git a/syva-core/src/main.rs b/syva-core/src/main.rs index 1c86a54..0fb1645 100644 --- a/syva-core/src/main.rs +++ b/syva-core/src/main.rs @@ -1,8 +1,7 @@ -//! syva-core — eBPF enforcement engine with gRPC API. +//! syva-core — eBPF enforcement engine. //! -//! The core engine manages BPF programs and maps. Adapters connect via -//! Unix socket gRPC to register zones, attach containers, and manage -//! cross-zone communication policies. +//! The core engine manages BPF programs and maps and consumes desired +//! zone state from syva-cp. //! //! Usage: //! syva-core # Start the enforcement engine @@ -14,24 +13,20 @@ mod btf; mod ebpf; mod events; mod health; -pub mod rpc; +mod ingest; pub mod types; mod zone; use std::path::PathBuf; use std::sync::Arc; -use std::time::Instant; use clap::{Parser, Subcommand}; use syva_cp_client::CpClientConfig; use tokio::sync::{Mutex, RwLock}; -use tonic::transport::Server; use tracing_subscriber::EnvFilter; -use syva_proto::syva_core::syva_core_server::SyvaCoreServer; - #[derive(Parser)] -#[command(name = "syva-core", about = "eBPF enforcement engine with gRPC API")] +#[command(name = "syva-core", about = "eBPF enforcement engine")] struct Cli { #[command(subcommand)] command: Option, @@ -44,15 +39,9 @@ struct Cli { #[arg(long, default_value = "9091")] health_port: u16, - /// Unix socket path for the gRPC server. - #[arg(long, default_value = "/run/syva/syva-core.sock")] - socket_path: String, - - /// Optional syva-cp endpoint. When set, syva-core registers with - /// syva-cp and consumes assignment updates in addition to its local - /// adapter-facing gRPC surface. + /// syva-cp endpoint. Required. #[arg(long, env = "SYVA_CP_ENDPOINT")] - cp_endpoint: Option, + cp_endpoint: String, /// Hostname to report to syva-cp. Defaults to the system hostname. #[arg(long, env = "SYVA_NODE_NAME")] @@ -129,8 +118,6 @@ async fn main() -> anyhow::Result<()> { async fn cmd_run(config: Cli) -> anyhow::Result<()> { tracing::info!("syva-core starting"); - let start_time = Instant::now(); - // Health state — shared with the HTTP server. Starts as unhealthy // (not attached, zero zones) and transitions as startup progresses. let health_state = health::SharedHealth::new(RwLock::new( @@ -165,36 +152,7 @@ async fn cmd_run(config: Cli) -> anyhow::Result<()> { let registry = Arc::new(RwLock::new(zone::ZoneRegistry::new())); let ebpf = Arc::new(Mutex::new(mgr)); - tracing::info!("startup complete — enforcement active, awaiting gRPC connections"); - - // Ensure parent directory for socket path exists. - if let Some(parent) = std::path::Path::new(&config.socket_path).parent() { - if !parent.exists() { - std::fs::create_dir_all(parent) - .map_err(|e| anyhow::anyhow!("failed to create socket directory {}: {e}", parent.display()))?; - } - } - - // Remove stale socket file if it exists. - if std::path::Path::new(&config.socket_path).exists() { - std::fs::remove_file(&config.socket_path) - .map_err(|e| anyhow::anyhow!("failed to remove stale socket {}: {e}", config.socket_path))?; - } - - // Build gRPC service. - let service = rpc::SyvaCoreService { - registry: registry.clone(), - ebpf: ebpf.clone(), - health: health_state.clone(), - start_time, - }; - - // Start gRPC server on Unix socket. - let uds = tokio::net::UnixListener::bind(&config.socket_path) - .map_err(|e| anyhow::anyhow!("failed to bind Unix socket {}: {e}", config.socket_path))?; - let uds_stream = tokio_stream::wrappers::UnixListenerStream::new(uds); - - tracing::info!(socket = config.socket_path, "gRPC server listening"); + tracing::info!("startup complete — enforcement active"); // Shutdown on SIGINT (ctrl-c) or SIGTERM (Kubernetes pod termination). let mut sigterm = tokio::signal::unix::signal( @@ -252,71 +210,56 @@ async fn cmd_run(config: Cli) -> anyhow::Result<()> { } }); - if let Some(endpoint) = config.cp_endpoint.as_ref() { - let node_name = config - .node_name - .clone() - .or_else(system_hostname) - .unwrap_or_else(|| "unknown".to_string()); - let cp_config = CpClientConfig { - endpoint: endpoint.clone(), - node_name, - cluster_id: config.cluster_id.clone(), - fingerprint: read_fingerprint(&config.fingerprint_path), - labels: parse_labels(&config.node_labels), - node_id_path: config.node_id_path.clone(), - heartbeat_interval: std::time::Duration::from_secs(config.heartbeat_secs), - ..Default::default() - }; - - match syva_cp_client::CpClient::connect(cp_config).await { - Ok(cp) => match cp.register().await { - Ok(registration) => { - tracing::info!(node_id = %registration.node_id, "registered with syva-cp"); - - let heartbeat_handle = cp.spawn_heartbeat_loop(); - let reconciler = cp_reconcile::Reconciler::new( - cp, - registry.clone(), - ebpf.clone(), - health_state.clone(), - ); - tokio::spawn(async move { - let _heartbeat = heartbeat_handle; - reconciler.run().await; - }); - - tracing::info!("syva-core CP mode active"); - } - Err(error) => { - tracing::error!("could not register with syva-cp at startup: {error}"); - tracing::warn!("syva-core running in degraded mode (local adapters only)"); - } - }, - Err(error) => { - tracing::error!("could not connect to syva-cp at startup: {error}"); - tracing::warn!("syva-core running in degraded mode (local adapters only)"); - } - } - } + let node_name = config + .node_name + .clone() + .or_else(system_hostname) + .unwrap_or_else(|| "unknown".to_string()); + let cp_config = CpClientConfig { + endpoint: config.cp_endpoint.clone(), + node_name, + cluster_id: config.cluster_id.clone(), + fingerprint: read_fingerprint(&config.fingerprint_path), + labels: parse_labels(&config.node_labels), + node_id_path: config.node_id_path.clone(), + heartbeat_interval: std::time::Duration::from_secs(config.heartbeat_secs), + ..Default::default() + }; - // Run gRPC server with graceful shutdown. - let grpc_server = Server::builder() - .add_service(SyvaCoreServer::new(service)) - .serve_with_incoming_shutdown(uds_stream, async { - tokio::select! { - _ = tokio::signal::ctrl_c() => { - tracing::info!("received SIGINT — shutting down"); - } - _ = sigterm.recv() => { - tracing::info!("received SIGTERM — shutting down"); - } - } - }); + let cp = syva_cp_client::CpClient::connect(cp_config) + .await + .map_err(|error| anyhow::anyhow!("connect to syva-cp at {}: {error}", config.cp_endpoint))?; + let registration = cp + .register() + .await + .map_err(|error| anyhow::anyhow!("register with syva-cp: {error}"))?; + tracing::info!(node_id = %registration.node_id, "registered with syva-cp"); + + let _heartbeat = cp.spawn_heartbeat_loop(); + let reconciler = cp_reconcile::Reconciler::new( + cp, + registry.clone(), + ebpf.clone(), + health_state.clone(), + ); + let mut reconcile_task = tokio::spawn(async move { + reconciler.run().await; + }); - grpc_server.await?; + tokio::select! { + _ = tokio::signal::ctrl_c() => { + tracing::info!("received SIGINT — shutting down"); + } + _ = sigterm.recv() => { + tracing::info!("received SIGTERM — shutting down"); + } + _ = &mut reconcile_task => { + tracing::warn!("reconcile loop exited"); + } + } cancel.cancel(); + reconcile_task.abort(); // Drop ebpf manager (cleans up BPF pins). drop(ebpf); tracing::info!("syva-core stopped"); @@ -327,8 +270,17 @@ pub(crate) fn parse_labels(entries: &[String]) -> std::collections::BTreeMap bool { - !id.is_empty() - && id.len() <= 128 - && id.bytes().all(|b| b.is_ascii_hexdigit() || b == b'-' || b == b'_') -} - -/// The gRPC service implementation. -pub struct SyvaCoreService { - pub registry: Arc>, - pub ebpf: Arc>, - pub health: SharedHealth, - pub start_time: Instant, -} - -#[derive(Debug, Clone)] -pub(crate) struct CoreZonePolicyInput { - pub host_paths: Vec, - pub allowed_zones: Vec, - pub allow_ptrace: bool, - pub zone_type: ZoneType, -} - -#[derive(Debug, Clone)] -pub(crate) struct RemoveZoneResult { - pub ok: bool, - pub message: String, -} - -pub(crate) async fn register_zone_local( - registry: &Arc>, - ebpf: &Arc>, - health: &SharedHealth, - zone_name: &str, - policy: Option, -) -> anyhow::Result { - let mut registry = registry.write().await; - let zone_id = registry.register_zone(zone_name)?; - - if let Some(policy) = policy { - let mut ebpf = ebpf.lock().await; - - let mut internal_policy = crate::types::ZonePolicy::default(); - if policy.allow_ptrace { - internal_policy - .capabilities - .allowed - .push("CAP_SYS_PTRACE".to_string()); - } - internal_policy.filesystem.host_paths = policy.host_paths.clone(); - internal_policy.network.allowed_zones = policy.allowed_zones; - - ebpf.set_zone_policy(zone_id, &internal_policy)?; - - if !policy.host_paths.is_empty() { - match ebpf.populate_inode_zone_map(zone_id, &policy.host_paths) { - Ok(inodes) => { - tracing::info!(zone = zone_name, zone_id, inodes, "inode map populated"); - } - Err(error) => { - tracing::warn!(zone = zone_name, %error, "inode map population failed"); - } - } - } - - let _ = policy.zone_type; - } - - let mut health = health.write().await; - health.zones_loaded = registry.zone_count(); - - Ok(zone_id) -} - -pub(crate) async fn remove_zone_local( - registry: &Arc>, - ebpf: &Arc>, - health: &SharedHealth, - zone_name: &str, - drain: bool, -) -> anyhow::Result { - let mut registry = registry.write().await; - - if drain { - registry.mark_draining(zone_name)?; - - let refcount = registry.refcount(zone_name); - if refcount == 0 { - let zone_id = registry.unregister_zone(zone_name)?; - - let mut ebpf = ebpf.lock().await; - let _ = ebpf.remove_zone_policy(zone_id); - let _ = ebpf.remove_zone_comms(zone_id); - let _ = ebpf.remove_zone_inodes(zone_id); - - tracing::info!(zone = zone_name, zone_id, "zone drained and removed"); - } else { - tracing::info!(zone = zone_name, refcount, "zone marked as draining"); - } - - let mut health = health.write().await; - health.zones_loaded = registry.zone_count(); - - return Ok(RemoveZoneResult { - ok: true, - message: String::new(), - }); - } - - let refcount = registry.refcount(zone_name); - if refcount > 0 { - return Ok(RemoveZoneResult { - ok: false, - message: format!( - "zone '{}' has {} active containers — use drain=true or detach them first", - zone_name, refcount - ), - }); - } - - let zone_id = registry.unregister_zone(zone_name)?; - - let mut ebpf = ebpf.lock().await; - let _ = ebpf.remove_zone_policy(zone_id); - let _ = ebpf.remove_zone_comms(zone_id); - let _ = ebpf.remove_zone_inodes(zone_id); - - let mut health = health.write().await; - health.zones_loaded = registry.zone_count(); - - tracing::info!(zone = zone_name, zone_id, "zone removed"); - Ok(RemoveZoneResult { - ok: true, - message: String::new(), - }) -} - -pub(crate) async fn allow_comm_local( - registry: &Arc>, - ebpf: &Arc>, - zone_a: &str, - zone_b: &str, -) -> anyhow::Result<()> { - let (zone_a_id, zone_b_id) = { - let registry = registry.read().await; - let zone_a_id = registry - .zone_id(zone_a) - .ok_or_else(|| anyhow::anyhow!("zone '{}' not registered", zone_a))?; - let zone_b_id = registry - .zone_id(zone_b) - .ok_or_else(|| anyhow::anyhow!("zone '{}' not registered", zone_b))?; - (zone_a_id, zone_b_id) - }; - - { - let mut ebpf = ebpf.lock().await; - ebpf.set_zone_allowed_comms(zone_a_id, zone_b_id)?; - } - - { - let mut registry = registry.write().await; - if registry.zone_id(zone_a).is_some() && registry.zone_id(zone_b).is_some() { - registry.record_allow_comm(zone_a, zone_b); - } - } - - tracing::info!(zone_a, zone_b, "cross-zone comm allowed"); - Ok(()) -} - -pub(crate) async fn deny_comm_local( - registry: &Arc>, - ebpf: &Arc>, - zone_a: &str, - zone_b: &str, -) -> anyhow::Result<()> { - let (zone_a_id, zone_b_id) = { - let registry = registry.read().await; - let zone_a_id = registry - .zone_id(zone_a) - .ok_or_else(|| anyhow::anyhow!("zone '{}' not registered", zone_a))?; - let zone_b_id = registry - .zone_id(zone_b) - .ok_or_else(|| anyhow::anyhow!("zone '{}' not registered", zone_b))?; - (zone_a_id, zone_b_id) - }; - - { - let mut ebpf = ebpf.lock().await; - ebpf.remove_zone_comm_pair(zone_a_id, zone_b_id)?; - } - - { - let mut registry = registry.write().await; - registry.record_deny_comm(zone_a, zone_b); - } - - tracing::info!(zone_a, zone_b, "cross-zone comm denied"); - Ok(()) -} - -#[tonic::async_trait] -impl SyvaCore for SyvaCoreService { - async fn register_zone( - &self, - request: Request, - ) -> Result, Status> { - let req = request.into_inner(); - let zone_name = req.zone_name; - - if zone_name.is_empty() { - return Err(Status::invalid_argument("zone_name is required")); - } - - let policy = req.policy.map(|proto_policy| CoreZonePolicyInput { - host_paths: proto_policy.host_paths, - allowed_zones: proto_policy.allowed_zones, - allow_ptrace: proto_policy.allow_ptrace, - zone_type: match proto_policy.zone_type { - 1 => ZoneType::Privileged, - _ => ZoneType::NonGlobal, - }, - }); - let zone_id = register_zone_local( - &self.registry, - &self.ebpf, - &self.health, - &zone_name, - policy, - ) - .await - .map_err(|e| Status::internal(format!("failed to register zone: {e}")))?; - - tracing::info!(zone = zone_name, zone_id, "zone registered via gRPC"); - Ok(Response::new(RegisterZoneResponse { zone_id })) - } - - async fn remove_zone( - &self, - request: Request, - ) -> Result, Status> { - let req = request.into_inner(); - let zone_name = req.zone_name; - - if zone_name.is_empty() { - return Err(Status::invalid_argument("zone_name is required")); - } - let result = remove_zone_local( - &self.registry, - &self.ebpf, - &self.health, - &zone_name, - req.drain, - ) - .await - .map_err(|e| Status::not_found(format!("{e}")))?; - - Ok(Response::new(RemoveZoneResponse { - ok: result.ok, - message: result.message, - })) - } - - async fn attach_container( - &self, - request: Request, - ) -> Result, Status> { - let req = request.into_inner(); - - if !is_valid_container_id(&req.container_id) { - return Ok(Response::new(AttachContainerResponse { - ok: false, - message: "invalid container_id: must be non-empty, max 128 chars, hex/dash/underscore only".to_string(), - })); - } - - if req.zone_name.is_empty() { - return Err(Status::invalid_argument("zone_name is required")); - } - - if req.cgroup_id == 0 { - return Err(Status::invalid_argument("cgroup_id must be non-zero")); - } - - let mut registry = self.registry.write().await; - let zone_id = match registry.add_container(&req.container_id, &req.zone_name, req.cgroup_id) { - Ok(id) => id, - Err(e) => { - return Ok(Response::new(AttachContainerResponse { - ok: false, - message: format!("{e}"), - })); - } - }; - - let mut ebpf = self.ebpf.lock().await; - if let Err(e) = ebpf.add_zone_member(req.cgroup_id, zone_id, ZoneType::NonGlobal) { - // Rollback registry state. - registry.remove_container(&req.container_id, None); - return Err(Status::internal(format!("BPF add_zone_member failed: {e}"))); - } - - let mut h = self.health.write().await; - h.containers_active = registry.container_count(); - - tracing::info!( - container = req.container_id, - zone = req.zone_name, - zone_id, - cgroup_id = req.cgroup_id, - "container attached via gRPC" - ); - - Ok(Response::new(AttachContainerResponse { - ok: true, - message: String::new(), - })) - } - - async fn detach_container( - &self, - request: Request, - ) -> Result, Status> { - let req = request.into_inner(); - - if req.container_id.is_empty() { - return Err(Status::invalid_argument("container_id is required")); - } - - let mut registry = self.registry.write().await; - let result = registry.remove_container(&req.container_id, None); - - if let Some((zone_id, cgroup_id, transition)) = result { - let mut ebpf = self.ebpf.lock().await; - if let Err(e) = ebpf.remove_zone_member(cgroup_id) { - tracing::warn!(cgroup_id, %e, "failed to remove zone member from BPF map"); - } - - match transition { - ZoneTransition::DrainingComplete => { - tracing::info!(zone_id, "draining zone emptied — cleaning up BPF maps"); - let _ = ebpf.remove_zone_policy(zone_id); - let _ = ebpf.remove_zone_comms(zone_id); - let _ = ebpf.remove_zone_inodes(zone_id); - if let Err(e) = registry.unregister_zone_by_id(zone_id) { - tracing::warn!(zone_id, %e, "failed to unregister drained zone"); - } - } - ZoneTransition::WentToPending => { - tracing::info!(zone_id, "zone has no active containers (Pending)"); - } - ZoneTransition::StillActive => {} - } - - let mut h = self.health.write().await; - h.containers_active = registry.container_count(); - h.zones_loaded = registry.zone_count(); - - tracing::info!(container = req.container_id, "container detached via gRPC"); - } - - Ok(Response::new(DetachContainerResponse { ok: true })) - } - - async fn allow_comm( - &self, - request: Request, - ) -> Result, Status> { - let req = request.into_inner(); - - if req.zone_a.is_empty() || req.zone_b.is_empty() { - return Err(Status::invalid_argument("both zone_a and zone_b are required")); - } - - // Resolve IDs under a read-lock, then release it before awaiting the - // eBPF update — holding a write-lock across the BPF syscall would - // block unrelated registry readers/writers for no gain. The - // subsequent write-lock is held just long enough to record the - // mirror entry. If a zone is unregistered in the window, the BPF - // entry will be cleared by remove_zone_comms, and the mirror - // re-check below skips the stale record. - allow_comm_local(&self.registry, &self.ebpf, &req.zone_a, &req.zone_b) - .await - .map_err(|e| Status::internal(format!("failed to set allowed comms: {e}")))?; - - tracing::info!(zone_a = req.zone_a, zone_b = req.zone_b, "cross-zone comm allowed via gRPC"); - Ok(Response::new(AllowCommResponse { ok: true })) - } - - async fn deny_comm( - &self, - request: Request, - ) -> Result, Status> { - let req = request.into_inner(); - - if req.zone_a.is_empty() || req.zone_b.is_empty() { - return Err(Status::invalid_argument("both zone_a and zone_b are required")); - } - - // Same locking shape as allow_comm — resolve IDs under a read-lock, - // release before the eBPF await, take a brief write-lock for the - // mirror update afterwards. - deny_comm_local(&self.registry, &self.ebpf, &req.zone_a, &req.zone_b) - .await - .map_err(|e| Status::internal(format!( - "failed to remove comms between '{}' and '{}': {e}", - req.zone_a, req.zone_b - )))?; - - tracing::info!(zone_a = req.zone_a, zone_b = req.zone_b, "cross-zone comm denied via gRPC"); - Ok(Response::new(DenyCommResponse { ok: true })) - } - - async fn list_zones( - &self, - _request: Request, - ) -> Result, Status> { - let registry = self.registry.read().await; - let zones = registry.zones_summary() - .map(|(name, zone_id, state, refcount)| ZoneSummary { - name: name.to_string(), - zone_id, - state: match state { - ZoneState::Pending => "pending", - ZoneState::Active => "active", - ZoneState::Draining => "draining", - }.to_string(), - containers_active: refcount as u32, - }) - .collect(); - Ok(Response::new(ListZonesResponse { zones })) - } - - async fn list_comms( - &self, - request: Request, - ) -> Result, Status> { - let req = request.into_inner(); - let filter = if req.zone_name.is_empty() { None } else { Some(req.zone_name.as_str()) }; - - let registry = self.registry.read().await; - - // Reject an explicit filter that points to an unknown zone — returning - // an empty list would hide typos. - if let Some(z) = filter { - if registry.zone_id(z).is_none() { - return Err(Status::not_found(format!("zone '{z}' not registered"))); - } - } - - let pairs = registry.list_allowed_comms(filter) - .map(|(a, b)| CommPair { zone_a: a.to_string(), zone_b: b.to_string() }) - .collect(); - Ok(Response::new(ListCommsResponse { pairs })) - } - - async fn register_host_path( - &self, - request: Request, - ) -> Result, Status> { - let req = request.into_inner(); - - if req.zone_name.is_empty() { - return Err(Status::invalid_argument("zone_name is required")); - } - if req.path.is_empty() { - return Err(Status::invalid_argument("path is required")); - } - - let registry = self.registry.read().await; - let zone_id = registry.zone_id(&req.zone_name) - .ok_or_else(|| Status::not_found(format!("zone '{}' not registered", req.zone_name)))?; - drop(registry); - - let mut ebpf = self.ebpf.lock().await; - let count = if req.recursive { - let paths = vec![req.path.clone()]; - ebpf.populate_inode_zone_map(zone_id, &paths) - .map_err(|e| Status::internal(format!("failed to populate inode map: {e}")))? - } else { - // Single inode registration — stat the path and add its inode directly. - ebpf.register_single_inode(zone_id, &req.path) - .map_err(|e| Status::internal(format!("failed to register inode: {e}")))? - }; - - tracing::info!( - zone = req.zone_name, - path = req.path, - inodes = count, - "host path registered via gRPC" - ); - - Ok(Response::new(RegisterHostPathResponse { - inodes_registered: count as u32, - })) - } - - async fn status( - &self, - _request: Request, - ) -> Result, Status> { - let health = self.health.read().await; - let registry = self.registry.read().await; - - let uptime_secs = self.start_time.elapsed().as_secs(); - - let mut hooks = Vec::new(); - - // Use stable HOOK_NAMES (file_open, bprm_check, etc.) rather than - // raw BPF program names (syva_file_open, etc.) for API consistency. - let ebpf = self.ebpf.lock().await; - match ebpf.read_counters() { - Ok(counters) => { - for (idx, (_, totals)) in counters.iter().enumerate() { - let hook_name = HOOK_NAMES.get(idx) - .copied() - .unwrap_or("unknown") - .to_string(); - hooks.push(HookStatus { - hook: hook_name, - allow: totals.allow, - deny: totals.deny, - error: totals.error, - lost: totals.lost, - }); - } - } - Err(e) => { - tracing::debug!(%e, "failed to read counters for status RPC"); - } - } - - Ok(Response::new(StatusResponse { - attached: health.attached, - zones_active: registry.zone_count() as u32, - containers_active: registry.container_count() as u32, - uptime_secs, - hooks, - max_zones: syva_ebpf_common::MAX_ZONES, - })) - } - - type WatchEventsStream = ReceiverStream>; - - async fn watch_events( - &self, - request: Request, - ) -> Result, Status> { - let req = request.into_inner(); - let (tx, rx) = tokio::sync::mpsc::channel(256); - - let mut ebpf = self.ebpf.lock().await; - let ring_buf = ebpf.take_event_ring_buf() - .ok_or_else(|| Status::unavailable("event ring buffer already taken"))?; - drop(ebpf); - - let follow = req.follow; - - tokio::spawn(async move { - let mut ring_buf = ring_buf; - let mut interval = tokio::time::interval(std::time::Duration::from_millis(100)); - - loop { - interval.tick().await; - - let events: Vec = tokio::task::block_in_place(|| { - let mut out = Vec::new(); - while let Some(item) = ring_buf.next() { - if item.len() < std::mem::size_of::() { - continue; - } - let event: EnforcementEvent = unsafe { - std::ptr::read_unaligned(item.as_ptr() as *const EnforcementEvent) - }; - out.push(event); - if out.len() >= 1000 { - break; - } - } - out - }); - - let had_events = !events.is_empty(); - - for event in events { - if event.decision != DECISION_DENY { - continue; - } - let hook = HOOK_NAMES.get(event.hook as usize) - .copied() - .unwrap_or("unknown") - .to_string(); - - let deny_event = DenyEvent { - timestamp_ns: event.timestamp_ns, - hook, - zone_id: event.caller_zone, - target_zone_id: event.target_zone, - pid: event.pid, - comm: String::new(), - inode: 0, - context: event.context.to_string(), - }; - - if tx.send(Ok(deny_event)).await.is_err() { - return; // Client disconnected. - } - } - - if !follow && !had_events { - return; // One-shot mode: drain and exit. - } - } - }); - - Ok(Response::new(ReceiverStream::new(rx))) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn valid_container_ids() { - assert!(is_valid_container_id("abc123")); - assert!(is_valid_container_id("abc-def_123")); - assert!(is_valid_container_id("a")); - } - - #[test] - fn invalid_container_ids() { - assert!(!is_valid_container_id("")); - assert!(!is_valid_container_id("abc/def")); - assert!(!is_valid_container_id("abc def")); - assert!(!is_valid_container_id(&"a".repeat(129))); - } - - #[test] - fn max_length_container_id() { - assert!(is_valid_container_id(&"a".repeat(128))); - assert!(!is_valid_container_id(&"a".repeat(129))); - } -} diff --git a/syva-core/src/zone.rs b/syva-core/src/zone.rs index 6e64a10..97e7e73 100644 --- a/syva-core/src/zone.rs +++ b/syva-core/src/zone.rs @@ -33,6 +33,7 @@ pub enum ZoneState { /// Result of a container removal — what happened to the zone. #[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[cfg_attr(not(test), allow(dead_code))] pub enum ZoneTransition { /// Zone still has containers. StillActive, @@ -62,8 +63,10 @@ pub struct ZoneRegistry { /// zone_name → ZoneEntry zones: HashMap, /// cgroup_id → (container_id, zone_name) — enables hint-based removal + #[cfg_attr(not(test), allow(dead_code))] cgroup_to_info: HashMap, /// container_id → (zone_name, cgroup_id) + #[cfg_attr(not(test), allow(dead_code))] container_to_info: HashMap, /// Canonicalised allowed cross-zone comm pairs — the two names are /// stored in lexicographic order so the set is symmetric by construction. @@ -116,6 +119,7 @@ impl ZoneRegistry { /// Record that a container has joined a zone. /// Transitions zone from Pending → Active. /// Returns Err if zone_name is not registered or container_id is already tracked. + #[cfg_attr(not(test), allow(dead_code))] pub fn add_container( &mut self, container_id: &str, @@ -153,6 +157,7 @@ impl ZoneRegistry { /// Record that a container has left. /// Decrements refcount. Returns the zone transition that occurred. /// Returns None if the container_id is unknown (no-op for Delete-before-Start). + #[cfg_attr(not(test), allow(dead_code))] pub fn remove_container( &mut self, container_id: &str, @@ -232,6 +237,7 @@ impl ZoneRegistry { /// Remove a zone entry by ID. Reverse-lookup by scanning zones. /// Only valid for zones with refcount 0. + #[cfg_attr(not(test), allow(dead_code))] pub fn unregister_zone_by_id(&mut self, zone_id: u32) -> anyhow::Result<()> { let zone_name = self.zones.iter() .find(|(_, e)| e.zone_id == zone_id) @@ -248,11 +254,13 @@ impl ZoneRegistry { } /// All registered zone names and their IDs. + #[cfg_attr(not(test), allow(dead_code))] pub fn all_zones(&self) -> impl Iterator { self.zones.iter().map(|(name, entry)| (name.as_str(), entry.zone_id)) } /// Full snapshot for ListZones — (name, zone_id, state, refcount). + #[cfg_attr(not(test), allow(dead_code))] pub fn zones_summary(&self) -> impl Iterator { self.zones.iter().map(|(name, e)| (name.as_str(), e.zone_id, e.state, e.refcount)) } @@ -270,6 +278,7 @@ impl ZoneRegistry { /// Iterate allowed comm pairs, optionally filtered to those involving /// a specific zone name. Yields canonicalised (a, b) tuples. + #[cfg_attr(not(test), allow(dead_code))] pub fn list_allowed_comms<'a>( &'a self, filter_zone: Option<&'a str>, @@ -293,6 +302,7 @@ impl ZoneRegistry { } /// Total number of tracked containers. + #[cfg_attr(not(test), allow(dead_code))] pub fn container_count(&self) -> usize { self.container_to_info.len() } From cbb8461eba53717c2bf4ebe41865ab5a4f77b21b Mon Sep 17 00:00:00 2001 From: Yair Etziony Date: Sat, 25 Apr 2026 02:50:24 +0200 Subject: [PATCH 6/9] feat(deploy): v0.3 manifests for cp-mode (syva-core DaemonSet, adapter Deployments) --- deploy/v0.2/daemonset-file.yaml | 6 +- deploy/v0.2/daemonset-k8s.yaml | 6 +- deploy/v0.3/daemonset-cp-mode.yaml | 197 +++++++++++++++++++++++++++++ 3 files changed, 203 insertions(+), 6 deletions(-) create mode 100644 deploy/v0.3/daemonset-cp-mode.yaml diff --git a/deploy/v0.2/daemonset-file.yaml b/deploy/v0.2/daemonset-file.yaml index 833fa53..1e5d9d2 100644 --- a/deploy/v0.2/daemonset-file.yaml +++ b/deploy/v0.2/daemonset-file.yaml @@ -1,6 +1,6 @@ -# syva v0.2 — Standalone / ConfigMap deployment -# Two containers: syva-core (enforcement) + syva-file (policy adapter) -# Use this when managing policies via TOML files or Kubernetes ConfigMaps. +# syva v0.2 — Legacy local-socket deployment +# Historical reference only. Session 4b removed syva-core's local gRPC surface. +# For current deployments, use deploy/v0.3/daemonset-cp-mode.yaml instead. apiVersion: apps/v1 kind: DaemonSet metadata: diff --git a/deploy/v0.2/daemonset-k8s.yaml b/deploy/v0.2/daemonset-k8s.yaml index 974ed1d..0c0c506 100644 --- a/deploy/v0.2/daemonset-k8s.yaml +++ b/deploy/v0.2/daemonset-k8s.yaml @@ -1,6 +1,6 @@ -# syva v0.2 — Kubernetes CRD deployment -# Two containers: syva-core (enforcement) + syva-k8s (CRD adapter) -# Use this when managing policies via SyvaZonePolicy CRDs. +# syva v0.2 — Legacy local-socket deployment +# Historical reference only. Session 4b removed syva-core's local gRPC surface. +# For current deployments, use deploy/v0.3/daemonset-cp-mode.yaml instead. apiVersion: apps/v1 kind: DaemonSet metadata: diff --git a/deploy/v0.3/daemonset-cp-mode.yaml b/deploy/v0.3/daemonset-cp-mode.yaml new file mode 100644 index 0000000..c14fa57 --- /dev/null +++ b/deploy/v0.3/daemonset-cp-mode.yaml @@ -0,0 +1,197 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: syva-system +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: syva + namespace: syva-system +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: syva-core + namespace: syva-system +spec: + selector: + matchLabels: + app: syva-core + template: + metadata: + labels: + app: syva-core + spec: + hostPID: true + serviceAccountName: syva + tolerations: + - effect: NoSchedule + operator: Exists + containers: + - name: syva-core + image: ghcr.io/false-systems/syva-core:0.3.0 + args: + - --cp-endpoint=http://syva-cp.syva-system.svc:50051 + - --node-labels=$(NODE_LABELS) + env: + - name: SYVA_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: SYVA_CLUSTER_ID + value: production + - name: NODE_LABELS + value: "tier=prod" + securityContext: + privileged: true + volumeMounts: + - name: machine-id + mountPath: /etc/machine-id + readOnly: true + - name: node-state + mountPath: /var/lib/syva + - name: bpf + mountPath: /sys/fs/bpf + - name: cgroup + mountPath: /sys/fs/cgroup + readOnly: true + - name: btf + mountPath: /sys/kernel/btf + readOnly: true + volumes: + - name: machine-id + hostPath: + path: /etc/machine-id + type: File + - name: node-state + hostPath: + path: /var/lib/syva + type: DirectoryOrCreate + - name: bpf + hostPath: + path: /sys/fs/bpf + type: DirectoryOrCreate + - name: cgroup + hostPath: + path: /sys/fs/cgroup + type: Directory + - name: btf + hostPath: + path: /sys/kernel/btf + type: Directory +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: syva-file-adapter + namespace: syva-system +spec: + replicas: 1 + selector: + matchLabels: + app: syva-file-adapter + template: + metadata: + labels: + app: syva-file-adapter + spec: + serviceAccountName: syva + containers: + - name: syva-file + image: ghcr.io/false-systems/syva-adapter-file:0.3.0 + args: + - --cp-endpoint=http://syva-cp.syva-system.svc:50051 + - --team-id=$(TEAM_ID) + - --policy-dir=/etc/syva/policies + env: + - name: TEAM_ID + value: "00000000-0000-0000-0000-000000000000" + volumeMounts: + - name: policies + mountPath: /etc/syva/policies + readOnly: true + volumes: + - name: policies + configMap: + name: syva-policies +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: syva-k8s-adapter + namespace: syva-system +spec: + replicas: 1 + selector: + matchLabels: + app: syva-k8s-adapter + template: + metadata: + labels: + app: syva-k8s-adapter + spec: + serviceAccountName: syva + containers: + - name: syva-k8s + image: ghcr.io/false-systems/syva-adapter-k8s:0.3.0 + args: + - --cp-endpoint=http://syva-cp.syva-system.svc:50051 + - --team-id=$(TEAM_ID) + - --namespace=$(WATCH_NAMESPACE) + env: + - name: TEAM_ID + value: "00000000-0000-0000-0000-000000000000" + - name: WATCH_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: syva-api-adapter + namespace: syva-system +spec: + replicas: 1 + selector: + matchLabels: + app: syva-api-adapter + template: + metadata: + labels: + app: syva-api-adapter + spec: + serviceAccountName: syva + containers: + - name: syva-api + image: ghcr.io/false-systems/syva-adapter-api:0.3.0 + args: + - --cp-endpoint=http://syva-cp.syva-system.svc:50051 + - --team-id=$(TEAM_ID) + - --listen=0.0.0.0:8080 + env: + - name: TEAM_ID + value: "00000000-0000-0000-0000-000000000000" +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: syva-k8s +rules: + - apiGroups: ["syva.dev"] + resources: ["syvazonepolicies"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: syva-k8s +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: syva-k8s +subjects: + - kind: ServiceAccount + name: syva + namespace: syva-system From 81dbaa3186c7999d6f0fc842e7bdc16bbc204db8 Mon Sep 17 00:00:00 2001 From: Yair Etziony Date: Sat, 25 Apr 2026 02:50:31 +0200 Subject: [PATCH 7/9] docs: reflect single-ingestion-path architecture after 4b --- AGENT.md | 27 ++++++++++++--------------- README.md | 30 +++++++++++++++++++++--------- syva-adapter-api/README.md | 20 ++++++++++++++++++++ syva-adapter-file/README.md | 18 ++++++++++++++++++ syva-adapter-k8s/README.md | 17 +++++++++++++++++ syva-core/README.md | 30 +++++++----------------------- 6 files changed, 95 insertions(+), 47 deletions(-) create mode 100644 syva-adapter-api/README.md create mode 100644 syva-adapter-file/README.md create mode 100644 syva-adapter-k8s/README.md diff --git a/AGENT.md b/AGENT.md index 773864c..048ede9 100644 --- a/AGENT.md +++ b/AGENT.md @@ -21,25 +21,22 @@ copied to any other operation. --- -## syva-core CP Mode vs Legacy Mode +## syva-core ingests zones via syva-cp -`syva-core` supports two zone ingestion paths that share the same in-process -`ZoneRegistry` and `EnforceEbpf`: +After session 4b, `syva-core` has one and only one ingestion path: the +`NodeAssignmentUpdate` stream from `syva-cp`. The local gRPC surface that +existed in v0.2 is deleted. `--cp-endpoint` is mandatory. -1. Local gRPC surface. Adapters push zones to `syva-core` directly. This is - the v0.2 architecture and remains the default. -2. CP mode (`--cp-endpoint`). `syva-core` connects to a remote `syva-cp`, - registers as a node, and consumes assignments via server-streaming. The - reconcile loop lives in `syva-core/src/cp_reconcile/`. +Adapters (`syva-file`, `syva-k8s`, `syva-api`) push zones to `syva-cp` +directly via `syva-cp-client`. They do not connect to `syva-core`. -Both paths call the same in-process mutation helpers. CP mode is additive, not -a replacement yet. Session 4b will migrate adapters to push to `syva-cp` -instead of `syva-core`. +Single-node operation (laptop, demo, CI) is achieved by running both +`syva-cp` and `syva-core` on the same machine, with `syva-cp` using a local +Postgres. There is no separate "local mode" code path. -The reconciler keeps in-memory state only (`AppliedState`). On restart, a fresh -subscription receives a `FULL_SNAPSHOT` from `syva-cp` and reconstructs desired -state. The only local persistence in CP mode is the `node-id` file used for -re-registration. +If easier single-node operation becomes important, the future plan is to ship a +`syva-cp --embedded` mode that bundles Postgres or SQLite into the control +plane binary. That work is not in scope here. --- diff --git a/README.md b/README.md index d7425b5..774b51d 100644 --- a/README.md +++ b/README.md @@ -96,30 +96,42 @@ Without Syva, these containers can interact through the shared kernel — read e ## How It Works -Syva has two layers: a **core engine** that manages eBPF enforcement, and **adapters** that tell it what to enforce. +Syva now has three layers: adapters write desired state to the **control plane**, the control plane computes per-node assignments, and `syva-core` reconciles those assignments into kernel-enforced BPF map state. ``` ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ syva-file │ │ syva-k8s │ │ syva-api │ - │ TOML/ConfigMap│ │ CRD + Pods │ │ REST API │ + │ TOML watcher │ │ CRD watcher │ │ REST proxy │ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ └─────────────────┼─────────────────┘ │ - gRPC / Unix socket + gRPC client + │ + ┌──────▼──────┐ + │ syva-cp │ + │ teams/zones │ + │ nodes/audit │ + └──────┬──────┘ + │ + NodeAssignmentUpdate stream │ ┌─────────────────▼─────────────────┐ │ syva-core │ │ BPF maps + 7 LSM hooks │ - │ Health :9091 + Prometheus │ + │ Health :9091 + Prometheus │ └───────────────────────────────────┘ ``` -**The core** loads eBPF programs into the kernel and exposes a gRPC API over a Unix socket. It handles zone registration, container membership, cross-zone communication policy, and inode-level file enforcement. It never knows where commands came from. +**`syva-cp`** is the source of truth. It stores teams, zones, policies, nodes, assignments, and audit history. Adapters write zones to it through `ZoneService`. Nodes register with it through `NodeService` and receive desired state through `AssignmentService`. + +**`syva-core`** is now CP-only. It no longer exposes a local adapter-facing gRPC server. It registers as a node with `syva-cp`, subscribes to assignment updates, and reconciles those assignments into the kernel. + +**Adapters** translate external truth into zone CRUD on `syva-cp`: +- **syva-file** — reconciles a directory of TOML policies into zones in one team +- **syva-k8s** — reconciles `SyvaZonePolicy` CRDs into zones in one team +- **syva-api** — exposes a REST API and proxies zone CRUD to `syva-cp` -**Adapters** connect to the core and translate their domain into enforcement commands: -- **syva-file** — reads TOML policy files, watches containerd for container events, hot-reloads on ConfigMap changes -- **syva-k8s** — watches `SyvaZonePolicy` CRDs and Pod annotations via kube-rs -- **syva-api** — exposes a REST API for programmatic zone management +Container and pod membership ingestion is temporarily deferred while `ContainerService` is being wired end to end. Session 4b moves zone management to `syva-cp`; membership will follow. **Step 1: Label your containers.** diff --git a/syva-adapter-api/README.md b/syva-adapter-api/README.md new file mode 100644 index 0000000..03842cd --- /dev/null +++ b/syva-adapter-api/README.md @@ -0,0 +1,20 @@ +# syva-adapter-api + +`syva-api` is a thin REST proxy in front of `syva-cp` `ZoneService`. + +Start: + +```bash +syva-api \ + --listen 0.0.0.0:8080 \ + --cp-endpoint http://syva-cp.syva-system.svc:50051 \ + --team-id 00000000-0000-0000-0000-000000000000 +``` + +Endpoints: +- `POST /v1/zones` +- `GET /v1/zones` +- `GET /v1/zones/{name}` +- `PUT /v1/zones/{name}` +- `DELETE /v1/zones/{name}` +- `GET /healthz` diff --git a/syva-adapter-file/README.md b/syva-adapter-file/README.md new file mode 100644 index 0000000..26684e7 --- /dev/null +++ b/syva-adapter-file/README.md @@ -0,0 +1,18 @@ +# syva-adapter-file + +`syva-file` reconciles a directory of TOML policies into zones in `syva-cp`. +Each `*.toml` filename becomes the zone name within one configured team. + +Start: + +```bash +syva-file \ + --policy-dir /etc/syva/policies \ + --cp-endpoint http://syva-cp.syva-system.svc:50051 \ + --team-id 00000000-0000-0000-0000-000000000000 +``` + +Notes: +- `verify` still works as a dry-run parser and validator. +- Reconcile is polling-based in session 4b. +- Containerd watcher / container membership sync is deferred until `ContainerService` exists end to end. diff --git a/syva-adapter-k8s/README.md b/syva-adapter-k8s/README.md new file mode 100644 index 0000000..1f959f1 --- /dev/null +++ b/syva-adapter-k8s/README.md @@ -0,0 +1,17 @@ +# syva-adapter-k8s + +`syva-k8s` watches `SyvaZonePolicy` CRDs in one namespace and reconciles them +into zones in `syva-cp` for one configured team. + +Start: + +```bash +syva-k8s \ + --namespace syva-system \ + --cp-endpoint http://syva-cp.syva-system.svc:50051 \ + --team-id 00000000-0000-0000-0000-000000000000 +``` + +Notes: +- The CRD remains the source of truth; direct API edits will be overwritten by the watcher. +- Pod annotation / container membership sync is deferred until `ContainerService` exists end to end. diff --git a/syva-core/README.md b/syva-core/README.md index fca39d6..d6c45b2 100644 --- a/syva-core/README.md +++ b/syva-core/README.md @@ -1,29 +1,10 @@ # syva-core -Kernel enforcement engine for Syva. `syva-core` owns the in-process -`ZoneRegistry` and `EnforceEbpf` state and exposes a local gRPC surface for -adapters. +Kernel enforcement engine for Syva. After session 4b, `syva-core` has one +ingestion path only: it connects to `syva-cp`, registers as a node, subscribes +to `NodeAssignmentUpdate`, and reconciles its BPF maps to the desired state. -## Operational Modes - -`syva-core` runs in one of two modes: - -### Legacy Mode - -Adapters (`syva-adapter-file`, `syva-adapter-k8s`, `syva-adapter-api`) connect -to the local gRPC surface and push zones directly. - -Start: - -```bash -syva-core --socket-path /run/syva/syva-core.sock -``` - -### CP Mode - -`syva-core` connects to a remote `syva-cp`, registers as a node, subscribes to -assignment updates, and reconciles its BPF maps to match desired state. Legacy -adapters can still push to the local gRPC surface in addition. +## CP Mode Start: @@ -38,3 +19,6 @@ syva-core \ The node ID is persisted to `--node-id-path` so restarts appear as re-registration of the same node rather than fresh registration. + +There is no local adapter-facing gRPC surface anymore. Adapters now push zones +to `syva-cp` directly. From 0fc40dd9d35429c4d42661f4ddbe1362d35559a1 Mon Sep 17 00:00:00 2001 From: Yair Etziony Date: Sat, 25 Apr 2026 02:54:17 +0200 Subject: [PATCH 8/9] chore(workspace): fix legacy warnings for strict workspace verification --- syva-adapter-file/src/types.rs | 2 +- syva/src/ebpf.rs | 11 +++-------- syva/src/health.rs | 3 ++- syva/src/main.rs | 12 ++++-------- syva/src/mapper.rs | 2 ++ syva/src/types.rs | 16 +--------------- syva/src/zone.rs | 7 ++++--- 7 files changed, 17 insertions(+), 36 deletions(-) diff --git a/syva-adapter-file/src/types.rs b/syva-adapter-file/src/types.rs index c9b2080..ce61740 100644 --- a/syva-adapter-file/src/types.rs +++ b/syva-adapter-file/src/types.rs @@ -410,7 +410,7 @@ deny = [] #[test] fn network_mode_rejects_pascalcase() { #[derive(Deserialize)] - struct T { mode: NetworkMode } + struct T { _mode: NetworkMode } assert!(toml::from_str::("mode = \"Bridged\"").is_err()); } } diff --git a/syva/src/ebpf.rs b/syva/src/ebpf.rs index 423c73b..61bce4d 100644 --- a/syva/src/ebpf.rs +++ b/syva/src/ebpf.rs @@ -3,7 +3,6 @@ //! Loads and attaches the 5 LSM programs. Provides typed //! wrappers for BPF map operations (zone membership, policy, comms). -use std::collections::HashMap; use std::fs; use std::os::unix::fs::MetadataExt; use std::path::{Path, PathBuf}; @@ -11,7 +10,7 @@ use std::path::{Path, PathBuf}; use aya::maps::HashMap as AyaHashMap; use aya::maps::RingBuf; use aya::programs::Lsm; -use aya::{Bpf, BpfLoader, Btf}; +use aya::{Ebpf, EbpfLoader, Btf}; use crate::types::{ZonePolicy, ZoneType, NetworkMode}; use syva_ebpf_common::{ ZoneInfoKernel, ZonePolicyKernel, ZoneCommKey, SelfTestResult, SelfTestInodeResult, @@ -44,7 +43,7 @@ const MAP_NAMES: &[&str] = &[ /// eBPF manager for the standalone enforce agent. pub struct EnforceEbpf { - bpf: Bpf, + bpf: Ebpf, pin_path: PathBuf, } @@ -83,7 +82,7 @@ impl EnforceEbpf { let obj_data = fs::read(&obj_path) .map_err(|e| anyhow::anyhow!("failed to read eBPF object {}: {e}", obj_path.display()))?; - let mut loader = BpfLoader::new(); + let mut loader = EbpfLoader::new(); loader.btf(Some(&btf)).map_pin_path(&pin_path); for (name, val) in &offsets { @@ -406,8 +405,6 @@ impl EnforceEbpf { .filter(|k| k.src_zone == zone_id || k.dst_zone == zone_id) .collect(); - drop(map); - let mut map: AyaHashMap<_, ZoneCommKey, u8> = AyaHashMap::try_from( self.bpf.map_mut("ZONE_ALLOWED_COMMS") .ok_or_else(|| anyhow::anyhow!("ZONE_ALLOWED_COMMS map not found"))?, @@ -433,8 +430,6 @@ impl EnforceEbpf { .map(|(k, _)| k) .collect(); - drop(map); - let mut map: AyaHashMap<_, u64, u32> = AyaHashMap::try_from( self.bpf.map_mut("INODE_ZONE_MAP") .ok_or_else(|| anyhow::anyhow!("INODE_ZONE_MAP map not found"))?, diff --git a/syva/src/health.rs b/syva/src/health.rs index 8189a93..0224c04 100644 --- a/syva/src/health.rs +++ b/syva/src/health.rs @@ -121,7 +121,8 @@ pub fn render_metrics(health: &HealthState) -> String { // Per-hook enforcement counters — always emitted (default 0 before first // snapshot) so Prometheus series exist from the start. let hook_names = &crate::events::HOOK_NAMES; - let metrics: [(&str, &str, fn(&HookCounters) -> u64); 4] = [ + type HookMetric = (&'static str, &'static str, fn(&HookCounters) -> u64); + let metrics: [HookMetric; 4] = [ ("syva_hook_allow_total", "Events allowed per hook", |c: &HookCounters| c.allow), ("syva_hook_deny_total", "Events denied per hook", |c: &HookCounters| c.deny), ("syva_hook_error_total", "Hook errors (fail-open) per hook", |c: &HookCounters| c.error), diff --git a/syva/src/main.rs b/syva/src/main.rs index 129099e..dcdf989 100644 --- a/syva/src/main.rs +++ b/syva/src/main.rs @@ -414,7 +414,6 @@ async fn cmd_status() -> anyhow::Result<()> { Ok(map) => { println!(" hooks:"); let mut total_errors: u64 = 0; - let mut total_lost: u64 = 0; let mut had_read_error = false; for (idx, hook) in events::HOOK_NAMES.iter().enumerate() { match map.get(&(idx as u32), 0) { @@ -427,7 +426,6 @@ async fn cmd_status() -> anyhow::Result<()> { total.lost += cpu_val.lost; } total_errors += total.error; - total_lost += total.lost; let flag = if total.error > 0 || total.lost > 0 { " ⚠" } else { "" }; println!( " {:<16} allow={:<8} deny={:<8} error={:<6} lost={}{}", @@ -580,7 +578,6 @@ async fn cmd_verify(policy_dir: PathBuf) -> anyhow::Result<()> { // 2. Resolve BTF offsets (read-only — no BPF loaded). println!(); let btf_path = std::path::Path::new("/sys/kernel/btf/vmlinux"); - let mut btf_ok = true; if btf_path.exists() { match btf::BtfData::from_sys_fs() { Ok(btf_data) => { @@ -602,22 +599,21 @@ async fn cmd_verify(policy_dir: PathBuf) -> anyhow::Result<()> { } None => { println!(" {:<40} NOT FOUND \u{2717}", format!("{struct_name}.{field_name}")); - btf_ok = false; } } } - if !btf_ok { + if offset_defs.iter().any(|(struct_name, field_name)| { + btf_data.struct_field_offset(struct_name, field_name).is_none() + }) { errors.push("some BTF offsets could not be resolved".to_string()); } } Err(e) => { - btf_ok = false; errors.push(format!("BTF resolution failed: {e}")); println!("BTF resolution: FAILED \u{2014} {e}"); } } } else { - btf_ok = false; errors.push("BTF not available \u{2014} enforcement will use default offsets".to_string()); println!("BTF: not available at {} (will use defaults)", btf_path.display()); } @@ -653,7 +649,7 @@ async fn cmd_verify(policy_dir: PathBuf) -> anyhow::Result<()> { // 4. Summary. println!(); - let valid = policies.len() > 0 && errors.is_empty(); + let valid = !policies.is_empty() && errors.is_empty(); if valid { println!("Result: \u{2713} VALID \u{2014} ready to deploy"); } else { diff --git a/syva/src/mapper.rs b/syva/src/mapper.rs index 015a305..dedb2e5 100644 --- a/syva/src/mapper.rs +++ b/syva/src/mapper.rs @@ -5,11 +5,13 @@ /// Label keys for zone assignment. pub const LABEL_ZONE: &str = "syva.dev/zone"; +#[allow(dead_code)] pub const LABEL_POLICY: &str = "syva.dev/policy"; /// Determine the zone name from container labels. /// /// Returns None if the container has no `syva.dev/zone` label (global/unzoned). +#[allow(dead_code)] pub fn zone_from_labels(labels: &std::collections::HashMap) -> Option { labels.get(LABEL_ZONE).cloned() } diff --git a/syva/src/types.rs b/syva/src/types.rs index 81da073..c9b2080 100644 --- a/syva/src/types.rs +++ b/syva/src/types.rs @@ -104,7 +104,7 @@ pub struct ZoneMetadata { } /// Declarative policy defining what a zone can do. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)] #[serde(deny_unknown_fields)] pub struct ZonePolicy { /// Optional metadata section — parsed but not used for enforcement. @@ -118,20 +118,6 @@ pub struct ZonePolicy { pub syscalls: SyscallPolicy, } -impl Default for ZonePolicy { - fn default() -> Self { - Self { - zone: ZoneMetadata::default(), - capabilities: CapabilityPolicy::default(), - resources: ResourcePolicy::default(), - network: NetworkPolicy::default(), - filesystem: FilesystemPolicy::default(), - devices: DevicePolicy::default(), - syscalls: SyscallPolicy::default(), - } - } -} - impl ZonePolicy { /// Validate policy values against kernel constraints. pub fn validate(&self, zone_name: &str) -> anyhow::Result<()> { diff --git a/syva/src/zone.rs b/syva/src/zone.rs index 9227e07..c44767d 100644 --- a/syva/src/zone.rs +++ b/syva/src/zone.rs @@ -232,6 +232,7 @@ impl ZoneRegistry { } /// All registered zone names and their IDs. + #[allow(dead_code)] pub fn all_zones(&self) -> impl Iterator { self.zones.iter().map(|(name, entry)| (name.as_str(), entry.zone_id)) } @@ -347,7 +348,7 @@ mod tests { #[test] fn duplicate_container_id_returns_error() { let mut reg = ZoneRegistry::new(); - reg.register_zone("frontend"); + let _ = reg.register_zone("frontend"); reg.add_container("c1", "frontend", 1000).unwrap(); // Second add with same container_id must fail. @@ -361,8 +362,8 @@ mod tests { #[test] fn duplicate_container_id_different_zone_returns_error() { let mut reg = ZoneRegistry::new(); - reg.register_zone("frontend"); - reg.register_zone("database"); + let _ = reg.register_zone("frontend"); + let _ = reg.register_zone("database"); reg.add_container("c1", "frontend", 1000).unwrap(); // Same container_id in a different zone must also fail. From fdc6ea2f3363dd5fc83bea779e06183ab9e92ed6 Mon Sep 17 00:00:00 2001 From: Yair Etziony Date: Sat, 25 Apr 2026 03:21:01 +0200 Subject: [PATCH 9/9] fix(session-4b): address adapter and CP-mode review feedback --- Cargo.lock | 3 ++ syva-adapter-api/Cargo.toml | 1 + syva-adapter-api/src/routes.rs | 24 +++++++-- syva-adapter-file/Cargo.toml | 1 + syva-adapter-file/src/run.rs | 65 +++++++++++++++++++++- syva-adapter-file/src/translate.rs | 6 ++- syva-adapter-k8s/Cargo.toml | 1 + syva-adapter-k8s/src/watcher.rs | 87 +++++++++++++++++++++++++++--- syva-core/src/cp_reconcile/mod.rs | 4 +- syva-core/src/main.rs | 47 ++++++++++++---- syva-cp-client/src/client.rs | 13 +++-- 11 files changed, 218 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e2caeab..7e4efb3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3029,6 +3029,7 @@ dependencies = [ "serde_json", "syva-cp-client", "tokio", + "tonic", "tracing", "tracing-subscriber", "uuid", @@ -3046,6 +3047,7 @@ dependencies = [ "syva-ebpf-common", "tokio", "toml", + "tonic", "tracing", "tracing-subscriber", "uuid", @@ -3065,6 +3067,7 @@ dependencies = [ "serde_json", "syva-cp-client", "tokio", + "tonic", "tracing", "tracing-subscriber", "uuid", diff --git a/syva-adapter-api/Cargo.toml b/syva-adapter-api/Cargo.toml index eba10da..c49d9b8 100644 --- a/syva-adapter-api/Cargo.toml +++ b/syva-adapter-api/Cargo.toml @@ -17,4 +17,5 @@ serde = { workspace = true } serde_json = { workspace = true } clap = { workspace = true } axum = { workspace = true } +tonic = { workspace = true } uuid = { version = "1", features = ["v4", "serde"] } diff --git a/syva-adapter-api/src/routes.rs b/syva-adapter-api/src/routes.rs index 05261ea..498c872 100644 --- a/syva-adapter-api/src/routes.rs +++ b/syva-adapter-api/src/routes.rs @@ -279,10 +279,26 @@ impl ApiError { } fn from_cp(error: syva_cp_client::CpClientError) -> Self { - Self { - status: StatusCode::BAD_GATEWAY, - message: error.to_string(), - } + let status = match &error { + syva_cp_client::CpClientError::Grpc(grpc) => match grpc.code() { + tonic::Code::InvalidArgument => StatusCode::BAD_REQUEST, + tonic::Code::NotFound => StatusCode::NOT_FOUND, + tonic::Code::AlreadyExists + | tonic::Code::FailedPrecondition + | tonic::Code::Aborted => StatusCode::CONFLICT, + tonic::Code::Unavailable | tonic::Code::DeadlineExceeded => { + StatusCode::SERVICE_UNAVAILABLE + } + _ => StatusCode::BAD_GATEWAY, + }, + syva_cp_client::CpClientError::InvalidEndpoint(_) + | syva_cp_client::CpClientError::Serde(_) + | syva_cp_client::CpClientError::Internal(_) => StatusCode::BAD_GATEWAY, + syva_cp_client::CpClientError::Connection(_) => StatusCode::SERVICE_UNAVAILABLE, + syva_cp_client::CpClientError::NotRegistered => StatusCode::INTERNAL_SERVER_ERROR, + }; + + Self { status, message: error.to_string() } } } diff --git a/syva-adapter-file/Cargo.toml b/syva-adapter-file/Cargo.toml index 7431d53..467baf0 100644 --- a/syva-adapter-file/Cargo.toml +++ b/syva-adapter-file/Cargo.toml @@ -21,5 +21,6 @@ serde = { workspace = true } serde_json = { workspace = true } toml = { workspace = true } clap = { workspace = true } +tonic = { workspace = true } uuid = { version = "1", features = ["v4", "serde"] } syva-ebpf-common = { path = "../syva-ebpf-common", features = ["userspace"] } diff --git a/syva-adapter-file/src/run.rs b/syva-adapter-file/src/run.rs index 7fe3f2b..af3effb 100644 --- a/syva-adapter-file/src/run.rs +++ b/syva-adapter-file/src/run.rs @@ -1,4 +1,4 @@ -use crate::policy::load_policies_from_dir; +use crate::policy::{load_policies_from_dir, FilePolicy}; use crate::translate::{policy_to_create_args, policy_to_update_args}; use anyhow::{Context, Result}; use std::collections::HashMap; @@ -111,7 +111,7 @@ async fn reconcile_once(cp: &CpClient, config: &Config) -> Result warn!(zone = %name, error = %error, "create_zone failed"), }, Some(snapshot) => match policy_to_update_args(&snapshot, policy)? { - Some(args) => match cp.update_zone(args).await { + Some(args) => match update_zone_with_refresh(cp, config.team_id, name, policy, args).await { Ok(output) => { stats.updated += 1; stats.changed += 1; @@ -147,9 +147,70 @@ async fn reconcile_once(cp: &CpClient, config: &Config) -> Result { + match cp.get_zone_by_name(config.team_id, name).await? { + Some(refreshed) if refreshed.status != "deleted" => { + match cp + .delete_zone(DeleteZoneArgs { + zone_id: refreshed.zone_id, + if_version: refreshed.version, + drain: true, + }) + .await + { + Ok(()) => { + stats.deleted += 1; + stats.changed += 1; + info!(zone = %name, "zone deletion requested (drain) after refresh"); + } + Err(retry_error) => { + warn!(zone = %name, error = %retry_error, "delete_zone failed after refresh"); + } + } + } + _ => {} + } + } Err(error) => warn!(zone = %name, error = %error, "delete_zone failed"), } } Ok(stats) } + +async fn update_zone_with_refresh( + cp: &CpClient, + team_id: Uuid, + name: &str, + policy: &FilePolicy, + args: syva_cp_client::UpdateZoneArgs, +) -> Result { + match cp.update_zone(args).await { + Ok(output) => Ok(output), + Err(error) if is_retryable_conflict(&error) => { + let Some(refreshed) = cp.get_zone_by_name(team_id, name).await? else { + return Err(anyhow::anyhow!("zone disappeared during update retry")); + }; + let Some(retry_args) = policy_to_update_args(&refreshed, policy)? else { + return Ok(syva_cp_client::UpdatedZone { + zone_id: refreshed.zone_id, + version: refreshed.version, + new_policy_id: refreshed.current_policy_id, + new_policy_version: None, + }); + }; + cp.update_zone(retry_args).await.map_err(Into::into) + } + Err(error) => Err(error.into()), + } +} + +fn is_retryable_conflict(error: &syva_cp_client::CpClientError) -> bool { + match error { + syva_cp_client::CpClientError::Grpc(status) => matches!( + status.code(), + tonic::Code::AlreadyExists | tonic::Code::Aborted | tonic::Code::FailedPrecondition + ), + _ => false, + } +} diff --git a/syva-adapter-file/src/translate.rs b/syva-adapter-file/src/translate.rs index e39f51a..01e53f8 100644 --- a/syva-adapter-file/src/translate.rs +++ b/syva-adapter-file/src/translate.rs @@ -32,9 +32,11 @@ pub fn policy_to_update_args( .map(|current| current == &desired_policy_json) .unwrap_or(false); let selector_matches = snapshot.selector_json == desired_selector_json; - let display_name_matches = snapshot.display_name == policy.display_name; - if policy_matches && selector_matches && display_name_matches { + // ZoneService::UpdateZone does not currently accept display_name updates. + // Ignore display_name drift here so the adapter does not generate a + // perpetual no-op update loop it can never resolve. + if policy_matches && selector_matches { return Ok(None); } diff --git a/syva-adapter-k8s/Cargo.toml b/syva-adapter-k8s/Cargo.toml index b96a588..9f9a7cc 100644 --- a/syva-adapter-k8s/Cargo.toml +++ b/syva-adapter-k8s/Cargo.toml @@ -17,6 +17,7 @@ serde = { workspace = true } serde_json = { workspace = true } clap = { workspace = true } futures = { workspace = true } +tonic = { workspace = true } kube = { version = "0.95", features = ["runtime", "derive"] } k8s-openapi = { version = "0.23", features = ["v1_29"] } schemars = "0.8" diff --git a/syva-adapter-k8s/src/watcher.rs b/syva-adapter-k8s/src/watcher.rs index 09fbcf5..be8d568 100644 --- a/syva-adapter-k8s/src/watcher.rs +++ b/syva-adapter-k8s/src/watcher.rs @@ -107,7 +107,7 @@ async fn initial_reconcile( } Some(snapshot) => { if let Some(args) = spec_to_update_args(&snapshot, crd)? { - match cp.update_zone(args).await { + match update_zone_with_refresh(cp, team_id, &name, crd, args).await { Ok(_) => info!(zone = %name, "zone updated from CRD (initial)"), Err(error) => warn!(zone = %name, error = %error, "initial update failed"), } @@ -129,6 +129,24 @@ async fn initial_reconcile( .await { Ok(()) => info!(zone = %name, "zone deleted (no matching CRD)"), + Err(error) if is_retryable_conflict(&error) => { + match cp.get_zone_by_name(team_id, name).await? { + Some(refreshed) if refreshed.status != "deleted" => { + match cp + .delete_zone(DeleteZoneArgs { + zone_id: refreshed.zone_id, + if_version: refreshed.version, + drain: true, + }) + .await + { + Ok(()) => info!(zone = %name, "zone deleted (no matching CRD) after refresh"), + Err(retry_error) => warn!(zone = %name, error = %retry_error, "initial delete failed after refresh"), + } + } + _ => {} + } + } Err(error) => warn!(zone = %name, error = %error, "initial delete failed"), } } @@ -151,7 +169,7 @@ async fn handle_apply(cp: &CpClient, team_id: Uuid, crd: &SyvaZonePolicy) -> Res } Some(snapshot) => { if let Some(args) = spec_to_update_args(&snapshot, crd)? { - cp.update_zone(args).await?; + update_zone_with_refresh(cp, team_id, &name, crd, args).await?; info!(zone = %name, "zone updated from CRD"); } } @@ -171,12 +189,65 @@ async fn handle_delete(cp: &CpClient, team_id: Uuid, crd: &SyvaZonePolicy) -> Re return Ok(()); }; - cp.delete_zone(DeleteZoneArgs { - zone_id: snapshot.zone_id, - if_version: snapshot.version, - drain: true, - }) - .await?; + match cp + .delete_zone(DeleteZoneArgs { + zone_id: snapshot.zone_id, + if_version: snapshot.version, + drain: true, + }) + .await + { + Ok(()) => {} + Err(error) if is_retryable_conflict(&error) => { + let Some(refreshed) = cp.get_zone_by_name(team_id, &name).await? else { + return Ok(()); + }; + cp.delete_zone(DeleteZoneArgs { + zone_id: refreshed.zone_id, + if_version: refreshed.version, + drain: true, + }) + .await?; + } + Err(error) => return Err(error.into()), + } info!(zone = %name, "zone deleted (CRD removed)"); Ok(()) } + +async fn update_zone_with_refresh( + cp: &CpClient, + team_id: Uuid, + name: &str, + crd: &SyvaZonePolicy, + args: syva_cp_client::UpdateZoneArgs, +) -> Result { + match cp.update_zone(args).await { + Ok(output) => Ok(output), + Err(error) if is_retryable_conflict(&error) => { + let Some(refreshed) = cp.get_zone_by_name(team_id, name).await? else { + anyhow::bail!("zone disappeared during update retry"); + }; + let Some(retry_args) = spec_to_update_args(&refreshed, crd)? else { + return Ok(syva_cp_client::UpdatedZone { + zone_id: refreshed.zone_id, + version: refreshed.version, + new_policy_id: refreshed.current_policy_id, + new_policy_version: None, + }); + }; + cp.update_zone(retry_args).await.map_err(Into::into) + } + Err(error) => Err(error.into()), + } +} + +fn is_retryable_conflict(error: &syva_cp_client::CpClientError) -> bool { + match error { + syva_cp_client::CpClientError::Grpc(status) => matches!( + status.code(), + tonic::Code::AlreadyExists | tonic::Code::Aborted | tonic::Code::FailedPrecondition + ), + _ => false, + } +} diff --git a/syva-core/src/cp_reconcile/mod.rs b/syva-core/src/cp_reconcile/mod.rs index 738f46a..ab2929a 100644 --- a/syva-core/src/cp_reconcile/mod.rs +++ b/syva-core/src/cp_reconcile/mod.rs @@ -144,6 +144,8 @@ impl Reconciler { ); } + self.sync_allowed_comms().await; + applied_reports.push(AppliedReport { assignment_id, actual_zone_version: assignment.desired_zone_version, @@ -166,8 +168,6 @@ impl Reconciler { } } - self.sync_allowed_comms().await; - if !applied_reports.is_empty() || !failed_reports.is_empty() { if let Err(error) = self .cp diff --git a/syva-core/src/main.rs b/syva-core/src/main.rs index 0fb1645..1622cc9 100644 --- a/syva-core/src/main.rs +++ b/syva-core/src/main.rs @@ -19,6 +19,7 @@ mod zone; use std::path::PathBuf; use std::sync::Arc; +use std::time::Duration; use clap::{Parser, Subcommand}; use syva_cp_client::CpClientConfig; @@ -157,7 +158,7 @@ async fn cmd_run(config: Cli) -> anyhow::Result<()> { // Shutdown on SIGINT (ctrl-c) or SIGTERM (Kubernetes pod termination). let mut sigterm = tokio::signal::unix::signal( tokio::signal::unix::SignalKind::terminate(), - ).expect("failed to register SIGTERM handler"); + )?; // Periodic error monitoring task. let monitor_ebpf = ebpf.clone(); @@ -222,17 +223,11 @@ async fn cmd_run(config: Cli) -> anyhow::Result<()> { fingerprint: read_fingerprint(&config.fingerprint_path), labels: parse_labels(&config.node_labels), node_id_path: config.node_id_path.clone(), - heartbeat_interval: std::time::Duration::from_secs(config.heartbeat_secs), + heartbeat_interval: Duration::from_secs(config.heartbeat_secs), ..Default::default() }; - let cp = syva_cp_client::CpClient::connect(cp_config) - .await - .map_err(|error| anyhow::anyhow!("connect to syva-cp at {}: {error}", config.cp_endpoint))?; - let registration = cp - .register() - .await - .map_err(|error| anyhow::anyhow!("register with syva-cp: {error}"))?; + let (cp, registration) = connect_and_register_with_retry(cp_config).await; tracing::info!(node_id = %registration.node_id, "registered with syva-cp"); let _heartbeat = cp.spawn_heartbeat_loop(); @@ -266,6 +261,40 @@ async fn cmd_run(config: Cli) -> anyhow::Result<()> { Ok(()) } +async fn connect_and_register_with_retry( + config: CpClientConfig, +) -> (syva_cp_client::CpClient, syva_cp_client::NodeRegistration) { + let mut backoff = Duration::from_millis(250); + let max_backoff = Duration::from_secs(30); + + loop { + match syva_cp_client::CpClient::connect(config.clone()).await { + Ok(cp) => match cp.register().await { + Ok(registration) => return (cp, registration), + Err(error) => { + tracing::warn!( + endpoint = %config.endpoint, + error = %error, + backoff_ms = backoff.as_millis(), + "could not register with syva-cp; retrying" + ); + } + }, + Err(error) => { + tracing::warn!( + endpoint = %config.endpoint, + error = %error, + backoff_ms = backoff.as_millis(), + "could not connect to syva-cp; retrying" + ); + } + } + + tokio::time::sleep(backoff).await; + backoff = (backoff * 2).min(max_backoff); + } +} + pub(crate) fn parse_labels(entries: &[String]) -> std::collections::BTreeMap { let mut labels = std::collections::BTreeMap::new(); diff --git a/syva-cp-client/src/client.rs b/syva-cp-client/src/client.rs index ae28dc7..5a760f6 100644 --- a/syva-cp-client/src/client.rs +++ b/syva-cp-client/src/client.rs @@ -13,9 +13,9 @@ use syva_proto::syva_control::v1::node_service_client::NodeServiceClient; use syva_proto::syva_control::v1::zone_service_client::ZoneServiceClient; use syva_proto::syva_control::v1::{ get_zone_request::Identifier as GetZoneIdentifier, AppliedAssignment, CreateZoneRequest, - FailedAssignment, GetZoneRequest, HeartbeatRequest, ListZonesRequest, + DeleteZoneRequest, FailedAssignment, GetZoneRequest, HeartbeatRequest, ListZonesRequest, NodeAssignmentUpdate, RegisterNodeRequest, ReportAssignmentStateRequest, - SubscribeAssignmentsRequest, UpdateZoneRequest, DeleteZoneRequest, + SubscribeAssignmentsRequest, UpdateZoneRequest, }; #[derive(Debug, Clone)] @@ -490,13 +490,12 @@ pub struct FailedReport { } fn parse_uuid(value: &str, field: &str) -> Result { - Uuid::parse_str(value) - .map_err(|error| CpClientError::Internal(format!("could not parse {field} as UUID: {error}"))) + Uuid::parse_str(value).map_err(|error| { + CpClientError::Internal(format!("could not parse {field} as UUID: {error}")) + }) } -fn parse_optional_json( - value: &str, -) -> Result, CpClientError> { +fn parse_optional_json(value: &str) -> Result, CpClientError> { if value.is_empty() { return Ok(None); }