From f435f7064504931f6be1d4005d8b814dbf49018f Mon Sep 17 00:00:00 2001 From: Peyton Date: Mon, 9 Mar 2026 11:18:46 -0700 Subject: [PATCH 1/9] feat(kvp): add store trait and Hyper-V KVP storage crate --- Cargo.toml | 1 + doc/libazurekvp.md | 166 ++++------ libazureinit-kvp/Cargo.toml | 20 ++ libazureinit-kvp/src/hyperv.rs | 578 +++++++++++++++++++++++++++++++++ libazureinit-kvp/src/lib.rs | 121 +++++++ 5 files changed, 785 insertions(+), 101 deletions(-) create mode 100644 libazureinit-kvp/Cargo.toml create mode 100644 libazureinit-kvp/src/hyperv.rs create mode 100644 libazureinit-kvp/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index 31693d8e..83d7e630 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,6 +50,7 @@ path = "tests/functional_tests.rs" [workspace] members = [ "libazureinit", + "libazureinit-kvp", ] [features] diff --git a/doc/libazurekvp.md b/doc/libazurekvp.md index 476bdcb1..8b34fb54 100644 --- a/doc/libazurekvp.md +++ b/doc/libazurekvp.md @@ -1,126 +1,90 @@ -# Azure-init Tracing System +# `libazureinit-kvp` -## Overview +`libazureinit-kvp` is the storage layer for Hyper-V KVP (Key-Value Pair) +pool files used by Azure guests. -Azure-init implements a comprehensive tracing system that captures detailed information about the provisioning process. -This information is crucial for monitoring, debugging, and troubleshooting VM provisioning issues in Azure environments. -The tracing system is built on a multi-layered architecture that provides flexibility and robustness. +It defines: +- `KvpStore`: storage trait with explicit read/write/delete semantics. +- `HyperVKvpStore`: production implementation backed by the Hyper-V + binary pool file format. +- `KvpLimits`: exported key/value byte limits for Hyper-V and Azure. -## Architecture +## Record Format -The tracing architecture consists of four specialized layers, each handling a specific aspect of the tracing process: +The Hyper-V pool file record format is fixed width: +- Key field: 512 bytes +- Value field: 2048 bytes +- Total record size: 2560 bytes -### 1. EmitKVPLayer +Records are appended to the file and zero-padded to fixed widths. -**Purpose**: Processes spans and events by capturing metadata, generating key-value pairs (KVPs), and writing to Hyper-V's data exchange file. +## Store Semantics -**Key Functions**: -- Captures span lifecycle events (creation, entry, exit, closing) -- Processes emitted events within spans -- Formats data as KVPs for Hyper-V consumption -- Writes encoded data to `/var/lib/hyperv/.kvp_pool_1` +### `write(key, value)` -Additionally, events emitted with a `health_report` field are written as special provisioning reports using the key `PROVISIONING_REPORT`. +- Append-only behavior: each call appends one new record. +- Duplicate keys are allowed in the file. +- Returns an error when: + - key is empty + - key byte length exceeds `max_key_size` + - value byte length exceeds `max_value_size` + - an I/O error occurs +- Oversized values are rejected by the store (no silent truncation). + Higher layers are responsible for chunking/splitting when required. -**Integration with Azure**: -- The `/var/lib/hyperv/.kvp_pool_1` file is monitored by the Hyper-V `hv_kvp_daemon` service -- This enables key metrics and logs to be transferred from the VM to the Azure platform -- Administrators can access this data through the Azure portal or API +### `read(key)` -### 2. OpenTelemetryLayer +- Scans records and returns the value from the most recent matching key + (last-write-wins). +- Returns `Ok(None)` when the key is missing or file does not exist. -**Purpose**: Propagates tracing context and prepares span data for export. +### `entries()` -**Key Functions**: -- Maintains distributed tracing context across service boundaries -- Exports standardized trace data to compatible backends -- Enables integration with broader monitoring ecosystems +- Returns `HashMap`. +- Deduplicates duplicate keys using last-write-wins, matching `read`. +- This exposes a logical unique-key view even though the file itself is + append-only and may contain multiple records per key. -### 3. Stderr Layer +### `delete(key)` -**Purpose**: Formats and logs trace data to stderr. +- Rewrites the file without any matching key records. +- Returns `true` if at least one record was removed, else `false`. -**Key Functions**: -- Provides human-readable logging for immediate inspection -- Supports debugging during development -- Captures trace events even when other layers might fail +## Truncate Semantics (`truncate_if_stale`) -### 4. File Layer +`HyperVKvpStore::truncate_if_stale` clears stale records from previous +boots by comparing file `mtime` to the current boot timestamp. -**Purpose**: Writes formatted logs to a file (default path: `/var/log/azure-init.log`). +- If file predates boot: truncate to zero length. +- If file is current: leave unchanged. +- If lock contention occurs (`WouldBlock`): return `Ok(())` and skip. +- Non-contention lock failures are returned as errors. -**Key Functions**: -- Provides a persistent log for post-provisioning inspection -- Uses file permissions `0600` when possible -- Log level controlled by `AZURE_INIT_LOG` (defaults to `info` for the file layer) +## Limits and Azure Compatibility -## How the Layers Work Together +`KvpLimits` is exported so callers (including diagnostics layers) can +enforce and reuse exact bounds. -Despite operating independently, these layers collaborate to provide comprehensive tracing: +- `KvpLimits::hyperv()` + - `max_key_size = 512` + - `max_value_size = 2048` +- `KvpLimits::azure()` + - `max_key_size = 512` + - `max_value_size = 1022` (UTF-16: 511 characters + null terminator) -1. **Independent Processing**: Each layer processes spans and events without dependencies on other layers -2. **Ordered Execution**: Layers are executed in the order they are registered in `setup_layers` (stderr, OpenTelemetry, KVP if enabled, file if available) -3. **Complementary Functions**: Each layer serves a specific purpose in the tracing ecosystem: - - `EmitKVPLayer` focuses on Azure Hyper-V integration - - `OpenTelemetryLayer` handles standardized tracing and exports - - `Stderr Layer` provides immediate visibility for debugging +Why Azure limit is lower for values: +- Hyper-V record format allows 2048-byte values. +- Azure host handling is stricter; values beyond 1022 bytes are + silently truncated by host-side consumers. +- For Azure VMs, use `KvpLimits::azure()` and rely on higher-level + chunking when larger payloads must be preserved. -### Configuration +## Record Count Behavior -The tracing system's behavior is controlled through configuration files and environment variables, allowing more control over what data is captured and where it's sent: +There is no explicit record-count cap in this storage layer. +The file grows with each append until external constraints (disk space, +retention policy, or caller behavior) are applied. -- `telemetry.kvp_diagnostics` (config): Enables/disables KVP emission. Default: `true`. -- `telemetry.kvp_filter` (config): Optional `EnvFilter`-style directives to select which spans/events go to KVP. -- `azure_init_log_path.path` (config): Target path for the file layer. Default: `/var/log/azure-init.log`. -- `AZURE_INIT_KVP_FILTER` (env): Overrides `telemetry.kvp_filter`. Precedence: env > config > default. -- `AZURE_INIT_LOG` (env): Controls stderr and file fmt layers’ levels (defaults: stderr=`error`, file=`info`). +## References -The KVP layer uses a conservative default filter aimed at essential provisioning signals; adjust that via the settings above as needed. -For more on how to use these configuration variables, see the [configuration documentation](./configuration.md#complete-configuration-example). - -## Practical Usage - -### Instrumenting Functions - -To instrument code with tracing, use the `#[instrument]` attribute on functions: - -```rust -use tracing::{instrument, Level, event}; - -#[instrument(fields(user_id = ?user.id))] -async fn provision_user(user: User) -> Result<(), Error> { - event!(Level::INFO, "Starting user provisioning"); - - // Function logic - - event!(Level::INFO, "User provisioning completed successfully"); - Ok(()) -} -``` - -### Emitting Events - -To record specific points within a span: - -```rust -use tracing::{event, Level}; - -fn configure_ssh_keys(user: &str, keys: &[String]) { - event!(Level::INFO, user = user, key_count = keys.len(), "Configuring SSH keys"); - - for (i, key) in keys.iter().enumerate() { - event!(Level::DEBUG, user = user, key_index = i, "Processing SSH key"); - // Process each key - } - - event!(Level::INFO, user = user, "SSH keys configured successfully"); -} -``` - -## Reference Documentation - -For more details on how the Hyper-V Data Exchange Service works, refer to the official documentation: -[Hyper-V Data Exchange Service (KVP)](https://learn.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/integration-services#hyper-v-data-exchange-service-kvp) - -For OpenTelemetry integration details: -[OpenTelemetry for Rust](https://opentelemetry.io/docs/instrumentation/rust/) \ No newline at end of file +- [Hyper-V Data Exchange Service (KVP)](https://learn.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/integration-services#hyper-v-data-exchange-service-kvp) \ No newline at end of file diff --git a/libazureinit-kvp/Cargo.toml b/libazureinit-kvp/Cargo.toml new file mode 100644 index 00000000..6d5a8106 --- /dev/null +++ b/libazureinit-kvp/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "libazureinit-kvp" +version = "0.1.0" +edition = "2021" +rust-version = "1.88" +repository = "https://github.com/Azure/azure-init/" +homepage = "https://github.com/Azure/azure-init/" +license = "MIT" +description = "Hyper-V KVP (Key-Value Pair) storage library for azure-init." + +[dependencies] +fs2 = "0.4" +sysinfo = "0.38" + +[dev-dependencies] +tempfile = "3" + +[lib] +name = "libazureinit_kvp" +path = "src/lib.rs" diff --git a/libazureinit-kvp/src/hyperv.rs b/libazureinit-kvp/src/hyperv.rs new file mode 100644 index 00000000..c0be3429 --- /dev/null +++ b/libazureinit-kvp/src/hyperv.rs @@ -0,0 +1,578 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Hyper-V-backed [`KvpStore`] implementation. +//! +//! ## Record format +//! - Fixed-size records of [`RECORD_SIZE`] bytes (512-byte key + +//! 2,048-byte value), zero-padded on disk. +//! - No record-count header or explicit record cap in this layer. +//! +//! ## Behavior summary +//! - **`write`**: append-only; one record appended per call. +//! - **`read`**: last-write-wins for duplicate keys. +//! - **`entries`**: returns a deduplicated `HashMap` with last-write-wins. +//! - **`delete`**: rewrites file and removes all records for the key. +//! - **`truncate_if_stale`**: truncates if file predates boot; on lock +//! contention (`WouldBlock`) it returns `Ok(())` and skips. +//! +//! ## Limits +//! Writes validate key/value byte lengths using [`KvpLimits`] and return +//! errors for empty/oversized keys or oversized values. The on-disk +//! format is always 512 + 2,048 bytes; limits only constrain what may +//! be written (for example, Azure's 1,024-byte value limit). +//! +//! Higher layers are responsible for splitting/chunking oversized +//! diagnostics payloads before calling this store. +//! +//! ## Reference +//! - [Hyper-V Data Exchange Service (KVP)](https://learn.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/integration-services#hyper-v-data-exchange-service-kvp) + +use std::collections::HashMap; +use std::fs::{File, OpenOptions}; +use std::io::{self, ErrorKind, Read, Seek, Write}; +use std::os::unix::fs::MetadataExt; +use std::path::{Path, PathBuf}; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +use fs2::FileExt; +use sysinfo::System; + +use crate::{ + KvpLimits, KvpStore, HYPERV_MAX_KEY_BYTES, HYPERV_MAX_VALUE_BYTES, +}; + +/// Key field width in the on-disk record format (bytes). +const HV_KVP_EXCHANGE_MAX_KEY_SIZE: usize = HYPERV_MAX_KEY_BYTES; + +/// Value field width in the on-disk record format (bytes). +const HV_KVP_EXCHANGE_MAX_VALUE_SIZE: usize = HYPERV_MAX_VALUE_BYTES; + +/// Total size of one on-disk record (key + value). +const RECORD_SIZE: usize = + HV_KVP_EXCHANGE_MAX_KEY_SIZE + HV_KVP_EXCHANGE_MAX_VALUE_SIZE; + +/// Hyper-V KVP pool file store. +/// +/// Reads and writes the binary Hyper-V KVP format: fixed-size records +/// of [`RECORD_SIZE`] bytes (512-byte key + 2,048-byte value) with +/// flock-based concurrency control. +/// +/// Constructed via [`HyperVKvpStore::new`] with a file path and a +/// [`KvpLimits`] that determines the maximum allowed key and value +/// byte lengths for writes. +#[derive(Clone, Debug)] +pub struct HyperVKvpStore { + path: PathBuf, + limits: KvpLimits, +} + +impl HyperVKvpStore { + /// Create a store backed by the pool file at `path`. + /// + /// The file is created on first write if it does not already exist. + /// `limits` controls the maximum key and value sizes that + /// [`write`](KvpStore::write) will accept. + pub fn new(path: impl Into, limits: KvpLimits) -> Self { + Self { + path: path.into(), + limits, + } + } + + /// Return a reference to the pool file path. + pub fn path(&self) -> &Path { + &self.path + } + + /// Truncate the file when its mtime predates the current boot + /// (stale-data guard). + /// + /// An exclusive `flock` is held while checking metadata and + /// truncating so that concurrent processes don't race on the same + /// check-then-truncate sequence. If the lock cannot be acquired + /// immediately (another client holds it), the call returns `Ok(())` + /// without blocking. + pub fn truncate_if_stale(&self) -> io::Result<()> { + let boot_time = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map_err(|e| io::Error::other(format!("clock error: {e}")))? + .as_secs() + .saturating_sub(get_uptime().as_secs()) + as i64; + + let file = + match OpenOptions::new().read(true).write(true).open(&self.path) { + Ok(f) => f, + Err(ref e) if e.kind() == ErrorKind::NotFound => { + return Ok(()); + } + Err(e) => return Err(e), + }; + + if FileExt::try_lock_exclusive(&file).is_err() { + return Ok(()); + } + + let result = (|| -> io::Result<()> { + let metadata = file.metadata()?; + if metadata.mtime() < boot_time { + file.set_len(0)?; + } + Ok(()) + })(); + + let _ = FileExt::unlock(&file); + result + } + + fn open_for_append(&self) -> io::Result { + OpenOptions::new() + .append(true) + .create(true) + .open(&self.path) + } + + fn open_for_read(&self) -> io::Result { + OpenOptions::new().read(true).open(&self.path) + } + + fn open_for_read_write(&self) -> io::Result { + OpenOptions::new().read(true).write(true).open(&self.path) + } + + fn validate_key_value(&self, key: &str, value: &str) -> io::Result<()> { + if key.is_empty() { + return Err(io::Error::other("KVP key must not be empty")); + } + if key.len() > self.limits.max_key_size { + return Err(io::Error::other(format!( + "KVP key length ({}) exceeds maximum ({})", + key.len(), + self.limits.max_key_size + ))); + } + if value.len() > self.limits.max_value_size { + return Err(io::Error::other(format!( + "KVP value length ({}) exceeds maximum ({})", + value.len(), + self.limits.max_value_size + ))); + } + Ok(()) + } +} + +/// Encode a key-value pair into a single fixed-size record. +/// +/// The key is zero-padded to [`HV_KVP_EXCHANGE_MAX_KEY_SIZE`] bytes +/// and the value is zero-padded to [`HV_KVP_EXCHANGE_MAX_VALUE_SIZE`] +/// bytes. The caller is responsible for ensuring the key and value do +/// not exceed the on-disk field widths; if they do, only the first N +/// bytes are written (no error is raised at this level -- validation +/// happens in [`HyperVKvpStore::write`]). +pub(crate) fn encode_record(key: &str, value: &str) -> Vec { + let mut buf = vec![0u8; RECORD_SIZE]; + + let key_bytes = key.as_bytes(); + let key_len = key_bytes.len().min(HV_KVP_EXCHANGE_MAX_KEY_SIZE); + buf[..key_len].copy_from_slice(&key_bytes[..key_len]); + + let val_bytes = value.as_bytes(); + let val_len = val_bytes.len().min(HV_KVP_EXCHANGE_MAX_VALUE_SIZE); + buf[HV_KVP_EXCHANGE_MAX_KEY_SIZE..HV_KVP_EXCHANGE_MAX_KEY_SIZE + val_len] + .copy_from_slice(&val_bytes[..val_len]); + + buf +} + +/// Decode a fixed-size record into its key and value strings. +/// +/// Trailing null bytes are stripped from both fields. Returns an error +/// if `data` is not exactly [`RECORD_SIZE`] bytes. +pub(crate) fn decode_record(data: &[u8]) -> io::Result<(String, String)> { + if data.len() != RECORD_SIZE { + return Err(io::Error::other(format!( + "record size mismatch: expected {RECORD_SIZE}, got {}", + data.len() + ))); + } + + let key = String::from_utf8(data[..HV_KVP_EXCHANGE_MAX_KEY_SIZE].to_vec()) + .unwrap_or_default() + .trim_end_matches('\0') + .to_string(); + + let value = + String::from_utf8(data[HV_KVP_EXCHANGE_MAX_KEY_SIZE..].to_vec()) + .unwrap_or_default() + .trim_end_matches('\0') + .to_string(); + + Ok((key, value)) +} + +/// Read all records from a file that is already open and locked. +fn read_all_records(file: &mut File) -> io::Result> { + let mut contents = Vec::new(); + file.read_to_end(&mut contents)?; + + if contents.is_empty() { + return Ok(Vec::new()); + } + + if contents.len() % RECORD_SIZE != 0 { + return Err(io::Error::other(format!( + "file size ({}) is not a multiple of record size ({RECORD_SIZE})", + contents.len() + ))); + } + + contents + .chunks_exact(RECORD_SIZE) + .map(decode_record) + .collect() +} + +impl KvpStore for HyperVKvpStore { + fn limits(&self) -> KvpLimits { + self.limits + } + + /// Append one fixed-size record to the pool file. + /// + /// Validates key and value against the configured [`KvpLimits`], + /// acquires an exclusive flock, writes the record, flushes, and + /// releases the lock. + fn write(&self, key: &str, value: &str) -> io::Result<()> { + self.validate_key_value(key, value)?; + + let mut file = self.open_for_append()?; + let record = encode_record(key, value); + + FileExt::lock_exclusive(&file).map_err(|e| { + io::Error::other(format!("failed to lock KVP file: {e}")) + })?; + + let write_result = file.write_all(&record).and_then(|_| file.flush()); + + let unlock_result = FileExt::unlock(&file).map_err(|e| { + io::Error::other(format!("failed to unlock KVP file: {e}")) + }); + + if let Err(err) = write_result { + let _ = unlock_result; + return Err(err); + } + unlock_result + } + + /// Scan all records and return the value of the last record + /// matching `key` (last-write-wins). + /// + /// Acquires a shared flock during the scan. Returns `Ok(None)` if + /// the pool file does not exist or no record matches. + fn read(&self, key: &str) -> io::Result> { + let mut file = match self.open_for_read() { + Ok(f) => f, + Err(ref e) if e.kind() == ErrorKind::NotFound => { + return Ok(None); + } + Err(e) => return Err(e), + }; + + FileExt::lock_shared(&file).map_err(|e| { + io::Error::other(format!("failed to lock KVP file: {e}")) + })?; + + let records = read_all_records(&mut file); + let _ = FileExt::unlock(&file); + let records = records?; + + Ok(records + .into_iter() + .rev() + .find(|(k, _)| k == key) + .map(|(_, v)| v)) + } + + /// Return all key-value pairs as a deduplicated `HashMap`. + /// + /// Duplicate keys are resolved by last-write-wins, matching + /// [`read`](KvpStore::read) semantics. Acquires a shared flock + /// during the scan. Returns an empty map if the pool file does + /// not exist. + fn entries(&self) -> io::Result> { + let mut file = match self.open_for_read() { + Ok(f) => f, + Err(ref e) if e.kind() == ErrorKind::NotFound => { + return Ok(HashMap::new()); + } + Err(e) => return Err(e), + }; + + FileExt::lock_shared(&file).map_err(|e| { + io::Error::other(format!("failed to lock KVP file: {e}")) + })?; + + let records = read_all_records(&mut file); + let _ = FileExt::unlock(&file); + let records = records?; + + let mut map = HashMap::new(); + for (k, v) in records { + map.insert(k, v); + } + Ok(map) + } + + /// Rewrite the pool file without the record(s) matching `key`. + /// + /// Acquires an exclusive flock for the duration. Returns `true` if + /// at least one record was removed, `false` if the key was not + /// found. Returns `Ok(false)` if the pool file does not exist. + fn delete(&self, key: &str) -> io::Result { + let mut file = match self.open_for_read_write() { + Ok(f) => f, + Err(ref e) if e.kind() == ErrorKind::NotFound => { + return Ok(false); + } + Err(e) => return Err(e), + }; + + FileExt::lock_exclusive(&file).map_err(|e| { + io::Error::other(format!("failed to lock KVP file: {e}")) + })?; + + let result = (|| -> io::Result { + let records = read_all_records(&mut file)?; + let original_count = records.len(); + let kept: Vec<_> = + records.into_iter().filter(|(k, _)| k != key).collect(); + + if kept.len() == original_count { + return Ok(false); + } + + file.set_len(0)?; + file.seek(io::SeekFrom::Start(0))?; + + for (k, v) in &kept { + file.write_all(&encode_record(k, v))?; + } + file.flush()?; + Ok(true) + })(); + + let _ = FileExt::unlock(&file); + result + } +} + +fn get_uptime() -> Duration { + let mut system = System::new(); + system.refresh_memory(); + system.refresh_cpu_usage(); + Duration::from_secs(System::uptime()) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::NamedTempFile; + + fn truncate_with_boot_time(path: &Path, boot_time: i64) -> io::Result<()> { + let file = OpenOptions::new().read(true).write(true).open(path)?; + let metadata = file.metadata()?; + if metadata.mtime() < boot_time { + file.set_len(0)?; + } + Ok(()) + } + + fn hyperv_store(path: &Path) -> HyperVKvpStore { + HyperVKvpStore::new(path, KvpLimits::hyperv()) + } + + fn azure_store(path: &Path) -> HyperVKvpStore { + HyperVKvpStore::new(path, KvpLimits::azure()) + } + + #[test] + fn test_encode_decode_roundtrip() { + let key = "test_key"; + let value = "test_value"; + let record = encode_record(key, value); + + assert_eq!(record.len(), RECORD_SIZE); + + let (decoded_key, decoded_value) = + decode_record(&record).expect("decode failed"); + assert_eq!(decoded_key, key); + assert_eq!(decoded_value, value); + } + + #[test] + fn test_write_rejects_empty_key() { + let tmp = NamedTempFile::new().unwrap(); + let store = hyperv_store(tmp.path()); + + let err = store.write("", "value").unwrap_err(); + assert!( + err.to_string().contains("empty"), + "expected empty-key error, got: {err}" + ); + } + + #[test] + fn test_write_rejects_oversized_key() { + let tmp = NamedTempFile::new().unwrap(); + let store = hyperv_store(tmp.path()); + + let key = "K".repeat(HV_KVP_EXCHANGE_MAX_KEY_SIZE + 1); + let err = store.write(&key, "v").unwrap_err(); + assert!( + err.to_string().contains("key length"), + "expected key-length error, got: {err}" + ); + } + + #[test] + fn test_write_rejects_oversized_value_hyperv() { + let tmp = NamedTempFile::new().unwrap(); + let store = hyperv_store(tmp.path()); + + let value = "V".repeat(HV_KVP_EXCHANGE_MAX_VALUE_SIZE + 1); + let err = store.write("k", &value).unwrap_err(); + assert!( + err.to_string().contains("value length"), + "expected value-length error, got: {err}" + ); + } + + #[test] + fn test_azure_limits_reject_long_value() { + let tmp = NamedTempFile::new().unwrap(); + let store = azure_store(tmp.path()); + + let value = "V".repeat(1023); + let err = store.write("k", &value).unwrap_err(); + assert!( + err.to_string().contains("value length"), + "expected value-length error with azure limits, got: {err}" + ); + } + + #[test] + fn test_write_and_read() { + let tmp = NamedTempFile::new().unwrap(); + let store = hyperv_store(tmp.path()); + + store.write("key1", "value1").unwrap(); + store.write("key2", "value2").unwrap(); + + assert_eq!(store.read("key1").unwrap(), Some("value1".to_string())); + assert_eq!(store.read("key2").unwrap(), Some("value2".to_string())); + assert_eq!(store.read("nonexistent").unwrap(), None); + } + + #[test] + fn test_read_returns_last_match() { + let tmp = NamedTempFile::new().unwrap(); + let store = hyperv_store(tmp.path()); + + store.write("key", "first").unwrap(); + store.write("key", "second").unwrap(); + store.write("key", "third").unwrap(); + + assert_eq!(store.read("key").unwrap(), Some("third".to_string())); + } + + #[test] + fn test_entries_deduplicates_with_last_write_wins() { + let tmp = NamedTempFile::new().unwrap(); + let store = hyperv_store(tmp.path()); + + store.write("key", "v1").unwrap(); + store.write("key", "v2").unwrap(); + store.write("other", "v3").unwrap(); + + let entries = store.entries().unwrap(); + assert_eq!(entries.len(), 2); + assert_eq!(entries.get("key"), Some(&"v2".to_string())); + assert_eq!(entries.get("other"), Some(&"v3".to_string())); + } + + #[test] + fn test_truncate_if_stale_truncates_when_file_is_older_than_boot() { + let tmp = NamedTempFile::new().unwrap(); + let store = hyperv_store(tmp.path()); + + store.write("key", "value").unwrap(); + assert!(tmp.path().metadata().unwrap().len() > 0); + + truncate_with_boot_time(tmp.path(), i64::MAX).unwrap(); + assert_eq!(tmp.path().metadata().unwrap().len(), 0); + } + + #[test] + fn test_truncate_if_stale_keeps_file_when_newer_than_boot() { + let tmp = NamedTempFile::new().unwrap(); + let store = hyperv_store(tmp.path()); + + store.write("key", "value").unwrap(); + let len_before = tmp.path().metadata().unwrap().len(); + + // Epoch boot time ensures any current file mtime is considered fresh. + truncate_with_boot_time(tmp.path(), 0).unwrap(); + assert_eq!(tmp.path().metadata().unwrap().len(), len_before); + } + + #[test] + fn test_delete_removes_all_matches() { + let tmp = NamedTempFile::new().unwrap(); + let store = hyperv_store(tmp.path()); + + store.write("key", "v1").unwrap(); + store.write("key", "v2").unwrap(); + store.write("other", "v3").unwrap(); + + assert!(store.delete("key").unwrap()); + assert_eq!(store.read("key").unwrap(), None); + assert_eq!(store.read("other").unwrap(), Some("v3".to_string())); + + let entries = store.entries().unwrap(); + assert_eq!(entries.len(), 1); + } + + #[test] + fn test_multi_thread_concurrent_writes() { + let tmp = NamedTempFile::new().unwrap(); + let path = tmp.path().to_path_buf(); + + let num_threads: usize = 20; + let iterations: usize = 1_000; + + let handles: Vec<_> = (0..num_threads) + .map(|tid| { + let p = path.clone(); + std::thread::spawn(move || { + let store = HyperVKvpStore::new(&p, KvpLimits::hyperv()); + for i in 0..iterations { + let key = format!("thread-{tid}-iter-{i}"); + let value = format!("value-{tid}-{i}"); + store.write(&key, &value).expect("write failed"); + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("thread panicked"); + } + + let store = HyperVKvpStore::new(&path, KvpLimits::hyperv()); + let entries = store.entries().unwrap(); + assert_eq!(entries.len(), num_threads * iterations); + } +} diff --git a/libazureinit-kvp/src/lib.rs b/libazureinit-kvp/src/lib.rs new file mode 100644 index 00000000..6d8b4dff --- /dev/null +++ b/libazureinit-kvp/src/lib.rs @@ -0,0 +1,121 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! `libazureinit-kvp` provides a storage trait and Hyper-V-backed +//! implementation for KVP pool files. +//! +//! - [`KvpStore`]: storage interface used by higher layers. +//! - [`HyperVKvpStore`]: production implementation. +//! - [`KvpLimits`]: exported Hyper-V and Azure byte limits. + +use std::collections::HashMap; +use std::io; + +pub mod hyperv; + +pub use hyperv::HyperVKvpStore; + +/// Hyper-V key limit in bytes (policy/default preset). +pub const HYPERV_MAX_KEY_BYTES: usize = 512; +/// Hyper-V value limit in bytes (policy/default preset). +pub const HYPERV_MAX_VALUE_BYTES: usize = 2048; +/// Azure key limit in bytes (policy/default preset). +pub const AZURE_MAX_KEY_BYTES: usize = 512; +/// Azure value limit in bytes (UTF-16: 511 characters + null terminator). +pub const AZURE_MAX_VALUE_BYTES: usize = 1022; + +/// Storage abstraction for KVP backends. +/// +/// Semantics: +/// - `write`: stores one key/value or returns validation/I/O error. +/// - `read`: returns the most recent value for a key (last-write-wins). +/// - `entries`: returns deduplicated key/value pairs as `HashMap`. +/// - `delete`: removes all records for a key and reports whether any were removed. +/// - `limits`: returns the [`KvpLimits`] that govern maximum key/value +/// sizes for this store, allowing consumers to chunk or validate +/// data generically. +pub trait KvpStore: Send + Sync { + /// The key and value byte-size limits for this store. + /// + /// Consumers (e.g. diagnostics, tracing layers) should call this + /// instead of hardcoding size constants, so the limits stay correct + /// regardless of the underlying implementation. + fn limits(&self) -> KvpLimits; + + /// Write a key-value pair into the store. + /// + /// Returns an error if: + /// - The key is empty. + /// - The key exceeds the configured maximum key size. + /// - The value exceeds the configured maximum value size. + /// - An I/O error occurs during the write. + fn write(&self, key: &str, value: &str) -> io::Result<()>; + + /// Read the value for a given key, returning `None` if absent. + /// + /// When multiple records exist for the same key (append-only + /// storage), the value from the most recent record is returned + /// (last-write-wins). + fn read(&self, key: &str) -> io::Result>; + + /// Return all key-value pairs currently in the store. + /// + /// Keys are deduplicated using last-write-wins semantics, matching + /// the behavior of [`read`](KvpStore::read). + fn entries(&self) -> io::Result>; + + /// Remove all records matching `key`. + /// + /// Returns `true` if at least one record was removed, `false` if + /// the key was not found. + fn delete(&self, key: &str) -> io::Result; +} + +/// Configurable key/value byte limits for writes. +/// +/// Presets: +/// - [`KvpLimits::hyperv`]: [`HYPERV_MAX_KEY_BYTES`] / +/// [`HYPERV_MAX_VALUE_BYTES`]. +/// - [`KvpLimits::azure`]: [`AZURE_MAX_KEY_BYTES`] / +/// [`AZURE_MAX_VALUE_BYTES`]. +/// +/// Use `azure()` for Azure guests, where host-side consumers are stricter +/// on value byte length than raw Hyper-V format. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct KvpLimits { + pub max_key_size: usize, + pub max_value_size: usize, +} + +impl KvpLimits { + /// Raw Hyper-V wire format limits. + /// + /// - Max key size: 512 bytes + /// - Max value size: 2,048 bytes + /// + /// Use this when writing to a Hyper-V KVP pool file that will only + /// be consumed by Hyper-V tooling (not the Azure host agent). + pub const fn hyperv() -> Self { + Self { + max_key_size: HYPERV_MAX_KEY_BYTES, + max_value_size: HYPERV_MAX_VALUE_BYTES, + } + } + + /// Azure platform limits. + /// + /// - Max key size: 512 bytes + /// - Max value size: 1,022 bytes (UTF-16: 511 characters + null + /// terminator) + /// + /// The Azure host agent reads KVP records from the guest but is + /// stricter than the underlying Hyper-V format. Values beyond + /// 1,022 bytes are silently truncated by the host. Use this preset + /// for any code running on Azure VMs. + pub const fn azure() -> Self { + Self { + max_key_size: AZURE_MAX_KEY_BYTES, + max_value_size: AZURE_MAX_VALUE_BYTES, + } + } +} From 5226e32e9256c1b14918bd5e5fb7c7ded3655b9a Mon Sep 17 00:00:00 2001 From: Peyton Date: Mon, 9 Mar 2026 13:04:10 -0700 Subject: [PATCH 2/9] add Azure autodetect limits selection for HyperVKvpStore --- doc/libazurekvp.md | 2 +- libazureinit-kvp/src/hyperv.rs | 79 ++++++++++++++++++++++++++++++++-- libazureinit-kvp/src/lib.rs | 35 +++------------ 3 files changed, 81 insertions(+), 35 deletions(-) diff --git a/doc/libazurekvp.md b/doc/libazurekvp.md index 8b34fb54..2bef650c 100644 --- a/doc/libazurekvp.md +++ b/doc/libazurekvp.md @@ -70,7 +70,7 @@ enforce and reuse exact bounds. - `max_value_size = 2048` - `KvpLimits::azure()` - `max_key_size = 512` - - `max_value_size = 1022` (UTF-16: 511 characters + null terminator) + - `max_value_size = 1022` Why Azure limit is lower for values: - Hyper-V record format allows 2048-byte values. diff --git a/libazureinit-kvp/src/hyperv.rs b/libazureinit-kvp/src/hyperv.rs index c0be3429..7e300f04 100644 --- a/libazureinit-kvp/src/hyperv.rs +++ b/libazureinit-kvp/src/hyperv.rs @@ -20,7 +20,7 @@ //! Writes validate key/value byte lengths using [`KvpLimits`] and return //! errors for empty/oversized keys or oversized values. The on-disk //! format is always 512 + 2,048 bytes; limits only constrain what may -//! be written (for example, Azure's 1,024-byte value limit). +//! be written (for example, Azure's 1,022-byte value limit). //! //! Higher layers are responsible for splitting/chunking oversized //! diagnostics payloads before calling this store. @@ -42,6 +42,18 @@ use crate::{ KvpLimits, KvpStore, HYPERV_MAX_KEY_BYTES, HYPERV_MAX_VALUE_BYTES, }; +/// DMI chassis asset tag used to identify Azure VMs. +const AZURE_CHASSIS_ASSET_TAG: &str = "7783-7084-3265-9085-8269-3286-77"; +const AZURE_CHASSIS_ASSET_TAG_PATH: &str = + "/sys/class/dmi/id/chassis_asset_tag"; + +fn is_azure_vm(tag_path: Option<&str>) -> bool { + let path = tag_path.unwrap_or(AZURE_CHASSIS_ASSET_TAG_PATH); + std::fs::read_to_string(path) + .map(|s| s.trim() == AZURE_CHASSIS_ASSET_TAG) + .unwrap_or(false) +} + /// Key field width in the on-disk record format (bytes). const HV_KVP_EXCHANGE_MAX_KEY_SIZE: usize = HYPERV_MAX_KEY_BYTES; @@ -68,11 +80,10 @@ pub struct HyperVKvpStore { } impl HyperVKvpStore { - /// Create a store backed by the pool file at `path`. + /// Create a store with explicit limits. /// /// The file is created on first write if it does not already exist. - /// `limits` controls the maximum key and value sizes that - /// [`write`](KvpStore::write) will accept. + /// Use [`HyperVKvpStore::new_autodetect`] to choose limits automatically. pub fn new(path: impl Into, limits: KvpLimits) -> Self { Self { path: path.into(), @@ -80,6 +91,38 @@ impl HyperVKvpStore { } } + /// Create a store with limits chosen from host platform detection. + /// + /// If the Azure DMI asset tag is present, uses [`KvpLimits::azure`]. + /// Otherwise, uses [`KvpLimits::hyperv`]. + pub fn new_autodetect(path: impl Into) -> Self { + let limits = if is_azure_vm(None) { + KvpLimits::azure() + } else { + KvpLimits::hyperv() + }; + Self { + path: path.into(), + limits, + } + } + + #[cfg(test)] + fn new_autodetect_with_tag_path( + path: impl Into, + tag_path: &str, + ) -> Self { + let limits = if is_azure_vm(Some(tag_path)) { + KvpLimits::azure() + } else { + KvpLimits::hyperv() + }; + Self { + path: path.into(), + limits, + } + } + /// Return a reference to the pool file path. pub fn path(&self) -> &Path { &self.path @@ -398,6 +441,34 @@ mod tests { HyperVKvpStore::new(path, KvpLimits::azure()) } + #[test] + fn test_autodetect_uses_azure_limits_on_azure_host() { + let tag_file = NamedTempFile::new().unwrap(); + let pool_file = NamedTempFile::new().unwrap(); + // DMI files on Linux have a trailing newline. + std::fs::write(tag_file.path(), format!("{AZURE_CHASSIS_ASSET_TAG}\n")) + .unwrap(); + + let store = HyperVKvpStore::new_autodetect_with_tag_path( + pool_file.path(), + tag_file.path().to_str().unwrap(), + ); + assert_eq!(store.limits(), KvpLimits::azure()); + } + + #[test] + fn test_autodetect_uses_hyperv_limits_on_bare_hyperv() { + let tag_file = NamedTempFile::new().unwrap(); + let pool_file = NamedTempFile::new().unwrap(); + std::fs::write(tag_file.path(), "bare-hyperv-tag").unwrap(); + + let store = HyperVKvpStore::new_autodetect_with_tag_path( + pool_file.path(), + tag_file.path().to_str().unwrap(), + ); + assert_eq!(store.limits(), KvpLimits::hyperv()); + } + #[test] fn test_encode_decode_roundtrip() { let key = "test_key"; diff --git a/libazureinit-kvp/src/lib.rs b/libazureinit-kvp/src/lib.rs index 6d8b4dff..89fef04f 100644 --- a/libazureinit-kvp/src/lib.rs +++ b/libazureinit-kvp/src/lib.rs @@ -21,7 +21,7 @@ pub const HYPERV_MAX_KEY_BYTES: usize = 512; pub const HYPERV_MAX_VALUE_BYTES: usize = 2048; /// Azure key limit in bytes (policy/default preset). pub const AZURE_MAX_KEY_BYTES: usize = 512; -/// Azure value limit in bytes (UTF-16: 511 characters + null terminator). +/// Azure value limit in bytes, matching Azure host behavior. pub const AZURE_MAX_VALUE_BYTES: usize = 1022; /// Storage abstraction for KVP backends. @@ -32,8 +32,7 @@ pub const AZURE_MAX_VALUE_BYTES: usize = 1022; /// - `entries`: returns deduplicated key/value pairs as `HashMap`. /// - `delete`: removes all records for a key and reports whether any were removed. /// - `limits`: returns the [`KvpLimits`] that govern maximum key/value -/// sizes for this store, allowing consumers to chunk or validate -/// data generically. +/// sizes for this store. pub trait KvpStore: Send + Sync { /// The key and value byte-size limits for this store. /// @@ -71,16 +70,7 @@ pub trait KvpStore: Send + Sync { fn delete(&self, key: &str) -> io::Result; } -/// Configurable key/value byte limits for writes. -/// -/// Presets: -/// - [`KvpLimits::hyperv`]: [`HYPERV_MAX_KEY_BYTES`] / -/// [`HYPERV_MAX_VALUE_BYTES`]. -/// - [`KvpLimits::azure`]: [`AZURE_MAX_KEY_BYTES`] / -/// [`AZURE_MAX_VALUE_BYTES`]. -/// -/// Use `azure()` for Azure guests, where host-side consumers are stricter -/// on value byte length than raw Hyper-V format. +/// Key/value byte limits for write validation. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct KvpLimits { pub max_key_size: usize, @@ -88,13 +78,7 @@ pub struct KvpLimits { } impl KvpLimits { - /// Raw Hyper-V wire format limits. - /// - /// - Max key size: 512 bytes - /// - Max value size: 2,048 bytes - /// - /// Use this when writing to a Hyper-V KVP pool file that will only - /// be consumed by Hyper-V tooling (not the Azure host agent). + /// Hyper-V limits (512-byte key, 2,048-byte value). pub const fn hyperv() -> Self { Self { max_key_size: HYPERV_MAX_KEY_BYTES, @@ -102,16 +86,7 @@ impl KvpLimits { } } - /// Azure platform limits. - /// - /// - Max key size: 512 bytes - /// - Max value size: 1,022 bytes (UTF-16: 511 characters + null - /// terminator) - /// - /// The Azure host agent reads KVP records from the guest but is - /// stricter than the underlying Hyper-V format. Values beyond - /// 1,022 bytes are silently truncated by the host. Use this preset - /// for any code running on Azure VMs. + /// Azure limits (512-byte key, 1,022-byte value). pub const fn azure() -> Self { Self { max_key_size: AZURE_MAX_KEY_BYTES, From c1440c960c1a9d4c7dc052227b35ec1e05803152 Mon Sep 17 00:00:00 2001 From: Peyton Date: Mon, 9 Mar 2026 15:04:21 -0700 Subject: [PATCH 3/9] Improving libazurekvp.md clarity --- doc/libazurekvp.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/doc/libazurekvp.md b/doc/libazurekvp.md index 2bef650c..6fe7bbba 100644 --- a/doc/libazurekvp.md +++ b/doc/libazurekvp.md @@ -29,8 +29,7 @@ Records are appended to the file and zero-padded to fixed widths. - key byte length exceeds `max_key_size` - value byte length exceeds `max_value_size` - an I/O error occurs -- Oversized values are rejected by the store (no silent truncation). - Higher layers are responsible for chunking/splitting when required. +- This store never truncates: oversized values cause `write()` to return an error. ### `read(key)` @@ -74,10 +73,9 @@ enforce and reuse exact bounds. Why Azure limit is lower for values: - Hyper-V record format allows 2048-byte values. -- Azure host handling is stricter; values beyond 1022 bytes are - silently truncated by host-side consumers. -- For Azure VMs, use `KvpLimits::azure()` and rely on higher-level - chunking when larger payloads must be preserved. +- On Azure, host-side consumers truncate values beyond 1022 bytes, so + `KvpLimits::azure()` caps at 1022. Layers above this crate (e.g. + diagnostics/tracing) handle chunking of larger payloads. ## Record Count Behavior From d63dd42f74144c6acce895db7066c13700eee11c Mon Sep 17 00:00:00 2001 From: peytonr18 Date: Tue, 10 Mar 2026 09:28:53 -0700 Subject: [PATCH 4/9] feat(kvp): harden Hyper-V record decode and refactor stale truncate tests --- libazureinit-kvp/src/hyperv.rs | 95 ++++++++++++++++++++++------------ 1 file changed, 61 insertions(+), 34 deletions(-) diff --git a/libazureinit-kvp/src/hyperv.rs b/libazureinit-kvp/src/hyperv.rs index 7e300f04..4d4a47a3 100644 --- a/libazureinit-kvp/src/hyperv.rs +++ b/libazureinit-kvp/src/hyperv.rs @@ -144,6 +144,10 @@ impl HyperVKvpStore { .saturating_sub(get_uptime().as_secs()) as i64; + self.truncate_if_stale_at_boot(boot_time) + } + + fn truncate_if_stale_at_boot(&self, boot_time: i64) -> io::Result<()> { let file = match OpenOptions::new().read(true).write(true).open(&self.path) { Ok(f) => f, @@ -153,8 +157,11 @@ impl HyperVKvpStore { Err(e) => return Err(e), }; - if FileExt::try_lock_exclusive(&file).is_err() { - return Ok(()); + if let Err(e) = FileExt::try_lock_exclusive(&file) { + if e.kind() == ErrorKind::WouldBlock { + return Ok(()); + } + return Err(e); } let result = (|| -> io::Result<()> { @@ -232,7 +239,8 @@ pub(crate) fn encode_record(key: &str, value: &str) -> Vec { /// Decode a fixed-size record into its key and value strings. /// /// Trailing null bytes are stripped from both fields. Returns an error -/// if `data` is not exactly [`RECORD_SIZE`] bytes. +/// if `data` is not exactly [`RECORD_SIZE`] bytes or if either field +/// contains invalid UTF-8. pub(crate) fn decode_record(data: &[u8]) -> io::Result<(String, String)> { if data.len() != RECORD_SIZE { return Err(io::Error::other(format!( @@ -241,40 +249,49 @@ pub(crate) fn decode_record(data: &[u8]) -> io::Result<(String, String)> { ))); } - let key = String::from_utf8(data[..HV_KVP_EXCHANGE_MAX_KEY_SIZE].to_vec()) - .unwrap_or_default() + let (key_bytes, value_bytes) = data.split_at(HV_KVP_EXCHANGE_MAX_KEY_SIZE); + + let key = std::str::from_utf8(key_bytes) + .map_err(|e| io::Error::new(ErrorKind::InvalidData, e))? .trim_end_matches('\0') .to_string(); - let value = - String::from_utf8(data[HV_KVP_EXCHANGE_MAX_KEY_SIZE..].to_vec()) - .unwrap_or_default() - .trim_end_matches('\0') - .to_string(); + let value = std::str::from_utf8(value_bytes) + .map_err(|e| io::Error::new(ErrorKind::InvalidData, e))? + .trim_end_matches('\0') + .to_string(); Ok((key, value)) } /// Read all records from a file that is already open and locked. fn read_all_records(file: &mut File) -> io::Result> { - let mut contents = Vec::new(); - file.read_to_end(&mut contents)?; + let metadata = file.metadata()?; + let len = metadata.len() as usize; - if contents.is_empty() { + if len == 0 { return Ok(Vec::new()); } - if contents.len() % RECORD_SIZE != 0 { + if len % RECORD_SIZE != 0 { return Err(io::Error::other(format!( - "file size ({}) is not a multiple of record size ({RECORD_SIZE})", - contents.len() + "file size ({len}) is not a multiple of record size ({RECORD_SIZE})" ))); } - contents - .chunks_exact(RECORD_SIZE) - .map(decode_record) - .collect() + // Ensure we start reading from the beginning of the file. + file.seek(io::SeekFrom::Start(0))?; + + let record_count = len / RECORD_SIZE; + let mut records = Vec::with_capacity(record_count); + let mut buf = [0u8; RECORD_SIZE]; + + for _ in 0..record_count { + file.read_exact(&mut buf)?; + records.push(decode_record(&buf)?); + } + + Ok(records) } impl KvpStore for HyperVKvpStore { @@ -424,15 +441,6 @@ mod tests { use super::*; use tempfile::NamedTempFile; - fn truncate_with_boot_time(path: &Path, boot_time: i64) -> io::Result<()> { - let file = OpenOptions::new().read(true).write(true).open(path)?; - let metadata = file.metadata()?; - if metadata.mtime() < boot_time { - file.set_len(0)?; - } - Ok(()) - } - fn hyperv_store(path: &Path) -> HyperVKvpStore { HyperVKvpStore::new(path, KvpLimits::hyperv()) } @@ -575,28 +583,47 @@ mod tests { } #[test] - fn test_truncate_if_stale_truncates_when_file_is_older_than_boot() { + fn test_truncate_if_stale_at_boot_truncates_when_file_is_older_than_boot() { let tmp = NamedTempFile::new().unwrap(); let store = hyperv_store(tmp.path()); store.write("key", "value").unwrap(); assert!(tmp.path().metadata().unwrap().len() > 0); - truncate_with_boot_time(tmp.path(), i64::MAX).unwrap(); + store.truncate_if_stale_at_boot(i64::MAX).unwrap(); assert_eq!(tmp.path().metadata().unwrap().len(), 0); } #[test] - fn test_truncate_if_stale_keeps_file_when_newer_than_boot() { + fn test_truncate_if_stale_at_boot_keeps_file_when_newer_than_boot() { + let tmp = NamedTempFile::new().unwrap(); + let store = hyperv_store(tmp.path()); + + store.write("key", "value").unwrap(); + let len_before = tmp.path().metadata().unwrap().len(); + + store.truncate_if_stale_at_boot(0).unwrap(); + assert_eq!(tmp.path().metadata().unwrap().len(), len_before); + } + + #[test] + fn test_truncate_if_stale_keeps_fresh_file() { let tmp = NamedTempFile::new().unwrap(); let store = hyperv_store(tmp.path()); store.write("key", "value").unwrap(); let len_before = tmp.path().metadata().unwrap().len(); + assert!(len_before > 0); - // Epoch boot time ensures any current file mtime is considered fresh. - truncate_with_boot_time(tmp.path(), 0).unwrap(); + store.truncate_if_stale().unwrap(); assert_eq!(tmp.path().metadata().unwrap().len(), len_before); + assert_eq!(store.read("key").unwrap(), Some("value".to_string())); + } + + #[test] + fn test_truncate_if_stale_ok_when_file_missing() { + let store = hyperv_store(Path::new("/tmp/nonexistent-kvp-pool")); + store.truncate_if_stale().unwrap(); } #[test] From 62a52fede6b86e0891b9d224b3b13b4a5a5ae6ed Mon Sep 17 00:00:00 2001 From: Peyton Date: Tue, 10 Mar 2026 17:17:07 -0700 Subject: [PATCH 5/9] Refactor libazureinit-kvp: KvpStore trait, KvpError, HyperV/Azure split, clear() API --- doc/libazurekvp.md | 88 -------- libazureinit-kvp/src/azure.rs | 134 ++++++++++++ libazureinit-kvp/src/hyperv.rs | 386 ++++++++++++++------------------- libazureinit-kvp/src/lib.rs | 205 +++++++++++------ 4 files changed, 443 insertions(+), 370 deletions(-) delete mode 100644 doc/libazurekvp.md create mode 100644 libazureinit-kvp/src/azure.rs diff --git a/doc/libazurekvp.md b/doc/libazurekvp.md deleted file mode 100644 index 6fe7bbba..00000000 --- a/doc/libazurekvp.md +++ /dev/null @@ -1,88 +0,0 @@ -# `libazureinit-kvp` - -`libazureinit-kvp` is the storage layer for Hyper-V KVP (Key-Value Pair) -pool files used by Azure guests. - -It defines: -- `KvpStore`: storage trait with explicit read/write/delete semantics. -- `HyperVKvpStore`: production implementation backed by the Hyper-V - binary pool file format. -- `KvpLimits`: exported key/value byte limits for Hyper-V and Azure. - -## Record Format - -The Hyper-V pool file record format is fixed width: -- Key field: 512 bytes -- Value field: 2048 bytes -- Total record size: 2560 bytes - -Records are appended to the file and zero-padded to fixed widths. - -## Store Semantics - -### `write(key, value)` - -- Append-only behavior: each call appends one new record. -- Duplicate keys are allowed in the file. -- Returns an error when: - - key is empty - - key byte length exceeds `max_key_size` - - value byte length exceeds `max_value_size` - - an I/O error occurs -- This store never truncates: oversized values cause `write()` to return an error. - -### `read(key)` - -- Scans records and returns the value from the most recent matching key - (last-write-wins). -- Returns `Ok(None)` when the key is missing or file does not exist. - -### `entries()` - -- Returns `HashMap`. -- Deduplicates duplicate keys using last-write-wins, matching `read`. -- This exposes a logical unique-key view even though the file itself is - append-only and may contain multiple records per key. - -### `delete(key)` - -- Rewrites the file without any matching key records. -- Returns `true` if at least one record was removed, else `false`. - -## Truncate Semantics (`truncate_if_stale`) - -`HyperVKvpStore::truncate_if_stale` clears stale records from previous -boots by comparing file `mtime` to the current boot timestamp. - -- If file predates boot: truncate to zero length. -- If file is current: leave unchanged. -- If lock contention occurs (`WouldBlock`): return `Ok(())` and skip. -- Non-contention lock failures are returned as errors. - -## Limits and Azure Compatibility - -`KvpLimits` is exported so callers (including diagnostics layers) can -enforce and reuse exact bounds. - -- `KvpLimits::hyperv()` - - `max_key_size = 512` - - `max_value_size = 2048` -- `KvpLimits::azure()` - - `max_key_size = 512` - - `max_value_size = 1022` - -Why Azure limit is lower for values: -- Hyper-V record format allows 2048-byte values. -- On Azure, host-side consumers truncate values beyond 1022 bytes, so - `KvpLimits::azure()` caps at 1022. Layers above this crate (e.g. - diagnostics/tracing) handle chunking of larger payloads. - -## Record Count Behavior - -There is no explicit record-count cap in this storage layer. -The file grows with each append until external constraints (disk space, -retention policy, or caller behavior) are applied. - -## References - -- [Hyper-V Data Exchange Service (KVP)](https://learn.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/integration-services#hyper-v-data-exchange-service-kvp) \ No newline at end of file diff --git a/libazureinit-kvp/src/azure.rs b/libazureinit-kvp/src/azure.rs new file mode 100644 index 00000000..7c35c9c9 --- /dev/null +++ b/libazureinit-kvp/src/azure.rs @@ -0,0 +1,134 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Azure-specific KVP store. +//! +//! Wraps [`HyperVKvpStore`] with the stricter value-size limit imposed +//! by the Azure host (1,022 bytes). All other behavior — record +//! format, file locking, append-only writes — is inherited from the +//! underlying Hyper-V pool file implementation. + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +use crate::hyperv::HyperVKvpStore; +use crate::{KvpError, KvpStore}; + +/// Azure host-side value limit (values beyond this are truncated). +const AZURE_MAX_VALUE_BYTES: usize = 1022; + +/// Azure KVP store backed by a Hyper-V pool file. +/// +/// Identical to [`HyperVKvpStore`] except that +/// [`MAX_VALUE_SIZE`](KvpStore::MAX_VALUE_SIZE) is set to 1,022 bytes, +/// matching the Azure host's truncation behavior. +#[derive(Clone, Debug)] +pub struct AzureKvpStore { + inner: HyperVKvpStore, +} + +impl AzureKvpStore { + /// Create a new Azure KVP store backed by the pool file at `path`. + /// + /// When `truncate_on_stale` is `true` the constructor checks + /// whether the pool file predates the current boot and, if so, + /// truncates it before returning. + pub fn new( + path: impl Into, + truncate_on_stale: bool, + ) -> Result { + Ok(Self { + inner: HyperVKvpStore::new(path, truncate_on_stale)?, + }) + } + + /// Return a reference to the pool file path. + pub fn path(&self) -> &Path { + self.inner.path() + } +} + +impl KvpStore for AzureKvpStore { + const MAX_KEY_SIZE: usize = HyperVKvpStore::MAX_KEY_SIZE; + const MAX_VALUE_SIZE: usize = AZURE_MAX_VALUE_BYTES; + + fn backend_read(&self, key: &str) -> Result, KvpError> { + self.inner.backend_read(key) + } + + fn backend_write(&self, key: &str, value: &str) -> Result<(), KvpError> { + self.inner.backend_write(key, value) + } + + fn entries(&self) -> Result, KvpError> { + self.inner.entries() + } + + fn entries_raw(&self) -> Result, KvpError> { + self.inner.entries_raw() + } + + fn delete(&self, key: &str) -> Result { + self.inner.delete(key) + } + + fn backend_clear(&self) -> Result<(), KvpError> { + self.inner.backend_clear() + } + + fn is_stale(&self) -> Result { + self.inner.is_stale() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::NamedTempFile; + + fn azure_store(path: &Path) -> AzureKvpStore { + AzureKvpStore::new(path, false).unwrap() + } + + #[test] + fn test_azure_rejects_value_over_1022() { + let tmp = NamedTempFile::new().unwrap(); + let store = azure_store(tmp.path()); + + let value = "V".repeat(AZURE_MAX_VALUE_BYTES + 1); + let err = store.write("k", &value).unwrap_err(); + assert!( + matches!(err, KvpError::ValueTooLarge { .. }), + "expected ValueTooLarge, got: {err}" + ); + } + + #[test] + fn test_azure_accepts_value_at_1022() { + let tmp = NamedTempFile::new().unwrap(); + let store = azure_store(tmp.path()); + + let value = "V".repeat(AZURE_MAX_VALUE_BYTES); + store.write("k", &value).unwrap(); + assert_eq!(store.read("k").unwrap(), Some(value)); + } + + #[test] + fn test_azure_write_and_read() { + let tmp = NamedTempFile::new().unwrap(); + let store = azure_store(tmp.path()); + + store.write("key", "value").unwrap(); + assert_eq!(store.read("key").unwrap(), Some("value".to_string())); + } + + #[test] + fn test_azure_clear() { + let tmp = NamedTempFile::new().unwrap(); + let store = azure_store(tmp.path()); + + store.write("key", "value").unwrap(); + store.clear().unwrap(); + assert_eq!(store.read("key").unwrap(), None); + } +} diff --git a/libazureinit-kvp/src/hyperv.rs b/libazureinit-kvp/src/hyperv.rs index 4d4a47a3..f2b359f5 100644 --- a/libazureinit-kvp/src/hyperv.rs +++ b/libazureinit-kvp/src/hyperv.rs @@ -1,29 +1,16 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -//! Hyper-V-backed [`KvpStore`] implementation. +//! Hyper-V KVP (Key-Value Pair) pool file backend. //! -//! ## Record format -//! - Fixed-size records of [`RECORD_SIZE`] bytes (512-byte key + -//! 2,048-byte value), zero-padded on disk. -//! - No record-count header or explicit record cap in this layer. +//! Hyper-V exposes a Data Exchange Service that lets a guest and its +//! host share key-value pairs through a set of pool files. Each pool +//! file is a flat sequence of fixed-size records (512-byte key + +//! 2,048-byte value, zero-padded). There is no record-count header; +//! the file grows by one record per write. //! -//! ## Behavior summary -//! - **`write`**: append-only; one record appended per call. -//! - **`read`**: last-write-wins for duplicate keys. -//! - **`entries`**: returns a deduplicated `HashMap` with last-write-wins. -//! - **`delete`**: rewrites file and removes all records for the key. -//! - **`truncate_if_stale`**: truncates if file predates boot; on lock -//! contention (`WouldBlock`) it returns `Ok(())` and skips. -//! -//! ## Limits -//! Writes validate key/value byte lengths using [`KvpLimits`] and return -//! errors for empty/oversized keys or oversized values. The on-disk -//! format is always 512 + 2,048 bytes; limits only constrain what may -//! be written (for example, Azure's 1,022-byte value limit). -//! -//! Higher layers are responsible for splitting/chunking oversized -//! diagnostics payloads before calling this store. +//! On Azure, the host-side KVP consumer truncates values beyond +//! 1,022 bytes, so Azure guests should use [`AzureKvpStore`](crate::AzureKvpStore). //! //! ## Reference //! - [Hyper-V Data Exchange Service (KVP)](https://learn.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/integration-services#hyper-v-data-exchange-service-kvp) @@ -38,27 +25,13 @@ use std::time::{Duration, SystemTime, UNIX_EPOCH}; use fs2::FileExt; use sysinfo::System; -use crate::{ - KvpLimits, KvpStore, HYPERV_MAX_KEY_BYTES, HYPERV_MAX_VALUE_BYTES, -}; - -/// DMI chassis asset tag used to identify Azure VMs. -const AZURE_CHASSIS_ASSET_TAG: &str = "7783-7084-3265-9085-8269-3286-77"; -const AZURE_CHASSIS_ASSET_TAG_PATH: &str = - "/sys/class/dmi/id/chassis_asset_tag"; - -fn is_azure_vm(tag_path: Option<&str>) -> bool { - let path = tag_path.unwrap_or(AZURE_CHASSIS_ASSET_TAG_PATH); - std::fs::read_to_string(path) - .map(|s| s.trim() == AZURE_CHASSIS_ASSET_TAG) - .unwrap_or(false) -} +use crate::{KvpError, KvpStore}; /// Key field width in the on-disk record format (bytes). -const HV_KVP_EXCHANGE_MAX_KEY_SIZE: usize = HYPERV_MAX_KEY_BYTES; +const HV_KVP_EXCHANGE_MAX_KEY_SIZE: usize = 512; /// Value field width in the on-disk record format (bytes). -const HV_KVP_EXCHANGE_MAX_VALUE_SIZE: usize = HYPERV_MAX_VALUE_BYTES; +const HV_KVP_EXCHANGE_MAX_VALUE_SIZE: usize = 2048; /// Total size of one on-disk record (key + value). const RECORD_SIZE: usize = @@ -69,58 +42,26 @@ const RECORD_SIZE: usize = /// Reads and writes the binary Hyper-V KVP format: fixed-size records /// of [`RECORD_SIZE`] bytes (512-byte key + 2,048-byte value) with /// flock-based concurrency control. -/// -/// Constructed via [`HyperVKvpStore::new`] with a file path and a -/// [`KvpLimits`] that determines the maximum allowed key and value -/// byte lengths for writes. #[derive(Clone, Debug)] pub struct HyperVKvpStore { path: PathBuf, - limits: KvpLimits, } impl HyperVKvpStore { - /// Create a store with explicit limits. - /// - /// The file is created on first write if it does not already exist. - /// Use [`HyperVKvpStore::new_autodetect`] to choose limits automatically. - pub fn new(path: impl Into, limits: KvpLimits) -> Self { - Self { - path: path.into(), - limits, - } - } - - /// Create a store with limits chosen from host platform detection. + /// Create a new store backed by the pool file at `path`. /// - /// If the Azure DMI asset tag is present, uses [`KvpLimits::azure`]. - /// Otherwise, uses [`KvpLimits::hyperv`]. - pub fn new_autodetect(path: impl Into) -> Self { - let limits = if is_azure_vm(None) { - KvpLimits::azure() - } else { - KvpLimits::hyperv() - }; - Self { - path: path.into(), - limits, - } - } - - #[cfg(test)] - fn new_autodetect_with_tag_path( + /// When `truncate_on_stale` is `true` the constructor checks + /// whether the pool file predates the current boot and, if so, + /// truncates it before returning. + pub fn new( path: impl Into, - tag_path: &str, - ) -> Self { - let limits = if is_azure_vm(Some(tag_path)) { - KvpLimits::azure() - } else { - KvpLimits::hyperv() - }; - Self { - path: path.into(), - limits, + truncate_on_stale: bool, + ) -> Result { + let store = Self { path: path.into() }; + if truncate_on_stale && store.pool_is_stale()? { + store.truncate_pool()?; } + Ok(store) } /// Return a reference to the pool file path. @@ -128,49 +69,59 @@ impl HyperVKvpStore { &self.path } - /// Truncate the file when its mtime predates the current boot - /// (stale-data guard). - /// - /// An exclusive `flock` is held while checking metadata and - /// truncating so that concurrent processes don't race on the same - /// check-then-truncate sequence. If the lock cannot be acquired - /// immediately (another client holds it), the call returns `Ok(())` - /// without blocking. - pub fn truncate_if_stale(&self) -> io::Result<()> { - let boot_time = SystemTime::now() + // -- Private helpers ------------------------------------------------ + + fn boot_time() -> Result { + let now = SystemTime::now() .duration_since(UNIX_EPOCH) .map_err(|e| io::Error::other(format!("clock error: {e}")))? - .as_secs() - .saturating_sub(get_uptime().as_secs()) - as i64; + .as_secs(); + Ok(now.saturating_sub(get_uptime().as_secs()) as i64) + } - self.truncate_if_stale_at_boot(boot_time) + /// Check whether the pool file's mtime predates the current boot. + /// + /// Returns `false` if the file does not exist. + fn pool_is_stale(&self) -> Result { + let metadata = match std::fs::metadata(&self.path) { + Ok(m) => m, + Err(ref e) if e.kind() == ErrorKind::NotFound => { + return Ok(false); + } + Err(e) => return Err(e.into()), + }; + let boot = Self::boot_time()?; + Ok(metadata.mtime() < boot) } - fn truncate_if_stale_at_boot(&self, boot_time: i64) -> io::Result<()> { + #[cfg(test)] + fn pool_is_stale_at_boot(&self, boot_time: i64) -> Result { + let metadata = match std::fs::metadata(&self.path) { + Ok(m) => m, + Err(ref e) if e.kind() == ErrorKind::NotFound => { + return Ok(false); + } + Err(e) => return Err(e.into()), + }; + Ok(metadata.mtime() < boot_time) + } + + /// Truncate the pool file to zero length under an exclusive flock. + fn truncate_pool(&self) -> Result<(), KvpError> { let file = match OpenOptions::new().read(true).write(true).open(&self.path) { Ok(f) => f, Err(ref e) if e.kind() == ErrorKind::NotFound => { return Ok(()); } - Err(e) => return Err(e), + Err(e) => return Err(e.into()), }; - if let Err(e) = FileExt::try_lock_exclusive(&file) { - if e.kind() == ErrorKind::WouldBlock { - return Ok(()); - } - return Err(e); - } + FileExt::lock_exclusive(&file).map_err(|e| { + io::Error::other(format!("failed to lock KVP file: {e}")) + })?; - let result = (|| -> io::Result<()> { - let metadata = file.metadata()?; - if metadata.mtime() < boot_time { - file.set_len(0)?; - } - Ok(()) - })(); + let result = file.set_len(0).map_err(KvpError::from); let _ = FileExt::unlock(&file); result @@ -190,27 +141,6 @@ impl HyperVKvpStore { fn open_for_read_write(&self) -> io::Result { OpenOptions::new().read(true).write(true).open(&self.path) } - - fn validate_key_value(&self, key: &str, value: &str) -> io::Result<()> { - if key.is_empty() { - return Err(io::Error::other("KVP key must not be empty")); - } - if key.len() > self.limits.max_key_size { - return Err(io::Error::other(format!( - "KVP key length ({}) exceeds maximum ({})", - key.len(), - self.limits.max_key_size - ))); - } - if value.len() > self.limits.max_value_size { - return Err(io::Error::other(format!( - "KVP value length ({}) exceeds maximum ({})", - value.len(), - self.limits.max_value_size - ))); - } - Ok(()) - } } /// Encode a key-value pair into a single fixed-size record. @@ -273,7 +203,7 @@ fn read_all_records(file: &mut File) -> io::Result> { return Ok(Vec::new()); } - if len % RECORD_SIZE != 0 { + if !len.is_multiple_of(RECORD_SIZE) { return Err(io::Error::other(format!( "file size ({len}) is not a multiple of record size ({RECORD_SIZE})" ))); @@ -295,18 +225,14 @@ fn read_all_records(file: &mut File) -> io::Result> { } impl KvpStore for HyperVKvpStore { - fn limits(&self) -> KvpLimits { - self.limits - } + const MAX_KEY_SIZE: usize = HV_KVP_EXCHANGE_MAX_KEY_SIZE; + const MAX_VALUE_SIZE: usize = HV_KVP_EXCHANGE_MAX_VALUE_SIZE; /// Append one fixed-size record to the pool file. /// - /// Validates key and value against the configured [`KvpLimits`], - /// acquires an exclusive flock, writes the record, flushes, and + /// Acquires an exclusive flock, writes the record, flushes, and /// releases the lock. - fn write(&self, key: &str, value: &str) -> io::Result<()> { - self.validate_key_value(key, value)?; - + fn backend_write(&self, key: &str, value: &str) -> Result<(), KvpError> { let mut file = self.open_for_append()?; let record = encode_record(key, value); @@ -322,9 +248,9 @@ impl KvpStore for HyperVKvpStore { if let Err(err) = write_result { let _ = unlock_result; - return Err(err); + return Err(err.into()); } - unlock_result + unlock_result.map_err(KvpError::from) } /// Scan all records and return the value of the last record @@ -332,13 +258,13 @@ impl KvpStore for HyperVKvpStore { /// /// Acquires a shared flock during the scan. Returns `Ok(None)` if /// the pool file does not exist or no record matches. - fn read(&self, key: &str) -> io::Result> { + fn backend_read(&self, key: &str) -> Result, KvpError> { let mut file = match self.open_for_read() { Ok(f) => f, Err(ref e) if e.kind() == ErrorKind::NotFound => { return Ok(None); } - Err(e) => return Err(e), + Err(e) => return Err(e.into()), }; FileExt::lock_shared(&file).map_err(|e| { @@ -358,17 +284,16 @@ impl KvpStore for HyperVKvpStore { /// Return all key-value pairs as a deduplicated `HashMap`. /// - /// Duplicate keys are resolved by last-write-wins, matching - /// [`read`](KvpStore::read) semantics. Acquires a shared flock - /// during the scan. Returns an empty map if the pool file does - /// not exist. - fn entries(&self) -> io::Result> { + /// Duplicate keys are resolved by last-write-wins. Acquires a + /// shared flock during the scan. Returns an empty map if the pool + /// file does not exist. + fn entries(&self) -> Result, KvpError> { let mut file = match self.open_for_read() { Ok(f) => f, Err(ref e) if e.kind() == ErrorKind::NotFound => { return Ok(HashMap::new()); } - Err(e) => return Err(e), + Err(e) => return Err(e.into()), }; FileExt::lock_shared(&file).map_err(|e| { @@ -386,25 +311,47 @@ impl KvpStore for HyperVKvpStore { Ok(map) } + /// Return all raw records without deduplication. + /// + /// Acquires a shared flock during the scan. Returns an empty list + /// if the pool file does not exist. + fn entries_raw(&self) -> Result, KvpError> { + let mut file = match self.open_for_read() { + Ok(f) => f, + Err(ref e) if e.kind() == ErrorKind::NotFound => { + return Ok(Vec::new()); + } + Err(e) => return Err(e.into()), + }; + + FileExt::lock_shared(&file).map_err(|e| { + io::Error::other(format!("failed to lock KVP file: {e}")) + })?; + + let records = read_all_records(&mut file); + let _ = FileExt::unlock(&file); + Ok(records?) + } + /// Rewrite the pool file without the record(s) matching `key`. /// /// Acquires an exclusive flock for the duration. Returns `true` if /// at least one record was removed, `false` if the key was not /// found. Returns `Ok(false)` if the pool file does not exist. - fn delete(&self, key: &str) -> io::Result { + fn delete(&self, key: &str) -> Result { let mut file = match self.open_for_read_write() { Ok(f) => f, Err(ref e) if e.kind() == ErrorKind::NotFound => { return Ok(false); } - Err(e) => return Err(e), + Err(e) => return Err(e.into()), }; FileExt::lock_exclusive(&file).map_err(|e| { io::Error::other(format!("failed to lock KVP file: {e}")) })?; - let result = (|| -> io::Result { + let result = (|| -> Result { let records = read_all_records(&mut file)?; let original_count = records.len(); let kept: Vec<_> = @@ -427,6 +374,16 @@ impl KvpStore for HyperVKvpStore { let _ = FileExt::unlock(&file); result } + + /// Truncate the pool file to zero length, removing all records. + fn backend_clear(&self) -> Result<(), KvpError> { + self.truncate_pool() + } + + /// Whether the pool file's mtime predates the current boot. + fn is_stale(&self) -> Result { + self.pool_is_stale() + } } fn get_uptime() -> Duration { @@ -442,39 +399,7 @@ mod tests { use tempfile::NamedTempFile; fn hyperv_store(path: &Path) -> HyperVKvpStore { - HyperVKvpStore::new(path, KvpLimits::hyperv()) - } - - fn azure_store(path: &Path) -> HyperVKvpStore { - HyperVKvpStore::new(path, KvpLimits::azure()) - } - - #[test] - fn test_autodetect_uses_azure_limits_on_azure_host() { - let tag_file = NamedTempFile::new().unwrap(); - let pool_file = NamedTempFile::new().unwrap(); - // DMI files on Linux have a trailing newline. - std::fs::write(tag_file.path(), format!("{AZURE_CHASSIS_ASSET_TAG}\n")) - .unwrap(); - - let store = HyperVKvpStore::new_autodetect_with_tag_path( - pool_file.path(), - tag_file.path().to_str().unwrap(), - ); - assert_eq!(store.limits(), KvpLimits::azure()); - } - - #[test] - fn test_autodetect_uses_hyperv_limits_on_bare_hyperv() { - let tag_file = NamedTempFile::new().unwrap(); - let pool_file = NamedTempFile::new().unwrap(); - std::fs::write(tag_file.path(), "bare-hyperv-tag").unwrap(); - - let store = HyperVKvpStore::new_autodetect_with_tag_path( - pool_file.path(), - tag_file.path().to_str().unwrap(), - ); - assert_eq!(store.limits(), KvpLimits::hyperv()); + HyperVKvpStore::new(path, false).unwrap() } #[test] @@ -498,47 +423,46 @@ mod tests { let err = store.write("", "value").unwrap_err(); assert!( - err.to_string().contains("empty"), - "expected empty-key error, got: {err}" + matches!(err, KvpError::EmptyKey), + "expected EmptyKey, got: {err}" ); } #[test] - fn test_write_rejects_oversized_key() { + fn test_write_rejects_null_in_key() { let tmp = NamedTempFile::new().unwrap(); let store = hyperv_store(tmp.path()); - let key = "K".repeat(HV_KVP_EXCHANGE_MAX_KEY_SIZE + 1); - let err = store.write(&key, "v").unwrap_err(); + let err = store.write("bad\0key", "value").unwrap_err(); assert!( - err.to_string().contains("key length"), - "expected key-length error, got: {err}" + matches!(err, KvpError::KeyContainsNull), + "expected KeyContainsNull, got: {err}" ); } #[test] - fn test_write_rejects_oversized_value_hyperv() { + fn test_write_rejects_oversized_key() { let tmp = NamedTempFile::new().unwrap(); let store = hyperv_store(tmp.path()); - let value = "V".repeat(HV_KVP_EXCHANGE_MAX_VALUE_SIZE + 1); - let err = store.write("k", &value).unwrap_err(); + let key = "K".repeat(HV_KVP_EXCHANGE_MAX_KEY_SIZE + 1); + let err = store.write(&key, "v").unwrap_err(); assert!( - err.to_string().contains("value length"), - "expected value-length error, got: {err}" + matches!(err, KvpError::KeyTooLarge { .. }), + "expected KeyTooLarge, got: {err}" ); } #[test] - fn test_azure_limits_reject_long_value() { + fn test_write_rejects_oversized_value_hyperv() { let tmp = NamedTempFile::new().unwrap(); - let store = azure_store(tmp.path()); + let store = hyperv_store(tmp.path()); - let value = "V".repeat(1023); + let value = "V".repeat(HV_KVP_EXCHANGE_MAX_VALUE_SIZE + 1); let err = store.write("k", &value).unwrap_err(); assert!( - err.to_string().contains("value length"), - "expected value-length error with azure limits, got: {err}" + matches!(err, KvpError::ValueTooLarge { .. }), + "expected ValueTooLarge, got: {err}" ); } @@ -583,47 +507,71 @@ mod tests { } #[test] - fn test_truncate_if_stale_at_boot_truncates_when_file_is_older_than_boot() { + fn test_entries_raw_preserves_duplicates() { + let tmp = NamedTempFile::new().unwrap(); + let store = hyperv_store(tmp.path()); + + store.write("key", "v1").unwrap(); + store.write("key", "v2").unwrap(); + store.write("other", "v3").unwrap(); + + let raw = store.entries_raw().unwrap(); + assert_eq!(raw.len(), 3); + assert_eq!(raw[0], ("key".to_string(), "v1".to_string())); + assert_eq!(raw[1], ("key".to_string(), "v2".to_string())); + assert_eq!(raw[2], ("other".to_string(), "v3".to_string())); + } + + #[test] + fn test_clear_empties_store() { let tmp = NamedTempFile::new().unwrap(); let store = hyperv_store(tmp.path()); store.write("key", "value").unwrap(); assert!(tmp.path().metadata().unwrap().len() > 0); - store.truncate_if_stale_at_boot(i64::MAX).unwrap(); + store.clear().unwrap(); assert_eq!(tmp.path().metadata().unwrap().len(), 0); + assert_eq!(store.read("key").unwrap(), None); } #[test] - fn test_truncate_if_stale_at_boot_keeps_file_when_newer_than_boot() { + fn test_is_stale_false_for_fresh_file() { let tmp = NamedTempFile::new().unwrap(); let store = hyperv_store(tmp.path()); store.write("key", "value").unwrap(); - let len_before = tmp.path().metadata().unwrap().len(); + assert!(!store.is_stale().unwrap()); + } - store.truncate_if_stale_at_boot(0).unwrap(); - assert_eq!(tmp.path().metadata().unwrap().len(), len_before); + #[test] + fn test_is_stale_false_when_file_missing() { + let store = hyperv_store(Path::new("/tmp/nonexistent-kvp-pool")); + assert!(!store.is_stale().unwrap()); } #[test] - fn test_truncate_if_stale_keeps_fresh_file() { + fn test_pool_is_stale_at_boot_detects_old_file() { let tmp = NamedTempFile::new().unwrap(); let store = hyperv_store(tmp.path()); store.write("key", "value").unwrap(); - let len_before = tmp.path().metadata().unwrap().len(); - assert!(len_before > 0); + assert!(store.pool_is_stale_at_boot(i64::MAX).unwrap()); + } + + #[test] + fn test_pool_is_stale_at_boot_keeps_new_file() { + let tmp = NamedTempFile::new().unwrap(); + let store = hyperv_store(tmp.path()); - store.truncate_if_stale().unwrap(); - assert_eq!(tmp.path().metadata().unwrap().len(), len_before); - assert_eq!(store.read("key").unwrap(), Some("value".to_string())); + store.write("key", "value").unwrap(); + assert!(!store.pool_is_stale_at_boot(0).unwrap()); } #[test] - fn test_truncate_if_stale_ok_when_file_missing() { + fn test_clear_ok_when_file_missing() { let store = hyperv_store(Path::new("/tmp/nonexistent-kvp-pool")); - store.truncate_if_stale().unwrap(); + store.clear().unwrap(); } #[test] @@ -655,7 +603,7 @@ mod tests { .map(|tid| { let p = path.clone(); std::thread::spawn(move || { - let store = HyperVKvpStore::new(&p, KvpLimits::hyperv()); + let store = HyperVKvpStore::new(&p, false).unwrap(); for i in 0..iterations { let key = format!("thread-{tid}-iter-{i}"); let value = format!("value-{tid}-{i}"); @@ -669,7 +617,7 @@ mod tests { h.join().expect("thread panicked"); } - let store = HyperVKvpStore::new(&path, KvpLimits::hyperv()); + let store = HyperVKvpStore::new(&path, false).unwrap(); let entries = store.entries().unwrap(); assert_eq!(entries.len(), num_threads * iterations); } diff --git a/libazureinit-kvp/src/lib.rs b/libazureinit-kvp/src/lib.rs index 89fef04f..a248b8ea 100644 --- a/libazureinit-kvp/src/lib.rs +++ b/libazureinit-kvp/src/lib.rs @@ -5,92 +5,171 @@ //! implementation for KVP pool files. //! //! - [`KvpStore`]: storage interface used by higher layers. -//! - [`HyperVKvpStore`]: production implementation. -//! - [`KvpLimits`]: exported Hyper-V and Azure byte limits. +//! - [`HyperVKvpStore`]: Hyper-V pool file implementation. +//! - [`AzureKvpStore`]: Azure-specific wrapper with stricter value limits. use std::collections::HashMap; +use std::fmt; use std::io; +pub mod azure; pub mod hyperv; +/// Errors returned by [`KvpStore`] operations. +#[derive(Debug)] +pub enum KvpError { + /// The key was empty. + EmptyKey, + /// The key exceeds the store's maximum key size. + KeyTooLarge { max: usize, actual: usize }, + /// The value exceeds the store's maximum value size. + ValueTooLarge { max: usize, actual: usize }, + /// The key contains a null byte, which is incompatible with the + /// on-disk format (null-padded fixed-width fields). + KeyContainsNull, + /// An underlying I/O error. + Io(io::Error), +} + +impl fmt::Display for KvpError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::EmptyKey => write!(f, "KVP key must not be empty"), + Self::KeyTooLarge { max, actual } => { + write!(f, "KVP key length ({actual}) exceeds maximum ({max})") + } + Self::ValueTooLarge { max, actual } => { + write!(f, "KVP value length ({actual}) exceeds maximum ({max})") + } + Self::KeyContainsNull => { + write!(f, "KVP key must not contain null bytes") + } + Self::Io(e) => write!(f, "{e}"), + } + } +} + +impl std::error::Error for KvpError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::Io(e) => Some(e), + _ => None, + } + } +} + +impl From for KvpError { + fn from(err: io::Error) -> Self { + Self::Io(err) + } +} + +pub use azure::AzureKvpStore; pub use hyperv::HyperVKvpStore; -/// Hyper-V key limit in bytes (policy/default preset). -pub const HYPERV_MAX_KEY_BYTES: usize = 512; -/// Hyper-V value limit in bytes (policy/default preset). -pub const HYPERV_MAX_VALUE_BYTES: usize = 2048; -/// Azure key limit in bytes (policy/default preset). -pub const AZURE_MAX_KEY_BYTES: usize = 512; -/// Azure value limit in bytes, matching Azure host behavior. -pub const AZURE_MAX_VALUE_BYTES: usize = 1022; - -/// Storage abstraction for KVP backends. -/// -/// Semantics: -/// - `write`: stores one key/value or returns validation/I/O error. -/// - `read`: returns the most recent value for a key (last-write-wins). -/// - `entries`: returns deduplicated key/value pairs as `HashMap`. -/// - `delete`: removes all records for a key and reports whether any were removed. -/// - `limits`: returns the [`KvpLimits`] that govern maximum key/value -/// sizes for this store. +/// Key-value store that supports Hyper-V KVP semantics while being +/// generic enough for non-file-backed in-memory implementations +/// (e.g. tests). pub trait KvpStore: Send + Sync { - /// The key and value byte-size limits for this store. - /// - /// Consumers (e.g. diagnostics, tracing layers) should call this - /// instead of hardcoding size constants, so the limits stay correct - /// regardless of the underlying implementation. - fn limits(&self) -> KvpLimits; + /// Maximum key size in bytes for this store. + const MAX_KEY_SIZE: usize; + /// Maximum value size in bytes for this store. + const MAX_VALUE_SIZE: usize; - /// Write a key-value pair into the store. - /// - /// Returns an error if: - /// - The key is empty. - /// - The key exceeds the configured maximum key size. - /// - The value exceeds the configured maximum value size. - /// - An I/O error occurs during the write. - fn write(&self, key: &str, value: &str) -> io::Result<()>; + // -- Backend callouts for read/write (required) --------------------- - /// Read the value for a given key, returning `None` if absent. - /// - /// When multiple records exist for the same key (append-only - /// storage), the value from the most recent record is returned - /// (last-write-wins). - fn read(&self, key: &str) -> io::Result>; + /// Backend-specific read implementation. + fn backend_read(&self, key: &str) -> Result, KvpError>; + + /// Backend-specific write implementation. + fn backend_write(&self, key: &str, value: &str) -> Result<(), KvpError>; + + // -- Required methods (no shared validation wrapper) --------------- + + /// Return all key-value pairs, deduplicated with last-write-wins. + fn entries(&self) -> Result, KvpError>; - /// Return all key-value pairs currently in the store. + /// Return all raw key-value records without deduplication. /// - /// Keys are deduplicated using last-write-wins semantics, matching - /// the behavior of [`read`](KvpStore::read). - fn entries(&self) -> io::Result>; + /// Useful for testing or diagnostic dump commands where the full + /// record history is needed. + fn entries_raw(&self) -> Result, KvpError>; /// Remove all records matching `key`. /// /// Returns `true` if at least one record was removed, `false` if /// the key was not found. - fn delete(&self, key: &str) -> io::Result; -} + fn delete(&self, key: &str) -> Result; -/// Key/value byte limits for write validation. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub struct KvpLimits { - pub max_key_size: usize, - pub max_value_size: usize, -} + /// Backend-specific clear implementation (empty the store). + fn backend_clear(&self) -> Result<(), KvpError>; + + // -- Public API with shared validation ---------------------------- + + /// Empty the store, removing all records. + fn clear(&self) -> Result<(), KvpError> { + self.backend_clear() + } + + /// Write a key-value pair into the store. + /// + /// # Errors + /// + /// Returns [`KvpError::EmptyKey`] if the key is empty, + /// [`KvpError::KeyContainsNull`] if the key contains a null byte, + /// [`KvpError::KeyTooLarge`] if the key exceeds [`Self::MAX_KEY_SIZE`], + /// [`KvpError::ValueTooLarge`] if the value exceeds + /// [`Self::MAX_VALUE_SIZE`], or [`KvpError::Io`] on I/O failure. + fn write(&self, key: &str, value: &str) -> Result<(), KvpError> { + Self::validate_key(key)?; + Self::validate_value(value)?; + self.backend_write(key, value) + } -impl KvpLimits { - /// Hyper-V limits (512-byte key, 2,048-byte value). - pub const fn hyperv() -> Self { - Self { - max_key_size: HYPERV_MAX_KEY_BYTES, - max_value_size: HYPERV_MAX_VALUE_BYTES, + /// Read the value for a given key, returning `None` if absent. + /// + /// When multiple records share the same key, the most recent value + /// is returned (last-write-wins). + fn read(&self, key: &str) -> Result, KvpError> { + Self::validate_key(key)?; + self.backend_read(key) + } + + /// Whether the store's data is stale (e.g. predates the current + /// boot). Defaults to `false`; file-backed stores can override. + fn is_stale(&self) -> Result { + Ok(false) + } + + // -- Validation helpers ------------------------------------------- + + /// Validate a key against common constraints. + /// + /// Keys must be non-empty, must not contain null bytes (the on-disk + /// format uses null-padding), and must not exceed + /// [`Self::MAX_KEY_SIZE`] bytes. + fn validate_key(key: &str) -> Result<(), KvpError> { + if key.is_empty() { + return Err(KvpError::EmptyKey); + } + if key.as_bytes().contains(&0) { + return Err(KvpError::KeyContainsNull); + } + let actual = key.len(); + let max = Self::MAX_KEY_SIZE; + if actual > max { + return Err(KvpError::KeyTooLarge { max, actual }); } + Ok(()) } - /// Azure limits (512-byte key, 1,022-byte value). - pub const fn azure() -> Self { - Self { - max_key_size: AZURE_MAX_KEY_BYTES, - max_value_size: AZURE_MAX_VALUE_BYTES, + /// Validate a value against the store's size limit. + fn validate_value(value: &str) -> Result<(), KvpError> { + let actual = value.len(); + let max = Self::MAX_VALUE_SIZE; + if actual > max { + return Err(KvpError::ValueTooLarge { max, actual }); } + Ok(()) } } From 498d13604dc8db24cd9d836902a8d8bb7900414a Mon Sep 17 00:00:00 2001 From: Peyton Date: Thu, 19 Mar 2026 11:56:51 -0700 Subject: [PATCH 6/9] Flatten implementation such that there is one trait and one implementation; add PoolMode to handle size limits --- libazureinit-kvp/src/azure.rs | 134 ------- libazureinit-kvp/src/hyperv.rs | 624 ------------------------------- libazureinit-kvp/src/kvp_pool.rs | 598 +++++++++++++++++++++++++++++ libazureinit-kvp/src/lib.rs | 109 +++--- 4 files changed, 642 insertions(+), 823 deletions(-) delete mode 100644 libazureinit-kvp/src/azure.rs delete mode 100644 libazureinit-kvp/src/hyperv.rs create mode 100644 libazureinit-kvp/src/kvp_pool.rs diff --git a/libazureinit-kvp/src/azure.rs b/libazureinit-kvp/src/azure.rs deleted file mode 100644 index 7c35c9c9..00000000 --- a/libazureinit-kvp/src/azure.rs +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -//! Azure-specific KVP store. -//! -//! Wraps [`HyperVKvpStore`] with the stricter value-size limit imposed -//! by the Azure host (1,022 bytes). All other behavior — record -//! format, file locking, append-only writes — is inherited from the -//! underlying Hyper-V pool file implementation. - -use std::collections::HashMap; -use std::path::{Path, PathBuf}; - -use crate::hyperv::HyperVKvpStore; -use crate::{KvpError, KvpStore}; - -/// Azure host-side value limit (values beyond this are truncated). -const AZURE_MAX_VALUE_BYTES: usize = 1022; - -/// Azure KVP store backed by a Hyper-V pool file. -/// -/// Identical to [`HyperVKvpStore`] except that -/// [`MAX_VALUE_SIZE`](KvpStore::MAX_VALUE_SIZE) is set to 1,022 bytes, -/// matching the Azure host's truncation behavior. -#[derive(Clone, Debug)] -pub struct AzureKvpStore { - inner: HyperVKvpStore, -} - -impl AzureKvpStore { - /// Create a new Azure KVP store backed by the pool file at `path`. - /// - /// When `truncate_on_stale` is `true` the constructor checks - /// whether the pool file predates the current boot and, if so, - /// truncates it before returning. - pub fn new( - path: impl Into, - truncate_on_stale: bool, - ) -> Result { - Ok(Self { - inner: HyperVKvpStore::new(path, truncate_on_stale)?, - }) - } - - /// Return a reference to the pool file path. - pub fn path(&self) -> &Path { - self.inner.path() - } -} - -impl KvpStore for AzureKvpStore { - const MAX_KEY_SIZE: usize = HyperVKvpStore::MAX_KEY_SIZE; - const MAX_VALUE_SIZE: usize = AZURE_MAX_VALUE_BYTES; - - fn backend_read(&self, key: &str) -> Result, KvpError> { - self.inner.backend_read(key) - } - - fn backend_write(&self, key: &str, value: &str) -> Result<(), KvpError> { - self.inner.backend_write(key, value) - } - - fn entries(&self) -> Result, KvpError> { - self.inner.entries() - } - - fn entries_raw(&self) -> Result, KvpError> { - self.inner.entries_raw() - } - - fn delete(&self, key: &str) -> Result { - self.inner.delete(key) - } - - fn backend_clear(&self) -> Result<(), KvpError> { - self.inner.backend_clear() - } - - fn is_stale(&self) -> Result { - self.inner.is_stale() - } -} - -#[cfg(test)] -mod tests { - use super::*; - use tempfile::NamedTempFile; - - fn azure_store(path: &Path) -> AzureKvpStore { - AzureKvpStore::new(path, false).unwrap() - } - - #[test] - fn test_azure_rejects_value_over_1022() { - let tmp = NamedTempFile::new().unwrap(); - let store = azure_store(tmp.path()); - - let value = "V".repeat(AZURE_MAX_VALUE_BYTES + 1); - let err = store.write("k", &value).unwrap_err(); - assert!( - matches!(err, KvpError::ValueTooLarge { .. }), - "expected ValueTooLarge, got: {err}" - ); - } - - #[test] - fn test_azure_accepts_value_at_1022() { - let tmp = NamedTempFile::new().unwrap(); - let store = azure_store(tmp.path()); - - let value = "V".repeat(AZURE_MAX_VALUE_BYTES); - store.write("k", &value).unwrap(); - assert_eq!(store.read("k").unwrap(), Some(value)); - } - - #[test] - fn test_azure_write_and_read() { - let tmp = NamedTempFile::new().unwrap(); - let store = azure_store(tmp.path()); - - store.write("key", "value").unwrap(); - assert_eq!(store.read("key").unwrap(), Some("value".to_string())); - } - - #[test] - fn test_azure_clear() { - let tmp = NamedTempFile::new().unwrap(); - let store = azure_store(tmp.path()); - - store.write("key", "value").unwrap(); - store.clear().unwrap(); - assert_eq!(store.read("key").unwrap(), None); - } -} diff --git a/libazureinit-kvp/src/hyperv.rs b/libazureinit-kvp/src/hyperv.rs deleted file mode 100644 index f2b359f5..00000000 --- a/libazureinit-kvp/src/hyperv.rs +++ /dev/null @@ -1,624 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -//! Hyper-V KVP (Key-Value Pair) pool file backend. -//! -//! Hyper-V exposes a Data Exchange Service that lets a guest and its -//! host share key-value pairs through a set of pool files. Each pool -//! file is a flat sequence of fixed-size records (512-byte key + -//! 2,048-byte value, zero-padded). There is no record-count header; -//! the file grows by one record per write. -//! -//! On Azure, the host-side KVP consumer truncates values beyond -//! 1,022 bytes, so Azure guests should use [`AzureKvpStore`](crate::AzureKvpStore). -//! -//! ## Reference -//! - [Hyper-V Data Exchange Service (KVP)](https://learn.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/integration-services#hyper-v-data-exchange-service-kvp) - -use std::collections::HashMap; -use std::fs::{File, OpenOptions}; -use std::io::{self, ErrorKind, Read, Seek, Write}; -use std::os::unix::fs::MetadataExt; -use std::path::{Path, PathBuf}; -use std::time::{Duration, SystemTime, UNIX_EPOCH}; - -use fs2::FileExt; -use sysinfo::System; - -use crate::{KvpError, KvpStore}; - -/// Key field width in the on-disk record format (bytes). -const HV_KVP_EXCHANGE_MAX_KEY_SIZE: usize = 512; - -/// Value field width in the on-disk record format (bytes). -const HV_KVP_EXCHANGE_MAX_VALUE_SIZE: usize = 2048; - -/// Total size of one on-disk record (key + value). -const RECORD_SIZE: usize = - HV_KVP_EXCHANGE_MAX_KEY_SIZE + HV_KVP_EXCHANGE_MAX_VALUE_SIZE; - -/// Hyper-V KVP pool file store. -/// -/// Reads and writes the binary Hyper-V KVP format: fixed-size records -/// of [`RECORD_SIZE`] bytes (512-byte key + 2,048-byte value) with -/// flock-based concurrency control. -#[derive(Clone, Debug)] -pub struct HyperVKvpStore { - path: PathBuf, -} - -impl HyperVKvpStore { - /// Create a new store backed by the pool file at `path`. - /// - /// When `truncate_on_stale` is `true` the constructor checks - /// whether the pool file predates the current boot and, if so, - /// truncates it before returning. - pub fn new( - path: impl Into, - truncate_on_stale: bool, - ) -> Result { - let store = Self { path: path.into() }; - if truncate_on_stale && store.pool_is_stale()? { - store.truncate_pool()?; - } - Ok(store) - } - - /// Return a reference to the pool file path. - pub fn path(&self) -> &Path { - &self.path - } - - // -- Private helpers ------------------------------------------------ - - fn boot_time() -> Result { - let now = SystemTime::now() - .duration_since(UNIX_EPOCH) - .map_err(|e| io::Error::other(format!("clock error: {e}")))? - .as_secs(); - Ok(now.saturating_sub(get_uptime().as_secs()) as i64) - } - - /// Check whether the pool file's mtime predates the current boot. - /// - /// Returns `false` if the file does not exist. - fn pool_is_stale(&self) -> Result { - let metadata = match std::fs::metadata(&self.path) { - Ok(m) => m, - Err(ref e) if e.kind() == ErrorKind::NotFound => { - return Ok(false); - } - Err(e) => return Err(e.into()), - }; - let boot = Self::boot_time()?; - Ok(metadata.mtime() < boot) - } - - #[cfg(test)] - fn pool_is_stale_at_boot(&self, boot_time: i64) -> Result { - let metadata = match std::fs::metadata(&self.path) { - Ok(m) => m, - Err(ref e) if e.kind() == ErrorKind::NotFound => { - return Ok(false); - } - Err(e) => return Err(e.into()), - }; - Ok(metadata.mtime() < boot_time) - } - - /// Truncate the pool file to zero length under an exclusive flock. - fn truncate_pool(&self) -> Result<(), KvpError> { - let file = - match OpenOptions::new().read(true).write(true).open(&self.path) { - Ok(f) => f, - Err(ref e) if e.kind() == ErrorKind::NotFound => { - return Ok(()); - } - Err(e) => return Err(e.into()), - }; - - FileExt::lock_exclusive(&file).map_err(|e| { - io::Error::other(format!("failed to lock KVP file: {e}")) - })?; - - let result = file.set_len(0).map_err(KvpError::from); - - let _ = FileExt::unlock(&file); - result - } - - fn open_for_append(&self) -> io::Result { - OpenOptions::new() - .append(true) - .create(true) - .open(&self.path) - } - - fn open_for_read(&self) -> io::Result { - OpenOptions::new().read(true).open(&self.path) - } - - fn open_for_read_write(&self) -> io::Result { - OpenOptions::new().read(true).write(true).open(&self.path) - } -} - -/// Encode a key-value pair into a single fixed-size record. -/// -/// The key is zero-padded to [`HV_KVP_EXCHANGE_MAX_KEY_SIZE`] bytes -/// and the value is zero-padded to [`HV_KVP_EXCHANGE_MAX_VALUE_SIZE`] -/// bytes. The caller is responsible for ensuring the key and value do -/// not exceed the on-disk field widths; if they do, only the first N -/// bytes are written (no error is raised at this level -- validation -/// happens in [`HyperVKvpStore::write`]). -pub(crate) fn encode_record(key: &str, value: &str) -> Vec { - let mut buf = vec![0u8; RECORD_SIZE]; - - let key_bytes = key.as_bytes(); - let key_len = key_bytes.len().min(HV_KVP_EXCHANGE_MAX_KEY_SIZE); - buf[..key_len].copy_from_slice(&key_bytes[..key_len]); - - let val_bytes = value.as_bytes(); - let val_len = val_bytes.len().min(HV_KVP_EXCHANGE_MAX_VALUE_SIZE); - buf[HV_KVP_EXCHANGE_MAX_KEY_SIZE..HV_KVP_EXCHANGE_MAX_KEY_SIZE + val_len] - .copy_from_slice(&val_bytes[..val_len]); - - buf -} - -/// Decode a fixed-size record into its key and value strings. -/// -/// Trailing null bytes are stripped from both fields. Returns an error -/// if `data` is not exactly [`RECORD_SIZE`] bytes or if either field -/// contains invalid UTF-8. -pub(crate) fn decode_record(data: &[u8]) -> io::Result<(String, String)> { - if data.len() != RECORD_SIZE { - return Err(io::Error::other(format!( - "record size mismatch: expected {RECORD_SIZE}, got {}", - data.len() - ))); - } - - let (key_bytes, value_bytes) = data.split_at(HV_KVP_EXCHANGE_MAX_KEY_SIZE); - - let key = std::str::from_utf8(key_bytes) - .map_err(|e| io::Error::new(ErrorKind::InvalidData, e))? - .trim_end_matches('\0') - .to_string(); - - let value = std::str::from_utf8(value_bytes) - .map_err(|e| io::Error::new(ErrorKind::InvalidData, e))? - .trim_end_matches('\0') - .to_string(); - - Ok((key, value)) -} - -/// Read all records from a file that is already open and locked. -fn read_all_records(file: &mut File) -> io::Result> { - let metadata = file.metadata()?; - let len = metadata.len() as usize; - - if len == 0 { - return Ok(Vec::new()); - } - - if !len.is_multiple_of(RECORD_SIZE) { - return Err(io::Error::other(format!( - "file size ({len}) is not a multiple of record size ({RECORD_SIZE})" - ))); - } - - // Ensure we start reading from the beginning of the file. - file.seek(io::SeekFrom::Start(0))?; - - let record_count = len / RECORD_SIZE; - let mut records = Vec::with_capacity(record_count); - let mut buf = [0u8; RECORD_SIZE]; - - for _ in 0..record_count { - file.read_exact(&mut buf)?; - records.push(decode_record(&buf)?); - } - - Ok(records) -} - -impl KvpStore for HyperVKvpStore { - const MAX_KEY_SIZE: usize = HV_KVP_EXCHANGE_MAX_KEY_SIZE; - const MAX_VALUE_SIZE: usize = HV_KVP_EXCHANGE_MAX_VALUE_SIZE; - - /// Append one fixed-size record to the pool file. - /// - /// Acquires an exclusive flock, writes the record, flushes, and - /// releases the lock. - fn backend_write(&self, key: &str, value: &str) -> Result<(), KvpError> { - let mut file = self.open_for_append()?; - let record = encode_record(key, value); - - FileExt::lock_exclusive(&file).map_err(|e| { - io::Error::other(format!("failed to lock KVP file: {e}")) - })?; - - let write_result = file.write_all(&record).and_then(|_| file.flush()); - - let unlock_result = FileExt::unlock(&file).map_err(|e| { - io::Error::other(format!("failed to unlock KVP file: {e}")) - }); - - if let Err(err) = write_result { - let _ = unlock_result; - return Err(err.into()); - } - unlock_result.map_err(KvpError::from) - } - - /// Scan all records and return the value of the last record - /// matching `key` (last-write-wins). - /// - /// Acquires a shared flock during the scan. Returns `Ok(None)` if - /// the pool file does not exist or no record matches. - fn backend_read(&self, key: &str) -> Result, KvpError> { - let mut file = match self.open_for_read() { - Ok(f) => f, - Err(ref e) if e.kind() == ErrorKind::NotFound => { - return Ok(None); - } - Err(e) => return Err(e.into()), - }; - - FileExt::lock_shared(&file).map_err(|e| { - io::Error::other(format!("failed to lock KVP file: {e}")) - })?; - - let records = read_all_records(&mut file); - let _ = FileExt::unlock(&file); - let records = records?; - - Ok(records - .into_iter() - .rev() - .find(|(k, _)| k == key) - .map(|(_, v)| v)) - } - - /// Return all key-value pairs as a deduplicated `HashMap`. - /// - /// Duplicate keys are resolved by last-write-wins. Acquires a - /// shared flock during the scan. Returns an empty map if the pool - /// file does not exist. - fn entries(&self) -> Result, KvpError> { - let mut file = match self.open_for_read() { - Ok(f) => f, - Err(ref e) if e.kind() == ErrorKind::NotFound => { - return Ok(HashMap::new()); - } - Err(e) => return Err(e.into()), - }; - - FileExt::lock_shared(&file).map_err(|e| { - io::Error::other(format!("failed to lock KVP file: {e}")) - })?; - - let records = read_all_records(&mut file); - let _ = FileExt::unlock(&file); - let records = records?; - - let mut map = HashMap::new(); - for (k, v) in records { - map.insert(k, v); - } - Ok(map) - } - - /// Return all raw records without deduplication. - /// - /// Acquires a shared flock during the scan. Returns an empty list - /// if the pool file does not exist. - fn entries_raw(&self) -> Result, KvpError> { - let mut file = match self.open_for_read() { - Ok(f) => f, - Err(ref e) if e.kind() == ErrorKind::NotFound => { - return Ok(Vec::new()); - } - Err(e) => return Err(e.into()), - }; - - FileExt::lock_shared(&file).map_err(|e| { - io::Error::other(format!("failed to lock KVP file: {e}")) - })?; - - let records = read_all_records(&mut file); - let _ = FileExt::unlock(&file); - Ok(records?) - } - - /// Rewrite the pool file without the record(s) matching `key`. - /// - /// Acquires an exclusive flock for the duration. Returns `true` if - /// at least one record was removed, `false` if the key was not - /// found. Returns `Ok(false)` if the pool file does not exist. - fn delete(&self, key: &str) -> Result { - let mut file = match self.open_for_read_write() { - Ok(f) => f, - Err(ref e) if e.kind() == ErrorKind::NotFound => { - return Ok(false); - } - Err(e) => return Err(e.into()), - }; - - FileExt::lock_exclusive(&file).map_err(|e| { - io::Error::other(format!("failed to lock KVP file: {e}")) - })?; - - let result = (|| -> Result { - let records = read_all_records(&mut file)?; - let original_count = records.len(); - let kept: Vec<_> = - records.into_iter().filter(|(k, _)| k != key).collect(); - - if kept.len() == original_count { - return Ok(false); - } - - file.set_len(0)?; - file.seek(io::SeekFrom::Start(0))?; - - for (k, v) in &kept { - file.write_all(&encode_record(k, v))?; - } - file.flush()?; - Ok(true) - })(); - - let _ = FileExt::unlock(&file); - result - } - - /// Truncate the pool file to zero length, removing all records. - fn backend_clear(&self) -> Result<(), KvpError> { - self.truncate_pool() - } - - /// Whether the pool file's mtime predates the current boot. - fn is_stale(&self) -> Result { - self.pool_is_stale() - } -} - -fn get_uptime() -> Duration { - let mut system = System::new(); - system.refresh_memory(); - system.refresh_cpu_usage(); - Duration::from_secs(System::uptime()) -} - -#[cfg(test)] -mod tests { - use super::*; - use tempfile::NamedTempFile; - - fn hyperv_store(path: &Path) -> HyperVKvpStore { - HyperVKvpStore::new(path, false).unwrap() - } - - #[test] - fn test_encode_decode_roundtrip() { - let key = "test_key"; - let value = "test_value"; - let record = encode_record(key, value); - - assert_eq!(record.len(), RECORD_SIZE); - - let (decoded_key, decoded_value) = - decode_record(&record).expect("decode failed"); - assert_eq!(decoded_key, key); - assert_eq!(decoded_value, value); - } - - #[test] - fn test_write_rejects_empty_key() { - let tmp = NamedTempFile::new().unwrap(); - let store = hyperv_store(tmp.path()); - - let err = store.write("", "value").unwrap_err(); - assert!( - matches!(err, KvpError::EmptyKey), - "expected EmptyKey, got: {err}" - ); - } - - #[test] - fn test_write_rejects_null_in_key() { - let tmp = NamedTempFile::new().unwrap(); - let store = hyperv_store(tmp.path()); - - let err = store.write("bad\0key", "value").unwrap_err(); - assert!( - matches!(err, KvpError::KeyContainsNull), - "expected KeyContainsNull, got: {err}" - ); - } - - #[test] - fn test_write_rejects_oversized_key() { - let tmp = NamedTempFile::new().unwrap(); - let store = hyperv_store(tmp.path()); - - let key = "K".repeat(HV_KVP_EXCHANGE_MAX_KEY_SIZE + 1); - let err = store.write(&key, "v").unwrap_err(); - assert!( - matches!(err, KvpError::KeyTooLarge { .. }), - "expected KeyTooLarge, got: {err}" - ); - } - - #[test] - fn test_write_rejects_oversized_value_hyperv() { - let tmp = NamedTempFile::new().unwrap(); - let store = hyperv_store(tmp.path()); - - let value = "V".repeat(HV_KVP_EXCHANGE_MAX_VALUE_SIZE + 1); - let err = store.write("k", &value).unwrap_err(); - assert!( - matches!(err, KvpError::ValueTooLarge { .. }), - "expected ValueTooLarge, got: {err}" - ); - } - - #[test] - fn test_write_and_read() { - let tmp = NamedTempFile::new().unwrap(); - let store = hyperv_store(tmp.path()); - - store.write("key1", "value1").unwrap(); - store.write("key2", "value2").unwrap(); - - assert_eq!(store.read("key1").unwrap(), Some("value1".to_string())); - assert_eq!(store.read("key2").unwrap(), Some("value2".to_string())); - assert_eq!(store.read("nonexistent").unwrap(), None); - } - - #[test] - fn test_read_returns_last_match() { - let tmp = NamedTempFile::new().unwrap(); - let store = hyperv_store(tmp.path()); - - store.write("key", "first").unwrap(); - store.write("key", "second").unwrap(); - store.write("key", "third").unwrap(); - - assert_eq!(store.read("key").unwrap(), Some("third".to_string())); - } - - #[test] - fn test_entries_deduplicates_with_last_write_wins() { - let tmp = NamedTempFile::new().unwrap(); - let store = hyperv_store(tmp.path()); - - store.write("key", "v1").unwrap(); - store.write("key", "v2").unwrap(); - store.write("other", "v3").unwrap(); - - let entries = store.entries().unwrap(); - assert_eq!(entries.len(), 2); - assert_eq!(entries.get("key"), Some(&"v2".to_string())); - assert_eq!(entries.get("other"), Some(&"v3".to_string())); - } - - #[test] - fn test_entries_raw_preserves_duplicates() { - let tmp = NamedTempFile::new().unwrap(); - let store = hyperv_store(tmp.path()); - - store.write("key", "v1").unwrap(); - store.write("key", "v2").unwrap(); - store.write("other", "v3").unwrap(); - - let raw = store.entries_raw().unwrap(); - assert_eq!(raw.len(), 3); - assert_eq!(raw[0], ("key".to_string(), "v1".to_string())); - assert_eq!(raw[1], ("key".to_string(), "v2".to_string())); - assert_eq!(raw[2], ("other".to_string(), "v3".to_string())); - } - - #[test] - fn test_clear_empties_store() { - let tmp = NamedTempFile::new().unwrap(); - let store = hyperv_store(tmp.path()); - - store.write("key", "value").unwrap(); - assert!(tmp.path().metadata().unwrap().len() > 0); - - store.clear().unwrap(); - assert_eq!(tmp.path().metadata().unwrap().len(), 0); - assert_eq!(store.read("key").unwrap(), None); - } - - #[test] - fn test_is_stale_false_for_fresh_file() { - let tmp = NamedTempFile::new().unwrap(); - let store = hyperv_store(tmp.path()); - - store.write("key", "value").unwrap(); - assert!(!store.is_stale().unwrap()); - } - - #[test] - fn test_is_stale_false_when_file_missing() { - let store = hyperv_store(Path::new("/tmp/nonexistent-kvp-pool")); - assert!(!store.is_stale().unwrap()); - } - - #[test] - fn test_pool_is_stale_at_boot_detects_old_file() { - let tmp = NamedTempFile::new().unwrap(); - let store = hyperv_store(tmp.path()); - - store.write("key", "value").unwrap(); - assert!(store.pool_is_stale_at_boot(i64::MAX).unwrap()); - } - - #[test] - fn test_pool_is_stale_at_boot_keeps_new_file() { - let tmp = NamedTempFile::new().unwrap(); - let store = hyperv_store(tmp.path()); - - store.write("key", "value").unwrap(); - assert!(!store.pool_is_stale_at_boot(0).unwrap()); - } - - #[test] - fn test_clear_ok_when_file_missing() { - let store = hyperv_store(Path::new("/tmp/nonexistent-kvp-pool")); - store.clear().unwrap(); - } - - #[test] - fn test_delete_removes_all_matches() { - let tmp = NamedTempFile::new().unwrap(); - let store = hyperv_store(tmp.path()); - - store.write("key", "v1").unwrap(); - store.write("key", "v2").unwrap(); - store.write("other", "v3").unwrap(); - - assert!(store.delete("key").unwrap()); - assert_eq!(store.read("key").unwrap(), None); - assert_eq!(store.read("other").unwrap(), Some("v3".to_string())); - - let entries = store.entries().unwrap(); - assert_eq!(entries.len(), 1); - } - - #[test] - fn test_multi_thread_concurrent_writes() { - let tmp = NamedTempFile::new().unwrap(); - let path = tmp.path().to_path_buf(); - - let num_threads: usize = 20; - let iterations: usize = 1_000; - - let handles: Vec<_> = (0..num_threads) - .map(|tid| { - let p = path.clone(); - std::thread::spawn(move || { - let store = HyperVKvpStore::new(&p, false).unwrap(); - for i in 0..iterations { - let key = format!("thread-{tid}-iter-{i}"); - let value = format!("value-{tid}-{i}"); - store.write(&key, &value).expect("write failed"); - } - }) - }) - .collect(); - - for h in handles { - h.join().expect("thread panicked"); - } - - let store = HyperVKvpStore::new(&path, false).unwrap(); - let entries = store.entries().unwrap(); - assert_eq!(entries.len(), num_threads * iterations); - } -} diff --git a/libazureinit-kvp/src/kvp_pool.rs b/libazureinit-kvp/src/kvp_pool.rs new file mode 100644 index 00000000..0cb55932 --- /dev/null +++ b/libazureinit-kvp/src/kvp_pool.rs @@ -0,0 +1,598 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Unified KVP pool file backend for Hyper-V and Azure guests. +//! +//! The on-disk pool format is fixed-width and identical across +//! environments: +//! - key field: 512 bytes +//! - value field: 2048 bytes +//! - record size: 2560 bytes +//! +//! [`PoolMode`] selects which size limits are enforced on writes: +//! - [`Restricted`](PoolMode::Restricted) (default): key <= 254 bytes, +//! value <= 1022 bytes +//! - [`Full`](PoolMode::Full): key <= 512 bytes, value <= 2048 bytes +//! +//! ## Reference +//! - [Hyper-V Data Exchange Service (KVP)](https://learn.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/integration-services#hyper-v-data-exchange-service-kvp) + +use std::collections::{HashMap, HashSet}; +use std::fs::{File, OpenOptions}; +use std::io::{self, ErrorKind, Read, Seek, Write}; +use std::os::unix::fs::MetadataExt; +use std::path::{Path, PathBuf}; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +use fs2::FileExt; +use sysinfo::System; + +use crate::{KvpError, KvpStore}; + +/// Default KVP pool path when none is provided. +pub const DEFAULT_KVP_POOL_PATH: &str = "/var/lib/hyperv/.kvp_pool_1"; + +const WIRE_MAX_KEY_BYTES: usize = 512; +const WIRE_MAX_VALUE_BYTES: usize = 2048; +const SAFE_MAX_KEY_BYTES: usize = 254; +const SAFE_MAX_VALUE_BYTES: usize = 1022; + +/// Maximum number of unique keys allowed in the pool. +const MAX_UNIQUE_KEYS: usize = 1024; + +const RECORD_SIZE: usize = WIRE_MAX_KEY_BYTES + WIRE_MAX_VALUE_BYTES; + +/// Policy mode controlling key/value size limits for writes. +/// +/// The on-disk record format is always 512 + 2048 bytes regardless of +/// mode; the mode only determines the validation ceiling for incoming +/// writes. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum PoolMode { + /// Conservative limits for Azure compatibility + /// (key <= 254 bytes, value <= 1022 bytes). + Restricted, + /// Full Hyper-V wire-format limits + /// (key <= 512 bytes, value <= 2048 bytes). + Full, +} + +impl PoolMode { + fn max_key_size(self) -> usize { + match self { + Self::Restricted => SAFE_MAX_KEY_BYTES, + Self::Full => WIRE_MAX_KEY_BYTES, + } + } + + fn max_value_size(self) -> usize { + match self { + Self::Restricted => SAFE_MAX_VALUE_BYTES, + Self::Full => WIRE_MAX_VALUE_BYTES, + } + } +} + +/// Unified KVP pool file store. +#[derive(Clone, Debug)] +pub struct KvpPoolStore { + path: PathBuf, + mode: PoolMode, +} + +impl KvpPoolStore { + /// Create a new KVP pool store. + /// + /// - `path`: pool file path, defaults to [`DEFAULT_KVP_POOL_PATH`]. + /// - `mode`: size-limit policy (see [`PoolMode`]). + /// - `truncate_on_stale`: if `true`, clears stale data from a + /// previous boot. + pub fn new( + path: Option, + mode: PoolMode, + truncate_on_stale: bool, + ) -> Result { + let store = Self { + path: path.unwrap_or_else(|| PathBuf::from(DEFAULT_KVP_POOL_PATH)), + mode, + }; + if truncate_on_stale && store.pool_is_stale()? { + store.truncate_pool()?; + } + Ok(store) + } + + /// Return a reference to the pool path. + pub fn path(&self) -> &Path { + &self.path + } + + /// The policy mode this store was created with. + pub fn mode(&self) -> PoolMode { + self.mode + } + + fn boot_time() -> Result { + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map_err(|e| io::Error::other(format!("clock error: {e}")))? + .as_secs(); + Ok(now.saturating_sub(get_uptime().as_secs()) as i64) + } + + fn pool_is_stale(&self) -> Result { + let metadata = match std::fs::metadata(&self.path) { + Ok(m) => m, + Err(ref e) if e.kind() == ErrorKind::NotFound => return Ok(false), + Err(e) => return Err(e.into()), + }; + let boot = Self::boot_time()?; + Ok(metadata.mtime() < boot) + } + + #[cfg(test)] + fn pool_is_stale_at_boot(&self, boot_time: i64) -> Result { + let metadata = match std::fs::metadata(&self.path) { + Ok(m) => m, + Err(ref e) if e.kind() == ErrorKind::NotFound => return Ok(false), + Err(e) => return Err(e.into()), + }; + Ok(metadata.mtime() < boot_time) + } + + fn truncate_pool(&self) -> Result<(), KvpError> { + let file = + match OpenOptions::new().read(true).write(true).open(&self.path) { + Ok(f) => f, + Err(ref e) if e.kind() == ErrorKind::NotFound => return Ok(()), + Err(e) => return Err(e.into()), + }; + + FileExt::lock_exclusive(&file).map_err(|e| { + io::Error::other(format!("failed to lock KVP file: {e}")) + })?; + let result = file.set_len(0).map_err(KvpError::from); + let _ = FileExt::unlock(&file); + result + } + + fn open_for_read(&self) -> io::Result { + OpenOptions::new().read(true).open(&self.path) + } + + fn open_for_read_write(&self) -> io::Result { + OpenOptions::new().read(true).write(true).open(&self.path) + } + + fn open_for_read_append_create(&self) -> io::Result { + OpenOptions::new() + .read(true) + .append(true) + .create(true) + .open(&self.path) + } +} + +pub(crate) fn encode_record(key: &str, value: &str) -> Vec { + let mut buf = vec![0u8; RECORD_SIZE]; + + let key_bytes = key.as_bytes(); + let key_len = key_bytes.len().min(WIRE_MAX_KEY_BYTES); + buf[..key_len].copy_from_slice(&key_bytes[..key_len]); + + let value_bytes = value.as_bytes(); + let value_len = value_bytes.len().min(WIRE_MAX_VALUE_BYTES); + buf[WIRE_MAX_KEY_BYTES..WIRE_MAX_KEY_BYTES + value_len] + .copy_from_slice(&value_bytes[..value_len]); + + buf +} + +pub(crate) fn decode_record(data: &[u8]) -> io::Result<(String, String)> { + if data.len() != RECORD_SIZE { + return Err(io::Error::other(format!( + "record size mismatch: expected {RECORD_SIZE}, got {}", + data.len() + ))); + } + + let (key_bytes, value_bytes) = data.split_at(WIRE_MAX_KEY_BYTES); + + let key = std::str::from_utf8(key_bytes) + .map_err(|e| io::Error::new(ErrorKind::InvalidData, e))? + .trim_end_matches('\0') + .to_string(); + let value = std::str::from_utf8(value_bytes) + .map_err(|e| io::Error::new(ErrorKind::InvalidData, e))? + .trim_end_matches('\0') + .to_string(); + + Ok((key, value)) +} + +fn read_all_records(file: &mut File) -> io::Result> { + let len = file.metadata()?.len() as usize; + if len == 0 { + return Ok(Vec::new()); + } + + if len % RECORD_SIZE != 0 { + return Err(io::Error::other(format!( + "file size ({len}) is not a multiple of record size ({RECORD_SIZE})" + ))); + } + + file.seek(io::SeekFrom::Start(0))?; + let record_count = len / RECORD_SIZE; + let mut records = Vec::with_capacity(record_count); + let mut buf = [0u8; RECORD_SIZE]; + + for _ in 0..record_count { + file.read_exact(&mut buf)?; + records.push(decode_record(&buf)?); + } + + Ok(records) +} + +impl KvpStore for KvpPoolStore { + fn max_key_size(&self) -> usize { + self.mode.max_key_size() + } + + fn max_value_size(&self) -> usize { + self.mode.max_value_size() + } + + fn backend_write(&self, key: &str, value: &str) -> Result<(), KvpError> { + let mut file = self.open_for_read_append_create()?; + FileExt::lock_exclusive(&file).map_err(|e| { + io::Error::other(format!("failed to lock KVP file: {e}")) + })?; + + let result = (|| -> Result<(), KvpError> { + let records = read_all_records(&mut file)?; + let mut unique_keys = HashSet::with_capacity(records.len()); + for (record_key, _) in records { + unique_keys.insert(record_key); + } + + if !unique_keys.contains(key) + && unique_keys.len() >= MAX_UNIQUE_KEYS + { + return Err(KvpError::MaxUniqueKeysExceeded { + max: MAX_UNIQUE_KEYS, + }); + } + + let record = encode_record(key, value); + file.write_all(&record)?; + file.flush()?; + Ok(()) + })(); + + let _ = FileExt::unlock(&file); + result + } + + fn backend_read(&self, key: &str) -> Result, KvpError> { + let mut file = match self.open_for_read() { + Ok(f) => f, + Err(ref e) if e.kind() == ErrorKind::NotFound => return Ok(None), + Err(e) => return Err(e.into()), + }; + + FileExt::lock_shared(&file).map_err(|e| { + io::Error::other(format!("failed to lock KVP file: {e}")) + })?; + let records = read_all_records(&mut file); + let _ = FileExt::unlock(&file); + let records = records?; + + Ok(records + .into_iter() + .rev() + .find(|(k, _)| k == key) + .map(|(_, v)| v)) + } + + fn entries(&self) -> Result, KvpError> { + let mut file = match self.open_for_read() { + Ok(f) => f, + Err(ref e) if e.kind() == ErrorKind::NotFound => { + return Ok(HashMap::new()) + } + Err(e) => return Err(e.into()), + }; + + FileExt::lock_shared(&file).map_err(|e| { + io::Error::other(format!("failed to lock KVP file: {e}")) + })?; + let records = read_all_records(&mut file); + let _ = FileExt::unlock(&file); + let records = records?; + + Ok(records.into_iter().collect()) + } + + fn entries_raw(&self) -> Result, KvpError> { + let mut file = match self.open_for_read() { + Ok(f) => f, + Err(ref e) if e.kind() == ErrorKind::NotFound => { + return Ok(Vec::new()) + } + Err(e) => return Err(e.into()), + }; + + FileExt::lock_shared(&file).map_err(|e| { + io::Error::other(format!("failed to lock KVP file: {e}")) + })?; + let records = read_all_records(&mut file); + let _ = FileExt::unlock(&file); + Ok(records?) + } + + fn delete(&self, key: &str) -> Result { + let mut file = match self.open_for_read_write() { + Ok(f) => f, + Err(ref e) if e.kind() == ErrorKind::NotFound => return Ok(false), + Err(e) => return Err(e.into()), + }; + + FileExt::lock_exclusive(&file).map_err(|e| { + io::Error::other(format!("failed to lock KVP file: {e}")) + })?; + + let result = (|| -> Result { + let records = read_all_records(&mut file)?; + let original_count = records.len(); + let kept: Vec<_> = + records.into_iter().filter(|(k, _)| k != key).collect(); + + if kept.len() == original_count { + return Ok(false); + } + + file.set_len(0)?; + file.seek(io::SeekFrom::Start(0))?; + for (k, v) in &kept { + file.write_all(&encode_record(k, v))?; + } + file.flush()?; + Ok(true) + })(); + + let _ = FileExt::unlock(&file); + result + } + + fn backend_clear(&self) -> Result<(), KvpError> { + self.truncate_pool() + } + + fn is_stale(&self) -> Result { + self.pool_is_stale() + } +} + +fn get_uptime() -> Duration { + Duration::from_secs(System::uptime()) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::NamedTempFile; + + fn restricted_store(path: &Path) -> KvpPoolStore { + KvpPoolStore::new(Some(path.to_path_buf()), PoolMode::Restricted, false) + .unwrap() + } + + fn full_store(path: &Path) -> KvpPoolStore { + KvpPoolStore::new(Some(path.to_path_buf()), PoolMode::Full, false) + .unwrap() + } + + #[test] + fn test_write_rejects_empty_key() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + let err = store.write("", "value").unwrap_err(); + assert!(matches!(err, KvpError::EmptyKey)); + } + + #[test] + fn test_write_rejects_null_in_key() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + let err = store.write("bad\0key", "value").unwrap_err(); + assert!(matches!(err, KvpError::KeyContainsNull)); + } + + #[test] + fn test_default_path_is_used() { + let store = + KvpPoolStore::new(None, PoolMode::Restricted, false).unwrap(); + assert_eq!(store.path(), Path::new(DEFAULT_KVP_POOL_PATH)); + } + + #[test] + fn test_explicit_path_is_used() { + let tmp = NamedTempFile::new().unwrap(); + let store = KvpPoolStore::new( + Some(tmp.path().to_path_buf()), + PoolMode::Restricted, + false, + ) + .unwrap(); + assert_eq!(store.path(), tmp.path()); + } + + #[test] + fn test_restricted_limits_key_254_pass_255_fail() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + let ok_key = "k".repeat(254); + store.write(&ok_key, "v").unwrap(); + + let bad_key = "k".repeat(255); + let err = store.write(&bad_key, "v").unwrap_err(); + assert!(matches!(err, KvpError::KeyTooLarge { .. })); + } + + #[test] + fn test_restricted_limits_value_1022_pass_1023_fail() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + let ok_val = "v".repeat(1022); + store.write("k", &ok_val).unwrap(); + + let bad_val = "v".repeat(1023); + let err = store.write("k2", &bad_val).unwrap_err(); + assert!(matches!(err, KvpError::ValueTooLarge { .. })); + } + + #[test] + fn test_full_limits_key_512_pass_513_fail() { + let tmp = NamedTempFile::new().unwrap(); + let store = full_store(tmp.path()); + + let ok_key = "k".repeat(512); + store.write(&ok_key, "v").unwrap(); + + let bad_key = "k".repeat(513); + let err = store.write(&bad_key, "v").unwrap_err(); + assert!(matches!(err, KvpError::KeyTooLarge { .. })); + } + + #[test] + fn test_full_limits_value_2048_pass_2049_fail() { + let tmp = NamedTempFile::new().unwrap(); + let store = full_store(tmp.path()); + + let ok_val = "v".repeat(2048); + store.write("k", &ok_val).unwrap(); + + let bad_val = "v".repeat(2049); + let err = store.write("k2", &bad_val).unwrap_err(); + assert!(matches!(err, KvpError::ValueTooLarge { .. })); + } + + #[test] + fn test_entries_raw_preserves_duplicates() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + store.write("key", "v1").unwrap(); + store.write("key", "v2").unwrap(); + store.write("other", "v3").unwrap(); + + let raw = store.entries_raw().unwrap(); + assert_eq!(raw.len(), 3); + assert_eq!(raw[0], ("key".to_string(), "v1".to_string())); + assert_eq!(raw[1], ("key".to_string(), "v2".to_string())); + assert_eq!(raw[2], ("other".to_string(), "v3".to_string())); + } + + #[test] + fn test_entries_deduplicates_last_write_wins() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + store.write("key", "v1").unwrap(); + store.write("key", "v2").unwrap(); + store.write("other", "v3").unwrap(); + + let entries = store.entries().unwrap(); + assert_eq!(entries.len(), 2); + assert_eq!(entries.get("key"), Some(&"v2".to_string())); + assert_eq!(entries.get("other"), Some(&"v3".to_string())); + } + + #[test] + fn test_unique_key_cap_allows_1024_then_rejects_1025th() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + for i in 0..MAX_UNIQUE_KEYS { + store.write(&format!("k{i}"), "v").unwrap(); + } + let err = store.write("overflow", "v").unwrap_err(); + assert!(matches!(err, KvpError::MaxUniqueKeysExceeded { .. })); + } + + #[test] + fn test_unique_key_cap_allows_overwrite_at_limit() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + for i in 0..MAX_UNIQUE_KEYS { + store.write(&format!("k{i}"), "v").unwrap(); + } + store.write("k0", "updated").unwrap(); + assert_eq!(store.read("k0").unwrap(), Some("updated".to_string())); + } + + #[test] + fn test_unique_key_cap_allows_new_key_after_delete() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + for i in 0..MAX_UNIQUE_KEYS { + store.write(&format!("k{i}"), "v").unwrap(); + } + assert!(store.delete("k0").unwrap()); + store.write("new-key", "v").unwrap(); + } + + #[test] + fn test_clear_empties_store() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + store.write("key", "value").unwrap(); + store.clear().unwrap(); + assert_eq!(store.read("key").unwrap(), None); + } + + #[test] + fn test_delete_removes_all_matching_records() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + store.write("key", "v1").unwrap(); + store.write("key", "v2").unwrap(); + store.write("other", "v3").unwrap(); + + assert!(store.delete("key").unwrap()); + assert_eq!(store.read("key").unwrap(), None); + assert_eq!(store.read("other").unwrap(), Some("v3".to_string())); + } + + #[test] + fn test_is_stale_and_pool_is_stale_at_boot_helpers() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + store.write("key", "value").unwrap(); + + assert!(!store.is_stale().unwrap()); + assert!(store.pool_is_stale_at_boot(i64::MAX).unwrap()); + assert!(!store.pool_is_stale_at_boot(0).unwrap()); + } + + #[test] + fn test_mode_getter() { + let tmp = NamedTempFile::new().unwrap(); + let restricted = restricted_store(tmp.path()); + assert_eq!(restricted.mode(), PoolMode::Restricted); + + let tmp2 = NamedTempFile::new().unwrap(); + let full = full_store(tmp2.path()); + assert_eq!(full.mode(), PoolMode::Full); + } +} diff --git a/libazureinit-kvp/src/lib.rs b/libazureinit-kvp/src/lib.rs index a248b8ea..bf342a3b 100644 --- a/libazureinit-kvp/src/lib.rs +++ b/libazureinit-kvp/src/lib.rs @@ -1,19 +1,18 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -//! `libazureinit-kvp` provides a storage trait and Hyper-V-backed -//! implementation for KVP pool files. +//! `libazureinit-kvp` provides a storage trait and unified KVP pool +//! implementation for Hyper-V/Azure guests. //! //! - [`KvpStore`]: storage interface used by higher layers. -//! - [`HyperVKvpStore`]: Hyper-V pool file implementation. -//! - [`AzureKvpStore`]: Azure-specific wrapper with stricter value limits. +//! - [`KvpPoolStore`]: KVP pool file implementation with +//! [`PoolMode`](kvp_pool::PoolMode)-based policy. use std::collections::HashMap; use std::fmt; use std::io; -pub mod azure; -pub mod hyperv; +pub mod kvp_pool; /// Errors returned by [`KvpStore`] operations. #[derive(Debug)] @@ -24,6 +23,8 @@ pub enum KvpError { KeyTooLarge { max: usize, actual: usize }, /// The value exceeds the store's maximum value size. ValueTooLarge { max: usize, actual: usize }, + /// The store already has the maximum allowed number of unique keys. + MaxUniqueKeysExceeded { max: usize }, /// The key contains a null byte, which is incompatible with the /// on-disk format (null-padded fixed-width fields). KeyContainsNull, @@ -41,6 +42,9 @@ impl fmt::Display for KvpError { Self::ValueTooLarge { max, actual } => { write!(f, "KVP value length ({actual}) exceeds maximum ({max})") } + Self::MaxUniqueKeysExceeded { max } => { + write!(f, "KVP unique key count exceeded maximum ({max})") + } Self::KeyContainsNull => { write!(f, "KVP key must not contain null bytes") } @@ -64,91 +68,66 @@ impl From for KvpError { } } -pub use azure::AzureKvpStore; -pub use hyperv::HyperVKvpStore; +pub use kvp_pool::KvpPoolStore; -/// Key-value store that supports Hyper-V KVP semantics while being -/// generic enough for non-file-backed in-memory implementations -/// (e.g. tests). +/// Key-value store with Hyper-V KVP semantics. +/// +/// The trait splits each operation into a `backend_*` method (raw I/O, +/// provided by the implementor) and a public method (`write`, `read`, +/// `clear`) that validates inputs then delegates to the backend. pub trait KvpStore: Send + Sync { /// Maximum key size in bytes for this store. - const MAX_KEY_SIZE: usize; - /// Maximum value size in bytes for this store. - const MAX_VALUE_SIZE: usize; - - // -- Backend callouts for read/write (required) --------------------- + fn max_key_size(&self) -> usize; - /// Backend-specific read implementation. - fn backend_read(&self, key: &str) -> Result, KvpError>; + /// Maximum value size in bytes for this store. + fn max_value_size(&self) -> usize; - /// Backend-specific write implementation. + /// Raw write — persist a key-value pair without validation. fn backend_write(&self, key: &str, value: &str) -> Result<(), KvpError>; - // -- Required methods (no shared validation wrapper) --------------- + /// Raw read — look up a key without validation. + fn backend_read(&self, key: &str) -> Result, KvpError>; /// Return all key-value pairs, deduplicated with last-write-wins. fn entries(&self) -> Result, KvpError>; - /// Return all raw key-value records without deduplication. - /// - /// Useful for testing or diagnostic dump commands where the full - /// record history is needed. + /// Return all raw records in file order, without deduplication. fn entries_raw(&self) -> Result, KvpError>; - /// Remove all records matching `key`. - /// - /// Returns `true` if at least one record was removed, `false` if - /// the key was not found. + /// Remove all records matching `key`. Returns `true` if any were removed. fn delete(&self, key: &str) -> Result; - /// Backend-specific clear implementation (empty the store). + /// Raw clear — remove all records without additional checks. fn backend_clear(&self) -> Result<(), KvpError>; - // -- Public API with shared validation ---------------------------- - - /// Empty the store, removing all records. - fn clear(&self) -> Result<(), KvpError> { - self.backend_clear() - } - - /// Write a key-value pair into the store. - /// - /// # Errors - /// - /// Returns [`KvpError::EmptyKey`] if the key is empty, - /// [`KvpError::KeyContainsNull`] if the key contains a null byte, - /// [`KvpError::KeyTooLarge`] if the key exceeds [`Self::MAX_KEY_SIZE`], - /// [`KvpError::ValueTooLarge`] if the value exceeds - /// [`Self::MAX_VALUE_SIZE`], or [`KvpError::Io`] on I/O failure. + /// Write a key-value pair after validation. fn write(&self, key: &str, value: &str) -> Result<(), KvpError> { - Self::validate_key(key)?; - Self::validate_value(value)?; + self.validate_key(key)?; + self.validate_value(value)?; self.backend_write(key, value) } - /// Read the value for a given key, returning `None` if absent. + /// Read the most recent value for a key (last-write-wins). /// - /// When multiple records share the same key, the most recent value - /// is returned (last-write-wins). + /// Returns `Ok(None)` when the key is absent. fn read(&self, key: &str) -> Result, KvpError> { - Self::validate_key(key)?; + self.validate_key(key)?; self.backend_read(key) } - /// Whether the store's data is stale (e.g. predates the current - /// boot). Defaults to `false`; file-backed stores can override. + /// Remove all records from the store. + fn clear(&self) -> Result<(), KvpError> { + self.backend_clear() + } + + /// Whether the store's data is stale (e.g. predates current boot). fn is_stale(&self) -> Result { Ok(false) } - // -- Validation helpers ------------------------------------------- - - /// Validate a key against common constraints. - /// - /// Keys must be non-empty, must not contain null bytes (the on-disk - /// format uses null-padding), and must not exceed - /// [`Self::MAX_KEY_SIZE`] bytes. - fn validate_key(key: &str) -> Result<(), KvpError> { + /// Validate a key: must be non-empty, no null bytes, within + /// [`max_key_size`](Self::max_key_size). + fn validate_key(&self, key: &str) -> Result<(), KvpError> { if key.is_empty() { return Err(KvpError::EmptyKey); } @@ -156,17 +135,17 @@ pub trait KvpStore: Send + Sync { return Err(KvpError::KeyContainsNull); } let actual = key.len(); - let max = Self::MAX_KEY_SIZE; + let max = self.max_key_size(); if actual > max { return Err(KvpError::KeyTooLarge { max, actual }); } Ok(()) } - /// Validate a value against the store's size limit. - fn validate_value(value: &str) -> Result<(), KvpError> { + /// Validate a value: must be within [`max_value_size`](Self::max_value_size). + fn validate_value(&self, value: &str) -> Result<(), KvpError> { let actual = value.len(); - let max = Self::MAX_VALUE_SIZE; + let max = self.max_value_size(); if actual > max { return Err(KvpError::ValueTooLarge { max, actual }); } From e681b592e23587460118220d5688fc75359b5ea1 Mon Sep 17 00:00:00 2001 From: Peyton Date: Thu, 26 Mar 2026 15:14:17 -0700 Subject: [PATCH 7/9] Refactor libazureinit-kvp: simplify KvpStore trait, add upsert semantics and JSON dump Overhaul the KvpStore trait into a pure customer-facing interface with no default implementations or backend_* methods. All logic (validation, file I/O, locking) now lives in the KvpPoolStore implementation. Key changes: - Replace append-based write with upsert (insert-or-update) so each key has exactly one record in the file. Eliminates entries_raw and last-write-wins deduplication. - Move validate_key/validate_value from trait defaults to private KvpPoolStore methods. - Decouple read validation from PoolMode: reads accept keys up to the full wire-format max (512 bytes) regardless of mode; only writes are restricted by PoolMode. - Add len(), is_empty(), and dump() (JSON via serde_json) to the trait. - Add 4 multi-threaded concurrency tests covering concurrent upserts to different keys, same key, mixed readers/writers, and key cap boundary. --- libazureinit-kvp/Cargo.toml | 2 + libazureinit-kvp/src/kvp_pool.rs | 450 +++++++++++++++++++++++++------ libazureinit-kvp/src/lib.rs | 84 ++---- 3 files changed, 383 insertions(+), 153 deletions(-) diff --git a/libazureinit-kvp/Cargo.toml b/libazureinit-kvp/Cargo.toml index 6d5a8106..91f95986 100644 --- a/libazureinit-kvp/Cargo.toml +++ b/libazureinit-kvp/Cargo.toml @@ -10,6 +10,8 @@ description = "Hyper-V KVP (Key-Value Pair) storage library for azure-init." [dependencies] fs2 = "0.4" +serde = { version = "1", features = ["derive"] } +serde_json = "1" sysinfo = "0.38" [dev-dependencies] diff --git a/libazureinit-kvp/src/kvp_pool.rs b/libazureinit-kvp/src/kvp_pool.rs index 0cb55932..ca62424a 100644 --- a/libazureinit-kvp/src/kvp_pool.rs +++ b/libazureinit-kvp/src/kvp_pool.rs @@ -17,7 +17,7 @@ //! ## Reference //! - [Hyper-V Data Exchange Service (KVP)](https://learn.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/integration-services#hyper-v-data-exchange-service-kvp) -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use std::fs::{File, OpenOptions}; use std::io::{self, ErrorKind, Read, Seek, Write}; use std::os::unix::fs::MetadataExt; @@ -43,10 +43,6 @@ const MAX_UNIQUE_KEYS: usize = 1024; const RECORD_SIZE: usize = WIRE_MAX_KEY_BYTES + WIRE_MAX_VALUE_BYTES; /// Policy mode controlling key/value size limits for writes. -/// -/// The on-disk record format is always 512 + 2048 bytes regardless of -/// mode; the mode only determines the validation ceiling for incoming -/// writes. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum PoolMode { /// Conservative limits for Azure compatibility @@ -112,6 +108,49 @@ impl KvpPoolStore { self.mode } + fn validate_key(&self, key: &str) -> Result<(), KvpError> { + if key.is_empty() { + return Err(KvpError::EmptyKey); + } + if key.as_bytes().contains(&0) { + return Err(KvpError::KeyContainsNull); + } + let actual = key.len(); + let max = self.mode.max_key_size(); + if actual > max { + return Err(KvpError::KeyTooLarge { max, actual }); + } + Ok(()) + } + + /// Looser validation for reads: accepts keys up to the full + /// wire-format maximum regardless of [`PoolMode`]. + fn validate_key_for_read(key: &str) -> Result<(), KvpError> { + if key.is_empty() { + return Err(KvpError::EmptyKey); + } + if key.as_bytes().contains(&0) { + return Err(KvpError::KeyContainsNull); + } + let actual = key.len(); + if actual > WIRE_MAX_KEY_BYTES { + return Err(KvpError::KeyTooLarge { + max: WIRE_MAX_KEY_BYTES, + actual, + }); + } + Ok(()) + } + + fn validate_value(&self, value: &str) -> Result<(), KvpError> { + let actual = value.len(); + let max = self.mode.max_value_size(); + if actual > max { + return Err(KvpError::ValueTooLarge { max, actual }); + } + Ok(()) + } + fn boot_time() -> Result { let now = SystemTime::now() .duration_since(UNIX_EPOCH) @@ -164,10 +203,10 @@ impl KvpPoolStore { OpenOptions::new().read(true).write(true).open(&self.path) } - fn open_for_read_append_create(&self) -> io::Result { + fn open_for_read_write_create(&self) -> io::Result { OpenOptions::new() .read(true) - .append(true) + .write(true) .create(true) .open(&self.path) } @@ -235,6 +274,19 @@ fn read_all_records(file: &mut File) -> io::Result> { Ok(records) } +fn rewrite_file( + file: &mut File, + map: &HashMap, +) -> Result<(), KvpError> { + file.set_len(0)?; + file.seek(io::SeekFrom::Start(0))?; + for (k, v) in map { + file.write_all(&encode_record(k, v))?; + } + file.flush()?; + Ok(()) +} + impl KvpStore for KvpPoolStore { fn max_key_size(&self) -> usize { self.mode.max_key_size() @@ -244,30 +296,28 @@ impl KvpStore for KvpPoolStore { self.mode.max_value_size() } - fn backend_write(&self, key: &str, value: &str) -> Result<(), KvpError> { - let mut file = self.open_for_read_append_create()?; + fn upsert(&self, key: &str, value: &str) -> Result<(), KvpError> { + self.validate_key(key)?; + self.validate_value(value)?; + + let mut file = self.open_for_read_write_create()?; FileExt::lock_exclusive(&file).map_err(|e| { io::Error::other(format!("failed to lock KVP file: {e}")) })?; let result = (|| -> Result<(), KvpError> { let records = read_all_records(&mut file)?; - let mut unique_keys = HashSet::with_capacity(records.len()); - for (record_key, _) in records { - unique_keys.insert(record_key); - } + let mut map: HashMap = + records.into_iter().collect(); - if !unique_keys.contains(key) - && unique_keys.len() >= MAX_UNIQUE_KEYS - { + if !map.contains_key(key) && map.len() >= MAX_UNIQUE_KEYS { return Err(KvpError::MaxUniqueKeysExceeded { max: MAX_UNIQUE_KEYS, }); } - let record = encode_record(key, value); - file.write_all(&record)?; - file.flush()?; + map.insert(key.to_string(), value.to_string()); + rewrite_file(&mut file, &map)?; Ok(()) })(); @@ -275,7 +325,9 @@ impl KvpStore for KvpPoolStore { result } - fn backend_read(&self, key: &str) -> Result, KvpError> { + fn read(&self, key: &str) -> Result, KvpError> { + Self::validate_key_for_read(key)?; + let mut file = match self.open_for_read() { Ok(f) => f, Err(ref e) if e.kind() == ErrorKind::NotFound => return Ok(None), @@ -289,11 +341,7 @@ impl KvpStore for KvpPoolStore { let _ = FileExt::unlock(&file); let records = records?; - Ok(records - .into_iter() - .rev() - .find(|(k, _)| k == key) - .map(|(_, v)| v)) + Ok(records.into_iter().find(|(k, _)| k == key).map(|(_, v)| v)) } fn entries(&self) -> Result, KvpError> { @@ -315,23 +363,6 @@ impl KvpStore for KvpPoolStore { Ok(records.into_iter().collect()) } - fn entries_raw(&self) -> Result, KvpError> { - let mut file = match self.open_for_read() { - Ok(f) => f, - Err(ref e) if e.kind() == ErrorKind::NotFound => { - return Ok(Vec::new()) - } - Err(e) => return Err(e.into()), - }; - - FileExt::lock_shared(&file).map_err(|e| { - io::Error::other(format!("failed to lock KVP file: {e}")) - })?; - let records = read_all_records(&mut file); - let _ = FileExt::unlock(&file); - Ok(records?) - } - fn delete(&self, key: &str) -> Result { let mut file = match self.open_for_read_write() { Ok(f) => f, @@ -366,13 +397,38 @@ impl KvpStore for KvpPoolStore { result } - fn backend_clear(&self) -> Result<(), KvpError> { + fn clear(&self) -> Result<(), KvpError> { self.truncate_pool() } + fn len(&self) -> Result { + match std::fs::metadata(&self.path) { + Ok(m) => Ok(m.len() as usize / RECORD_SIZE), + Err(ref e) if e.kind() == ErrorKind::NotFound => Ok(0), + Err(e) => Err(e.into()), + } + } + + fn is_empty(&self) -> Result { + match std::fs::metadata(&self.path) { + Ok(m) => Ok(m.len() == 0), + Err(ref e) if e.kind() == ErrorKind::NotFound => Ok(true), + Err(e) => Err(e.into()), + } + } + fn is_stale(&self) -> Result { self.pool_is_stale() } + + fn dump(&self, path: &Path) -> Result<(), KvpError> { + let entries = self.entries()?; + let json = serde_json::to_string_pretty(&entries).map_err(|e| { + io::Error::other(format!("JSON serialization failed: {e}")) + })?; + std::fs::write(path, json)?; + Ok(()) + } } fn get_uptime() -> Duration { @@ -395,20 +451,20 @@ mod tests { } #[test] - fn test_write_rejects_empty_key() { + fn test_upsert_rejects_empty_key() { let tmp = NamedTempFile::new().unwrap(); let store = restricted_store(tmp.path()); - let err = store.write("", "value").unwrap_err(); + let err = store.upsert("", "value").unwrap_err(); assert!(matches!(err, KvpError::EmptyKey)); } #[test] - fn test_write_rejects_null_in_key() { + fn test_upsert_rejects_null_in_key() { let tmp = NamedTempFile::new().unwrap(); let store = restricted_store(tmp.path()); - let err = store.write("bad\0key", "value").unwrap_err(); + let err = store.upsert("bad\0key", "value").unwrap_err(); assert!(matches!(err, KvpError::KeyContainsNull)); } @@ -437,10 +493,10 @@ mod tests { let store = restricted_store(tmp.path()); let ok_key = "k".repeat(254); - store.write(&ok_key, "v").unwrap(); + store.upsert(&ok_key, "v").unwrap(); let bad_key = "k".repeat(255); - let err = store.write(&bad_key, "v").unwrap_err(); + let err = store.upsert(&bad_key, "v").unwrap_err(); assert!(matches!(err, KvpError::KeyTooLarge { .. })); } @@ -450,10 +506,10 @@ mod tests { let store = restricted_store(tmp.path()); let ok_val = "v".repeat(1022); - store.write("k", &ok_val).unwrap(); + store.upsert("k", &ok_val).unwrap(); let bad_val = "v".repeat(1023); - let err = store.write("k2", &bad_val).unwrap_err(); + let err = store.upsert("k2", &bad_val).unwrap_err(); assert!(matches!(err, KvpError::ValueTooLarge { .. })); } @@ -463,10 +519,10 @@ mod tests { let store = full_store(tmp.path()); let ok_key = "k".repeat(512); - store.write(&ok_key, "v").unwrap(); + store.upsert(&ok_key, "v").unwrap(); let bad_key = "k".repeat(513); - let err = store.write(&bad_key, "v").unwrap_err(); + let err = store.upsert(&bad_key, "v").unwrap_err(); assert!(matches!(err, KvpError::KeyTooLarge { .. })); } @@ -476,37 +532,21 @@ mod tests { let store = full_store(tmp.path()); let ok_val = "v".repeat(2048); - store.write("k", &ok_val).unwrap(); + store.upsert("k", &ok_val).unwrap(); let bad_val = "v".repeat(2049); - let err = store.write("k2", &bad_val).unwrap_err(); + let err = store.upsert("k2", &bad_val).unwrap_err(); assert!(matches!(err, KvpError::ValueTooLarge { .. })); } #[test] - fn test_entries_raw_preserves_duplicates() { + fn test_upsert_overwrites_existing_key() { let tmp = NamedTempFile::new().unwrap(); let store = restricted_store(tmp.path()); - store.write("key", "v1").unwrap(); - store.write("key", "v2").unwrap(); - store.write("other", "v3").unwrap(); - - let raw = store.entries_raw().unwrap(); - assert_eq!(raw.len(), 3); - assert_eq!(raw[0], ("key".to_string(), "v1".to_string())); - assert_eq!(raw[1], ("key".to_string(), "v2".to_string())); - assert_eq!(raw[2], ("other".to_string(), "v3".to_string())); - } - - #[test] - fn test_entries_deduplicates_last_write_wins() { - let tmp = NamedTempFile::new().unwrap(); - let store = restricted_store(tmp.path()); - - store.write("key", "v1").unwrap(); - store.write("key", "v2").unwrap(); - store.write("other", "v3").unwrap(); + store.upsert("key", "v1").unwrap(); + store.upsert("key", "v2").unwrap(); + store.upsert("other", "v3").unwrap(); let entries = store.entries().unwrap(); assert_eq!(entries.len(), 2); @@ -520,9 +560,9 @@ mod tests { let store = restricted_store(tmp.path()); for i in 0..MAX_UNIQUE_KEYS { - store.write(&format!("k{i}"), "v").unwrap(); + store.upsert(&format!("k{i}"), "v").unwrap(); } - let err = store.write("overflow", "v").unwrap_err(); + let err = store.upsert("overflow", "v").unwrap_err(); assert!(matches!(err, KvpError::MaxUniqueKeysExceeded { .. })); } @@ -532,9 +572,9 @@ mod tests { let store = restricted_store(tmp.path()); for i in 0..MAX_UNIQUE_KEYS { - store.write(&format!("k{i}"), "v").unwrap(); + store.upsert(&format!("k{i}"), "v").unwrap(); } - store.write("k0", "updated").unwrap(); + store.upsert("k0", "updated").unwrap(); assert_eq!(store.read("k0").unwrap(), Some("updated".to_string())); } @@ -544,10 +584,10 @@ mod tests { let store = restricted_store(tmp.path()); for i in 0..MAX_UNIQUE_KEYS { - store.write(&format!("k{i}"), "v").unwrap(); + store.upsert(&format!("k{i}"), "v").unwrap(); } assert!(store.delete("k0").unwrap()); - store.write("new-key", "v").unwrap(); + store.upsert("new-key", "v").unwrap(); } #[test] @@ -555,30 +595,29 @@ mod tests { let tmp = NamedTempFile::new().unwrap(); let store = restricted_store(tmp.path()); - store.write("key", "value").unwrap(); + store.upsert("key", "value").unwrap(); store.clear().unwrap(); assert_eq!(store.read("key").unwrap(), None); } #[test] - fn test_delete_removes_all_matching_records() { + fn test_delete_removes_key() { let tmp = NamedTempFile::new().unwrap(); let store = restricted_store(tmp.path()); - store.write("key", "v1").unwrap(); - store.write("key", "v2").unwrap(); - store.write("other", "v3").unwrap(); + store.upsert("key", "v1").unwrap(); + store.upsert("other", "v2").unwrap(); assert!(store.delete("key").unwrap()); assert_eq!(store.read("key").unwrap(), None); - assert_eq!(store.read("other").unwrap(), Some("v3".to_string())); + assert_eq!(store.read("other").unwrap(), Some("v2".to_string())); } #[test] fn test_is_stale_and_pool_is_stale_at_boot_helpers() { let tmp = NamedTempFile::new().unwrap(); let store = restricted_store(tmp.path()); - store.write("key", "value").unwrap(); + store.upsert("key", "value").unwrap(); assert!(!store.is_stale().unwrap()); assert!(store.pool_is_stale_at_boot(i64::MAX).unwrap()); @@ -595,4 +634,239 @@ mod tests { let full = full_store(tmp2.path()); assert_eq!(full.mode(), PoolMode::Full); } + + #[test] + fn test_len_and_is_empty() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + assert!(store.is_empty().unwrap()); + assert_eq!(store.len().unwrap(), 0); + + store.upsert("key", "value").unwrap(); + assert!(!store.is_empty().unwrap()); + assert_eq!(store.len().unwrap(), 1); + + store.upsert("key2", "value2").unwrap(); + assert_eq!(store.len().unwrap(), 2); + + store.upsert("key", "updated").unwrap(); + assert_eq!(store.len().unwrap(), 2); + } + + #[test] + fn test_read_accepts_wire_max_key_in_restricted_mode() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + // Write a record using Full mode so a 300-byte key is accepted + let full = full_store(tmp.path()); + let long_key = "k".repeat(300); + full.upsert(&long_key, "val").unwrap(); + + // Restricted store can still read the 300-byte key (> 254 safe limit) + assert_eq!(store.read(&long_key).unwrap(), Some("val".to_string())); + + // But a key beyond the wire max (512) is rejected even for reads + let too_long = "k".repeat(513); + let err = store.read(&too_long).unwrap_err(); + assert!(matches!(err, KvpError::KeyTooLarge { .. })); + } + + #[test] + fn test_dump_writes_json() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + store.upsert("key1", "value1").unwrap(); + store.upsert("key2", "value2").unwrap(); + + let dump_file = NamedTempFile::new().unwrap(); + store.dump(dump_file.path()).unwrap(); + + let contents = std::fs::read_to_string(dump_file.path()).unwrap(); + let parsed: HashMap = + serde_json::from_str(&contents).unwrap(); + assert_eq!(parsed.len(), 2); + assert_eq!(parsed.get("key1"), Some(&"value1".to_string())); + assert_eq!(parsed.get("key2"), Some(&"value2".to_string())); + } + + #[test] + fn test_concurrent_upserts_to_different_keys() { + let tmp = NamedTempFile::new().unwrap(); + let path = tmp.path().to_path_buf(); + let threads: Vec<_> = (0..8) + .map(|t| { + let p = path.clone(); + std::thread::spawn(move || { + let store = + KvpPoolStore::new(Some(p), PoolMode::Restricted, false) + .unwrap(); + for i in 0..10 { + let key = format!("t{t}_k{i}"); + store.upsert(&key, &format!("val_{t}_{i}")).unwrap(); + } + }) + }) + .collect(); + + for t in threads { + t.join().unwrap(); + } + + let store = restricted_store(tmp.path()); + let entries = store.entries().unwrap(); + assert_eq!(entries.len(), 80); + for t in 0..8 { + for i in 0..10 { + let key = format!("t{t}_k{i}"); + assert_eq!( + entries.get(&key), + Some(&format!("val_{t}_{i}")), + "missing or wrong value for {key}" + ); + } + } + } + + #[test] + fn test_concurrent_upserts_to_same_key() { + let tmp = NamedTempFile::new().unwrap(); + let path = tmp.path().to_path_buf(); + let threads: Vec<_> = (0..8) + .map(|t| { + let p = path.clone(); + std::thread::spawn(move || { + let store = + KvpPoolStore::new(Some(p), PoolMode::Restricted, false) + .unwrap(); + for i in 0..10 { + store + .upsert("shared_key", &format!("t{t}_v{i}")) + .unwrap(); + } + }) + }) + .collect(); + + for t in threads { + t.join().unwrap(); + } + + let store = restricted_store(tmp.path()); + assert_eq!(store.len().unwrap(), 1); + let val = store.read("shared_key").unwrap().unwrap(); + assert!( + val.starts_with('t') && val.contains("_v"), + "unexpected value format: {val}" + ); + + // File must be exactly one record (no duplicates) + let file_len = std::fs::metadata(tmp.path()).unwrap().len() as usize; + assert_eq!(file_len, RECORD_SIZE); + } + + #[test] + fn test_concurrent_readers_and_writers() { + let tmp = NamedTempFile::new().unwrap(); + let path = tmp.path().to_path_buf(); + + // Seed some initial data + let store = restricted_store(tmp.path()); + for i in 0..10 { + store.upsert(&format!("k{i}"), &format!("v{i}")).unwrap(); + } + + let writer_threads: Vec<_> = (0..4) + .map(|t| { + let p = path.clone(); + std::thread::spawn(move || { + let store = + KvpPoolStore::new(Some(p), PoolMode::Restricted, false) + .unwrap(); + for round in 0..5 { + let key = format!("k{t}"); + store.upsert(&key, &format!("w{t}_r{round}")).unwrap(); + } + }) + }) + .collect(); + + let reader_threads: Vec<_> = (0..4) + .map(|_| { + let p = path.clone(); + std::thread::spawn(move || { + let store = + KvpPoolStore::new(Some(p), PoolMode::Restricted, false) + .unwrap(); + for _ in 0..20 { + let entries = store.entries().unwrap(); + // Should always have 10 keys, never more + assert!(entries.len() <= 10); + // File should be well-formed + for (k, v) in &entries { + assert!(!k.is_empty()); + assert!(!v.is_empty()); + } + } + }) + }) + .collect(); + + for t in writer_threads { + t.join().unwrap(); + } + for t in reader_threads { + t.join().unwrap(); + } + + let entries = store.entries().unwrap(); + assert_eq!(entries.len(), 10); + } + + #[test] + fn test_concurrent_writers_at_key_cap() { + let tmp = NamedTempFile::new().unwrap(); + let path = tmp.path().to_path_buf(); + + // Fill to just under the cap + let store = restricted_store(tmp.path()); + for i in 0..MAX_UNIQUE_KEYS - 4 { + store.upsert(&format!("pre{i}"), "v").unwrap(); + } + + // 4 threads each try to add one new unique key concurrently + let threads: Vec<_> = (0..4) + .map(|t| { + let p = path.clone(); + std::thread::spawn(move || { + let store = + KvpPoolStore::new(Some(p), PoolMode::Restricted, false) + .unwrap(); + store.upsert(&format!("new{t}"), "v") + }) + }) + .collect(); + + let results: Vec<_> = + threads.into_iter().map(|t| t.join().unwrap()).collect(); + + let successes = results.iter().filter(|r| r.is_ok()).count(); + let cap_errors = results + .iter() + .filter(|r| { + matches!(r, Err(KvpError::MaxUniqueKeysExceeded { .. })) + }) + .count(); + + // Exactly 4 should succeed (filling the last 4 slots) + assert_eq!(successes, 4); + assert_eq!(cap_errors, 0); + assert_eq!(store.len().unwrap(), MAX_UNIQUE_KEYS); + + // One more should be rejected + let err = store.upsert("one_too_many", "v").unwrap_err(); + assert!(matches!(err, KvpError::MaxUniqueKeysExceeded { .. })); + } } diff --git a/libazureinit-kvp/src/lib.rs b/libazureinit-kvp/src/lib.rs index bf342a3b..4c9aa844 100644 --- a/libazureinit-kvp/src/lib.rs +++ b/libazureinit-kvp/src/lib.rs @@ -11,6 +11,7 @@ use std::collections::HashMap; use std::fmt; use std::io; +use std::path::Path; pub mod kvp_pool; @@ -71,84 +72,37 @@ impl From for KvpError { pub use kvp_pool::KvpPoolStore; /// Key-value store with Hyper-V KVP semantics. -/// -/// The trait splits each operation into a `backend_*` method (raw I/O, -/// provided by the implementor) and a public method (`write`, `read`, -/// `clear`) that validates inputs then delegates to the backend. pub trait KvpStore: Send + Sync { - /// Maximum key size in bytes for this store. + /// Maximum key size in bytes for writes. fn max_key_size(&self) -> usize; - /// Maximum value size in bytes for this store. + /// Maximum value size in bytes for writes. fn max_value_size(&self) -> usize; - /// Raw write — persist a key-value pair without validation. - fn backend_write(&self, key: &str, value: &str) -> Result<(), KvpError>; + /// Insert a new key-value pair or update an existing key's value. + fn upsert(&self, key: &str, value: &str) -> Result<(), KvpError>; - /// Raw read — look up a key without validation. - fn backend_read(&self, key: &str) -> Result, KvpError>; + /// Read the value for a key. Returns `Ok(None)` when absent. + fn read(&self, key: &str) -> Result, KvpError>; - /// Return all key-value pairs, deduplicated with last-write-wins. - fn entries(&self) -> Result, KvpError>; - - /// Return all raw records in file order, without deduplication. - fn entries_raw(&self) -> Result, KvpError>; - - /// Remove all records matching `key`. Returns `true` if any were removed. + /// Remove a key. Returns `true` if the key was present. fn delete(&self, key: &str) -> Result; - /// Raw clear — remove all records without additional checks. - fn backend_clear(&self) -> Result<(), KvpError>; + /// Remove all entries from the store. + fn clear(&self) -> Result<(), KvpError>; - /// Write a key-value pair after validation. - fn write(&self, key: &str, value: &str) -> Result<(), KvpError> { - self.validate_key(key)?; - self.validate_value(value)?; - self.backend_write(key, value) - } + /// Return all key-value pairs. + fn entries(&self) -> Result, KvpError>; - /// Read the most recent value for a key (last-write-wins). - /// - /// Returns `Ok(None)` when the key is absent. - fn read(&self, key: &str) -> Result, KvpError> { - self.validate_key(key)?; - self.backend_read(key) - } + /// Return the number of entries in the store. + fn len(&self) -> Result; - /// Remove all records from the store. - fn clear(&self) -> Result<(), KvpError> { - self.backend_clear() - } + /// Return whether the store is empty. + fn is_empty(&self) -> Result; /// Whether the store's data is stale (e.g. predates current boot). - fn is_stale(&self) -> Result { - Ok(false) - } + fn is_stale(&self) -> Result; - /// Validate a key: must be non-empty, no null bytes, within - /// [`max_key_size`](Self::max_key_size). - fn validate_key(&self, key: &str) -> Result<(), KvpError> { - if key.is_empty() { - return Err(KvpError::EmptyKey); - } - if key.as_bytes().contains(&0) { - return Err(KvpError::KeyContainsNull); - } - let actual = key.len(); - let max = self.max_key_size(); - if actual > max { - return Err(KvpError::KeyTooLarge { max, actual }); - } - Ok(()) - } - - /// Validate a value: must be within [`max_value_size`](Self::max_value_size). - fn validate_value(&self, value: &str) -> Result<(), KvpError> { - let actual = value.len(); - let max = self.max_value_size(); - if actual > max { - return Err(KvpError::ValueTooLarge { max, actual }); - } - Ok(()) - } + /// Dump all entries to a JSON file at the given path. + fn dump(&self, path: &Path) -> Result<(), KvpError>; } From ae03af63f07c33778c205d32fd669a764380a2e1 Mon Sep 17 00:00:00 2001 From: Peyton Date: Fri, 27 Mar 2026 12:19:35 -0700 Subject: [PATCH 8/9] Refactor libazureinit-kvp: replace flock-based locking with fcntl OFD locks Use libc fcntl(F_OFD_SETLKW/F_OFD_SETLK) for shared/exclusive/unlock operations so pool file locking interoperates with Linux KVP consumers that use fcntl record locks. Also replace fs2 with libc and keep existing pool behavior/tests intact. --- doc/kvp_pr_description.md | 74 ++++++++++++++++++++++++++++++++ libazureinit-kvp/Cargo.toml | 2 +- libazureinit-kvp/src/kvp_pool.rs | 74 ++++++++++++++++++++++++++------ 3 files changed, 137 insertions(+), 13 deletions(-) create mode 100644 doc/kvp_pr_description.md diff --git a/doc/kvp_pr_description.md b/doc/kvp_pr_description.md new file mode 100644 index 00000000..b008dfb4 --- /dev/null +++ b/doc/kvp_pr_description.md @@ -0,0 +1,74 @@ +# PR: Add `libazureinit-kvp` with unified KVP pool store + +## Summary + +- Adds workspace member `libazureinit-kvp` in root `Cargo.toml` +- Adds `libazureinit-kvp/Cargo.toml` +- Adds `libazureinit-kvp/src/lib.rs` and `libazureinit-kvp/src/kvp_pool.rs` + +## Crate Design + +- One trait: `KvpStore` +- One implementation: `KvpPoolStore` + +`KvpStore` splits each operation into a `backend_*` method (raw I/O, +provided by the implementor) and a public method (`write`, `read`, +`clear`) that validates inputs then delegates to the backend. + +Public API: + +- `write`, `read` (validate then delegate to `backend_write`/`backend_read`) +- `entries`, `entries_raw` +- `delete`, `clear` +- `is_stale` + +Validation is centralized in trait default methods and policy-aware via: + +- `max_key_size(&self)` +- `max_value_size(&self)` + +`KvpPoolStore` is file-backed using Hyper-V KVP wire format +(fixed-size 512-byte key + 2048-byte value records), with lock-based concurrency. + +## Policy and Limits + +Constructor: + +- `new(path: Option, mode: PoolMode, truncate_on_stale: bool)` + +`PoolMode`: + +- `Restricted` (default): key <= 254 bytes, value <= 1022 bytes +- `Full`: key <= 512 bytes, value <= 2048 bytes + +Behavior: + +- Default path when `None`: `/var/lib/hyperv/.kvp_pool_1` +- Unique key cap: 1024 + - new key beyond cap is rejected + - overwrite of existing key at cap is allowed +- `clear()` truncates the store +- `truncate_on_stale` keeps truncation caller-controlled + +## Errors + +`KvpError` includes explicit variants: + +- `EmptyKey` +- `KeyContainsNull` +- `KeyTooLarge { max, actual }` +- `ValueTooLarge { max, actual }` +- `MaxUniqueKeysExceeded { max }` +- `Io(io::Error)` + +## Testing + +17 tests covering: + +- restricted/full key/value boundary checks +- default and explicit path behavior +- mode getter +- unique-key cap behavior (including overwrite-at-cap and add-after-delete) +- `entries` last-write-wins and `entries_raw` duplicate preservation +- `delete`, `clear`, and stale checks +- key validation (empty and null-byte) diff --git a/libazureinit-kvp/Cargo.toml b/libazureinit-kvp/Cargo.toml index 91f95986..0ace7528 100644 --- a/libazureinit-kvp/Cargo.toml +++ b/libazureinit-kvp/Cargo.toml @@ -9,7 +9,7 @@ license = "MIT" description = "Hyper-V KVP (Key-Value Pair) storage library for azure-init." [dependencies] -fs2 = "0.4" +libc = "0.2" serde = { version = "1", features = ["derive"] } serde_json = "1" sysinfo = "0.38" diff --git a/libazureinit-kvp/src/kvp_pool.rs b/libazureinit-kvp/src/kvp_pool.rs index ca62424a..135db4ea 100644 --- a/libazureinit-kvp/src/kvp_pool.rs +++ b/libazureinit-kvp/src/kvp_pool.rs @@ -21,10 +21,10 @@ use std::collections::HashMap; use std::fs::{File, OpenOptions}; use std::io::{self, ErrorKind, Read, Seek, Write}; use std::os::unix::fs::MetadataExt; +use std::os::unix::io::AsRawFd; use std::path::{Path, PathBuf}; use std::time::{Duration, SystemTime, UNIX_EPOCH}; -use fs2::FileExt; use sysinfo::System; use crate::{KvpError, KvpStore}; @@ -42,6 +42,55 @@ const MAX_UNIQUE_KEYS: usize = 1024; const RECORD_SIZE: usize = WIRE_MAX_KEY_BYTES + WIRE_MAX_VALUE_BYTES; +/// Acquire an exclusive (write) lock on the entire file. +/// +/// Uses `fcntl(F_OFD_SETLKW)` — open file description locks that are +/// per-FD (safe for multi-threaded use) yet conflict with traditional +/// `fcntl` record locks used by `hv_kvp_daemon.c` and cloud-init. +fn fcntl_lock_exclusive(file: &File) -> io::Result<()> { + let fl = libc::flock { + l_type: libc::F_WRLCK as libc::c_short, + l_whence: libc::SEEK_SET as libc::c_short, + l_start: 0, + l_len: 0, + l_pid: 0, + }; + if unsafe { libc::fcntl(file.as_raw_fd(), libc::F_OFD_SETLKW, &fl) } == -1 { + return Err(io::Error::last_os_error()); + } + Ok(()) +} + +/// Acquire a shared (read) lock on the entire file. +fn fcntl_lock_shared(file: &File) -> io::Result<()> { + let fl = libc::flock { + l_type: libc::F_RDLCK as libc::c_short, + l_whence: libc::SEEK_SET as libc::c_short, + l_start: 0, + l_len: 0, + l_pid: 0, + }; + if unsafe { libc::fcntl(file.as_raw_fd(), libc::F_OFD_SETLKW, &fl) } == -1 { + return Err(io::Error::last_os_error()); + } + Ok(()) +} + +/// Release a lock on the entire file. +fn fcntl_unlock(file: &File) -> io::Result<()> { + let fl = libc::flock { + l_type: libc::F_UNLCK as libc::c_short, + l_whence: libc::SEEK_SET as libc::c_short, + l_start: 0, + l_len: 0, + l_pid: 0, + }; + if unsafe { libc::fcntl(file.as_raw_fd(), libc::F_OFD_SETLK, &fl) } == -1 { + return Err(io::Error::last_os_error()); + } + Ok(()) +} + /// Policy mode controlling key/value size limits for writes. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum PoolMode { @@ -187,11 +236,11 @@ impl KvpPoolStore { Err(e) => return Err(e.into()), }; - FileExt::lock_exclusive(&file).map_err(|e| { + fcntl_lock_exclusive(&file).map_err(|e| { io::Error::other(format!("failed to lock KVP file: {e}")) })?; let result = file.set_len(0).map_err(KvpError::from); - let _ = FileExt::unlock(&file); + let _ = fcntl_unlock(&file); result } @@ -208,6 +257,7 @@ impl KvpPoolStore { .read(true) .write(true) .create(true) + .truncate(false) .open(&self.path) } } @@ -255,7 +305,7 @@ fn read_all_records(file: &mut File) -> io::Result> { return Ok(Vec::new()); } - if len % RECORD_SIZE != 0 { + if !len.is_multiple_of(RECORD_SIZE) { return Err(io::Error::other(format!( "file size ({len}) is not a multiple of record size ({RECORD_SIZE})" ))); @@ -301,7 +351,7 @@ impl KvpStore for KvpPoolStore { self.validate_value(value)?; let mut file = self.open_for_read_write_create()?; - FileExt::lock_exclusive(&file).map_err(|e| { + fcntl_lock_exclusive(&file).map_err(|e| { io::Error::other(format!("failed to lock KVP file: {e}")) })?; @@ -321,7 +371,7 @@ impl KvpStore for KvpPoolStore { Ok(()) })(); - let _ = FileExt::unlock(&file); + let _ = fcntl_unlock(&file); result } @@ -334,11 +384,11 @@ impl KvpStore for KvpPoolStore { Err(e) => return Err(e.into()), }; - FileExt::lock_shared(&file).map_err(|e| { + fcntl_lock_shared(&file).map_err(|e| { io::Error::other(format!("failed to lock KVP file: {e}")) })?; let records = read_all_records(&mut file); - let _ = FileExt::unlock(&file); + let _ = fcntl_unlock(&file); let records = records?; Ok(records.into_iter().find(|(k, _)| k == key).map(|(_, v)| v)) @@ -353,11 +403,11 @@ impl KvpStore for KvpPoolStore { Err(e) => return Err(e.into()), }; - FileExt::lock_shared(&file).map_err(|e| { + fcntl_lock_shared(&file).map_err(|e| { io::Error::other(format!("failed to lock KVP file: {e}")) })?; let records = read_all_records(&mut file); - let _ = FileExt::unlock(&file); + let _ = fcntl_unlock(&file); let records = records?; Ok(records.into_iter().collect()) @@ -370,7 +420,7 @@ impl KvpStore for KvpPoolStore { Err(e) => return Err(e.into()), }; - FileExt::lock_exclusive(&file).map_err(|e| { + fcntl_lock_exclusive(&file).map_err(|e| { io::Error::other(format!("failed to lock KVP file: {e}")) })?; @@ -393,7 +443,7 @@ impl KvpStore for KvpPoolStore { Ok(true) })(); - let _ = FileExt::unlock(&file); + let _ = fcntl_unlock(&file); result } From 8445370d191fabdfd67f437fa2147dbe2ddbfa03 Mon Sep 17 00:00:00 2001 From: Peyton Date: Wed, 1 Apr 2026 15:38:38 -0700 Subject: [PATCH 9/9] feat(kvp): split insert and append semantics for pool store Introduce explicit insert (overwrite existing key) and append (always add record) APIs, refactor pool operations to iterator-based in-place updates/deletes, and document record-count vs unique-key behavior with expanded concurrency/integrity test coverage. --- libazureinit-kvp/src/kvp_pool.rs | 858 ++++++++++++++++++++++++++----- libazureinit-kvp/src/lib.rs | 11 +- 2 files changed, 726 insertions(+), 143 deletions(-) diff --git a/libazureinit-kvp/src/kvp_pool.rs b/libazureinit-kvp/src/kvp_pool.rs index 135db4ea..c68accb0 100644 --- a/libazureinit-kvp/src/kvp_pool.rs +++ b/libazureinit-kvp/src/kvp_pool.rs @@ -17,7 +17,7 @@ //! ## Reference //! - [Hyper-V Data Exchange Service (KVP)](https://learn.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/integration-services#hyper-v-data-exchange-service-kvp) -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::fs::{File, OpenOptions}; use std::io::{self, ErrorKind, Read, Seek, Write}; use std::os::unix::fs::MetadataExt; @@ -260,6 +260,18 @@ impl KvpPoolStore { .truncate(false) .open(&self.path) } + + /// Create a read-only iterator over all records. + fn iter(&self) -> Result { + let file = self.open_for_read()?; + KvpPoolIter::new(file, false) + } + + /// Create a read-write iterator over all records. + fn iter_mut(&self) -> Result { + let file = self.open_for_read_write_create()?; + KvpPoolIter::new(file, true) + } } pub(crate) fn encode_record(key: &str, value: &str) -> Vec { @@ -299,42 +311,173 @@ pub(crate) fn decode_record(data: &[u8]) -> io::Result<(String, String)> { Ok((key, value)) } -fn read_all_records(file: &mut File) -> io::Result> { - let len = file.metadata()?.len() as usize; - if len == 0 { - return Ok(Vec::new()); +/// A record-at-a-time iterator over a KVP pool file. +/// +/// Uses file I/O with seek semantics for memory-efficient iteration +/// without loading all records into memory. Supports in-place value +/// overwrites and record removal for the last-yielded record. +/// +/// The underlying file is locked for the lifetime of the iterator; +/// the lock is released on drop. +#[derive(Debug)] +struct KvpPoolIter { + file: File, + record_count: usize, + current_index: usize, +} + +impl KvpPoolIter { + fn new(mut file: File, lock_exclusive: bool) -> Result { + let lock_result = if lock_exclusive { + fcntl_lock_exclusive(&file) + } else { + fcntl_lock_shared(&file) + }; + lock_result.map_err(|e| { + io::Error::other(format!("failed to lock KVP file: {e}")) + })?; + + let len = file.metadata()?.len() as usize; + if len > 0 && !len.is_multiple_of(RECORD_SIZE) { + let _ = fcntl_unlock(&file); + return Err(io::Error::other(format!( + "file size ({len}) is not a multiple of record size \ + ({RECORD_SIZE})" + )) + .into()); + } + + let record_count = len / RECORD_SIZE; + file.seek(io::SeekFrom::Start(0))?; + + Ok(Self { + file, + record_count, + current_index: 0, + }) } - if !len.is_multiple_of(RECORD_SIZE) { - return Err(io::Error::other(format!( - "file size ({len}) is not a multiple of record size ({RECORD_SIZE})" - ))); + /// The number of records in the file (updated by + /// [`append`](Self::append) and + /// [`remove_current`](Self::remove_current)). + fn record_count(&self) -> usize { + self.record_count } - file.seek(io::SeekFrom::Start(0))?; - let record_count = len / RECORD_SIZE; - let mut records = Vec::with_capacity(record_count); - let mut buf = [0u8; RECORD_SIZE]; + /// Overwrite the value field of the record last returned by + /// [`next()`](Iterator::next), zero-padding to fill the + /// fixed-width 2048-byte field. + /// + /// After the write the file position is at the start of the next + /// record, so iteration can continue normally. + /// + /// Returns `InvalidInput` if called before the first `next()`. + fn overwrite_current_value(&mut self, value: &str) -> io::Result<()> { + if self.current_index == 0 { + return Err(io::Error::new( + ErrorKind::InvalidInput, + "no record has been read yet", + )); + } + + let record_start = (self.current_index - 1) as u64 * RECORD_SIZE as u64; + let value_offset = record_start + WIRE_MAX_KEY_BYTES as u64; - for _ in 0..record_count { - file.read_exact(&mut buf)?; - records.push(decode_record(&buf)?); + self.file.seek(io::SeekFrom::Start(value_offset))?; + + let mut buf = [0u8; WIRE_MAX_VALUE_BYTES]; + let bytes = value.as_bytes(); + let len = bytes.len().min(WIRE_MAX_VALUE_BYTES); + buf[..len].copy_from_slice(&bytes[..len]); + + self.file.write_all(&buf)?; + Ok(()) + } + + /// Append a new record at the end of the file, zero-padded to the + /// fixed-width wire format. + fn append(&mut self, key: &str, value: &str) -> io::Result<()> { + self.file.seek(io::SeekFrom::End(0))?; + self.file.write_all(&encode_record(key, value))?; + self.record_count += 1; + Ok(()) + } + + /// Remove the record last returned by [`next()`](Iterator::next) + /// by swapping it with the final record and truncating the file. + /// + /// After removal the file position rewinds so that the next call + /// to `next()` reads the record that was swapped into this slot + /// (unless the removed record was already the last one). + /// + /// Returns `InvalidInput` if called before the first `next()`. + fn remove_current(&mut self) -> io::Result<()> { + if self.current_index == 0 { + return Err(io::Error::new( + ErrorKind::InvalidInput, + "no record has been read yet", + )); + } + + let delete_index = self.current_index - 1; + let last_index = self.record_count - 1; + + if delete_index != last_index { + self.file.seek(io::SeekFrom::Start( + last_index as u64 * RECORD_SIZE as u64, + ))?; + let mut buf = [0u8; RECORD_SIZE]; + self.file.read_exact(&mut buf)?; + + self.file.seek(io::SeekFrom::Start( + delete_index as u64 * RECORD_SIZE as u64, + ))?; + self.file.write_all(&buf)?; + + self.file.seek(io::SeekFrom::Start( + delete_index as u64 * RECORD_SIZE as u64, + ))?; + self.current_index = delete_index; + } + + self.file.set_len(last_index as u64 * RECORD_SIZE as u64)?; + self.record_count -= 1; + Ok(()) } - Ok(records) + /// Flush any buffered writes to the underlying file. + fn flush(&mut self) -> io::Result<()> { + self.file.flush() + } } -fn rewrite_file( - file: &mut File, - map: &HashMap, -) -> Result<(), KvpError> { - file.set_len(0)?; - file.seek(io::SeekFrom::Start(0))?; - for (k, v) in map { - file.write_all(&encode_record(k, v))?; +impl Drop for KvpPoolIter { + fn drop(&mut self) { + let _ = fcntl_unlock(&self.file); + } +} + +impl Iterator for KvpPoolIter { + type Item = io::Result<(String, String)>; + + fn next(&mut self) -> Option { + if self.current_index >= self.record_count { + return None; + } + let mut buf = [0u8; RECORD_SIZE]; + match self.file.read_exact(&mut buf) { + Ok(()) => { + self.current_index += 1; + Some(decode_record(&buf)) + } + Err(e) => Some(Err(e)), + } + } + + fn size_hint(&self) -> (usize, Option) { + let remaining = self.record_count - self.current_index; + (remaining, Some(remaining)) } - file.flush()?; - Ok(()) } impl KvpStore for KvpPoolStore { @@ -346,105 +489,109 @@ impl KvpStore for KvpPoolStore { self.mode.max_value_size() } - fn upsert(&self, key: &str, value: &str) -> Result<(), KvpError> { + fn insert(&self, key: &str, value: &str) -> Result<(), KvpError> { self.validate_key(key)?; self.validate_value(value)?; - let mut file = self.open_for_read_write_create()?; - fcntl_lock_exclusive(&file).map_err(|e| { - io::Error::other(format!("failed to lock KVP file: {e}")) - })?; - - let result = (|| -> Result<(), KvpError> { - let records = read_all_records(&mut file)?; - let mut map: HashMap = - records.into_iter().collect(); + let mut iter = self.iter_mut()?; + let mut found = false; + let mut unique_keys = HashSet::new(); + + while let Some(record) = iter.next() { + let (k, _) = record?; + let is_target = k == key; + unique_keys.insert(k); + if is_target { + iter.overwrite_current_value(value)?; + found = true; + break; + } + } - if !map.contains_key(key) && map.len() >= MAX_UNIQUE_KEYS { + if !found { + if unique_keys.len() >= MAX_UNIQUE_KEYS { return Err(KvpError::MaxUniqueKeysExceeded { max: MAX_UNIQUE_KEYS, }); } + iter.append(key, value)?; + } + + iter.flush()?; + Ok(()) + } - map.insert(key.to_string(), value.to_string()); - rewrite_file(&mut file, &map)?; - Ok(()) - })(); + fn append(&self, key: &str, value: &str) -> Result<(), KvpError> { + self.validate_key(key)?; + self.validate_value(value)?; - let _ = fcntl_unlock(&file); - result + let mut iter = self.iter_mut()?; + iter.append(key, value)?; + iter.flush()?; + Ok(()) } fn read(&self, key: &str) -> Result, KvpError> { Self::validate_key_for_read(key)?; - let mut file = match self.open_for_read() { - Ok(f) => f, - Err(ref e) if e.kind() == ErrorKind::NotFound => return Ok(None), - Err(e) => return Err(e.into()), + let iter = match self.iter() { + Ok(it) => it, + Err(KvpError::Io(e)) if e.kind() == ErrorKind::NotFound => { + return Ok(None); + } + Err(e) => return Err(e), }; - fcntl_lock_shared(&file).map_err(|e| { - io::Error::other(format!("failed to lock KVP file: {e}")) - })?; - let records = read_all_records(&mut file); - let _ = fcntl_unlock(&file); - let records = records?; + for record in iter { + let (k, v) = record?; + if k == key { + return Ok(Some(v)); + } + } - Ok(records.into_iter().find(|(k, _)| k == key).map(|(_, v)| v)) + Ok(None) } fn entries(&self) -> Result, KvpError> { - let mut file = match self.open_for_read() { - Ok(f) => f, - Err(ref e) if e.kind() == ErrorKind::NotFound => { - return Ok(HashMap::new()) + let iter = match self.iter() { + Ok(it) => it, + Err(KvpError::Io(e)) if e.kind() == ErrorKind::NotFound => { + return Ok(HashMap::new()); } - Err(e) => return Err(e.into()), + Err(e) => return Err(e), }; - fcntl_lock_shared(&file).map_err(|e| { - io::Error::other(format!("failed to lock KVP file: {e}")) - })?; - let records = read_all_records(&mut file); - let _ = fcntl_unlock(&file); - let records = records?; - - Ok(records.into_iter().collect()) + let mut map = HashMap::with_capacity(iter.record_count()); + for record in iter { + let (k, v) = record?; + map.entry(k).or_insert(v); + } + Ok(map) } fn delete(&self, key: &str) -> Result { - let mut file = match self.open_for_read_write() { + Self::validate_key_for_read(key)?; + + let file = match self.open_for_read_write() { Ok(f) => f, - Err(ref e) if e.kind() == ErrorKind::NotFound => return Ok(false), + Err(ref e) if e.kind() == ErrorKind::NotFound => { + return Ok(false); + } Err(e) => return Err(e.into()), }; - fcntl_lock_exclusive(&file).map_err(|e| { - io::Error::other(format!("failed to lock KVP file: {e}")) - })?; - - let result = (|| -> Result { - let records = read_all_records(&mut file)?; - let original_count = records.len(); - let kept: Vec<_> = - records.into_iter().filter(|(k, _)| k != key).collect(); - - if kept.len() == original_count { - return Ok(false); - } + let mut iter = KvpPoolIter::new(file, true)?; - file.set_len(0)?; - file.seek(io::SeekFrom::Start(0))?; - for (k, v) in &kept { - file.write_all(&encode_record(k, v))?; + while let Some(record) = iter.next() { + let (k, _) = record?; + if k == key { + iter.remove_current()?; + iter.flush()?; + return Ok(true); } - file.flush()?; - Ok(true) - })(); + } - let _ = fcntl_unlock(&file); - result + Ok(false) } fn clear(&self) -> Result<(), KvpError> { @@ -501,20 +648,20 @@ mod tests { } #[test] - fn test_upsert_rejects_empty_key() { + fn test_insert_rejects_empty_key() { let tmp = NamedTempFile::new().unwrap(); let store = restricted_store(tmp.path()); - let err = store.upsert("", "value").unwrap_err(); + let err = store.insert("", "value").unwrap_err(); assert!(matches!(err, KvpError::EmptyKey)); } #[test] - fn test_upsert_rejects_null_in_key() { + fn test_insert_rejects_null_in_key() { let tmp = NamedTempFile::new().unwrap(); let store = restricted_store(tmp.path()); - let err = store.upsert("bad\0key", "value").unwrap_err(); + let err = store.insert("bad\0key", "value").unwrap_err(); assert!(matches!(err, KvpError::KeyContainsNull)); } @@ -543,10 +690,10 @@ mod tests { let store = restricted_store(tmp.path()); let ok_key = "k".repeat(254); - store.upsert(&ok_key, "v").unwrap(); + store.insert(&ok_key, "v").unwrap(); let bad_key = "k".repeat(255); - let err = store.upsert(&bad_key, "v").unwrap_err(); + let err = store.insert(&bad_key, "v").unwrap_err(); assert!(matches!(err, KvpError::KeyTooLarge { .. })); } @@ -556,10 +703,10 @@ mod tests { let store = restricted_store(tmp.path()); let ok_val = "v".repeat(1022); - store.upsert("k", &ok_val).unwrap(); + store.insert("k", &ok_val).unwrap(); let bad_val = "v".repeat(1023); - let err = store.upsert("k2", &bad_val).unwrap_err(); + let err = store.insert("k2", &bad_val).unwrap_err(); assert!(matches!(err, KvpError::ValueTooLarge { .. })); } @@ -569,10 +716,10 @@ mod tests { let store = full_store(tmp.path()); let ok_key = "k".repeat(512); - store.upsert(&ok_key, "v").unwrap(); + store.insert(&ok_key, "v").unwrap(); let bad_key = "k".repeat(513); - let err = store.upsert(&bad_key, "v").unwrap_err(); + let err = store.insert(&bad_key, "v").unwrap_err(); assert!(matches!(err, KvpError::KeyTooLarge { .. })); } @@ -582,21 +729,21 @@ mod tests { let store = full_store(tmp.path()); let ok_val = "v".repeat(2048); - store.upsert("k", &ok_val).unwrap(); + store.insert("k", &ok_val).unwrap(); let bad_val = "v".repeat(2049); - let err = store.upsert("k2", &bad_val).unwrap_err(); + let err = store.insert("k2", &bad_val).unwrap_err(); assert!(matches!(err, KvpError::ValueTooLarge { .. })); } #[test] - fn test_upsert_overwrites_existing_key() { + fn test_insert_overwrites_existing_key() { let tmp = NamedTempFile::new().unwrap(); let store = restricted_store(tmp.path()); - store.upsert("key", "v1").unwrap(); - store.upsert("key", "v2").unwrap(); - store.upsert("other", "v3").unwrap(); + store.insert("key", "v1").unwrap(); + store.insert("key", "v2").unwrap(); + store.insert("other", "v3").unwrap(); let entries = store.entries().unwrap(); assert_eq!(entries.len(), 2); @@ -610,9 +757,9 @@ mod tests { let store = restricted_store(tmp.path()); for i in 0..MAX_UNIQUE_KEYS { - store.upsert(&format!("k{i}"), "v").unwrap(); + store.insert(&format!("k{i}"), "v").unwrap(); } - let err = store.upsert("overflow", "v").unwrap_err(); + let err = store.insert("overflow", "v").unwrap_err(); assert!(matches!(err, KvpError::MaxUniqueKeysExceeded { .. })); } @@ -622,22 +769,38 @@ mod tests { let store = restricted_store(tmp.path()); for i in 0..MAX_UNIQUE_KEYS { - store.upsert(&format!("k{i}"), "v").unwrap(); + store.insert(&format!("k{i}"), "v").unwrap(); } - store.upsert("k0", "updated").unwrap(); + store.insert("k0", "updated").unwrap(); assert_eq!(store.read("k0").unwrap(), Some("updated".to_string())); } + #[test] + fn test_insert_cap_uses_unique_keys_not_record_count() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + for i in 0..MAX_UNIQUE_KEYS - 1 { + store.insert(&format!("k{i}"), "v").unwrap(); + } + + store.append("k0", "dup").unwrap(); + assert_eq!(store.len().unwrap(), MAX_UNIQUE_KEYS); + + store.insert("new_unique", "v").unwrap(); + assert_eq!(store.read("new_unique").unwrap(), Some("v".to_string())); + } + #[test] fn test_unique_key_cap_allows_new_key_after_delete() { let tmp = NamedTempFile::new().unwrap(); let store = restricted_store(tmp.path()); for i in 0..MAX_UNIQUE_KEYS { - store.upsert(&format!("k{i}"), "v").unwrap(); + store.insert(&format!("k{i}"), "v").unwrap(); } assert!(store.delete("k0").unwrap()); - store.upsert("new-key", "v").unwrap(); + store.insert("new-key", "v").unwrap(); } #[test] @@ -645,7 +808,7 @@ mod tests { let tmp = NamedTempFile::new().unwrap(); let store = restricted_store(tmp.path()); - store.upsert("key", "value").unwrap(); + store.insert("key", "value").unwrap(); store.clear().unwrap(); assert_eq!(store.read("key").unwrap(), None); } @@ -655,8 +818,8 @@ mod tests { let tmp = NamedTempFile::new().unwrap(); let store = restricted_store(tmp.path()); - store.upsert("key", "v1").unwrap(); - store.upsert("other", "v2").unwrap(); + store.insert("key", "v1").unwrap(); + store.insert("other", "v2").unwrap(); assert!(store.delete("key").unwrap()); assert_eq!(store.read("key").unwrap(), None); @@ -667,7 +830,7 @@ mod tests { fn test_is_stale_and_pool_is_stale_at_boot_helpers() { let tmp = NamedTempFile::new().unwrap(); let store = restricted_store(tmp.path()); - store.upsert("key", "value").unwrap(); + store.insert("key", "value").unwrap(); assert!(!store.is_stale().unwrap()); assert!(store.pool_is_stale_at_boot(i64::MAX).unwrap()); @@ -693,14 +856,14 @@ mod tests { assert!(store.is_empty().unwrap()); assert_eq!(store.len().unwrap(), 0); - store.upsert("key", "value").unwrap(); + store.insert("key", "value").unwrap(); assert!(!store.is_empty().unwrap()); assert_eq!(store.len().unwrap(), 1); - store.upsert("key2", "value2").unwrap(); + store.insert("key2", "value2").unwrap(); assert_eq!(store.len().unwrap(), 2); - store.upsert("key", "updated").unwrap(); + store.insert("key", "updated").unwrap(); assert_eq!(store.len().unwrap(), 2); } @@ -712,7 +875,7 @@ mod tests { // Write a record using Full mode so a 300-byte key is accepted let full = full_store(tmp.path()); let long_key = "k".repeat(300); - full.upsert(&long_key, "val").unwrap(); + full.insert(&long_key, "val").unwrap(); // Restricted store can still read the 300-byte key (> 254 safe limit) assert_eq!(store.read(&long_key).unwrap(), Some("val".to_string())); @@ -728,8 +891,8 @@ mod tests { let tmp = NamedTempFile::new().unwrap(); let store = restricted_store(tmp.path()); - store.upsert("key1", "value1").unwrap(); - store.upsert("key2", "value2").unwrap(); + store.insert("key1", "value1").unwrap(); + store.insert("key2", "value2").unwrap(); let dump_file = NamedTempFile::new().unwrap(); store.dump(dump_file.path()).unwrap(); @@ -743,7 +906,7 @@ mod tests { } #[test] - fn test_concurrent_upserts_to_different_keys() { + fn test_concurrent_inserts_to_different_keys() { let tmp = NamedTempFile::new().unwrap(); let path = tmp.path().to_path_buf(); let threads: Vec<_> = (0..8) @@ -755,7 +918,7 @@ mod tests { .unwrap(); for i in 0..10 { let key = format!("t{t}_k{i}"); - store.upsert(&key, &format!("val_{t}_{i}")).unwrap(); + store.insert(&key, &format!("val_{t}_{i}")).unwrap(); } }) }) @@ -781,7 +944,7 @@ mod tests { } #[test] - fn test_concurrent_upserts_to_same_key() { + fn test_concurrent_inserts_to_same_key() { let tmp = NamedTempFile::new().unwrap(); let path = tmp.path().to_path_buf(); let threads: Vec<_> = (0..8) @@ -793,7 +956,7 @@ mod tests { .unwrap(); for i in 0..10 { store - .upsert("shared_key", &format!("t{t}_v{i}")) + .insert("shared_key", &format!("t{t}_v{i}")) .unwrap(); } }) @@ -812,7 +975,6 @@ mod tests { "unexpected value format: {val}" ); - // File must be exactly one record (no duplicates) let file_len = std::fs::metadata(tmp.path()).unwrap().len() as usize; assert_eq!(file_len, RECORD_SIZE); } @@ -822,10 +984,9 @@ mod tests { let tmp = NamedTempFile::new().unwrap(); let path = tmp.path().to_path_buf(); - // Seed some initial data let store = restricted_store(tmp.path()); for i in 0..10 { - store.upsert(&format!("k{i}"), &format!("v{i}")).unwrap(); + store.insert(&format!("k{i}"), &format!("v{i}")).unwrap(); } let writer_threads: Vec<_> = (0..4) @@ -837,7 +998,7 @@ mod tests { .unwrap(); for round in 0..5 { let key = format!("k{t}"); - store.upsert(&key, &format!("w{t}_r{round}")).unwrap(); + store.insert(&key, &format!("w{t}_r{round}")).unwrap(); } }) }) @@ -852,9 +1013,7 @@ mod tests { .unwrap(); for _ in 0..20 { let entries = store.entries().unwrap(); - // Should always have 10 keys, never more assert!(entries.len() <= 10); - // File should be well-formed for (k, v) in &entries { assert!(!k.is_empty()); assert!(!v.is_empty()); @@ -880,13 +1039,11 @@ mod tests { let tmp = NamedTempFile::new().unwrap(); let path = tmp.path().to_path_buf(); - // Fill to just under the cap let store = restricted_store(tmp.path()); for i in 0..MAX_UNIQUE_KEYS - 4 { - store.upsert(&format!("pre{i}"), "v").unwrap(); + store.insert(&format!("pre{i}"), "v").unwrap(); } - // 4 threads each try to add one new unique key concurrently let threads: Vec<_> = (0..4) .map(|t| { let p = path.clone(); @@ -894,7 +1051,7 @@ mod tests { let store = KvpPoolStore::new(Some(p), PoolMode::Restricted, false) .unwrap(); - store.upsert(&format!("new{t}"), "v") + store.insert(&format!("new{t}"), "v") }) }) .collect(); @@ -910,13 +1067,432 @@ mod tests { }) .count(); - // Exactly 4 should succeed (filling the last 4 slots) assert_eq!(successes, 4); assert_eq!(cap_errors, 0); assert_eq!(store.len().unwrap(), MAX_UNIQUE_KEYS); - // One more should be rejected - let err = store.upsert("one_too_many", "v").unwrap_err(); + let err = store.insert("one_too_many", "v").unwrap_err(); assert!(matches!(err, KvpError::MaxUniqueKeysExceeded { .. })); } + + #[test] + fn test_iter_yields_all_records_in_order() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + let expected: Vec<(&str, &str)> = + vec![("a", "1"), ("b", "2"), ("c", "3"), ("d", "4"), ("e", "5")]; + for (k, v) in &expected { + store.append(k, v).unwrap(); + } + + let iter = store.iter().unwrap(); + assert_eq!(iter.record_count(), 5); + + let records: Vec<(String, String)> = iter.map(|r| r.unwrap()).collect(); + for (i, (k, v)) in records.iter().enumerate() { + assert_eq!(k, expected[i].0); + assert_eq!(v, expected[i].1); + } + } + + #[test] + fn test_iter_empty_file() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + let iter = store.iter().unwrap(); + assert_eq!(iter.record_count(), 0); + assert_eq!(iter.count(), 0); + } + + #[test] + fn test_iter_malformed_file_error() { + let tmp = NamedTempFile::new().unwrap(); + std::fs::write(tmp.path(), [0u8; RECORD_SIZE + 1]).unwrap(); + + let store = restricted_store(tmp.path()); + let err = store.iter().unwrap_err(); + assert!(matches!(err, KvpError::Io(_))); + } + + #[test] + fn test_overwrite_adjacent_records_untouched() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + store.append("first", "val_first").unwrap(); + store.append("middle", "val_middle").unwrap(); + store.append("last", "val_last").unwrap(); + + let raw_before = std::fs::read(tmp.path()).unwrap(); + let first_before = &raw_before[..RECORD_SIZE]; + let last_before = &raw_before[2 * RECORD_SIZE..3 * RECORD_SIZE]; + + { + let mut iter = store.iter_mut().unwrap(); + iter.next().unwrap().unwrap(); // first + let (k, _) = iter.next().unwrap().unwrap(); // middle + assert_eq!(k, "middle"); + iter.overwrite_current_value("UPDATED").unwrap(); + iter.flush().unwrap(); + } + + let raw_after = std::fs::read(tmp.path()).unwrap(); + assert_eq!(&raw_after[..RECORD_SIZE], first_before); + assert_eq!(&raw_after[2 * RECORD_SIZE..3 * RECORD_SIZE], last_before); + + assert_eq!(store.read("middle").unwrap(), Some("UPDATED".to_string())); + } + + #[test] + fn test_overwrite_multiple_in_one_pass() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + store.append("a", "old_a").unwrap(); + store.append("b", "old_b").unwrap(); + store.append("c", "old_c").unwrap(); + + { + let mut iter = store.iter_mut().unwrap(); + let mut idx = 0; + while let Some(record) = iter.next() { + record.unwrap(); + if idx == 0 || idx == 2 { + iter.overwrite_current_value("new").unwrap(); + } + idx += 1; + } + iter.flush().unwrap(); + } + + assert_eq!(store.read("a").unwrap(), Some("new".to_string())); + assert_eq!(store.read("b").unwrap(), Some("old_b".to_string())); + assert_eq!(store.read("c").unwrap(), Some("new".to_string())); + } + + #[test] + fn test_remove_swap_and_continue() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + store.append("A", "1").unwrap(); + store.append("B", "2").unwrap(); + store.append("C", "3").unwrap(); + store.append("D", "4").unwrap(); + + let mut collected = Vec::new(); + { + let mut iter = store.iter_mut().unwrap(); + while let Some(record) = iter.next() { + let (k, v) = record.unwrap(); + if k == "B" { + iter.remove_current().unwrap(); + } else { + collected.push((k, v)); + } + } + iter.flush().unwrap(); + } + + assert_eq!(collected.len(), 3); + assert!(collected.iter().any(|(k, _)| k == "A")); + assert!(collected.iter().any(|(k, _)| k == "C")); + assert!(collected.iter().any(|(k, _)| k == "D")); + + assert_eq!(store.len().unwrap(), 3); + assert_eq!(store.read("B").unwrap(), None); + } + + #[test] + fn test_remove_last_record_no_swap() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + store.append("a", "1").unwrap(); + store.append("b", "2").unwrap(); + store.append("c", "3").unwrap(); + + { + let mut iter = store.iter_mut().unwrap(); + iter.next().unwrap().unwrap(); + iter.next().unwrap().unwrap(); + iter.next().unwrap().unwrap(); + iter.remove_current().unwrap(); + iter.flush().unwrap(); + } + + assert_eq!(store.len().unwrap(), 2); + assert_eq!(store.read("c").unwrap(), None); + assert_eq!(store.read("a").unwrap(), Some("1".to_string())); + assert_eq!(store.read("b").unwrap(), Some("2".to_string())); + } + + #[test] + fn test_remove_only_record() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + store.append("solo", "val").unwrap(); + + { + let mut iter = store.iter_mut().unwrap(); + iter.next().unwrap().unwrap(); + iter.remove_current().unwrap(); + iter.flush().unwrap(); + } + + assert!(store.is_empty().unwrap()); + } + + #[test] + fn test_append_no_key_cap() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + for i in 0..MAX_UNIQUE_KEYS { + store.insert(&format!("k{i}"), "v").unwrap(); + } + store.append("extra", "val").unwrap(); + assert_eq!(store.len().unwrap(), MAX_UNIQUE_KEYS + 1); + } + + #[test] + fn test_append_validates_sizes() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + let big_key = "k".repeat(255); + let err = store.append(&big_key, "v").unwrap_err(); + assert!(matches!(err, KvpError::KeyTooLarge { .. })); + + let big_val = "v".repeat(1023); + let err = store.append("k", &big_val).unwrap_err(); + assert!(matches!(err, KvpError::ValueTooLarge { .. })); + } + + #[test] + fn test_append_then_read() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + store.append("diag_001", "event_data").unwrap(); + assert_eq!( + store.read("diag_001").unwrap(), + Some("event_data".to_string()) + ); + } + + #[test] + fn test_append_duplicate_key_semantics() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + store.append("X", "first").unwrap(); + store.append("X", "second").unwrap(); + + assert_eq!(store.read("X").unwrap(), Some("first".to_string())); + + let entries = store.entries().unwrap(); + assert_eq!(entries.get("X"), Some(&"first".to_string())); + } + + #[test] + fn test_insert_file_size_unchanged_on_overwrite() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + store.insert("key", "original").unwrap(); + let size_before = std::fs::metadata(tmp.path()).unwrap().len(); + + store.insert("key", "updated").unwrap(); + let size_after = std::fs::metadata(tmp.path()).unwrap().len(); + + assert_eq!(size_before, size_after); + assert_eq!(store.read("key").unwrap(), Some("updated".to_string())); + } + + #[test] + fn test_insert_file_size_grows_on_new_key() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + store.insert("k1", "v1").unwrap(); + let size_before = std::fs::metadata(tmp.path()).unwrap().len(); + + store.insert("k2", "v2").unwrap(); + let size_after = std::fs::metadata(tmp.path()).unwrap().len(); + + assert_eq!(size_after - size_before, RECORD_SIZE as u64); + } + + #[test] + fn test_insert_preserves_other_records() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + for i in 0..5 { + store.insert(&format!("k{i}"), &format!("v{i}")).unwrap(); + } + + let raw_before = std::fs::read(tmp.path()).unwrap(); + store.insert("k2", "CHANGED").unwrap(); + let raw_after = std::fs::read(tmp.path()).unwrap(); + + assert_eq!(raw_before.len(), raw_after.len()); + for i in 0..5 { + if i == 2 { + continue; + } + let start = i * RECORD_SIZE; + let end = start + RECORD_SIZE; + assert_eq!( + &raw_before[start..end], + &raw_after[start..end], + "record {i} was modified unexpectedly" + ); + } + } + + #[test] + fn test_file_integrity_after_mixed_ops() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + + for i in 0..10 { + store.insert(&format!("u{i}"), &format!("uv{i}")).unwrap(); + } + for i in 0..5 { + store.append(&format!("a{i}"), &format!("av{i}")).unwrap(); + } + store.delete("u3").unwrap(); + store.delete("u7").unwrap(); + store.insert("u0", "overwritten").unwrap(); + + let raw = std::fs::read(tmp.path()).unwrap(); + assert_eq!(raw.len() % RECORD_SIZE, 0); + + let record_count = raw.len() / RECORD_SIZE; + for i in 0..record_count { + let start = i * RECORD_SIZE; + let end = start + RECORD_SIZE; + let result = decode_record(&raw[start..end]); + assert!( + result.is_ok(), + "record {i} failed to decode: {:?}", + result.err() + ); + } + } + + #[test] + fn test_concurrent_appends() { + let tmp = NamedTempFile::new().unwrap(); + let path = tmp.path().to_path_buf(); + + let threads: Vec<_> = (0..8) + .map(|t| { + let p = path.clone(); + std::thread::spawn(move || { + let store = + KvpPoolStore::new(Some(p), PoolMode::Restricted, false) + .unwrap(); + for i in 0..10 { + store + .append( + &format!("t{t}_d{i}"), + &format!("data_{t}_{i}"), + ) + .unwrap(); + } + }) + }) + .collect(); + + for t in threads { + t.join().unwrap(); + } + + let store = restricted_store(tmp.path()); + assert_eq!(store.len().unwrap(), 80); + + let entries = store.entries().unwrap(); + for t in 0..8 { + for i in 0..10 { + let key = format!("t{t}_d{i}"); + assert!(entries.contains_key(&key), "missing key {key}"); + } + } + } + + #[test] + fn test_concurrent_mixed_insert_and_append() { + let tmp = NamedTempFile::new().unwrap(); + let path = tmp.path().to_path_buf(); + + let store = restricted_store(tmp.path()); + for i in 0..4 { + store.insert(&format!("fixed{i}"), "init").unwrap(); + } + + let insert_threads: Vec<_> = (0..4) + .map(|t| { + let p = path.clone(); + std::thread::spawn(move || { + let store = + KvpPoolStore::new(Some(p), PoolMode::Restricted, false) + .unwrap(); + for round in 0..5 { + store + .insert(&format!("fixed{t}"), &format!("r{round}")) + .unwrap(); + } + }) + }) + .collect(); + + let append_threads: Vec<_> = (0..4) + .map(|t| { + let p = path.clone(); + std::thread::spawn(move || { + let store = + KvpPoolStore::new(Some(p), PoolMode::Restricted, false) + .unwrap(); + for i in 0..5 { + store + .append( + &format!("diag_t{t}_i{i}"), + &format!("d{i}"), + ) + .unwrap(); + } + }) + }) + .collect(); + + for t in insert_threads { + t.join().unwrap(); + } + for t in append_threads { + t.join().unwrap(); + } + + let entries = store.entries().unwrap(); + assert_eq!(entries.len(), 24); + for t in 0..4 { + assert!(entries.contains_key(&format!("fixed{t}"))); + } + } + + #[test] + fn test_iter_drop_mid_iteration_releases_lock() { + let tmp = NamedTempFile::new().unwrap(); + let store = restricted_store(tmp.path()); + store.insert("k1", "v1").unwrap(); + store.insert("k2", "v2").unwrap(); + + { + let mut iter = store.iter().unwrap(); + iter.next().unwrap().unwrap(); + } + + store.insert("k3", "v3").unwrap(); + assert_eq!(store.len().unwrap(), 3); + } } diff --git a/libazureinit-kvp/src/lib.rs b/libazureinit-kvp/src/lib.rs index 4c9aa844..85f18311 100644 --- a/libazureinit-kvp/src/lib.rs +++ b/libazureinit-kvp/src/lib.rs @@ -80,7 +80,10 @@ pub trait KvpStore: Send + Sync { fn max_value_size(&self) -> usize; /// Insert a new key-value pair or update an existing key's value. - fn upsert(&self, key: &str, value: &str) -> Result<(), KvpError>; + fn insert(&self, key: &str, value: &str) -> Result<(), KvpError>; + + /// Append a key-value pair without checking for an existing key. + fn append(&self, key: &str, value: &str) -> Result<(), KvpError>; /// Read the value for a key. Returns `Ok(None)` when absent. fn read(&self, key: &str) -> Result, KvpError>; @@ -94,7 +97,11 @@ pub trait KvpStore: Send + Sync { /// Return all key-value pairs. fn entries(&self) -> Result, KvpError>; - /// Return the number of entries in the store. + /// Return the number of records in the store. + /// + /// This counts on-disk records, not unique keys. If [`append`](Self::append) + /// was used to write duplicate keys, this may exceed the number of + /// unique keys returned by [`entries`](Self::entries). fn len(&self) -> Result; /// Return whether the store is empty.