diff --git a/Cargo.lock b/Cargo.lock index ed0eebd1..fb996fa5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3781,28 +3781,28 @@ checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092" [[package]] name = "llama-cpp-bindings" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c12d8de3511f1c3e3025e811ad2644d22d5b6657f18e9496ea3977c456eed8a" +checksum = "0d1144611b04160ced5626c50470aa83b24b739174f06b57d17fe0acb2e20dd0" dependencies = [ "encoding_rs", "enumflags2", "llama-cpp-bindings-sys", "llama-cpp-bindings-types", + "llama-cpp-log-decoder", "llguidance", + "log", "nom 8.0.0", "serde_json", "thiserror 2.0.18", "toktrie", - "tracing", - "tracing-core", ] [[package]] name = "llama-cpp-bindings-build" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "352fc011d0d723af3864d500d3a78b2d5ee0a5200993229f93421984d9abbfe3" +checksum = "d54fa3f1bf8856c5a080f76a9b595356de0199ff573c2605a6d47d0791401721" dependencies = [ "bindgen", "cc", @@ -3815,24 +3815,30 @@ dependencies = [ [[package]] name = "llama-cpp-bindings-sys" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac6ce8ade04ae8cacd4ce4e627786c0a449c54d87b5e7287e936ab13fd8ceca8" +checksum = "1d615ae977c1f81cb87cf8b75a7977d3e5d30cf20723050e939657a473dad6ac" dependencies = [ "llama-cpp-bindings-build", ] [[package]] name = "llama-cpp-bindings-types" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a76906d544513079d6dbd299d9f0469f618077153aba8eeaf950727a06b45aac" +checksum = "aafa1ae3e0c87d06d44d6a1476307e069bac06bd0efdf374942fcbcc41aa8bfd" dependencies = [ "serde", "serde_json", "thiserror 2.0.18", ] +[[package]] +name = "llama-cpp-log-decoder" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66433575f7419dd96c18928a4b56c1cd3a631f6b941515ff479c6896d713b8b3" + [[package]] name = "llguidance" version = "1.7.0" @@ -7353,7 +7359,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" dependencies = [ "once_cell", - "valuable", ] [[package]] @@ -7651,12 +7656,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "valuable" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" - [[package]] name = "vcpkg" version = "0.2.15" diff --git a/Cargo.toml b/Cargo.toml index 4be68b5f..ba364156 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,9 +38,9 @@ hf-hub = { version = "0.4", features = ["tokio"] } image = "0.25" indoc = "2" jsonschema = { version = "0.37", default-features = false } -llama-cpp-bindings = "=0.6.0" -llama-cpp-bindings-sys = "=0.6.0" -llama-cpp-bindings-types = "=0.6.0" +llama-cpp-bindings = "=0.7.0" +llama-cpp-bindings-sys = "=0.7.0" +llama-cpp-bindings-types = "=0.7.0" base64 = "0.22" log = "0.4" mime_guess = "2" diff --git a/Makefile b/Makefile index c9e8ea3b..73e0e1d8 100644 --- a/Makefile +++ b/Makefile @@ -10,21 +10,15 @@ FRONTEND_SOURCES := $(shell find resources -type f) $(wildcard jarmuz/*.mjs) # Real targets # ----------------------------------------------------------------------------- -package-lock.json: package.json - npm install --package-lock-only +esbuild-meta.json: $(FRONTEND_SOURCES) jarmuz-static.mjs tsconfig.json package.json node_modules + ./jarmuz-static.mjs node_modules: package-lock.json npm ci touch node_modules -esbuild-meta.json: $(FRONTEND_SOURCES) jarmuz-static.mjs tsconfig.json package.json node_modules - ./jarmuz-static.mjs - -target/debug/paddler: $(PADDLER_SOURCES) - cargo build -p paddler_cli - -target/release/paddler: $(PADDLER_SOURCES) esbuild-meta.json - cargo build --release -p paddler_cli --features web_admin_panel +package-lock.json: package.json + npm install --package-lock-only target/cuda/debug/paddler: $(PADDLER_SOURCES) esbuild-meta.json cargo build -p paddler_cli --features cuda,web_admin_panel --target-dir target/cuda @@ -32,22 +26,35 @@ target/cuda/debug/paddler: $(PADDLER_SOURCES) esbuild-meta.json target/cuda/release/paddler: $(PADDLER_SOURCES) esbuild-meta.json cargo build --release -p paddler_cli --features cuda,web_admin_panel --target-dir target/cuda +target/cuda/release/paddler_gui: $(PADDLER_SOURCES) esbuild-meta.json + cargo build --release -p paddler_gui --features cuda,web_admin_panel --target-dir target/cuda + +target/debug/paddler: $(PADDLER_SOURCES) + cargo build -p paddler_cli + target/metal/debug/paddler: $(PADDLER_SOURCES) esbuild-meta.json cargo build -p paddler_cli --features metal,web_admin_panel --target-dir target/metal target/metal/release/paddler: $(PADDLER_SOURCES) esbuild-meta.json cargo build --release -p paddler_cli --features metal,web_admin_panel --target-dir target/metal -target/vulkan/release/paddler: $(PADDLER_SOURCES) esbuild-meta.json - cargo build --release -p paddler_cli --features vulkan,web_admin_panel --target-dir target/vulkan +target/release/paddler: $(PADDLER_SOURCES) esbuild-meta.json + cargo build --release -p paddler_cli --features web_admin_panel target/release/paddler_gui: $(PADDLER_SOURCES) esbuild-meta.json cargo build --release -p paddler_gui --features web_admin_panel +target/vulkan/release/paddler: $(PADDLER_SOURCES) esbuild-meta.json + cargo build --release -p paddler_cli --features vulkan,web_admin_panel --target-dir target/vulkan + # ----------------------------------------------------------------------------- # Phony targets # ----------------------------------------------------------------------------- +.PHONY: build.client.js +build.client.js: node_modules + npm --workspace @intentee/paddler-client run build + .PHONY: clean clean: rm -rf esbuild-meta.json @@ -88,6 +95,10 @@ fmt: node_modules .PHONY: test test: test.client.js test.unit test.integration +.PHONY: test.client.js +test.client.js: node_modules + npm --workspace @intentee/paddler-client test + .PHONY: test.integration test.integration: target/debug/paddler cargo test -p paddler_tests --features tests_that_use_compiled_paddler,tests_that_use_in_process_cluster,tests_that_use_llms @@ -104,14 +115,6 @@ test.integration.metal: target/metal/debug/paddler test.unit: esbuild-meta.json cargo test --features web_admin_panel -.PHONY: build.client.js -build.client.js: node_modules - npm --workspace @intentee/paddler-client run build - -.PHONY: test.client.js -test.client.js: node_modules - npm --workspace @intentee/paddler-client test - .PHONY: watch watch: node_modules ./jarmuz-watch.mjs diff --git a/paddler/src/agent/continuous_batch_scheduler/decode_batch_phase.rs b/paddler/src/agent/continuous_batch_scheduler/decode_batch_phase.rs index d080bbba..10214fe8 100644 --- a/paddler/src/agent/continuous_batch_scheduler/decode_batch_phase.rs +++ b/paddler/src/agent/continuous_batch_scheduler/decode_batch_phase.rs @@ -4,5 +4,5 @@ use crate::agent::continuous_batch_scheduler::batch_pass::BatchPass; use crate::agent::continuous_batch_scheduler::decode_outcome::DecodeOutcome; pub fn run(pass: &mut BatchPass, context: &mut LlamaContext) -> DecodeOutcome { - DecodeOutcome::from_decode_result(&context.decode(&mut pass.batch)) + DecodeOutcome::from_decode_result(context.decode(&mut pass.batch)) } diff --git a/paddler/src/agent/continuous_batch_scheduler/decode_outcome.rs b/paddler/src/agent/continuous_batch_scheduler/decode_outcome.rs index 667d2821..edb40528 100644 --- a/paddler/src/agent/continuous_batch_scheduler/decode_outcome.rs +++ b/paddler/src/agent/continuous_batch_scheduler/decode_outcome.rs @@ -1,35 +1,35 @@ -use llama_cpp_bindings::DecodeError; +use llama_cpp_bindings::error::DecodeError; #[derive(Debug)] pub enum DecodeOutcome { Decoded, NeedsEviction, Aborted, - Errored(i32), + Errored(DecodeError), } impl DecodeOutcome { #[must_use] - pub const fn from_decode_result(result: &Result<(), DecodeError>) -> Self { + pub fn from_decode_result(result: Result<(), DecodeError>) -> Self { match result { Ok(()) => Self::Decoded, Err(DecodeError::NoKvCacheSlot) => Self::NeedsEviction, - Err(DecodeError::Aborted | DecodeError::NTokensZero) => Self::Aborted, - Err(DecodeError::Unknown(error_code)) => Self::Errored(*error_code), + Err(DecodeError::Aborted | DecodeError::BatchInvalid) => Self::Aborted, + Err(other) => Self::Errored(other), } } } #[cfg(test)] mod tests { - use llama_cpp_bindings::DecodeError; + use llama_cpp_bindings::error::DecodeError; use super::DecodeOutcome; #[test] fn ok_maps_to_decoded() { assert!(matches!( - DecodeOutcome::from_decode_result(&Ok(())), + DecodeOutcome::from_decode_result(Ok(())), DecodeOutcome::Decoded )); } @@ -37,7 +37,7 @@ mod tests { #[test] fn no_kv_cache_slot_maps_to_needs_eviction() { assert!(matches!( - DecodeOutcome::from_decode_result(&Err(DecodeError::NoKvCacheSlot)), + DecodeOutcome::from_decode_result(Err(DecodeError::NoKvCacheSlot)), DecodeOutcome::NeedsEviction )); } @@ -45,23 +45,27 @@ mod tests { #[test] fn aborted_maps_to_aborted() { assert!(matches!( - DecodeOutcome::from_decode_result(&Err(DecodeError::Aborted)), + DecodeOutcome::from_decode_result(Err(DecodeError::Aborted)), DecodeOutcome::Aborted )); } #[test] - fn n_tokens_zero_maps_to_aborted() { + fn batch_invalid_maps_to_aborted() { assert!(matches!( - DecodeOutcome::from_decode_result(&Err(DecodeError::NTokensZero)), + DecodeOutcome::from_decode_result(Err(DecodeError::BatchInvalid)), DecodeOutcome::Aborted )); } #[test] - fn unknown_carries_error_code() { - let outcome = DecodeOutcome::from_decode_result(&Err(DecodeError::Unknown(42))); + fn other_error_is_forwarded_as_errored() { + let outcome = + DecodeOutcome::from_decode_result(Err(DecodeError::UnknownStatus { code: 42 })); - assert!(matches!(outcome, DecodeOutcome::Errored(42))); + assert!(matches!( + outcome, + DecodeOutcome::Errored(DecodeError::UnknownStatus { code: 42 }) + )); } } diff --git a/paddler/src/agent/continuous_batch_scheduler/mod.rs b/paddler/src/agent/continuous_batch_scheduler/mod.rs index a21b5f2e..28809909 100644 --- a/paddler/src/agent/continuous_batch_scheduler/mod.rs +++ b/paddler/src/agent/continuous_batch_scheduler/mod.rs @@ -1006,10 +1006,8 @@ impl ContinuousBatchScheduler { DecodeOutcome::Aborted => { return Ok(()); } - DecodeOutcome::Errored(error_code) => { - return Err(anyhow!( - "Decode failed with unknown error code: {error_code}" - )); + DecodeOutcome::Errored(decode_error) => { + return Err(anyhow::Error::new(decode_error).context("decode failed")); } } } diff --git a/paddler/src/balancer/state_database_type.rs b/paddler/src/balancer/state_database_type.rs index b4c97c9c..0f22655d 100644 --- a/paddler/src/balancer/state_database_type.rs +++ b/paddler/src/balancer/state_database_type.rs @@ -76,11 +76,16 @@ mod tests { #[test] fn test_file_absolute_path() -> Result<()> { - let result = StateDatabaseType::from_str("file:///absolute/path")?; + #[cfg(unix)] + let (url, expected_path) = ("file:///absolute/path", "/absolute/path"); + #[cfg(windows)] + let (url, expected_path) = ("file://C:/absolute/path", "C:/absolute/path"); + + let result = StateDatabaseType::from_str(url)?; match result { StateDatabaseType::File(path) => { - assert_eq!(path, PathBuf::from("/absolute/path")); + assert_eq!(path, PathBuf::from(expected_path)); } StateDatabaseType::Memory(_) => { return Err(anyhow!("Expected File variant")); diff --git a/paddler/src/model_source/url.rs b/paddler/src/model_source/url.rs index 72992a5c..c5bacc07 100644 --- a/paddler/src/model_source/url.rs +++ b/paddler/src/model_source/url.rs @@ -533,8 +533,13 @@ mod tests { } #[test] - fn classify_cache_io_error_maps_enospc_to_cache_storage_is_full() { - let error = io::Error::from_raw_os_error(28); + fn classify_cache_io_error_maps_disk_full_errno_to_cache_storage_is_full() { + #[cfg(unix)] + const DISK_FULL_ERRNO: i32 = 28; + #[cfg(windows)] + const DISK_FULL_ERRNO: i32 = 112; + + let error = io::Error::from_raw_os_error(DISK_FULL_ERRNO); assert!(matches!( classify_cache_io_error(TEST_URL, &error), diff --git a/paddler_bootstrap/src/shutdown_signal/windows.rs b/paddler_bootstrap/src/shutdown_signal/windows.rs index b019c78f..4d9df50d 100644 --- a/paddler_bootstrap/src/shutdown_signal/windows.rs +++ b/paddler_bootstrap/src/shutdown_signal/windows.rs @@ -10,6 +10,10 @@ use tokio::signal::windows::ctrl_c; use tokio::signal::windows::ctrl_close; use tokio::signal::windows::ctrl_shutdown; +#[expect( + clippy::struct_field_names, + reason = "field names mirror the Windows console control event types they hold; the shared `ctrl_` prefix is part of the Windows API vocabulary, and `break` is a reserved keyword" +)] pub struct ShutdownSignals { ctrl_c: CtrlC, ctrl_break: CtrlBreak, diff --git a/paddler_tests/src/lib.rs b/paddler_tests/src/lib.rs index 1afef49d..6ef3f36f 100644 --- a/paddler_tests/src/lib.rs +++ b/paddler_tests/src/lib.rs @@ -26,7 +26,9 @@ pub mod openai_chat_completions_client; pub mod paddler_command; pub mod parse_test_device_value; pub mod qwen3_embedding_cluster_params; +#[cfg(any(target_os = "macos", target_os = "linux"))] pub mod resource_snapshot; +#[cfg(any(target_os = "macos", target_os = "linux"))] pub mod resource_snapshot_diff; pub mod spawn_agent_subprocess; pub mod spawn_agent_subprocess_params; diff --git a/paddler_tests/src/resource_snapshot.rs b/paddler_tests/src/resource_snapshot.rs index 0452ebb6..d9e71732 100644 --- a/paddler_tests/src/resource_snapshot.rs +++ b/paddler_tests/src/resource_snapshot.rs @@ -52,6 +52,3 @@ const fn open_descriptors_directory_path() -> &'static str { const fn open_descriptors_directory_path() -> &'static str { "/proc/self/fd" } - -#[cfg(not(any(target_os = "macos", target_os = "linux")))] -compile_error!("ResourceSnapshot is only implemented for macOS and Linux"); diff --git a/paddler_tests/tests/agent_controller_pool_signals_update_when_slot_guard_drops.rs b/paddler_tests/tests/agent_controller_pool_notifies_subscribers_when_slot_guard_drops.rs similarity index 92% rename from paddler_tests/tests/agent_controller_pool_signals_update_when_slot_guard_drops.rs rename to paddler_tests/tests/agent_controller_pool_notifies_subscribers_when_slot_guard_drops.rs index b28ea015..b4204622 100644 --- a/paddler_tests/tests/agent_controller_pool_signals_update_when_slot_guard_drops.rs +++ b/paddler_tests/tests/agent_controller_pool_notifies_subscribers_when_slot_guard_drops.rs @@ -8,7 +8,7 @@ use paddler::subscribes_to_updates::SubscribesToUpdates as _; use paddler_tests::make_agent_controller_without_remote_agent::make_agent_controller_without_remote_agent; #[test] -fn agent_controller_pool_signals_update_when_slot_guard_drops() -> Result<()> { +fn agent_controller_pool_notifies_subscribers_when_slot_guard_drops() -> Result<()> { let pool = AgentControllerPool::default(); let controller = Arc::new(make_agent_controller_without_remote_agent("test-agent")); diff --git a/paddler_tests/tests/in_process_cluster_shutdown_returns_fd_count_to_baseline.rs b/paddler_tests/tests/in_process_cluster_shutdown_returns_fd_count_to_baseline.rs index ede1b71e..e4669ac9 100644 --- a/paddler_tests/tests/in_process_cluster_shutdown_returns_fd_count_to_baseline.rs +++ b/paddler_tests/tests/in_process_cluster_shutdown_returns_fd_count_to_baseline.rs @@ -1,3 +1,5 @@ +#![cfg(any(target_os = "macos", target_os = "linux"))] + use anyhow::Result; use paddler_tests::in_process_cluster_params::InProcessClusterParams; use paddler_tests::resource_snapshot::ResourceSnapshot; diff --git a/paddler_tests/tests/management_two_agents_stream_subscribers_receive_slot_usage_updates.rs b/paddler_tests/tests/management_two_agents_stream_subscribers_receive_slot_usage_changes.rs similarity index 98% rename from paddler_tests/tests/management_two_agents_stream_subscribers_receive_slot_usage_updates.rs rename to paddler_tests/tests/management_two_agents_stream_subscribers_receive_slot_usage_changes.rs index b5a7ef4a..4c02687a 100644 --- a/paddler_tests/tests/management_two_agents_stream_subscribers_receive_slot_usage_updates.rs +++ b/paddler_tests/tests/management_two_agents_stream_subscribers_receive_slot_usage_changes.rs @@ -18,7 +18,7 @@ use reqwest::Client; #[serial_test::file_serial(model_load, path => "../target/model_load.lock")] #[tokio::test(flavor = "multi_thread")] -async fn management_two_agents_stream_subscribers_receive_slot_usage_updates() -> Result<()> { +async fn management_two_agents_stream_subscribers_receive_slot_usage_changes() -> Result<()> { let device = current_test_device()?; device.require_available()?; diff --git a/paddler_tests/tests/subprocess_cluster_shutdown_returns_fd_count_to_baseline.rs b/paddler_tests/tests/subprocess_cluster_shutdown_returns_fd_count_to_baseline.rs index 598a516e..f7f4e759 100644 --- a/paddler_tests/tests/subprocess_cluster_shutdown_returns_fd_count_to_baseline.rs +++ b/paddler_tests/tests/subprocess_cluster_shutdown_returns_fd_count_to_baseline.rs @@ -1,4 +1,7 @@ -#![cfg(feature = "tests_that_use_compiled_paddler")] +#![cfg(all( + feature = "tests_that_use_compiled_paddler", + any(target_os = "macos", target_os = "linux") +))] use anyhow::Result; use paddler_tests::resource_snapshot::ResourceSnapshot;