Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 16 additions & 17 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ hf-hub = { version = "0.4", features = ["tokio"] }
image = "0.25"
indoc = "2"
jsonschema = { version = "0.37", default-features = false }
llama-cpp-bindings = "=0.6.0"
llama-cpp-bindings-sys = "=0.6.0"
llama-cpp-bindings-types = "=0.6.0"
llama-cpp-bindings = "=0.7.0"
llama-cpp-bindings-sys = "=0.7.0"
llama-cpp-bindings-types = "=0.7.0"
base64 = "0.22"
log = "0.4"
mime_guess = "2"
Expand Down
43 changes: 23 additions & 20 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,44 +10,51 @@ FRONTEND_SOURCES := $(shell find resources -type f) $(wildcard jarmuz/*.mjs)
# Real targets
# -----------------------------------------------------------------------------

package-lock.json: package.json
npm install --package-lock-only
esbuild-meta.json: $(FRONTEND_SOURCES) jarmuz-static.mjs tsconfig.json package.json node_modules
./jarmuz-static.mjs

node_modules: package-lock.json
npm ci
touch node_modules

esbuild-meta.json: $(FRONTEND_SOURCES) jarmuz-static.mjs tsconfig.json package.json node_modules
./jarmuz-static.mjs

target/debug/paddler: $(PADDLER_SOURCES)
cargo build -p paddler_cli

target/release/paddler: $(PADDLER_SOURCES) esbuild-meta.json
cargo build --release -p paddler_cli --features web_admin_panel
package-lock.json: package.json
npm install --package-lock-only

target/cuda/debug/paddler: $(PADDLER_SOURCES) esbuild-meta.json
cargo build -p paddler_cli --features cuda,web_admin_panel --target-dir target/cuda

target/cuda/release/paddler: $(PADDLER_SOURCES) esbuild-meta.json
cargo build --release -p paddler_cli --features cuda,web_admin_panel --target-dir target/cuda

target/cuda/release/paddler_gui: $(PADDLER_SOURCES) esbuild-meta.json
cargo build --release -p paddler_gui --features cuda,web_admin_panel --target-dir target/cuda

target/debug/paddler: $(PADDLER_SOURCES)
cargo build -p paddler_cli

target/metal/debug/paddler: $(PADDLER_SOURCES) esbuild-meta.json
cargo build -p paddler_cli --features metal,web_admin_panel --target-dir target/metal

target/metal/release/paddler: $(PADDLER_SOURCES) esbuild-meta.json
cargo build --release -p paddler_cli --features metal,web_admin_panel --target-dir target/metal

target/vulkan/release/paddler: $(PADDLER_SOURCES) esbuild-meta.json
cargo build --release -p paddler_cli --features vulkan,web_admin_panel --target-dir target/vulkan
target/release/paddler: $(PADDLER_SOURCES) esbuild-meta.json
cargo build --release -p paddler_cli --features web_admin_panel

target/release/paddler_gui: $(PADDLER_SOURCES) esbuild-meta.json
cargo build --release -p paddler_gui --features web_admin_panel

target/vulkan/release/paddler: $(PADDLER_SOURCES) esbuild-meta.json
cargo build --release -p paddler_cli --features vulkan,web_admin_panel --target-dir target/vulkan

# -----------------------------------------------------------------------------
# Phony targets
# -----------------------------------------------------------------------------

.PHONY: build.client.js
build.client.js: node_modules
npm --workspace @intentee/paddler-client run build

.PHONY: clean
clean:
rm -rf esbuild-meta.json
Expand Down Expand Up @@ -88,6 +95,10 @@ fmt: node_modules
.PHONY: test
test: test.client.js test.unit test.integration

.PHONY: test.client.js
test.client.js: node_modules
npm --workspace @intentee/paddler-client test

.PHONY: test.integration
test.integration: target/debug/paddler
cargo test -p paddler_tests --features tests_that_use_compiled_paddler,tests_that_use_in_process_cluster,tests_that_use_llms
Expand All @@ -104,14 +115,6 @@ test.integration.metal: target/metal/debug/paddler
test.unit: esbuild-meta.json
cargo test --features web_admin_panel

.PHONY: build.client.js
build.client.js: node_modules
npm --workspace @intentee/paddler-client run build

.PHONY: test.client.js
test.client.js: node_modules
npm --workspace @intentee/paddler-client test

.PHONY: watch
watch: node_modules
./jarmuz-watch.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ use crate::agent::continuous_batch_scheduler::batch_pass::BatchPass;
use crate::agent::continuous_batch_scheduler::decode_outcome::DecodeOutcome;

pub fn run(pass: &mut BatchPass, context: &mut LlamaContext) -> DecodeOutcome {
DecodeOutcome::from_decode_result(&context.decode(&mut pass.batch))
DecodeOutcome::from_decode_result(context.decode(&mut pass.batch))
}
32 changes: 18 additions & 14 deletions paddler/src/agent/continuous_batch_scheduler/decode_outcome.rs
Original file line number Diff line number Diff line change
@@ -1,67 +1,71 @@
use llama_cpp_bindings::DecodeError;
use llama_cpp_bindings::error::DecodeError;

#[derive(Debug)]
pub enum DecodeOutcome {
Decoded,
NeedsEviction,
Aborted,
Errored(i32),
Errored(DecodeError),
}

impl DecodeOutcome {
#[must_use]
pub const fn from_decode_result(result: &Result<(), DecodeError>) -> Self {
pub fn from_decode_result(result: Result<(), DecodeError>) -> Self {
match result {
Ok(()) => Self::Decoded,
Err(DecodeError::NoKvCacheSlot) => Self::NeedsEviction,
Err(DecodeError::Aborted | DecodeError::NTokensZero) => Self::Aborted,
Err(DecodeError::Unknown(error_code)) => Self::Errored(*error_code),
Err(DecodeError::Aborted | DecodeError::BatchInvalid) => Self::Aborted,
Err(other) => Self::Errored(other),
}
}
}

#[cfg(test)]
mod tests {
use llama_cpp_bindings::DecodeError;
use llama_cpp_bindings::error::DecodeError;

use super::DecodeOutcome;

#[test]
fn ok_maps_to_decoded() {
assert!(matches!(
DecodeOutcome::from_decode_result(&Ok(())),
DecodeOutcome::from_decode_result(Ok(())),
DecodeOutcome::Decoded
));
}

#[test]
fn no_kv_cache_slot_maps_to_needs_eviction() {
assert!(matches!(
DecodeOutcome::from_decode_result(&Err(DecodeError::NoKvCacheSlot)),
DecodeOutcome::from_decode_result(Err(DecodeError::NoKvCacheSlot)),
DecodeOutcome::NeedsEviction
));
}

#[test]
fn aborted_maps_to_aborted() {
assert!(matches!(
DecodeOutcome::from_decode_result(&Err(DecodeError::Aborted)),
DecodeOutcome::from_decode_result(Err(DecodeError::Aborted)),
DecodeOutcome::Aborted
));
}

#[test]
fn n_tokens_zero_maps_to_aborted() {
fn batch_invalid_maps_to_aborted() {
assert!(matches!(
DecodeOutcome::from_decode_result(&Err(DecodeError::NTokensZero)),
DecodeOutcome::from_decode_result(Err(DecodeError::BatchInvalid)),
DecodeOutcome::Aborted
));
}

#[test]
fn unknown_carries_error_code() {
let outcome = DecodeOutcome::from_decode_result(&Err(DecodeError::Unknown(42)));
fn other_error_is_forwarded_as_errored() {
let outcome =
DecodeOutcome::from_decode_result(Err(DecodeError::UnknownStatus { code: 42 }));

assert!(matches!(outcome, DecodeOutcome::Errored(42)));
assert!(matches!(
outcome,
DecodeOutcome::Errored(DecodeError::UnknownStatus { code: 42 })
));
}
}
6 changes: 2 additions & 4 deletions paddler/src/agent/continuous_batch_scheduler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1006,10 +1006,8 @@ impl ContinuousBatchScheduler {
DecodeOutcome::Aborted => {
return Ok(());
}
DecodeOutcome::Errored(error_code) => {
return Err(anyhow!(
"Decode failed with unknown error code: {error_code}"
));
DecodeOutcome::Errored(decode_error) => {
return Err(anyhow::Error::new(decode_error).context("decode failed"));
}
}
}
Expand Down
9 changes: 7 additions & 2 deletions paddler/src/balancer/state_database_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,16 @@ mod tests {

#[test]
fn test_file_absolute_path() -> Result<()> {
let result = StateDatabaseType::from_str("file:///absolute/path")?;
#[cfg(unix)]
let (url, expected_path) = ("file:///absolute/path", "/absolute/path");
#[cfg(windows)]
let (url, expected_path) = ("file://C:/absolute/path", "C:/absolute/path");

let result = StateDatabaseType::from_str(url)?;

match result {
StateDatabaseType::File(path) => {
assert_eq!(path, PathBuf::from("/absolute/path"));
assert_eq!(path, PathBuf::from(expected_path));
}
StateDatabaseType::Memory(_) => {
return Err(anyhow!("Expected File variant"));
Expand Down
9 changes: 7 additions & 2 deletions paddler/src/model_source/url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -533,8 +533,13 @@ mod tests {
}

#[test]
fn classify_cache_io_error_maps_enospc_to_cache_storage_is_full() {
let error = io::Error::from_raw_os_error(28);
fn classify_cache_io_error_maps_disk_full_errno_to_cache_storage_is_full() {
#[cfg(unix)]
const DISK_FULL_ERRNO: i32 = 28;
#[cfg(windows)]
const DISK_FULL_ERRNO: i32 = 112;

let error = io::Error::from_raw_os_error(DISK_FULL_ERRNO);

assert!(matches!(
classify_cache_io_error(TEST_URL, &error),
Expand Down
4 changes: 4 additions & 0 deletions paddler_bootstrap/src/shutdown_signal/windows.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ use tokio::signal::windows::ctrl_c;
use tokio::signal::windows::ctrl_close;
use tokio::signal::windows::ctrl_shutdown;

#[expect(
clippy::struct_field_names,
reason = "field names mirror the Windows console control event types they hold; the shared `ctrl_` prefix is part of the Windows API vocabulary, and `break` is a reserved keyword"
)]
pub struct ShutdownSignals {
ctrl_c: CtrlC,
ctrl_break: CtrlBreak,
Expand Down
2 changes: 2 additions & 0 deletions paddler_tests/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ pub mod openai_chat_completions_client;
pub mod paddler_command;
pub mod parse_test_device_value;
pub mod qwen3_embedding_cluster_params;
#[cfg(any(target_os = "macos", target_os = "linux"))]
pub mod resource_snapshot;
#[cfg(any(target_os = "macos", target_os = "linux"))]
pub mod resource_snapshot_diff;
pub mod spawn_agent_subprocess;
pub mod spawn_agent_subprocess_params;
Expand Down
3 changes: 0 additions & 3 deletions paddler_tests/src/resource_snapshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,3 @@ const fn open_descriptors_directory_path() -> &'static str {
const fn open_descriptors_directory_path() -> &'static str {
"/proc/self/fd"
}

#[cfg(not(any(target_os = "macos", target_os = "linux")))]
compile_error!("ResourceSnapshot is only implemented for macOS and Linux");
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use paddler::subscribes_to_updates::SubscribesToUpdates as _;
use paddler_tests::make_agent_controller_without_remote_agent::make_agent_controller_without_remote_agent;

#[test]
fn agent_controller_pool_signals_update_when_slot_guard_drops() -> Result<()> {
fn agent_controller_pool_notifies_subscribers_when_slot_guard_drops() -> Result<()> {
let pool = AgentControllerPool::default();
let controller = Arc::new(make_agent_controller_without_remote_agent("test-agent"));

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![cfg(any(target_os = "macos", target_os = "linux"))]

use anyhow::Result;
use paddler_tests::in_process_cluster_params::InProcessClusterParams;
use paddler_tests::resource_snapshot::ResourceSnapshot;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use reqwest::Client;

#[serial_test::file_serial(model_load, path => "../target/model_load.lock")]
#[tokio::test(flavor = "multi_thread")]
async fn management_two_agents_stream_subscribers_receive_slot_usage_updates() -> Result<()> {
async fn management_two_agents_stream_subscribers_receive_slot_usage_changes() -> Result<()> {
let device = current_test_device()?;

device.require_available()?;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#![cfg(feature = "tests_that_use_compiled_paddler")]
#![cfg(all(
feature = "tests_that_use_compiled_paddler",
any(target_os = "macos", target_os = "linux")
))]

use anyhow::Result;
use paddler_tests::resource_snapshot::ResourceSnapshot;
Expand Down
Loading