Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions llama-cpp-bindings-build/src/android_ndk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ pub enum AndroidNdkDetectionError {
UnsupportedAndroidTarget { target_triple: String },
}

/// Consolidated Android NDK configuration, computed once and shared between
/// bindgen and `CMake` configuration steps.
#[derive(Debug)]
pub struct AndroidNdk {
pub ndk_path: String,
Expand Down
11 changes: 0 additions & 11 deletions llama-cpp-bindings-build/src/cmake_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,9 +217,6 @@ fn configure_platform_specific(
}
}

/// Work around a cmake-rs bug where debug Rust builds under MSVC strip
/// optimization flags from Release-profile C/C++ builds.
/// See: <https://github.com/rust-lang/cmake-rs/issues/240>
fn configure_msvc_release_workaround(config: &mut Config, profile: &str) {
let is_release_profile = matches!(profile, "Release" | "RelWithDebInfo" | "MinSizeRel");

Expand Down Expand Up @@ -269,14 +266,6 @@ fn configure_android_cmake(config: &mut Config, ndk: &AndroidNdk, _target_triple
println!("cargo:rustc-link-lib=android");
}

/// macOS BSD ar (from cctools) does not accept GNU ar's `-D` (deterministic)
/// flag. cmake's default archive recipe is `<CMAKE_AR> qcD …`, which produces
/// `illegal option -- D` warnings during every static-library link.
///
/// We override the archive command for every language used by llama.cpp's
/// build — C, C++, Objective-C and Objective-C++ (the latter two appear once
/// `GGML_METAL=ON` enables the Metal backend). Plain `qc` keeps the
/// quick-create semantics; `<CMAKE_RANLIB>` still runs as ARCHIVE_FINISH.
fn override_archive_commands_for_apple_ar(config: &mut Config) {
for language in ["C", "CXX", "OBJC", "OBJCXX"] {
config.define(
Expand Down
6 changes: 0 additions & 6 deletions llama-cpp-bindings-build/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
//! Build system for llama-cpp-bindings-sys FFI bindings to llama.cpp.

mod android_ndk;
mod bindgen_config;
mod cmake_config;
Expand Down Expand Up @@ -30,7 +28,6 @@ macro_rules! debug_log {
};
}

/// Shared state passed between build phases.
#[derive(Debug)]
pub struct BuildContext {
pub out_dir: PathBuf,
Expand Down Expand Up @@ -124,9 +121,6 @@ fn set_cmake_parallelism() {
}
}

/// Main entry point for the llama.cpp build system.
///
/// Call this from `build.rs` in `llama-cpp-bindings-sys`.
pub fn build() {
let context = BuildContext::detect();

Expand Down
7 changes: 0 additions & 7 deletions llama-cpp-bindings-build/src/stable_cmake_build_dir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,6 @@ const CMAKE_AFFECTING_FEATURES: &[(&str, bool)] = &[
("static-stdcxx", cfg!(feature = "static-stdcxx")),
];

/// Compute a stable, persistent cmake build directory under the workspace
/// `target/` tree, keyed only by inputs that materially change cmake compile
/// commands. Toggling features that don't affect cmake (e.g. `mtmd`, `llguidance`)
/// returns the same path, allowing cmake's incremental build (and ccache) to
/// reuse all prior artifacts — including `nvcc`-built CUDA kernels.
///
/// `LLAMA_CMAKE_BUILD_DIR_OVERRIDE` overrides the path entirely when set.
pub fn stable_cmake_build_dir(
target_dir: &Path,
target_triple: &str,
Expand Down
2 changes: 0 additions & 2 deletions llama-cpp-bindings-sys/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
//! See [llama-cpp-bindings](https://crates.io/crates/llama-cpp-bindings) for a documented and safe API.
#![expect(
non_camel_case_types,
reason = "bindgen emits C struct and enum names verbatim and they don't follow Rust naming"
Expand Down
39 changes: 27 additions & 12 deletions llama-cpp-bindings-tests/src/classify_sample_loop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,6 @@ use llama_cpp_bindings::sampled_token::SampledToken;
use llama_cpp_bindings::sampled_token_classifier::SampledTokenClassifier;
use llama_cpp_bindings::sampling::LlamaSampler;

/// Drives a classifier through the full sample/decode/flush loop.
///
/// Suppresses EOG outcomes (so `generated_raw` and the per-section streams
/// never contain end-of-generation marker text) and captures per-section
/// counts. Tests that need to exercise classifier behaviour during real
/// inference should construct one of these and call
/// [`ClassifySampleLoop::run`] instead of re-implementing the loop. The
/// strict per-test assertions then run on [`ClassifySampleLoopOutcome`].
pub struct ClassifySampleLoop<'borrow, 'model, 'tokens> {
pub model: &'model LlamaModel,
pub classifier: &'borrow mut SampledTokenClassifier<'model>,
Expand Down Expand Up @@ -59,10 +51,6 @@ impl ClassifySampleLoop<'_, '_, '_> {
} else {
outcome.generated_raw.push_str(&ingest_outcome.raw_piece);
}
// Counters always include EOG so they match the classifier's
// internal usage counters (which include every sampled token).
// EOG text is suppressed from `generated_raw` and the per-section
// streams so callers can assert exact textual equality.
record_outcome(ingest_outcome, &mut outcome, is_eog);
}

Expand Down Expand Up @@ -115,3 +103,30 @@ fn record_outcome(ingest: &IngestOutcome, outcome: &mut ClassifySampleLoopOutcom
}
}
}

#[cfg(test)]
mod tests {
use llama_cpp_bindings::ingest_outcome::IngestOutcome;
use llama_cpp_bindings::sampled_token::SampledToken;
use llama_cpp_bindings::token::LlamaToken;

use super::ClassifySampleLoopOutcome;
use super::record_outcome;

#[test]
fn record_outcome_tool_call_token() {
let ingest = IngestOutcome {
sampled_token: SampledToken::ToolCall(LlamaToken(42)),
visible_piece: String::new(),
raw_piece: String::new(),
};
let mut outcome = ClassifySampleLoopOutcome::default();

record_outcome(&ingest, &mut outcome, false);

assert_eq!(outcome.observed_tool_call, 1);
assert_eq!(outcome.observed_content, 0);
assert_eq!(outcome.observed_reasoning, 0);
assert_eq!(outcome.observed_undeterminable, 0);
}
}
8 changes: 1 addition & 7 deletions llama-cpp-bindings-tests/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,2 @@
//! Integration test fixtures for `llama-cpp-bindings`.
//!
//! This crate hosts test-only helpers used by the integration tests in `tests/`:
//! [`classify_sample_loop`] for sampling-loop drivers and [`test_model::fixtures_dir`]
//! for locating image fixtures.
pub mod classify_sample_loop;
pub mod test_model;
pub mod prime_kv_cache;
15 changes: 15 additions & 0 deletions llama-cpp-bindings-tests/src/prime_kv_cache.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
use anyhow::Result;
use llama_cpp_bindings::context::LlamaContext;
use llama_cpp_bindings::llama_batch::LlamaBatch;
use llama_cpp_bindings::model::AddBos;
use llama_cpp_test_harness::LlamaFixture;

/// # Errors
/// Forwards tokenization, batch construction, and [`LlamaContext::decode`] errors verbatim.
pub fn prime_kv_cache(fixture: &LlamaFixture<'_>, context: &mut LlamaContext<'_>) -> Result<()> {
let tokens = fixture.model.str_to_token("Hello world", AddBos::Always)?;
let mut batch = LlamaBatch::new(512, 1)?;
batch.add_sequence(&tokens, 0, false)?;
context.decode(&mut batch)?;
Ok(())
}
Loading
Loading