intentee · mcharytoniuk · May 25, 2026 · May 25, 2026 · May 25, 2026 · May 25, 2026
diff --git a/llama-cpp-bindings-build/src/android_ndk.rs b/llama-cpp-bindings-build/src/android_ndk.rs
@@ -27,8 +27,6 @@ pub enum AndroidNdkDetectionError {
     UnsupportedAndroidTarget { target_triple: String },
 }
 
-/// Consolidated Android NDK configuration, computed once and shared between
-/// bindgen and `CMake` configuration steps.
 #[derive(Debug)]
 pub struct AndroidNdk {
     pub ndk_path: String,

diff --git a/llama-cpp-bindings-build/src/cmake_config.rs b/llama-cpp-bindings-build/src/cmake_config.rs
@@ -217,9 +217,6 @@ fn configure_platform_specific(
     }
 }
 
-/// Work around a cmake-rs bug where debug Rust builds under MSVC strip
-/// optimization flags from Release-profile C/C++ builds.
-/// See: <https://github.com/rust-lang/cmake-rs/issues/240>
 fn configure_msvc_release_workaround(config: &mut Config, profile: &str) {
     let is_release_profile = matches!(profile, "Release" | "RelWithDebInfo" | "MinSizeRel");
 
@@ -269,14 +266,6 @@ fn configure_android_cmake(config: &mut Config, ndk: &AndroidNdk, _target_triple
     println!("cargo:rustc-link-lib=android");
 }
 
-/// macOS BSD ar (from cctools) does not accept GNU ar's `-D` (deterministic)
-/// flag. cmake's default archive recipe is `<CMAKE_AR> qcD …`, which produces
-/// `illegal option -- D` warnings during every static-library link.
-///
-/// We override the archive command for every language used by llama.cpp's
-/// build — C, C++, Objective-C and Objective-C++ (the latter two appear once
-/// `GGML_METAL=ON` enables the Metal backend). Plain `qc` keeps the
-/// quick-create semantics; `<CMAKE_RANLIB>` still runs as ARCHIVE_FINISH.
 fn override_archive_commands_for_apple_ar(config: &mut Config) {
     for language in ["C", "CXX", "OBJC", "OBJCXX"] {
         config.define(

diff --git a/llama-cpp-bindings-build/src/lib.rs b/llama-cpp-bindings-build/src/lib.rs
@@ -1,5 +1,3 @@
-//! Build system for llama-cpp-bindings-sys FFI bindings to llama.cpp.
-
 mod android_ndk;
 mod bindgen_config;
 mod cmake_config;
@@ -30,7 +28,6 @@ macro_rules! debug_log {
     };
 }
 
-/// Shared state passed between build phases.
 #[derive(Debug)]
 pub struct BuildContext {
     pub out_dir: PathBuf,
@@ -124,9 +121,6 @@ fn set_cmake_parallelism() {
     }
 }
 
-/// Main entry point for the llama.cpp build system.
-///
-/// Call this from `build.rs` in `llama-cpp-bindings-sys`.
 pub fn build() {
     let context = BuildContext::detect();
 

diff --git a/llama-cpp-bindings-build/src/stable_cmake_build_dir.rs b/llama-cpp-bindings-build/src/stable_cmake_build_dir.rs
@@ -17,13 +17,6 @@ const CMAKE_AFFECTING_FEATURES: &[(&str, bool)] = &[
     ("static-stdcxx", cfg!(feature = "static-stdcxx")),
 ];
 
-/// Compute a stable, persistent cmake build directory under the workspace
-/// `target/` tree, keyed only by inputs that materially change cmake compile
-/// commands. Toggling features that don't affect cmake (e.g. `mtmd`, `llguidance`)
-/// returns the same path, allowing cmake's incremental build (and ccache) to
-/// reuse all prior artifacts — including `nvcc`-built CUDA kernels.
-///
-/// `LLAMA_CMAKE_BUILD_DIR_OVERRIDE` overrides the path entirely when set.
 pub fn stable_cmake_build_dir(
     target_dir: &Path,
     target_triple: &str,

diff --git a/llama-cpp-bindings-sys/src/lib.rs b/llama-cpp-bindings-sys/src/lib.rs
@@ -1,5 +1,3 @@
-//! See [llama-cpp-bindings](https://crates.io/crates/llama-cpp-bindings) for a documented and safe API.
-
 #![expect(
     non_camel_case_types,
     reason = "bindgen emits C struct and enum names verbatim and they don't follow Rust naming"

diff --git a/llama-cpp-bindings-tests/src/classify_sample_loop.rs b/llama-cpp-bindings-tests/src/classify_sample_loop.rs
@@ -7,14 +7,6 @@ use llama_cpp_bindings::sampled_token::SampledToken;
 use llama_cpp_bindings::sampled_token_classifier::SampledTokenClassifier;
 use llama_cpp_bindings::sampling::LlamaSampler;
 
-/// Drives a classifier through the full sample/decode/flush loop.
-///
-/// Suppresses EOG outcomes (so `generated_raw` and the per-section streams
-/// never contain end-of-generation marker text) and captures per-section
-/// counts. Tests that need to exercise classifier behaviour during real
-/// inference should construct one of these and call
-/// [`ClassifySampleLoop::run`] instead of re-implementing the loop. The
-/// strict per-test assertions then run on [`ClassifySampleLoopOutcome`].
 pub struct ClassifySampleLoop<'borrow, 'model, 'tokens> {
     pub model: &'model LlamaModel,
     pub classifier: &'borrow mut SampledTokenClassifier<'model>,
@@ -59,10 +51,6 @@ impl ClassifySampleLoop<'_, '_, '_> {
                 } else {
                     outcome.generated_raw.push_str(&ingest_outcome.raw_piece);
                 }
-                // Counters always include EOG so they match the classifier's
-                // internal usage counters (which include every sampled token).
-                // EOG text is suppressed from `generated_raw` and the per-section
-                // streams so callers can assert exact textual equality.
                 record_outcome(ingest_outcome, &mut outcome, is_eog);
             }
 
@@ -115,3 +103,30 @@ fn record_outcome(ingest: &IngestOutcome, outcome: &mut ClassifySampleLoopOutcom
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use llama_cpp_bindings::ingest_outcome::IngestOutcome;
+    use llama_cpp_bindings::sampled_token::SampledToken;
+    use llama_cpp_bindings::token::LlamaToken;
+
+    use super::ClassifySampleLoopOutcome;
+    use super::record_outcome;
+
+    #[test]
+    fn record_outcome_tool_call_token() {
+        let ingest = IngestOutcome {
+            sampled_token: SampledToken::ToolCall(LlamaToken(42)),
+            visible_piece: String::new(),
+            raw_piece: String::new(),
+        };
+        let mut outcome = ClassifySampleLoopOutcome::default();
+
+        record_outcome(&ingest, &mut outcome, false);
+
+        assert_eq!(outcome.observed_tool_call, 1);
+        assert_eq!(outcome.observed_content, 0);
+        assert_eq!(outcome.observed_reasoning, 0);
+        assert_eq!(outcome.observed_undeterminable, 0);
+    }
+}
diff --git a/llama-cpp-bindings-tests/src/lib.rs b/llama-cpp-bindings-tests/src/lib.rs
@@ -1,8 +1,2 @@
-//! Integration test fixtures for `llama-cpp-bindings`.
-//!
-//! This crate hosts test-only helpers used by the integration tests in `tests/`:
-//! [`classify_sample_loop`] for sampling-loop drivers and [`test_model::fixtures_dir`]
-//! for locating image fixtures.
-
 pub mod classify_sample_loop;
-pub mod test_model;
+pub mod prime_kv_cache;
diff --git a/llama-cpp-bindings-tests/src/prime_kv_cache.rs b/llama-cpp-bindings-tests/src/prime_kv_cache.rs
@@ -0,0 +1,15 @@
+use anyhow::Result;
+use llama_cpp_bindings::context::LlamaContext;
+use llama_cpp_bindings::llama_batch::LlamaBatch;
+use llama_cpp_bindings::model::AddBos;
+use llama_cpp_test_harness::LlamaFixture;
+
+/// # Errors
+/// Forwards tokenization, batch construction, and [`LlamaContext::decode`] errors verbatim.
+pub fn prime_kv_cache(fixture: &LlamaFixture<'_>, context: &mut LlamaContext<'_>) -> Result<()> {
+    let tokens = fixture.model.str_to_token("Hello world", AddBos::Always)?;
+    let mut batch = LlamaBatch::new(512, 1)?;
+    batch.add_sequence(&tokens, 0, false)?;
+    context.decode(&mut batch)?;
+    Ok(())
+}