Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- **Spawner task-body fix** -- agents now spawned with TOML task body, not runtime task_string (Refs #1020)
- **OpenCode + Terraphim experiment results** documenting FFF vs Ripgrep performance comparison
- **Frontend developer role setup** experiment documentation and clarification
- **Compiled thesaurus cache invalidation** via SHA-256 source hash tracking on KG markdown edits (Refs #945)
- **`terraphim-agent cache flush`** CLI subcommand for manual cache eviction by role (Refs #945)

### Fixed

Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

79 changes: 79 additions & 0 deletions crates/terraphim_agent/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -907,13 +907,29 @@ enum Command {
server: bool,
},

/// Manage the compiled thesaurus cache
Cache {
#[command(subcommand)]
sub: CacheSub,
},

/// Robot mode self-documentation commands
Robot {
#[command(subcommand)]
sub: RobotSub,
},
}

#[derive(Subcommand, Debug)]
enum CacheSub {
/// Flush (delete) compiled thesaurus cache entries
Flush {
/// Specific role to flush (if omitted, flushes all cached thesauri)
#[arg(long)]
role: Option<String>,
},
}

#[derive(Subcommand, Debug)]
enum LearnSub {
/// Capture a failed command as a learning
Expand Down Expand Up @@ -1479,6 +1495,7 @@ fn build_cli_forgiving_parser() -> forgiving::ForgivingParser {
"update",
"learn",
"listen",
"cache",
];

#[cfg(feature = "llm")]
Expand Down Expand Up @@ -1990,6 +2007,11 @@ async fn run_offline_command(
return run_config_validate().await;
}

// Cache is stateless - handle before TuiService initialization
if let Command::Cache { sub } = &command {
return run_cache_command(sub).await;
}

// Learn is stateless - handle before TuiService initialization.
// Must be last early-return because it consumes `command` via destructuring.
if let Command::Learn { sub } = command {
Expand Down Expand Up @@ -3014,6 +3036,58 @@ async fn run_offline_command(
Command::Robot { .. } => {
unreachable!("Robot commands are handled in main()")
}
Command::Cache { .. } => {
unreachable!("Cache commands are handled before TuiService initialization")
}
}
}

async fn run_cache_command(sub: &CacheSub) -> Result<()> {
use terraphim_persistence::DeviceStorage;

match sub {
CacheSub::Flush { role } => {
let storage = DeviceStorage::instance().await?;
let fastest_op = &storage.fastest_op;

if let Some(role_name) = role {
let key = format!("thesaurus_{}.json", role_name.to_lowercase());
match fastest_op.delete(&key).await {
Ok(_) => {
println!("Flushed cache for role: {}", role_name);
}
Err(e) => {
eprintln!("Failed to flush cache for role '{}': {}", role_name, e);
std::process::exit(1);
}
}
} else {
// Flush all thesaurus entries
let prefix = "thesaurus_";
match fastest_op.list(prefix).await {
Ok(entries) => {
let mut count = 0;
for entry in entries {
let path = entry.path();
if path.ends_with(".json") {
match fastest_op.delete(path).await {
Ok(_) => count += 1,
Err(e) => {
log::warn!("Failed to delete '{}': {}", path, e);
}
}
}
}
println!("Flushed {} cached thesaurus entries", count);
}
Err(e) => {
eprintln!("Failed to list cache entries: {}", e);
std::process::exit(1);
}
}
}
Ok(())
}
}
}

Expand Down Expand Up @@ -4759,6 +4833,11 @@ async fn run_server_command(
Command::Robot { .. } => {
unreachable!("Robot commands are handled in main()")
}
Command::Cache { .. } => {
eprintln!("error: cache commands are not available in server mode");
eprintln!("Cache management runs in offline mode only.");
std::process::exit(1);
}
}
}

Expand Down
1 change: 1 addition & 0 deletions crates/terraphim_automata/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ wasm-bindgen-futures = { version = "0.4.1", optional = true }
wasm-bindgen = { version = "0.2.51", optional = true }
cached = { version = "0.56.0", features = ["proc_macro", "async", "serde", "ahash"] }
tsify = { version = "0.5", features = ["js"], optional = true }
sha2 = "0.10"
walkdir = "2.5"
daachorse = { version = "1.0", optional = true }
zstd = { version = "0.13", optional = true }
Expand Down
156 changes: 155 additions & 1 deletion crates/terraphim_automata/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,59 @@ pub enum BuilderError {

pub type Result<T> = std::result::Result<T, BuilderError>;

/// Compute a SHA-256 hash of all markdown files in a directory tree.
///
/// Files are processed in sorted order to ensure deterministic output.
/// The hash incorporates both the relative file path and file content,
/// so renames and edits are both detected.
///
/// Returns `Ok(None)` if the directory does not exist or contains no markdown files.
pub fn compute_kg_source_hash(dir: &std::path::Path) -> std::io::Result<Option<String>> {
use sha2::{Digest, Sha256};
use std::io::Read;

if !dir.exists() {
return Ok(None);
}

let mut hasher = Sha256::new();
let mut found_any = false;

let mut entries: Vec<_> = walkdir::WalkDir::new(dir)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| {
e.path()
.extension()
.map(|ext| ext.eq_ignore_ascii_case("md"))
.unwrap_or(false)
})
.collect();

entries.sort_by_key(|e| e.path().to_path_buf());

for entry in entries {
let path = entry.path();
let relative = path.strip_prefix(dir).unwrap_or(path);
hasher.update(relative.to_string_lossy().as_bytes());
hasher.update(b"\0");

let mut file = std::fs::File::open(path)?;
let mut contents = Vec::new();
file.read_to_end(&mut contents)?;
hasher.update(&contents);
hasher.update(b"\0");
found_any = true;
}

if found_any {
let result = format!("{:x}", hasher.finalize());
Ok(Some(result))
} else {
Ok(None)
}
}

/// A ThesaurusBuilder receives a path containing
/// resources (e.g. files) with key-value pairs and returns a `Thesaurus`
/// (a dictionary with synonyms which map to higher-level concepts)
Expand Down Expand Up @@ -61,7 +114,26 @@ impl ThesaurusBuilder for Logseq {
.await?;
#[cfg(not(feature = "tokio-runtime"))]
let messages: Vec<Message> = Vec::new();
let thesaurus = index_inner(name, messages);
#[allow(unused_mut)]
let mut thesaurus = index_inner(name, messages);

// Compute source hash for cache invalidation
#[cfg(feature = "tokio-runtime")]
match compute_kg_source_hash(&haystack) {
Ok(Some(hash)) => {
thesaurus.source_hash = Some(hash);
}
Ok(None) => {
log::debug!(
"No markdown files found in {:?}, source_hash left empty",
haystack
);
}
Err(e) => {
log::warn!("Failed to compute source hash for {:?}: {}", haystack, e);
}
}

Ok(thesaurus)
}
}
Expand Down Expand Up @@ -316,3 +388,85 @@ pub fn json_decode(jsonlines: &str) -> Result<Vec<Message>> {
.into_iter()
.collect::<std::result::Result<Vec<Message>, serde_json::Error>>()?)
}

#[cfg(test)]
mod hash_tests {
use crate::builder::compute_kg_source_hash;
use std::io::Write;

#[test]
fn compute_hash_empty_dir() {
let tmp = tempfile::tempdir().unwrap();
let result = compute_kg_source_hash(tmp.path()).unwrap();
assert!(result.is_none());
}

#[test]
fn compute_hash_detects_content_change() {
let tmp = tempfile::tempdir().unwrap();
let md_path = tmp.path().join("concept.md");
{
let mut f = std::fs::File::create(&md_path).unwrap();
f.write_all(b"synonyms:: foo, bar").unwrap();
}

let hash1 = compute_kg_source_hash(tmp.path()).unwrap().unwrap();

// Edit the file
{
let mut f = std::fs::File::create(&md_path).unwrap();
f.write_all(b"synonyms:: foo, baz").unwrap();
}

let hash2 = compute_kg_source_hash(tmp.path()).unwrap().unwrap();
assert_ne!(hash1, hash2, "Hash should change when file content changes");
}

#[test]
fn compute_hash_detects_rename() {
let tmp = tempfile::tempdir().unwrap();
let md_path = tmp.path().join("concept.md");
{
let mut f = std::fs::File::create(&md_path).unwrap();
f.write_all(b"synonyms:: foo, bar").unwrap();
}

let hash1 = compute_kg_source_hash(tmp.path()).unwrap().unwrap();

// Rename the file
let renamed_path = tmp.path().join("renamed.md");
std::fs::rename(&md_path, &renamed_path).unwrap();

let hash2 = compute_kg_source_hash(tmp.path()).unwrap().unwrap();
assert_ne!(hash1, hash2, "Hash should change when file is renamed");
}

#[test]
fn compute_hash_ignores_non_md() {
let tmp = tempfile::tempdir().unwrap();
{
let mut f = std::fs::File::create(tmp.path().join("readme.txt")).unwrap();
f.write_all(b"synonyms:: foo, bar").unwrap();
}

let result = compute_kg_source_hash(tmp.path()).unwrap();
assert!(result.is_none(), "Non-markdown files should be ignored");
}

#[test]
fn compute_hash_stable_across_runs() {
let tmp = tempfile::tempdir().unwrap();
let md_path = tmp.path().join("concept.md");
{
let mut f = std::fs::File::create(&md_path).unwrap();
f.write_all(b"synonyms:: foo, bar").unwrap();
}

let hash1 = compute_kg_source_hash(tmp.path()).unwrap().unwrap();
let hash2 = compute_kg_source_hash(tmp.path()).unwrap().unwrap();
assert_eq!(
hash1, hash2,
"Hash should be stable when files don't change"
);
}
}
2 changes: 1 addition & 1 deletion crates/terraphim_automata/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@
//!
//! See the [WASM package](wasm/) for browser usage.

pub use self::builder::{Logseq, ThesaurusBuilder};
pub use self::builder::{Logseq, ThesaurusBuilder, compute_kg_source_hash};
pub mod autocomplete;
pub mod builder;
pub mod evaluation;
Expand Down
Loading
Loading