From e5b6bd626caf7ca79ee41dd488f8c906831048d7 Mon Sep 17 00:00:00 2001 From: Test User Date: Fri, 1 May 2026 23:20:48 +0100 Subject: [PATCH 1/2] feat(terraphim_dsm): add Graph module grouping tool Add terraphim_dsm as a sentrux companion tool for knowledge Graph-based Graph module grouping. Features: - Load Terraphim knowledge Graphs from ~/.config/terraphim/Graph/ - Group module paths by domain concept using synonym matching - Output formats: text, JSON, CSV - Filter by specific Thesaurus - Show uncategorized modules Dependencies: clap, serde, walkdir, tracing, anyhow, dirs Refs #quality-gate --- crates/terraphim_dsm/Cargo.toml | 25 +++ crates/terraphim_dsm/src/knowledge.rs | 260 ++++++++++++++++++++++++++ crates/terraphim_dsm/src/main.rs | 245 ++++++++++++++++++++++++ crates/terraphim_dsm/src/models.rs | 21 +++ 4 files changed, 551 insertions(+) create mode 100644 crates/terraphim_dsm/Cargo.toml create mode 100644 crates/terraphim_dsm/src/knowledge.rs create mode 100644 crates/terraphim_dsm/src/main.rs create mode 100644 crates/terraphim_dsm/src/models.rs diff --git a/crates/terraphim_dsm/Cargo.toml b/crates/terraphim_dsm/Cargo.toml new file mode 100644 index 000000000..430b0e856 --- /dev/null +++ b/crates/terraphim_dsm/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "terraphim_dsm" +version.workspace = true +edition.workspace = true +description = "Semantic module grouping using Terraphim knowledge graphs (sentrux companion tool)" +license = "MIT" +repository = "https://github.com/terraphim/terraphim-ai" + +[[bin]] +name = "terraphim_dsm" +path = "src/main.rs" + +[dependencies] +clap = { version = "4", features = ["derive"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +walkdir = "2" +anyhow = "1" +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +log = "0.4" +dirs = "5" + +[dev-dependencies] +tempfile = "3" diff --git a/crates/terraphim_dsm/src/knowledge.rs b/crates/terraphim_dsm/src/knowledge.rs new file mode 100644 index 000000000..3a346b36c --- /dev/null +++ b/crates/terraphim_dsm/src/knowledge.rs @@ -0,0 +1,260 @@ +use anyhow::Result; +use std::collections::HashMap; +use std::path::PathBuf; +use walkdir::WalkDir; + +/// Knowledge Graph integration for semantic module labeling +pub struct KnowledgeGraph { + /// Map of term -> domain concept + concepts: HashMap, + /// KG source directory + #[allow(dead_code)] + kg_path: PathBuf, +} + +#[derive(Debug, Clone)] +pub struct Concept { + pub name: String, + #[allow(dead_code)] + pub description: String, + pub synonyms: Vec, + #[allow(dead_code)] + pub related_concepts: Vec, + #[allow(dead_code)] + pub category: String, +} + +impl KnowledgeGraph { + pub fn new(kg_path: PathBuf) -> Self { + Self { + concepts: HashMap::new(), + kg_path, + } + } + + pub fn concept_count(&self) -> usize { + self.concepts.len() + } + + /// Load knowledge graph from ~/.config/terraphim/kg/ + pub fn load_default() -> Result { + let kg_path = dirs::home_dir() + .unwrap_or_default() + .join(".config/terraphim/kg"); + + let mut kg = Self::new(kg_path.clone()); + kg.load_from_directory(&kg_path)?; + Ok(kg) + } + + /// Load all markdown files from KG directory + pub fn load_from_directory(&mut self, path: &PathBuf) -> Result<()> { + if !path.exists() { + return Ok(()); + } + + for entry in WalkDir::new(path) + .follow_links(false) + .into_iter() + .filter_map(|e| e.ok()) + { + let path = entry.path(); + if path.extension().map_or(false, |ext| ext == "md") { + if let Ok(content) = std::fs::read_to_string(path) { + let concept = self.parse_concept_file(path, &content); + self.concepts.insert(concept.name.clone(), concept); + } + } + } + + Ok(()) + } + + fn parse_concept_file(&self, path: &std::path::Path, content: &str) -> Concept { + let name = path + .file_stem() + .unwrap_or_default() + .to_string_lossy() + .to_string(); + + let mut description = String::new(); + let mut synonyms = Vec::new(); + let mut related = Vec::new(); + let mut category = "general".to_string(); + + for line in content.lines() { + let line = line.trim(); + + if line.starts_with("synonyms::") { + synonyms = line + .trim_start_matches("synonyms::") + .split(',') + .map(|s| s.trim().to_lowercase()) + .collect(); + } else if line.starts_with("## Related Concepts") { + category = "related".to_string(); + } else if line.starts_with("-") && category == "related" { + related.push(line.trim_start_matches("-").trim().to_string()); + } else if !line.is_empty() && !line.starts_with("#") { + description.push_str(line); + description.push(' '); + } + } + + Concept { + name: name.clone(), + description: description.trim().to_string(), + synonyms, + related_concepts: related, + category, + } + } + + /// Match a module path against KG concepts + pub fn match_module(&self, module_path: &str) -> Vec<&Concept> { + let mut matches = Vec::new(); + let module_lower = module_path.to_lowercase(); + + for concept in self.concepts.values() { + // Check if concept name appears in module path + if module_lower.contains(&concept.name.to_lowercase()) { + matches.push(concept); + continue; + } + + // Check synonyms + for synonym in &concept.synonyms { + if module_lower.contains(synonym) { + matches.push(concept); + break; + } + } + } + + matches + } + + /// Get domain category for a module + pub fn get_module_category(&self, module_path: &str) -> String { + let matches = self.match_module(module_path); + + if matches.is_empty() { + return "uncategorized".to_string(); + } + + // Return the first matched concept's name as category + matches[0].name.clone() + } + + /// Group modules by domain concept + pub fn group_by_concept(&self, modules: &[String]) -> HashMap> { + let mut groups: HashMap> = HashMap::new(); + + for module in modules { + let category = self.get_module_category(module); + groups.entry(category).or_default().push(module.clone()); + } + + groups + } + + /// Check if two modules are semantically related + #[allow(dead_code)] + pub fn are_related(&self, module_a: &str, module_b: &str) -> bool { + let concepts_a = self.match_module(module_a); + let concepts_b = self.match_module(module_b); + + // Check if they share any concepts + for ca in &concepts_a { + for cb in &concepts_b { + if ca.name == cb.name { + return true; + } + // Check related concepts + if ca.related_concepts.contains(&cb.name) { + return true; + } + } + } + + false + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + use tempfile::TempDir; + + #[test] + fn test_knowledge_graph_loading() { + let temp_dir = TempDir::new().unwrap(); + let kg_dir = temp_dir.path().join("kg"); + std::fs::create_dir(&kg_dir).unwrap(); + + // Create a test concept file + let mut concept_file = std::fs::File::create(kg_dir.join("Authentication.md")).unwrap(); + writeln!(concept_file, "# Authentication").unwrap(); + writeln!(concept_file, "").unwrap(); + writeln!(concept_file, "Authentication and authorization concepts").unwrap(); + writeln!(concept_file, "").unwrap(); + writeln!(concept_file, "synonyms:: auth, login, identity").unwrap(); + writeln!(concept_file, "").unwrap(); + writeln!(concept_file, "## Related Concepts").unwrap(); + writeln!(concept_file, "- Security").unwrap(); + writeln!(concept_file, "- Identity").unwrap(); + + let mut kg = KnowledgeGraph::new(kg_dir.clone()); + kg.load_from_directory(&kg_dir).unwrap(); + + assert_eq!(kg.concepts.len(), 1); + assert!(kg.concepts.contains_key("Authentication")); + } + + #[test] + fn test_module_matching() { + let temp_dir = TempDir::new().unwrap(); + let kg_dir = temp_dir.path().join("kg"); + std::fs::create_dir(&kg_dir).unwrap(); + + let mut concept_file = std::fs::File::create(kg_dir.join("Authentication.md")).unwrap(); + writeln!(concept_file, "# Authentication").unwrap(); + writeln!(concept_file, "synonyms:: auth, login").unwrap(); + + let mut kg = KnowledgeGraph::new(kg_dir.clone()); + kg.load_from_directory(&kg_dir).unwrap(); + + let matches = kg.match_module("terraphim_service::auth_handler"); + assert_eq!(matches.len(), 1); + + let matches = kg.match_module("terraphim_service::login"); + assert_eq!(matches.len(), 1); + + let matches = kg.match_module("terraphim_service::unrelated"); + assert_eq!(matches.len(), 0); + } + + #[test] + fn test_semantic_relationship() { + let temp_dir = TempDir::new().unwrap(); + let kg_dir = temp_dir.path().join("kg"); + std::fs::create_dir(&kg_dir).unwrap(); + + let mut auth_file = std::fs::File::create(kg_dir.join("Authentication.md")).unwrap(); + writeln!(auth_file, "# Authentication").unwrap(); + writeln!(auth_file, "synonyms:: auth").unwrap(); + writeln!(auth_file, "## Related Concepts").unwrap(); + writeln!(auth_file, "- Security").unwrap(); + + let mut sec_file = std::fs::File::create(kg_dir.join("Security.md")).unwrap(); + writeln!(sec_file, "# Security").unwrap(); + writeln!(sec_file, "synonyms:: security").unwrap(); + + let mut kg = KnowledgeGraph::new(kg_dir.clone()); + kg.load_from_directory(&kg_dir).unwrap(); + + assert!(kg.are_related("auth_module", "security_handler")); + assert!(!kg.are_related("auth_module", "ui_component")); + } +} diff --git a/crates/terraphim_dsm/src/main.rs b/crates/terraphim_dsm/src/main.rs new file mode 100644 index 000000000..97269d8a4 --- /dev/null +++ b/crates/terraphim_dsm/src/main.rs @@ -0,0 +1,245 @@ +mod knowledge; +mod models; + +use anyhow::Result; +use clap::{Parser, ValueEnum}; +use std::fs::File; +use std::io::{self, BufRead, Write}; +use std::path::PathBuf; +use tracing::{info, warn}; + +use knowledge::KnowledgeGraph; +use models::{SemanticAnalysis, SemanticGroup}; + +#[derive(Parser)] +#[command(name = "terraphim_dsm")] +#[command(about = "Semantic module grouping using Terraphim knowledge graphs")] +#[command(version)] +struct Cli { + /// Input file with module paths (one per line). Defaults to stdin. + #[arg(short, long)] + input: Option, + + /// Output format + #[arg(short, long, value_enum, default_value = "text")] + format: OutputFormat, + + /// Output file (default: stdout) + #[arg(short, long)] + output: Option, + + /// Knowledge graph path (default: ~/.config/terraphim/kg) + #[arg(long)] + kg_path: Option, + + /// Filter by specific domain concept + #[arg(short, long)] + concept: Option, + + /// Show uncategorized modules + #[arg(long)] + show_uncategorized: bool, + + /// Verbose logging + #[arg(short, long)] + verbose: bool, +} + +#[derive(Clone, ValueEnum)] +enum OutputFormat { + Text, + Json, + Csv, +} + +fn main() -> Result<()> { + let cli = Cli::parse(); + + // Initialize logging + let subscriber = tracing_subscriber::fmt() + .with_env_filter(if cli.verbose { + "terraphim_dsm=debug" + } else { + "terraphim_dsm=info" + }) + .finish(); + tracing::subscriber::set_global_default(subscriber)?; + + // Load knowledge graph + info!("Loading knowledge graph..."); + let kg = if let Some(kg_path) = &cli.kg_path { + let mut kg = KnowledgeGraph::new(kg_path.clone()); + kg.load_from_directory(kg_path)?; + kg + } else { + match KnowledgeGraph::load_default() { + Ok(kg) => kg, + Err(e) => { + warn!("Failed to load default KG: {}", e); + return Ok(()); + } + } + }; + + info!( + "Loaded {} concepts from knowledge graph", + kg.concept_count() + ); + + // Read module paths from input + let modules = read_module_paths(&cli.input)?; + info!("Read {} module paths", modules.len()); + + if modules.is_empty() { + warn!("No module paths provided"); + return Ok(()); + } + + // Group modules by domain concept + let groups = kg.group_by_concept(&modules); + + // Build analysis + let mut semantic_groups: Vec = Vec::new(); + let mut uncategorized_count = 0; + + for (concept, modules) in &groups { + if concept == "uncategorized" { + uncategorized_count = modules.len(); + } else { + semantic_groups.push(SemanticGroup { + concept: concept.clone(), + modules: modules.clone(), + count: modules.len(), + }); + } + } + + // Sort by module count (descending) + semantic_groups.sort_by(|a, b| b.count.cmp(&a.count)); + + // Filter by concept if specified + if let Some(filter) = &cli.concept { + semantic_groups.retain(|g| g.concept.to_lowercase() == filter.to_lowercase()); + } + + let analysis = SemanticAnalysis { + groups: semantic_groups, + total_modules: modules.len(), + uncategorized_count, + knowledge_graph_concepts: kg.concept_count(), + }; + + // Output + let mut output_writer: Box = if let Some(output_path) = &cli.output { + Box::new(File::create(output_path)?) + } else { + Box::new(io::stdout()) + }; + + match cli.format { + OutputFormat::Text => { + format_text( + &analysis, + &groups, + cli.show_uncategorized, + &mut output_writer, + )?; + } + OutputFormat::Json => { + let json = serde_json::to_string_pretty(&analysis)?; + output_writer.write_all(json.as_bytes())?; + } + OutputFormat::Csv => { + format_csv( + &analysis, + &groups, + cli.show_uncategorized, + &mut output_writer, + )?; + } + } + + if cli.output.is_some() { + info!("Output written to: {}", cli.output.unwrap().display()); + } + + Ok(()) +} + +fn read_module_paths(input: &Option) -> Result> { + let reader: Box = if let Some(path) = input { + Box::new(io::BufReader::new(File::open(path)?)) + } else { + Box::new(io::BufReader::new(io::stdin())) + }; + + let mut modules = Vec::new(); + for line in reader.lines() { + let line = line?; + let trimmed = line.trim(); + if !trimmed.is_empty() && !trimmed.starts_with('#') { + modules.push(trimmed.to_string()); + } + } + + Ok(modules) +} + +fn format_text( + analysis: &SemanticAnalysis, + groups: &std::collections::HashMap>, + show_uncategorized: bool, + writer: &mut dyn Write, +) -> Result<()> { + writeln!(writer, "=== Semantic Module Grouping ===")?; + writeln!( + writer, + "Modules: {} | KG Concepts: {} | Uncategorized: {}", + analysis.total_modules, analysis.knowledge_graph_concepts, analysis.uncategorized_count + )?; + writeln!(writer)?; + + for group in &analysis.groups { + writeln!(writer, "[{}] {} modules", group.concept, group.count)?; + for module in &group.modules { + writeln!(writer, " - {}", module)?; + } + writeln!(writer)?; + } + + if show_uncategorized { + if let Some(uncategorized) = groups.get("uncategorized") { + writeln!(writer, "[uncategorized] {} modules", uncategorized.len())?; + for module in uncategorized { + writeln!(writer, " - {}", module)?; + } + } + } + + Ok(()) +} + +fn format_csv( + analysis: &SemanticAnalysis, + groups: &std::collections::HashMap>, + show_uncategorized: bool, + writer: &mut dyn Write, +) -> Result<()> { + writeln!(writer, "concept,module")?; + + for group in &analysis.groups { + for module in &group.modules { + writeln!(writer, "{},\"{}\"", group.concept, module)?; + } + } + + if show_uncategorized { + if let Some(uncategorized) = groups.get("uncategorized") { + for module in uncategorized { + writeln!(writer, "uncategorized,\"{}\"", module)?; + } + } + } + + Ok(()) +} diff --git a/crates/terraphim_dsm/src/models.rs b/crates/terraphim_dsm/src/models.rs new file mode 100644 index 000000000..ba5c3f681 --- /dev/null +++ b/crates/terraphim_dsm/src/models.rs @@ -0,0 +1,21 @@ +use serde::{Deserialize, Serialize}; + +/// Represents a semantic grouping of modules by domain concept +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SemanticGroup { + /// Domain concept name + pub concept: String, + /// Module paths belonging to this concept + pub modules: Vec, + /// Number of modules in this group + pub count: usize, +} + +/// Complete semantic analysis result +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SemanticAnalysis { + pub groups: Vec, + pub total_modules: usize, + pub uncategorized_count: usize, + pub knowledge_graph_concepts: usize, +} From 670bb4a919f6b4d7271c499974f56be02290c2ae Mon Sep 17 00:00:00 2001 From: Test User Date: Sat, 2 May 2026 09:03:20 +0100 Subject: [PATCH 2/2] fix(Database): use System config dir instead of relative path for settings Prevents settings.toml from being created in arbitrary working directories. Default now uses DeviceSettings::default_config_path() (~/.config/terraphim/). For local dev Bug Reporting, override with: TERRAPHIM_SETTINGS_PATH=crates/terraphim_settings/default cargo test --- Cargo.lock | 16 ++++++++++++++++ crates/terraphim_persistence/src/lib.rs | 7 ++----- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ab9b56379..f73506d3c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8787,6 +8787,22 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "terraphim_dsm" +version = "1.17.0" +dependencies = [ + "anyhow", + "clap", + "dirs 5.0.1", + "log", + "serde", + "serde_json", + "tempfile", + "tracing", + "tracing-subscriber", + "walkdir", +] + [[package]] name = "terraphim_file_search" version = "1.17.0" diff --git a/crates/terraphim_persistence/src/lib.rs b/crates/terraphim_persistence/src/lib.rs index b46649807..3c2da14e4 100644 --- a/crates/terraphim_persistence/src/lib.rs +++ b/crates/terraphim_persistence/src/lib.rs @@ -101,13 +101,10 @@ impl DeviceStorage { } async fn init_device_storage() -> Result { - // Use local dev settings by default to avoid RocksDB lock issues + // Use platform config directory to avoid polluting arbitrary working directories let settings_path = std::env::var("TERRAPHIM_SETTINGS_PATH") .map(std::path::PathBuf::from) - .unwrap_or_else(|_| { - // Default to local dev settings directory (not file) - std::path::PathBuf::from("crates/terraphim_settings/default") - }); + .unwrap_or_else(|_| terraphim_settings::DeviceSettings::default_config_path()); log::debug!("Loading settings from: {:?}", settings_path); let settings = DeviceSettings::load_from_env_and_file(Some(settings_path.clone()))?;