diff --git a/Cargo.lock b/Cargo.lock index d4e26a2..468ecc0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -67,6 +67,27 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "assert_cmd" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c5bcfa8749ac45dd12cb11055aeeb6b27a3895560d60d71e3c23bf979e60514" +dependencies = [ + "anstyle", + "bstr", + "libc", + "predicates", + "predicates-core", + "predicates-tree", + "wait-timeout", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + [[package]] name = "base64" version = "0.22.1" @@ -166,6 +187,28 @@ dependencies = [ "memchr", ] +[[package]] +name = "crossbeam" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -185,12 +228,27 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "difflib" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" + [[package]] name = "either" version = "1.15.0" @@ -247,6 +305,15 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "float-cmp" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8" +dependencies = [ + "num-traits", +] + [[package]] name = "getrandom" version = "0.3.4" @@ -261,26 +328,23 @@ dependencies = [ [[package]] name = "gitmelt" -version = "0.3.0" +version = "0.4.0" dependencies = [ "anyhow", + "assert_cmd", "clap", "content_inspector", + "crossbeam", + "crossbeam-channel", "env_logger", - "glob", "ignore", "log", + "predicates", "rayon", "tempfile", "tiktoken-rs", ] -[[package]] -name = "glob" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" - [[package]] name = "globset" version = "0.4.18" @@ -376,6 +440,21 @@ version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +[[package]] +name = "normalize-line-endings" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -403,6 +482,36 @@ dependencies = [ "portable-atomic", ] +[[package]] +name = "predicates" +version = "3.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5d19ee57562043d37e82899fade9a22ebab7be9cef5026b07fda9cdd4293573" +dependencies = [ + "anstyle", + "difflib", + "float-cmp", + "normalize-line-endings", + "predicates-core", + "regex", +] + +[[package]] +name = "predicates-core" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "727e462b119fe9c93fd0eb1429a5f7647394014cf3c04ab2c0350eeb09095ffa" + +[[package]] +name = "predicates-tree" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72dd2d6d381dfb73a193c7fca536518d7caee39fc8503f74e7dc0be0531b425c" +dependencies = [ + "predicates-core", + "termtree", +] + [[package]] name = "proc-macro2" version = "1.0.105" @@ -563,6 +672,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "termtree" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" + [[package]] name = "tiktoken-rs" version = "0.9.1" @@ -590,6 +705,15 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "wait-timeout" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" +dependencies = [ + "libc", +] + [[package]] name = "walkdir" version = "2.5.0" diff --git a/Cargo.toml b/Cargo.toml index 93fd3be..eea26af 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "gitmelt" -version = "0.3.0" +version = "0.4.0" edition = "2024" authors = ["qustrolabe "] description = "a tool to turn repository into single file text digest to conveniently feed into LLM" @@ -14,8 +14,9 @@ categories = ["command-line-utilities"] anyhow = "1.0.100" clap = { version = "4.5.54", features = ["derive"] } content_inspector = "0.2.4" +crossbeam = "0.8.4" +crossbeam-channel = "0.5.15" env_logger = "0.11.8" -glob = "0.3.3" ignore = "0.4.25" log = "0.4.29" rayon = "1.11.0" @@ -24,8 +25,10 @@ tiktoken-rs = "0.9.1" [dev-dependencies] tempfile = "3.24.0" +assert_cmd = "2.0.16" +predicates = "3.1.3" [profile.release] strip = true lto = true -codegen-units = 1 \ No newline at end of file +codegen-units = 1 diff --git a/README.md b/README.md index 606a203..be6c252 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,6 @@ and `.toml` files in the current directory and its subdirectories ## Build ```bash -cargo build --relase +cargo build --release cargo install --path . ``` diff --git a/src/cloner.rs b/src/cloner.rs index f14ab70..12315e1 100644 --- a/src/cloner.rs +++ b/src/cloner.rs @@ -1,9 +1,19 @@ -use anyhow::{Context, Result}; +use anyhow::{Context, Result, bail}; use log::info; use std::process::Command; use tempfile::TempDir; +fn check_git_installed() -> Result<()> { + match Command::new("git").arg("--version").output() { + Ok(output) if output.status.success() => Ok(()), + _ => { + bail!("Git is not installed or not in PATH. Please install Git to clone repositories.") + } + } +} + pub fn clone_repo(url: &str, branch: Option<&str>) -> Result { + check_git_installed()?; let temp_dir = TempDir::new()?; let target_path = temp_dir.path(); diff --git a/src/decorator/file_tree.rs b/src/decorator/file_tree.rs index 0e6fa47..c927536 100644 --- a/src/decorator/file_tree.rs +++ b/src/decorator/file_tree.rs @@ -74,7 +74,7 @@ fn print_tree(node: &TreeNode, prefix: &str, output: &mut String) { prefix, connector, name, - if !child.is_file { "/" } else { "" } + if child.is_file { "" } else { "/" } ); if !child.children.is_empty() { diff --git a/src/decorator/markdown.rs b/src/decorator/markdown.rs new file mode 100644 index 0000000..d4cf4db --- /dev/null +++ b/src/decorator/markdown.rs @@ -0,0 +1,22 @@ +use super::{ContentDecorator, format_path}; +use std::path::Path; + +pub struct MarkdownDecorator; + +impl ContentDecorator for MarkdownDecorator { + fn before(&self, path: &Path) -> Option { + let path_str = format_path(path); + // Extract extension for syntax highlighting (e.g., "rs", "toml") + let ext = path.extension().and_then(|e| e.to_str()).unwrap_or(""); + + Some(format!("## File: {path_str}\n```{ext}")) + } + + fn after(&self, _path: &Path) -> Option { + Some("```".to_string()) + } + + fn transform(&self, _path: &Path, content: String) -> String { + content + } +} diff --git a/src/decorator/mod.rs b/src/decorator/mod.rs index c60236c..d71c8eb 100644 --- a/src/decorator/mod.rs +++ b/src/decorator/mod.rs @@ -2,10 +2,12 @@ use std::path::Path; pub mod default; pub mod file_tree; +pub mod markdown; pub mod xml; pub use default::DefaultDecorator; pub use file_tree::FileTreeDecorator; +pub use markdown::MarkdownDecorator; pub use xml::XmlDecorator; #[derive(clap::ValueEnum, Clone, Debug, Default, PartialEq)] diff --git a/src/ingest.rs b/src/ingest.rs index 1d8653d..b308e2b 100644 --- a/src/ingest.rs +++ b/src/ingest.rs @@ -1,11 +1,13 @@ use crate::decorator::{ContentDecorator, GlobalDecorator}; use anyhow::Result; +use crossbeam_channel::bounded; use log::{error, info, warn}; use rayon::prelude::*; +use std::collections::BTreeMap; use std::fs::File; -use std::io::{self, Write}; +use std::io::{self, BufWriter, Write}; use std::path::PathBuf; -use tiktoken_rs::{CoreBPE, cl100k_base}; +use tiktoken_rs::cl100k_base; pub const DIGEST_FILENAME: &str = "digest.txt"; @@ -17,9 +19,13 @@ pub enum OutputDestination { pub const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024; // 10MB +pub struct IngestMetrics { + pub total_tokens: usize, +} + struct ProcessedFile { + index: usize, content: String, - tokens: usize, } pub fn ingest( @@ -27,69 +33,125 @@ pub fn ingest( output_dest: OutputDestination, content_decorator: &dyn ContentDecorator, global_decorator: Option<&dyn GlobalDecorator>, -) -> Result<()> { + count_tokens: bool, +) -> Result> { match &output_dest { OutputDestination::File(path) => info!("Writing digest to {}", path.display()), OutputDestination::Stdout => info!("Writing digest to stdout"), OutputDestination::Null => info!("Dry run: only token estimation will be performed"), } - // Pre-load tokenizer - let tokenizer = cl100k_base().ok(); + // Pre-load tokenizer if needed + let tokenizer = if count_tokens { + cl100k_base().ok() + } else { + None + }; - // Process files in parallel - let processed_results: Vec = files - .par_iter() - .filter_map(|path| process_single_file(path, content_decorator, tokenizer.as_ref())) - .collect(); + let (tx, rx) = bounded(32); // Buffer some results to keep cores busy + + let metrics = crossbeam::scope(|scope| -> Result { + // Spawn writer thread + let rx = rx; // Move rx into the scope, but it's shared + let writer_handle = scope.spawn(move |_| -> Result { + let mut writer: Option> = match output_dest { + OutputDestination::File(ref path) => { + Some(Box::new(BufWriter::new(File::create(path)?))) + } + OutputDestination::Stdout => Some(Box::new(io::stdout())), + OutputDestination::Null => None, + }; + + let mut total_tokens = 0; + let mut pending = BTreeMap::new(); + let mut next_index = 0; + + if let Some(prologue) = global_decorator.and_then(|g| g.prologue(files)) { + if let Some(ref t) = tokenizer { + total_tokens += t.encode_with_special_tokens(&prologue).len(); + } + if let Some(ref mut w) = writer { + writeln!(w, "{prologue}")?; + } + } + + while next_index < files.len() { + // Check if we already have the next segment + while let Some(processed) = pending.remove(&next_index) { + let processed: ProcessedFile = processed; + + if let Some(ref t) = tokenizer { + total_tokens += t.encode_with_special_tokens(&processed.content).len(); + } + + if let Some(ref mut w) = writer { + writeln!(w, "{}", processed.content)?; + } + + next_index += 1; + } + + if next_index >= files.len() { + break; + } + + // Wait for more results + if let Ok(processed) = rx.recv() { + let processed: ProcessedFile = processed; + pending.insert(processed.index, processed); + } else { + break; // Channel closed + } + } + + if let Some(ref mut w) = writer { + w.flush()?; + } + + Ok(total_tokens) + }); - let mut writer: Option> = match output_dest { - OutputDestination::File(path) => Some(Box::new(File::create(path)?)), - OutputDestination::Stdout => Some(Box::new(io::stdout())), - OutputDestination::Null => None, - }; + // Process files in parallel + files.par_iter().enumerate().for_each(|(idx, path)| { + if let Some(processed) = process_single_file(idx, path, content_decorator) { + let _ = tx.send(processed); + } + }); - let mut total_tokens = 0; + drop(tx); // Signal completion - if let Some(prologue) = global_decorator.and_then(|g| g.prologue(files)) { - if let Some(ref mut w) = writer { - writeln!(w, "{prologue}")?; - } - } + let total_tokens = writer_handle + .join() + .map_err(|_| anyhow::anyhow!("Writer thread panicked"))??; - // Write results sequentially to maintain order (files was sorted in traversal) - for processed in processed_results { - if let Some(ref mut w) = writer { - writeln!(w, "{}", processed.content)?; - } - total_tokens += processed.tokens; - } + Ok(IngestMetrics { total_tokens }) + }) + .map_err(|e| anyhow::anyhow!("Scope error: {e:?}"))??; - info!("Total estimated tokens: {total_tokens}"); - println!("Total estimated tokens: {total_tokens}"); + info!("Total estimated tokens: {}", metrics.total_tokens); + println!("Total estimated tokens: {}", metrics.total_tokens); - Ok(()) + Ok(Some(metrics)) } fn process_single_file( + index: usize, path: &PathBuf, content_decorator: &dyn ContentDecorator, - tokenizer: Option<&CoreBPE>, ) -> Option { // 1. Check file size - if let Ok(metadata) = std::fs::metadata(path) { - if metadata.len() > MAX_FILE_SIZE { + if let Ok(metadata) = std::fs::metadata(path) + && metadata.len() > MAX_FILE_SIZE { error!( "Skipping large file: {} ({} bytes)", path.display(), metadata.len() ); return Some(ProcessedFile { + index, content: format!("----- {} (Skipped: >10MB) -----", path.display()), - tokens: 0, }); } - } // 2. Check for binary content & Read let mut file = match File::open(path) { @@ -97,29 +159,29 @@ fn process_single_file( Err(e) => { error!("Error opening {}: {e}", path.display()); return Some(ProcessedFile { + index, content: format!("----- {} (Error opening file) -----", path.display()), - tokens: 0, }); } }; - let mut buffer = [0u8; 1024]; - let n = match std::io::Read::read(&mut file, &mut buffer) { + let mut prelude_buffer = [0u8; 1024]; + let n = match std::io::Read::read(&mut file, &mut prelude_buffer) { Ok(n) => n, Err(e) => { error!("Error reading prelude of {}: {e}", path.display()); return Some(ProcessedFile { + index, content: format!("----- {} (Error reading prelude) -----", path.display()), - tokens: 0, }); } }; - if n > 0 && content_inspector::inspect(&buffer[..n]).is_binary() { + if n > 0 && content_inspector::inspect(&prelude_buffer[..n]).is_binary() { warn!("Skipping binary file: {}", path.display()); return Some(ProcessedFile { + index, content: format!("----- {} (Skipped: Binary) -----", path.display()), - tokens: 0, }); } @@ -129,15 +191,17 @@ fn process_single_file( return None; } - let mut content = String::new(); - if let Err(e) = std::io::Read::read_to_string(&mut file, &mut content) { + let mut buffer = Vec::new(); + if let Err(e) = std::io::Read::read_to_end(&mut file, &mut buffer) { error!("Error reading {}: {e}", path.display()); return Some(ProcessedFile { + index, content: format!("----- {} (Error reading content) -----", path.display()), - tokens: 0, }); } + let content = String::from_utf8_lossy(&buffer).to_string(); + // Apply decoration let mut final_output = String::new(); if let Some(before) = content_decorator.before(path) { @@ -154,18 +218,9 @@ fn process_single_file( final_output.push('\n'); } - // Count tokens - let tokens = if let Some(tokenizer) = tokenizer { - tokenizer - .encode_with_special_tokens(&transformed_content) - .len() - } else { - 0 - }; - Some(ProcessedFile { + index, content: final_output.trim_end().to_string(), - tokens, }) } @@ -196,6 +251,7 @@ mod tests { OutputDestination::File(output_path.clone()), &decorator, None, + true, )?; assert!(output_path.exists()); diff --git a/src/main.rs b/src/main.rs index 9cbc8ab..45bb24a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,20 +8,25 @@ use clap::Parser; use log::{LevelFilter, info}; use std::env; use std::path::PathBuf; +use std::time::Instant; use traversal::TraversalOptions; -use crate::decorator::{ContentDecorator, DefaultDecorator, FileTreeDecorator, XmlDecorator}; +use crate::decorator::{ + ContentDecorator, DefaultDecorator, FileTreeDecorator, MarkdownDecorator, XmlDecorator, +}; use crate::ingest::OutputDestination; #[derive(clap::ValueEnum, Clone, Debug)] enum Preset { Default, + Markdown, Xml, } #[derive(Parser)] #[command(name = "gitmelt")] #[command(about = "Concatenates file contents into a single digest file", long_about = None)] +#[allow(clippy::struct_excessive_bools)] struct Cli { /// Path to traverse or Git URL #[arg(default_value = ".")] @@ -62,6 +67,14 @@ struct Cli { /// Dry run (only token estimation) #[arg(long)] dry: bool, + + /// Disable token counting + #[arg(long)] + no_tokens: bool, + + /// Show detailed timing information + #[arg(short, long)] + timing: bool, } fn init_logger(verbose: bool) { @@ -79,6 +92,7 @@ fn init_logger(verbose: bool) { } fn main() -> Result<()> { + let global_start = Instant::now(); let cli = Cli::parse(); init_logger(cli.verbose); @@ -102,7 +116,9 @@ fn main() -> Result<()> { }; info!("Traversing files in {}", options.root.display()); + let discovery_start = Instant::now(); let files = traversal::traverse(&options)?; + let discovery_duration = discovery_start.elapsed(); info!("Found {} files", files.len()); if files.is_empty() { @@ -125,6 +141,7 @@ fn main() -> Result<()> { let content_decorator: Box = match cli.preset { Preset::Default => Box::new(DefaultDecorator), + Preset::Markdown => Box::new(MarkdownDecorator), Preset::Xml => Box::new(XmlDecorator), }; @@ -133,14 +150,26 @@ fn main() -> Result<()> { mode: cli.prologue, }; - ingest::ingest( + let ingest_start = Instant::now(); + let _ingest_metrics = ingest::ingest( &files, output_dest, content_decorator.as_ref(), Some(&global_decorator), + !cli.no_tokens, )?; + let ingest_duration = ingest_start.elapsed(); info!("Done!"); + if cli.timing { + println!("\nTiming Summary:"); + println!("----------------------------------------"); + println!("Discovery: {discovery_duration:?}"); + println!("Ingestion: {ingest_duration:?}"); + println!("Total Runtime: {:?}", global_start.elapsed()); + println!("----------------------------------------"); + } + Ok(()) } diff --git a/src/traversal.rs b/src/traversal.rs index 4cb4758..de7f44d 100644 --- a/src/traversal.rs +++ b/src/traversal.rs @@ -169,13 +169,12 @@ mod tests { let files = traverse(&options)?; for f in &files { - eprintln!("Found: {:?}", f); + eprintln!("Found: {f:?}"); } assert!( files.iter().any(|p| p.ends_with("main.rs")), - "Files found: {:?}", - files + "Files found: {files:?}" ); assert!(!files.iter().any(|p| p.ends_with("Cargo.lock"))); assert!(!files.iter().any(|p| p.ends_with("test.lock"))); diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs new file mode 100644 index 0000000..dd8165c --- /dev/null +++ b/tests/integration_tests.rs @@ -0,0 +1,131 @@ +use assert_cmd::Command; +use predicates::prelude::*; +use std::fs::{self, File}; +use std::io::Write; +use tempfile::TempDir; + +#[test] +fn test_cli_ignores_binaries() -> Result<(), Box> { + let temp = TempDir::new()?; + let root = temp.path(); + + // 1. Create a binary file + let bin_path = root.join("program.exe"); + let mut f = File::create(&bin_path)?; + // Write null bytes to look like binary + f.write_all(&[0u8; 100])?; + + // 2. Create a text file + let text_path = root.join("readme.md"); + let mut f = File::create(&text_path)?; + writeln!(f, "Important Context")?; + + // 3. Run gitmelt + let mut cmd = Command::new(env!("CARGO_BIN_EXE_gitmelt")); + cmd.arg(root.to_str().unwrap()) + .arg("--stdout") + .arg("--no-tokens"); + + // 4. Verify output + cmd.assert() + .success() + .stdout(predicate::str::contains("Important Context")) + .stdout(predicate::str::contains("Skipped: Binary")); + + Ok(()) +} + +#[test] +fn test_gitignore_logic() -> Result<(), Box> { + let temp = TempDir::new()?; + let root = temp.path(); + + // Create .git directory to ensure ignore crate respects .gitignore + fs::create_dir(root.join(".git"))?; + + // Create .gitignore + let mut gitignore = File::create(root.join(".gitignore"))?; + writeln!(gitignore, "secret.txt")?; + + // Create files + File::create(root.join("secret.txt"))?; + let mut public = File::create(root.join("public.txt"))?; + writeln!(public, "Public info")?; + + let mut cmd = Command::new(env!("CARGO_BIN_EXE_gitmelt")); + cmd.arg(root.to_str().unwrap()) + .arg("--stdout") + .arg("--no-tokens"); + + cmd.assert() + .success() + .stdout(predicate::str::contains("public.txt")) + .stdout(predicate::str::contains("secret.txt").not()); + + Ok(()) +} + +#[test] +fn test_file_ordering() -> Result<(), Box> { + let temp = TempDir::new()?; + let root = temp.path(); + + fs::create_dir_all(root.join("a"))?; + fs::create_dir_all(root.join("b"))?; + + let mut az = File::create(root.join("a/z.txt"))?; + writeln!(az, "Content A/Z")?; + + let mut ba = File::create(root.join("b/a.txt"))?; + writeln!(ba, "Content B/A")?; + + let mut cmd = Command::new(env!("CARGO_BIN_EXE_gitmelt")); + cmd.arg(root.to_str().unwrap()) + .arg("--stdout") + .arg("--no-tokens"); + + let output = cmd.assert().success().get_output().stdout.clone(); + let output_str = String::from_utf8(output)?; + + let pos_az = output_str.find("a/z.txt").unwrap(); + let pos_ba = output_str.find("b/a.txt").unwrap(); + + assert!(pos_az < pos_ba, "a/z.txt should come before b/a.txt"); + + Ok(()) +} + +#[test] +fn test_include_exclude_complexity() -> Result<(), Box> { + let temp = TempDir::new()?; + let root = temp.path(); + + fs::create_dir_all(root.join("src"))?; + fs::create_dir_all(root.join("tests"))?; + + let mut main_rs = File::create(root.join("src/main.rs"))?; + writeln!(main_rs, "fn main() {{}}")?; + + let mut utils_rs = File::create(root.join("src/utils.rs"))?; + writeln!(utils_rs, "fn utils() {{}}")?; + + let mut test_rs = File::create(root.join("tests/main_test.rs"))?; + writeln!(test_rs, "test")?; + + let mut cmd = Command::new(env!("CARGO_BIN_EXE_gitmelt")); + cmd.arg(root.to_str().unwrap()) + .arg("--stdout") + .arg("--no-tokens") + .arg("--include") + .arg("src/*.rs") + .arg("--exclude") + .arg("utils.rs"); + + cmd.assert() + .success() + .stdout(predicate::str::contains("src/main.rs")) + .stdout(predicate::str::contains("src/utils.rs").not()) + .stdout(predicate::str::contains("tests/main_test.rs").not()); + + Ok(()) +}