diff --git a/src/discord.rs b/src/discord.rs index 749297b..a459ceb 100644 --- a/src/discord.rs +++ b/src/discord.rs @@ -172,7 +172,7 @@ impl EventHandler for Handler { msg.content.trim().to_string() }; - // No text and no image attachments → skip to avoid wasting session slots + // No text and no attachments → skip to avoid wasting session slots if prompt.is_empty() && msg.attachments.is_empty() { return; } @@ -204,9 +204,14 @@ impl EventHandler for Handler { text: prompt_with_sender.clone(), }); - // Process attachments: route by content type (audio → STT, image → encode) + // Process attachments: route by content type (audio → STT, text file → inline, image → encode) let mut audio_skipped = false; if !msg.attachments.is_empty() { + let mut text_file_bytes: u64 = 0; + let mut text_file_count: u32 = 0; + const TEXT_TOTAL_CAP: u64 = 1024 * 1024; // 1 MB total for all text file attachments + const TEXT_FILE_COUNT_CAP: u32 = 5; + for attachment in &msg.attachments { if is_audio_attachment(attachment) { if self.stt_config.enabled { @@ -220,6 +225,23 @@ impl EventHandler for Handler { warn!(filename = %attachment.filename, "skipping audio attachment (STT disabled)"); audio_skipped = true; } + } else if is_text_attachment(attachment) { + if text_file_count >= TEXT_FILE_COUNT_CAP { + warn!(filename = %attachment.filename, count = text_file_count, "text file count cap reached, skipping"); + continue; + } + // Pre-check with Discord-reported size (fast path, avoids unnecessary download). + // Running total uses actual downloaded bytes for accurate accounting. + if text_file_bytes + u64::from(attachment.size) > TEXT_TOTAL_CAP { + warn!(filename = %attachment.filename, total = text_file_bytes, "text attachments total exceeds 1MB cap, skipping remaining"); + continue; + } + if let Some((content_block, actual_bytes)) = download_and_read_text_file(attachment).await { + text_file_bytes += actual_bytes; + text_file_count += 1; + debug!(filename = %attachment.filename, "adding text file attachment"); + content_blocks.push(content_block); + } } else if let Some(content_block) = download_and_encode_image(attachment).await { debug!(url = %attachment.url, filename = %attachment.filename, "adding image attachment"); content_blocks.push(content_block); @@ -329,6 +351,95 @@ impl EventHandler for Handler { } } +/// Extensions recognised as text-based files that can be inlined into the prompt. +const TEXT_EXTENSIONS: &[&str] = &[ + "txt", "csv", "log", "md", "json", "jsonl", "yaml", "yml", "toml", "xml", + "rs", "py", "js", "ts", "jsx", "tsx", "go", "java", "c", "cpp", "h", "hpp", + "rb", "sh", "bash", "zsh", "fish", "ps1", "bat", "sql", "html", "css", + "scss", "less", "ini", "cfg", "conf", "env", +]; + +/// Exact filenames (no extension) recognised as text files. +const TEXT_FILENAMES: &[&str] = &[ + "dockerfile", "makefile", "justfile", "rakefile", "gemfile", + "procfile", "vagrantfile", ".gitignore", ".dockerignore", ".editorconfig", +]; + +/// MIME types recognised as text-based (beyond `text/*`). +const TEXT_MIME_TYPES: &[&str] = &[ + "application/json", + "application/xml", + "application/javascript", + "application/x-yaml", + "application/x-sh", + "application/toml", + "application/x-toml", +]; + +/// Check if an attachment is a text-based file we can inline. +fn is_text_attachment(attachment: &serenity::model::channel::Attachment) -> bool { + let mime = attachment.content_type.as_deref().unwrap_or(""); + let mime_base = mime.split(';').next().unwrap_or(mime).trim(); + if mime_base.starts_with("text/") || TEXT_MIME_TYPES.contains(&mime_base) { + return true; + } + // Check extension + if attachment.filename.contains('.') { + if let Some(ext) = attachment.filename.rsplit('.').next() { + if TEXT_EXTENSIONS.contains(&ext.to_lowercase().as_str()) { + return true; + } + } + } + // Check exact filename (Dockerfile, Makefile, etc.) + TEXT_FILENAMES.contains(&attachment.filename.to_lowercase().as_str()) +} + +/// Download a text-based file attachment and return it as a ContentBlock::Text. +/// Files larger than 512 KB are skipped to avoid bloating the prompt. +/// +/// Note: the caller already guards total size via TEXT_TOTAL_CAP; the per-file +/// MAX_SIZE check here is intentional defense-in-depth so this function remains +/// self-contained and safe when called from other contexts. +async fn download_and_read_text_file( + attachment: &serenity::model::channel::Attachment, +) -> Option<(ContentBlock, u64)> { + const MAX_SIZE: u64 = 512 * 1024; // 512 KB + + if u64::from(attachment.size) > MAX_SIZE { + warn!(filename = %attachment.filename, size = attachment.size, "text file exceeds 512KB limit, skipping"); + return None; + } + + let resp = HTTP_CLIENT.get(&attachment.url).send().await.ok()?; + if !resp.status().is_success() { + warn!(url = %attachment.url, status = %resp.status(), "text file download failed"); + return None; + } + let bytes = resp.bytes().await.ok()?; + let actual_size = bytes.len() as u64; + + // Defense-in-depth: verify actual download size + if actual_size > MAX_SIZE { + warn!(filename = %attachment.filename, size = actual_size, "downloaded text file exceeds 512KB limit, skipping"); + return None; + } + + // from_utf8_lossy returns Cow::Borrowed for valid UTF-8 (zero-copy) + let text = String::from_utf8_lossy(&bytes).into_owned(); + + // Dynamic fence: keep adding backticks until the fence doesn't appear in content + let mut fence = "```".to_string(); + while text.contains(fence.as_str()) { + fence.push('`'); + } + + debug!(filename = %attachment.filename, bytes = text.len(), "text file inlined"); + Some((ContentBlock::Text { + text: format!("[File: {}]\n{fence}\n{}\n{fence}", attachment.filename, text), + }, actual_size)) +} + /// Check if an attachment is an audio file (voice messages are typically audio/ogg). fn is_audio_attachment(attachment: &serenity::model::channel::Attachment) -> bool { let mime = attachment.content_type.as_deref().unwrap_or("");