From c4bcf20791da3e04cba7fdd79fa9e0fcdcea4a59 Mon Sep 17 00:00:00 2001 From: JARVIS-coding-Agent Date: Mon, 13 Apr 2026 12:20:13 +0000 Subject: [PATCH 1/4] feat(discord): inline text-based file attachments into prompt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Download text-based file attachments (.txt, .csv, .log, .md, .json, .rs, .py, etc.) and inject their content as ContentBlock::Text blocks. - Add TEXT_EXTENSIONS whitelist and is_text_attachment() for MIME + ext detection - Add download_and_read_text_file() with 512KB size limit - Route: audio → text file → image → skip (non-image/non-text filtered) - UTF-8 with lossy fallback for non-UTF-8 files Closes #161 --- src/discord.rs | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/src/discord.rs b/src/discord.rs index e267064e..e7a02f09 100644 --- a/src/discord.rs +++ b/src/discord.rs @@ -141,6 +141,11 @@ impl EventHandler for Handler { } else { debug!(filename = %attachment.filename, "skipping audio attachment (STT disabled)"); } + } else if is_text_attachment(attachment) { + if let Some(content_block) = download_and_read_text_file(attachment).await { + debug!(filename = %attachment.filename, "adding text file attachment"); + content_blocks.push(content_block); + } } else if let Some(content_block) = download_and_encode_image(attachment).await { debug!(url = %attachment.url, filename = %attachment.filename, "adding image attachment"); content_blocks.push(content_block); @@ -241,6 +246,56 @@ impl EventHandler for Handler { } } +/// Extensions recognised as text-based files that can be inlined into the prompt. +const TEXT_EXTENSIONS: &[&str] = &[ + "txt", "csv", "log", "md", "json", "jsonl", "yaml", "yml", "toml", "xml", + "rs", "py", "js", "ts", "jsx", "tsx", "go", "java", "c", "cpp", "h", "hpp", + "rb", "sh", "bash", "zsh", "fish", "ps1", "bat", "sql", "html", "css", + "scss", "less", "ini", "cfg", "conf", "env", "dockerfile", "makefile", +]; + +/// Check if an attachment is a text-based file we can inline. +fn is_text_attachment(attachment: &serenity::model::channel::Attachment) -> bool { + let mime = attachment.content_type.as_deref().unwrap_or(""); + if mime.starts_with("text/") || mime == "application/json" || mime == "application/xml" { + return true; + } + attachment + .filename + .rsplit('.') + .next() + .is_some_and(|ext| TEXT_EXTENSIONS.contains(&ext.to_lowercase().as_str())) +} + +/// Download a text-based file attachment and return it as a ContentBlock::Text. +/// Files larger than 512 KB are skipped to avoid bloating the prompt. +async fn download_and_read_text_file( + attachment: &serenity::model::channel::Attachment, +) -> Option { + const MAX_SIZE: u64 = 512 * 1024; // 512 KB + + if u64::from(attachment.size) > MAX_SIZE { + error!(filename = %attachment.filename, size = attachment.size, "text file exceeds 512KB limit"); + return None; + } + + let resp = HTTP_CLIENT.get(&attachment.url).send().await.ok()?; + if !resp.status().is_success() { + error!(url = %attachment.url, status = %resp.status(), "text file download failed"); + return None; + } + let bytes = resp.bytes().await.ok()?; + + let text = String::from_utf8(bytes.to_vec()).unwrap_or_else(|_| { + String::from_utf8_lossy(&bytes).into_owned() + }); + + debug!(filename = %attachment.filename, chars = text.len(), "text file inlined"); + Some(ContentBlock::Text { + text: format!("[File: {}]\n```\n{}\n```", attachment.filename, text), + }) +} + /// Check if an attachment is an audio file (voice messages are typically audio/ogg). fn is_audio_attachment(attachment: &serenity::model::channel::Attachment) -> bool { let mime = attachment.content_type.as_deref().unwrap_or(""); From 3ea3abf7a2ce0fa1ffb92c067bd758e366b1c077 Mon Sep 17 00:00:00 2001 From: JARVIS-coding-Agent Date: Mon, 13 Apr 2026 13:06:43 +0000 Subject: [PATCH 2/4] fix: address review feedback on text file attachments - Add 1MB total cap across all text file attachments to prevent prompt bloat - Verify actual download size (defense-in-depth, don't trust Discord metadata) - Expand MIME type coverage: application/javascript, x-yaml, x-sh, typescript, toml - Handle Dockerfile/Makefile via TEXT_FILENAMES exact match (no extension) - Downgrade error! to warn! for expected user-triggered size/download failures - Use 4 backticks when file content contains triple backticks --- src/discord.rs | 65 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 12 deletions(-) diff --git a/src/discord.rs b/src/discord.rs index e7a02f09..f5a4834d 100644 --- a/src/discord.rs +++ b/src/discord.rs @@ -16,7 +16,7 @@ use serenity::prelude::*; use std::collections::HashSet; use std::sync::Arc; use tokio::sync::watch; -use tracing::{debug, error, info}; +use tracing::{debug, error, info, warn}; /// Reusable HTTP client for downloading Discord attachments. /// Built once with a 30s timeout and rustls TLS (no native-tls deps). @@ -127,8 +127,11 @@ impl EventHandler for Handler { text: prompt_with_sender.clone(), }); - // Process attachments: route by content type (audio → STT, image → encode) + // Process attachments: route by content type (audio → STT, text file → inline, image → encode) if !msg.attachments.is_empty() { + let mut text_file_bytes: u64 = 0; + const TEXT_TOTAL_CAP: u64 = 1024 * 1024; // 1 MB total for all text file attachments + for attachment in &msg.attachments { if is_audio_attachment(attachment) { if self.stt_config.enabled { @@ -142,7 +145,12 @@ impl EventHandler for Handler { debug!(filename = %attachment.filename, "skipping audio attachment (STT disabled)"); } } else if is_text_attachment(attachment) { + if text_file_bytes + u64::from(attachment.size) > TEXT_TOTAL_CAP { + warn!(filename = %attachment.filename, total = text_file_bytes, "text attachments total exceeds 1MB cap, skipping remaining"); + continue; + } if let Some(content_block) = download_and_read_text_file(attachment).await { + text_file_bytes += u64::from(attachment.size); debug!(filename = %attachment.filename, "adding text file attachment"); content_blocks.push(content_block); } @@ -251,20 +259,44 @@ const TEXT_EXTENSIONS: &[&str] = &[ "txt", "csv", "log", "md", "json", "jsonl", "yaml", "yml", "toml", "xml", "rs", "py", "js", "ts", "jsx", "tsx", "go", "java", "c", "cpp", "h", "hpp", "rb", "sh", "bash", "zsh", "fish", "ps1", "bat", "sql", "html", "css", - "scss", "less", "ini", "cfg", "conf", "env", "dockerfile", "makefile", + "scss", "less", "ini", "cfg", "conf", "env", +]; + +/// Exact filenames (no extension) recognised as text files. +const TEXT_FILENAMES: &[&str] = &[ + "dockerfile", "makefile", "justfile", "rakefile", "gemfile", + "procfile", "vagrantfile", ".gitignore", ".dockerignore", ".editorconfig", +]; + +/// MIME types recognised as text-based (beyond `text/*`). +const TEXT_MIME_TYPES: &[&str] = &[ + "application/json", + "application/xml", + "application/javascript", + "application/typescript", + "application/x-yaml", + "application/x-sh", + "application/toml", + "application/x-toml", ]; /// Check if an attachment is a text-based file we can inline. fn is_text_attachment(attachment: &serenity::model::channel::Attachment) -> bool { let mime = attachment.content_type.as_deref().unwrap_or(""); - if mime.starts_with("text/") || mime == "application/json" || mime == "application/xml" { + let mime_base = mime.split(';').next().unwrap_or(mime).trim(); + if mime_base.starts_with("text/") || TEXT_MIME_TYPES.contains(&mime_base) { return true; } - attachment - .filename - .rsplit('.') - .next() - .is_some_and(|ext| TEXT_EXTENSIONS.contains(&ext.to_lowercase().as_str())) + // Check extension + if attachment.filename.contains('.') { + if let Some(ext) = attachment.filename.rsplit('.').next() { + if TEXT_EXTENSIONS.contains(&ext.to_lowercase().as_str()) { + return true; + } + } + } + // Check exact filename (Dockerfile, Makefile, etc.) + TEXT_FILENAMES.contains(&attachment.filename.to_lowercase().as_str()) } /// Download a text-based file attachment and return it as a ContentBlock::Text. @@ -275,24 +307,33 @@ async fn download_and_read_text_file( const MAX_SIZE: u64 = 512 * 1024; // 512 KB if u64::from(attachment.size) > MAX_SIZE { - error!(filename = %attachment.filename, size = attachment.size, "text file exceeds 512KB limit"); + warn!(filename = %attachment.filename, size = attachment.size, "text file exceeds 512KB limit, skipping"); return None; } let resp = HTTP_CLIENT.get(&attachment.url).send().await.ok()?; if !resp.status().is_success() { - error!(url = %attachment.url, status = %resp.status(), "text file download failed"); + warn!(url = %attachment.url, status = %resp.status(), "text file download failed"); return None; } let bytes = resp.bytes().await.ok()?; + // Defense-in-depth: verify actual download size + if bytes.len() as u64 > MAX_SIZE { + warn!(filename = %attachment.filename, size = bytes.len(), "downloaded text file exceeds 512KB limit, skipping"); + return None; + } + let text = String::from_utf8(bytes.to_vec()).unwrap_or_else(|_| { String::from_utf8_lossy(&bytes).into_owned() }); + // Use enough backticks to avoid conflicts with content that contains triple backticks + let fence = if text.contains("```") { "````" } else { "```" }; + debug!(filename = %attachment.filename, chars = text.len(), "text file inlined"); Some(ContentBlock::Text { - text: format!("[File: {}]\n```\n{}\n```", attachment.filename, text), + text: format!("[File: {}]\n{fence}\n{}\n{fence}", attachment.filename, text), }) } From a5f5a476b9eff1bc1c5ed3cf7b9e7eaf249f61ea Mon Sep 17 00:00:00 2001 From: JARVIS-coding-Agent Date: Wed, 15 Apr 2026 06:36:49 +0000 Subject: [PATCH 3/4] fix(discord): address final review feedback on text file attachments - Remove application/typescript from TEXT_MIME_TYPES (not IANA-registered, .ts/.tsx covered by TEXT_EXTENSIONS) - Add comment explaining pre-check vs running total size accounting mismatch - Return actual downloaded bytes from download_and_read_text_file for accurate running total (was using Discord metadata) - Use from_utf8_lossy directly to eliminate double allocation - Dynamic fence escape: loop until fence absent from content - Add TEXT_FILE_COUNT_CAP = 5 to bound latency for many small files --- src/discord.rs | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/src/discord.rs b/src/discord.rs index f5a4834d..9e9eda41 100644 --- a/src/discord.rs +++ b/src/discord.rs @@ -130,7 +130,9 @@ impl EventHandler for Handler { // Process attachments: route by content type (audio → STT, text file → inline, image → encode) if !msg.attachments.is_empty() { let mut text_file_bytes: u64 = 0; + let mut text_file_count: u32 = 0; const TEXT_TOTAL_CAP: u64 = 1024 * 1024; // 1 MB total for all text file attachments + const TEXT_FILE_COUNT_CAP: u32 = 5; for attachment in &msg.attachments { if is_audio_attachment(attachment) { @@ -145,12 +147,19 @@ impl EventHandler for Handler { debug!(filename = %attachment.filename, "skipping audio attachment (STT disabled)"); } } else if is_text_attachment(attachment) { + if text_file_count >= TEXT_FILE_COUNT_CAP { + warn!(filename = %attachment.filename, count = text_file_count, "text file count cap reached, skipping"); + continue; + } + // Pre-check with Discord-reported size (fast path, avoids unnecessary download). + // Running total uses actual downloaded bytes for accurate accounting. if text_file_bytes + u64::from(attachment.size) > TEXT_TOTAL_CAP { warn!(filename = %attachment.filename, total = text_file_bytes, "text attachments total exceeds 1MB cap, skipping remaining"); continue; } - if let Some(content_block) = download_and_read_text_file(attachment).await { - text_file_bytes += u64::from(attachment.size); + if let Some((content_block, actual_bytes)) = download_and_read_text_file(attachment).await { + text_file_bytes += actual_bytes; + text_file_count += 1; debug!(filename = %attachment.filename, "adding text file attachment"); content_blocks.push(content_block); } @@ -273,7 +282,6 @@ const TEXT_MIME_TYPES: &[&str] = &[ "application/json", "application/xml", "application/javascript", - "application/typescript", "application/x-yaml", "application/x-sh", "application/toml", @@ -303,7 +311,7 @@ fn is_text_attachment(attachment: &serenity::model::channel::Attachment) -> bool /// Files larger than 512 KB are skipped to avoid bloating the prompt. async fn download_and_read_text_file( attachment: &serenity::model::channel::Attachment, -) -> Option { +) -> Option<(ContentBlock, u64)> { const MAX_SIZE: u64 = 512 * 1024; // 512 KB if u64::from(attachment.size) > MAX_SIZE { @@ -317,24 +325,27 @@ async fn download_and_read_text_file( return None; } let bytes = resp.bytes().await.ok()?; + let actual_size = bytes.len() as u64; // Defense-in-depth: verify actual download size - if bytes.len() as u64 > MAX_SIZE { - warn!(filename = %attachment.filename, size = bytes.len(), "downloaded text file exceeds 512KB limit, skipping"); + if actual_size > MAX_SIZE { + warn!(filename = %attachment.filename, size = actual_size, "downloaded text file exceeds 512KB limit, skipping"); return None; } - let text = String::from_utf8(bytes.to_vec()).unwrap_or_else(|_| { - String::from_utf8_lossy(&bytes).into_owned() - }); + // from_utf8_lossy returns Cow::Borrowed for valid UTF-8 (zero-copy) + let text = String::from_utf8_lossy(&bytes).into_owned(); - // Use enough backticks to avoid conflicts with content that contains triple backticks - let fence = if text.contains("```") { "````" } else { "```" }; + // Dynamic fence: keep adding backticks until the fence doesn't appear in content + let mut fence = "```".to_string(); + while text.contains(fence.as_str()) { + fence.push('`'); + } - debug!(filename = %attachment.filename, chars = text.len(), "text file inlined"); - Some(ContentBlock::Text { + debug!(filename = %attachment.filename, bytes = text.len(), "text file inlined"); + Some((ContentBlock::Text { text: format!("[File: {}]\n{fence}\n{}\n{fence}", attachment.filename, text), - }) + }, actual_size)) } /// Check if an attachment is an audio file (voice messages are typically audio/ogg). From 6d206477d20b14d16df1f2c82d698b8a7563cff8 Mon Sep 17 00:00:00 2001 From: JARVIS-coding-Agent Date: Wed, 15 Apr 2026 12:56:31 +0000 Subject: [PATCH 4/4] fix(discord): address pre-review nits for text-file attachment feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix stale comment: "no image attachments" → "no attachments" - Add doc comment to download_and_read_text_file clarifying the per-file MAX_SIZE check is intentional defense-in-depth - Update PR description to document .env inclusion tradeoff --- src/discord.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/discord.rs b/src/discord.rs index af0949f1..a459ceb0 100644 --- a/src/discord.rs +++ b/src/discord.rs @@ -172,7 +172,7 @@ impl EventHandler for Handler { msg.content.trim().to_string() }; - // No text and no image attachments → skip to avoid wasting session slots + // No text and no attachments → skip to avoid wasting session slots if prompt.is_empty() && msg.attachments.is_empty() { return; } @@ -397,6 +397,10 @@ fn is_text_attachment(attachment: &serenity::model::channel::Attachment) -> bool /// Download a text-based file attachment and return it as a ContentBlock::Text. /// Files larger than 512 KB are skipped to avoid bloating the prompt. +/// +/// Note: the caller already guards total size via TEXT_TOTAL_CAP; the per-file +/// MAX_SIZE check here is intentional defense-in-depth so this function remains +/// self-contained and safe when called from other contexts. async fn download_and_read_text_file( attachment: &serenity::model::channel::Attachment, ) -> Option<(ContentBlock, u64)> {