Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 113 additions & 2 deletions src/discord.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ impl EventHandler for Handler {
msg.content.trim().to_string()
};

// No text and no image attachments → skip to avoid wasting session slots
// No text and no attachments → skip to avoid wasting session slots
if prompt.is_empty() && msg.attachments.is_empty() {
return;
}
Expand Down Expand Up @@ -204,9 +204,14 @@ impl EventHandler for Handler {
text: prompt_with_sender.clone(),
});

// Process attachments: route by content type (audio → STT, image → encode)
// Process attachments: route by content type (audio → STT, text file → inline, image → encode)
let mut audio_skipped = false;
if !msg.attachments.is_empty() {
let mut text_file_bytes: u64 = 0;
let mut text_file_count: u32 = 0;
const TEXT_TOTAL_CAP: u64 = 1024 * 1024; // 1 MB total for all text file attachments
const TEXT_FILE_COUNT_CAP: u32 = 5;

for attachment in &msg.attachments {
if is_audio_attachment(attachment) {
if self.stt_config.enabled {
Expand All @@ -220,6 +225,23 @@ impl EventHandler for Handler {
warn!(filename = %attachment.filename, "skipping audio attachment (STT disabled)");
audio_skipped = true;
}
} else if is_text_attachment(attachment) {
if text_file_count >= TEXT_FILE_COUNT_CAP {
warn!(filename = %attachment.filename, count = text_file_count, "text file count cap reached, skipping");
continue;
}
// Pre-check with Discord-reported size (fast path, avoids unnecessary download).
// Running total uses actual downloaded bytes for accurate accounting.
if text_file_bytes + u64::from(attachment.size) > TEXT_TOTAL_CAP {
warn!(filename = %attachment.filename, total = text_file_bytes, "text attachments total exceeds 1MB cap, skipping remaining");
continue;
}
if let Some((content_block, actual_bytes)) = download_and_read_text_file(attachment).await {
text_file_bytes += actual_bytes;
text_file_count += 1;
debug!(filename = %attachment.filename, "adding text file attachment");
content_blocks.push(content_block);
}
} else if let Some(content_block) = download_and_encode_image(attachment).await {
debug!(url = %attachment.url, filename = %attachment.filename, "adding image attachment");
content_blocks.push(content_block);
Expand Down Expand Up @@ -329,6 +351,95 @@ impl EventHandler for Handler {
}
}

/// Extensions recognised as text-based files that can be inlined into the prompt.
const TEXT_EXTENSIONS: &[&str] = &[
"txt", "csv", "log", "md", "json", "jsonl", "yaml", "yml", "toml", "xml",
"rs", "py", "js", "ts", "jsx", "tsx", "go", "java", "c", "cpp", "h", "hpp",
"rb", "sh", "bash", "zsh", "fish", "ps1", "bat", "sql", "html", "css",
"scss", "less", "ini", "cfg", "conf", "env",
];

/// Exact filenames (no extension) recognised as text files.
const TEXT_FILENAMES: &[&str] = &[
"dockerfile", "makefile", "justfile", "rakefile", "gemfile",
"procfile", "vagrantfile", ".gitignore", ".dockerignore", ".editorconfig",
];

/// MIME types recognised as text-based (beyond `text/*`).
const TEXT_MIME_TYPES: &[&str] = &[
"application/json",
"application/xml",
"application/javascript",
"application/x-yaml",
"application/x-sh",
"application/toml",
"application/x-toml",
];

/// Check if an attachment is a text-based file we can inline.
fn is_text_attachment(attachment: &serenity::model::channel::Attachment) -> bool {
let mime = attachment.content_type.as_deref().unwrap_or("");
let mime_base = mime.split(';').next().unwrap_or(mime).trim();
if mime_base.starts_with("text/") || TEXT_MIME_TYPES.contains(&mime_base) {
return true;
}
// Check extension
if attachment.filename.contains('.') {
if let Some(ext) = attachment.filename.rsplit('.').next() {
if TEXT_EXTENSIONS.contains(&ext.to_lowercase().as_str()) {
return true;
}
}
}
// Check exact filename (Dockerfile, Makefile, etc.)
TEXT_FILENAMES.contains(&attachment.filename.to_lowercase().as_str())
}

/// Download a text-based file attachment and return it as a ContentBlock::Text.
/// Files larger than 512 KB are skipped to avoid bloating the prompt.
///
/// Note: the caller already guards total size via TEXT_TOTAL_CAP; the per-file
/// MAX_SIZE check here is intentional defense-in-depth so this function remains
/// self-contained and safe when called from other contexts.
async fn download_and_read_text_file(
attachment: &serenity::model::channel::Attachment,
) -> Option<(ContentBlock, u64)> {
const MAX_SIZE: u64 = 512 * 1024; // 512 KB

if u64::from(attachment.size) > MAX_SIZE {
warn!(filename = %attachment.filename, size = attachment.size, "text file exceeds 512KB limit, skipping");
return None;
}

let resp = HTTP_CLIENT.get(&attachment.url).send().await.ok()?;
if !resp.status().is_success() {
warn!(url = %attachment.url, status = %resp.status(), "text file download failed");
return None;
}
let bytes = resp.bytes().await.ok()?;
let actual_size = bytes.len() as u64;

// Defense-in-depth: verify actual download size
if actual_size > MAX_SIZE {
warn!(filename = %attachment.filename, size = actual_size, "downloaded text file exceeds 512KB limit, skipping");
return None;
}

// from_utf8_lossy returns Cow::Borrowed for valid UTF-8 (zero-copy)
let text = String::from_utf8_lossy(&bytes).into_owned();

// Dynamic fence: keep adding backticks until the fence doesn't appear in content
let mut fence = "```".to_string();
while text.contains(fence.as_str()) {
fence.push('`');
}

debug!(filename = %attachment.filename, bytes = text.len(), "text file inlined");
Some((ContentBlock::Text {
text: format!("[File: {}]\n{fence}\n{}\n{fence}", attachment.filename, text),
}, actual_size))
}

/// Check if an attachment is an audio file (voice messages are typically audio/ogg).
fn is_audio_attachment(attachment: &serenity::model::channel::Attachment) -> bool {
let mime = attachment.content_type.as_deref().unwrap_or("");
Expand Down
Loading