Skip to content

Commit a5f5a47

Browse files
fix(discord): address final review feedback on text file attachments
- Remove application/typescript from TEXT_MIME_TYPES (not IANA-registered, .ts/.tsx covered by TEXT_EXTENSIONS) - Add comment explaining pre-check vs running total size accounting mismatch - Return actual downloaded bytes from download_and_read_text_file for accurate running total (was using Discord metadata) - Use from_utf8_lossy directly to eliminate double allocation - Dynamic fence escape: loop until fence absent from content - Add TEXT_FILE_COUNT_CAP = 5 to bound latency for many small files
1 parent 3ea3abf commit a5f5a47

1 file changed

Lines changed: 25 additions & 14 deletions

File tree

src/discord.rs

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,9 @@ impl EventHandler for Handler {
130130
// Process attachments: route by content type (audio → STT, text file → inline, image → encode)
131131
if !msg.attachments.is_empty() {
132132
let mut text_file_bytes: u64 = 0;
133+
let mut text_file_count: u32 = 0;
133134
const TEXT_TOTAL_CAP: u64 = 1024 * 1024; // 1 MB total for all text file attachments
135+
const TEXT_FILE_COUNT_CAP: u32 = 5;
134136

135137
for attachment in &msg.attachments {
136138
if is_audio_attachment(attachment) {
@@ -145,12 +147,19 @@ impl EventHandler for Handler {
145147
debug!(filename = %attachment.filename, "skipping audio attachment (STT disabled)");
146148
}
147149
} else if is_text_attachment(attachment) {
150+
if text_file_count >= TEXT_FILE_COUNT_CAP {
151+
warn!(filename = %attachment.filename, count = text_file_count, "text file count cap reached, skipping");
152+
continue;
153+
}
154+
// Pre-check with Discord-reported size (fast path, avoids unnecessary download).
155+
// Running total uses actual downloaded bytes for accurate accounting.
148156
if text_file_bytes + u64::from(attachment.size) > TEXT_TOTAL_CAP {
149157
warn!(filename = %attachment.filename, total = text_file_bytes, "text attachments total exceeds 1MB cap, skipping remaining");
150158
continue;
151159
}
152-
if let Some(content_block) = download_and_read_text_file(attachment).await {
153-
text_file_bytes += u64::from(attachment.size);
160+
if let Some((content_block, actual_bytes)) = download_and_read_text_file(attachment).await {
161+
text_file_bytes += actual_bytes;
162+
text_file_count += 1;
154163
debug!(filename = %attachment.filename, "adding text file attachment");
155164
content_blocks.push(content_block);
156165
}
@@ -273,7 +282,6 @@ const TEXT_MIME_TYPES: &[&str] = &[
273282
"application/json",
274283
"application/xml",
275284
"application/javascript",
276-
"application/typescript",
277285
"application/x-yaml",
278286
"application/x-sh",
279287
"application/toml",
@@ -303,7 +311,7 @@ fn is_text_attachment(attachment: &serenity::model::channel::Attachment) -> bool
303311
/// Files larger than 512 KB are skipped to avoid bloating the prompt.
304312
async fn download_and_read_text_file(
305313
attachment: &serenity::model::channel::Attachment,
306-
) -> Option<ContentBlock> {
314+
) -> Option<(ContentBlock, u64)> {
307315
const MAX_SIZE: u64 = 512 * 1024; // 512 KB
308316

309317
if u64::from(attachment.size) > MAX_SIZE {
@@ -317,24 +325,27 @@ async fn download_and_read_text_file(
317325
return None;
318326
}
319327
let bytes = resp.bytes().await.ok()?;
328+
let actual_size = bytes.len() as u64;
320329

321330
// Defense-in-depth: verify actual download size
322-
if bytes.len() as u64 > MAX_SIZE {
323-
warn!(filename = %attachment.filename, size = bytes.len(), "downloaded text file exceeds 512KB limit, skipping");
331+
if actual_size > MAX_SIZE {
332+
warn!(filename = %attachment.filename, size = actual_size, "downloaded text file exceeds 512KB limit, skipping");
324333
return None;
325334
}
326335

327-
let text = String::from_utf8(bytes.to_vec()).unwrap_or_else(|_| {
328-
String::from_utf8_lossy(&bytes).into_owned()
329-
});
336+
// from_utf8_lossy returns Cow::Borrowed for valid UTF-8 (zero-copy)
337+
let text = String::from_utf8_lossy(&bytes).into_owned();
330338

331-
// Use enough backticks to avoid conflicts with content that contains triple backticks
332-
let fence = if text.contains("```") { "````" } else { "```" };
339+
// Dynamic fence: keep adding backticks until the fence doesn't appear in content
340+
let mut fence = "```".to_string();
341+
while text.contains(fence.as_str()) {
342+
fence.push('`');
343+
}
333344

334-
debug!(filename = %attachment.filename, chars = text.len(), "text file inlined");
335-
Some(ContentBlock::Text {
345+
debug!(filename = %attachment.filename, bytes = text.len(), "text file inlined");
346+
Some((ContentBlock::Text {
336347
text: format!("[File: {}]\n{fence}\n{}\n{fence}", attachment.filename, text),
337-
})
348+
}, actual_size))
338349
}
339350

340351
/// Check if an attachment is an audio file (voice messages are typically audio/ogg).

0 commit comments

Comments
 (0)