Skip to content

Commit cebeb41

Browse files
committed
fix(discord): address remaining review feedback on text file attachments
- Dynamic fence: loop until fence string absent from content, handles files with 4+ consecutive backticks (replaces static "````" check) - Eliminate double allocation: use from_utf8_lossy directly (zero-copy for valid UTF-8) instead of from_utf8(bytes.to_vec()) fallback chain - Accurate size accounting: download_and_read_text_file now returns Option<(ContentBlock, u64)> with actual bytes.len(), replacing attachment.size (Discord metadata) for text_file_bytes tracking - Explicit file count cap: TEXT_FILE_COUNT_CAP = 5 to bound latency when many small files are attached, alongside the existing 1 MB cap
1 parent 3ea3abf commit cebeb41

1 file changed

Lines changed: 23 additions & 12 deletions

File tree

src/discord.rs

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,9 @@ impl EventHandler for Handler {
130130
// Process attachments: route by content type (audio → STT, text file → inline, image → encode)
131131
if !msg.attachments.is_empty() {
132132
let mut text_file_bytes: u64 = 0;
133+
let mut text_file_count: usize = 0;
133134
const TEXT_TOTAL_CAP: u64 = 1024 * 1024; // 1 MB total for all text file attachments
135+
const TEXT_FILE_COUNT_CAP: usize = 5;
134136

135137
for attachment in &msg.attachments {
136138
if is_audio_attachment(attachment) {
@@ -145,12 +147,17 @@ impl EventHandler for Handler {
145147
debug!(filename = %attachment.filename, "skipping audio attachment (STT disabled)");
146148
}
147149
} else if is_text_attachment(attachment) {
150+
if text_file_count >= TEXT_FILE_COUNT_CAP {
151+
warn!(filename = %attachment.filename, count = text_file_count, "text file count cap reached, skipping");
152+
continue;
153+
}
148154
if text_file_bytes + u64::from(attachment.size) > TEXT_TOTAL_CAP {
149155
warn!(filename = %attachment.filename, total = text_file_bytes, "text attachments total exceeds 1MB cap, skipping remaining");
150156
continue;
151157
}
152-
if let Some(content_block) = download_and_read_text_file(attachment).await {
153-
text_file_bytes += u64::from(attachment.size);
158+
if let Some((content_block, actual_bytes)) = download_and_read_text_file(attachment).await {
159+
text_file_bytes += actual_bytes;
160+
text_file_count += 1;
154161
debug!(filename = %attachment.filename, "adding text file attachment");
155162
content_blocks.push(content_block);
156163
}
@@ -299,11 +306,12 @@ fn is_text_attachment(attachment: &serenity::model::channel::Attachment) -> bool
299306
TEXT_FILENAMES.contains(&attachment.filename.to_lowercase().as_str())
300307
}
301308

302-
/// Download a text-based file attachment and return it as a ContentBlock::Text.
309+
/// Download a text-based file attachment and return it as a ContentBlock::Text
310+
/// along with the actual downloaded byte count.
303311
/// Files larger than 512 KB are skipped to avoid bloating the prompt.
304312
async fn download_and_read_text_file(
305313
attachment: &serenity::model::channel::Attachment,
306-
) -> Option<ContentBlock> {
314+
) -> Option<(ContentBlock, u64)> {
307315
const MAX_SIZE: u64 = 512 * 1024; // 512 KB
308316

309317
if u64::from(attachment.size) > MAX_SIZE {
@@ -319,22 +327,25 @@ async fn download_and_read_text_file(
319327
let bytes = resp.bytes().await.ok()?;
320328

321329
// Defense-in-depth: verify actual download size
322-
if bytes.len() as u64 > MAX_SIZE {
330+
let actual_bytes = bytes.len() as u64;
331+
if actual_bytes > MAX_SIZE {
323332
warn!(filename = %attachment.filename, size = bytes.len(), "downloaded text file exceeds 512KB limit, skipping");
324333
return None;
325334
}
326335

327-
let text = String::from_utf8(bytes.to_vec()).unwrap_or_else(|_| {
328-
String::from_utf8_lossy(&bytes).into_owned()
329-
});
336+
// from_utf8_lossy returns Cow::Borrowed for valid UTF-8 (zero-copy path)
337+
let text = String::from_utf8_lossy(&bytes).into_owned();
330338

331-
// Use enough backticks to avoid conflicts with content that contains triple backticks
332-
let fence = if text.contains("```") { "````" } else { "```" };
339+
// Dynamically extend the fence until it no longer appears in the file content
340+
let mut fence = "```".to_string();
341+
while text.contains(fence.as_str()) {
342+
fence.push('`');
343+
}
333344

334345
debug!(filename = %attachment.filename, chars = text.len(), "text file inlined");
335-
Some(ContentBlock::Text {
346+
Some((ContentBlock::Text {
336347
text: format!("[File: {}]\n{fence}\n{}\n{fence}", attachment.filename, text),
337-
})
348+
}, actual_bytes))
338349
}
339350

340351
/// Check if an attachment is an audio file (voice messages are typically audio/ogg).

0 commit comments

Comments
 (0)