Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,5 @@ data/
sessions/
backups/
uploads/
.uploads/
config/mcp.json
37 changes: 34 additions & 3 deletions src/bot/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import re
import time
from dataclasses import dataclass, field
from datetime import UTC, datetime
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional

Expand Down Expand Up @@ -1159,6 +1160,29 @@ async def agentic_text(
success=success,
)

async def _save_pdf_and_build_prompt(
self, document: Any, caption: Optional[str]
) -> str:
"""Save PDF to <approved_directory>/.uploads/ and build a prompt for Claude.

Returns prompt with an absolute path so Claude's Read tool works regardless
of cwd. Filename is prefixed with millisecond timestamp to avoid collisions.
"""
uploads_dir = Path(self.settings.approved_directory) / ".uploads"
uploads_dir.mkdir(parents=True, exist_ok=True)

timestamp = datetime.now(UTC).strftime("%Y%m%d-%H%M%S-%f")[:-3]
safe_name = f"{timestamp}-{document.file_name}"
target = uploads_dir / safe_name

tg_file = await document.get_file()
await tg_file.download_to_drive(str(target))

return (
f"{caption or 'PDF uploaded:'}\n\n"
f"File: `{target}`. Read it via Read tool and answer the user's question."
)

async def agentic_document(
self, update: Update, context: ContextTypes.DEFAULT_TYPE
) -> None:
Expand Down Expand Up @@ -1192,12 +1216,19 @@ async def agentic_document(
await chat.send_action("typing")
progress_msg = await update.message.reply_text("Working...")

prompt: Optional[str] = None

# Binary document formats are saved to disk for Claude to read via Read tool.
if document.file_name and document.file_name.lower().endswith(".pdf"):
prompt = await self._save_pdf_and_build_prompt(
document, update.message.caption
)

# Try enhanced file handler, fall back to basic
features = context.bot_data.get("features")
file_handler = features.get_file_handler() if features else None
prompt: Optional[str] = None

if file_handler:
if prompt is None and file_handler:
try:
processed_file = await file_handler.handle_document_upload(
document,
Expand All @@ -1208,7 +1239,7 @@ async def agentic_document(
except Exception:
file_handler = None

if not file_handler:
if prompt is None and not file_handler:
file = await document.get_file()
file_bytes = await file.download_as_bytearray()
try:
Expand Down
2 changes: 2 additions & 0 deletions src/security/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ class SecurityValidator:
".vue",
".svelte",
".lock",
# Document formats (binary, saved to disk for downstream tools)
".pdf",
}

# Forbidden filenames and patterns
Expand Down
62 changes: 62 additions & 0 deletions tests/unit/test_orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,68 @@ async def test_agentic_document_rejects_large_files(agentic_settings, deps):
assert "too large" in call_args.args[0].lower()


async def test_agentic_document_pdf_saves_and_prompts_read(agentic_settings, deps):
"""PDF uploads are saved to <approved_directory>/.uploads/ and Claude gets
an absolute path + instruction to use Read."""
orchestrator = MessageOrchestrator(agentic_settings, deps)

approved_dir = Path(agentic_settings.approved_directory)
pdf_bytes = b"%PDF-1.4\nSMOKE-TOKEN-42\n%%EOF\n"

async def fake_download(target_path):
Path(target_path).write_bytes(pdf_bytes)

tg_file = MagicMock()
tg_file.download_to_drive = AsyncMock(side_effect=fake_download)

update = MagicMock()
update.effective_user.id = 123
update.message.document.file_name = "ticket.pdf"
update.message.document.file_size = len(pdf_bytes)
update.message.document.get_file = AsyncMock(return_value=tg_file)
update.message.caption = "ticket attached"
update.message.chat.send_action = AsyncMock()
update.message.reply_text = AsyncMock()

progress_msg = AsyncMock()
progress_msg.edit_text = AsyncMock()
progress_msg.delete = AsyncMock()
update.message.reply_text.return_value = progress_msg

mock_response = MagicMock()
mock_response.session_id = "pdf-session-1"
mock_response.content = "Read the PDF. Found SMOKE-TOKEN-42."
mock_response.tools_used = []

claude_integration = AsyncMock()
claude_integration.run_command = AsyncMock(return_value=mock_response)

context = MagicMock()
context.user_data = {}
context.bot_data = {
"settings": agentic_settings,
"security_validator": None,
"features": None,
"claude_integration": claude_integration,
}

await orchestrator.agentic_document(update, context)

uploads_dir = approved_dir / ".uploads"
saved_files = list(uploads_dir.glob("*-ticket.pdf"))
assert len(saved_files) == 1, f"expected one saved PDF, got {saved_files}"
assert saved_files[0].read_bytes() == pdf_bytes

claude_integration.run_command.assert_awaited_once()
call_kwargs = claude_integration.run_command.call_args.kwargs
prompt = (
call_kwargs.get("prompt") or claude_integration.run_command.call_args.args[0]
)
assert str(saved_files[0]) in prompt
assert "Read" in prompt
assert "ticket attached" in prompt


async def test_agentic_voice_calls_claude(agentic_settings, deps):
"""Agentic voice handler transcribes and routes prompt to Claude."""
orchestrator = MessageOrchestrator(agentic_settings, deps)
Expand Down
7 changes: 7 additions & 0 deletions tests/unit/test_security/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,13 +139,20 @@ def test_filename_validation_valid(self, validator):
"style.css",
"data.sql",
"build.sh",
"report.pdf",
]

for filename in valid_filenames:
valid, error = validator.validate_filename(filename)
assert valid is True
assert error is None

def test_filename_pdf_exe_suffix_blocked(self, validator):
"""Regression: `.pdf.exe` trap name is still blocked by dangerous patterns."""
valid, error = validator.validate_filename("ticket.pdf.exe")
assert valid is False
assert "not allowed" in error

def test_filename_validation_invalid_extensions(self, validator):
"""Test rejection of invalid file extensions."""
invalid_filenames = [
Expand Down