diff --git a/apps/web/src/components/attachment-chip.tsx b/apps/web/src/components/attachment-chip.tsx new file mode 100644 index 0000000..d68d83b --- /dev/null +++ b/apps/web/src/components/attachment-chip.tsx @@ -0,0 +1,55 @@ +import { FileText, Loader2, Music, X } from "lucide-react"; +import type { PendingAttachment } from "../hooks/use-file-attachments"; + +export function AttachmentChip({ + attachment, + onRemove, +}: { + attachment: PendingAttachment; + onRemove: () => void; +}) { + const isImage = attachment.mimeType.startsWith("image/"); + const isPdf = attachment.mimeType === "application/pdf"; + const isAudio = attachment.mimeType.startsWith("audio/"); + + const Icon = isAudio ? Music : FileText; + + return ( +
+ {isImage && attachment.previewUrl ? ( + {attachment.fileName} + ) : (isPdf || isAudio) ? ( +
+ + + {attachment.fileName} + +
+ ) : null} + + {attachment.status === "uploading" && ( +
+ +
+ )} + + {attachment.status === "error" && ( +
+ Error +
+ )} + + +
+ ); +} diff --git a/apps/web/src/components/message-attachments.tsx b/apps/web/src/components/message-attachments.tsx new file mode 100644 index 0000000..d01222c --- /dev/null +++ b/apps/web/src/components/message-attachments.tsx @@ -0,0 +1,102 @@ +import { convexQuery } from "@convex-dev/react-query"; +import { api } from "@harness/convex-backend/convex/_generated/api"; +import type { Id } from "@harness/convex-backend/convex/_generated/dataModel"; +import { useQuery } from "@tanstack/react-query"; +import { FileText, Music } from "lucide-react"; +import { + Dialog, + DialogContent, + DialogTrigger, +} from "./ui/dialog"; + +interface Attachment { + storageId: Id<"_storage">; + mimeType: string; + fileName: string; + fileSize: number; +} + +function AttachmentItem({ attachment }: { attachment: Attachment }) { + const { data: url } = useQuery( + convexQuery(api.files.getFileUrl, { storageId: attachment.storageId }), + ); + + if (!url) return null; + + const square = "h-16 w-16 shrink-0 overflow-hidden rounded-lg border border-border"; + + if (attachment.mimeType.startsWith("image/")) { + return ( + + + + + + {attachment.fileName} + + + ); + } + + if (attachment.mimeType === "application/pdf") { + return ( + + + + {attachment.fileName} + + + ); + } + + if (attachment.mimeType.startsWith("audio/")) { + return ( +
+
+ + + {attachment.fileName} + +
+ {/* eslint-disable-next-line jsx-a11y/media-has-caption */} +
+ ); + } + + return null; +} + +export function MessageAttachments({ + attachments, +}: { + attachments: Attachment[]; +}) { + if (attachments.length === 0) return null; + + return ( +
+ {attachments.map((a) => ( + + ))} +
+ ); +} diff --git a/apps/web/src/hooks/use-file-attachments.ts b/apps/web/src/hooks/use-file-attachments.ts new file mode 100644 index 0000000..65c1015 --- /dev/null +++ b/apps/web/src/hooks/use-file-attachments.ts @@ -0,0 +1,165 @@ +import { api } from "@harness/convex-backend/convex/_generated/api"; +import type { Id } from "@harness/convex-backend/convex/_generated/dataModel"; +import { useConvex } from "convex/react"; +import { useCallback, useRef, useState } from "react"; +import toast from "react-hot-toast"; + +const MAX_IMAGE_BYTES = 10 * 1024 * 1024; // 10 MB +const MAX_PDF_BYTES = 20 * 1024 * 1024; // 20 MB +const MAX_AUDIO_BYTES = 25 * 1024 * 1024; // 25 MB +const MAX_ATTACHMENTS = 5; + +function getMaxBytes(mimeType: string): number { + if (mimeType === "application/pdf") return MAX_PDF_BYTES; + if (mimeType.startsWith("audio/")) return MAX_AUDIO_BYTES; + return MAX_IMAGE_BYTES; +} + +function getSizeLabel(mimeType: string): string { + if (mimeType === "application/pdf") return "20 MB"; + if (mimeType.startsWith("audio/")) return "25 MB"; + return "10 MB"; +} + +export interface PendingAttachment { + localId: string; + previewUrl: string | null; // object URL for images, null for PDFs/audio + mimeType: string; + status: "uploading" | "ready" | "error"; + storageId?: string; + fileName: string; + fileSize: number; +} + +/** + * @param allowedMimes – the set of MIME types the current model accepts. + * Passed in from the caller so validation stays in sync with model capabilities. + */ +export function useFileAttachments(allowedMimes: Set) { + const [attachments, setAttachments] = useState([]); + const convex = useConvex(); + const localIdCounter = useRef(0); + + const uploadOne = useCallback( + async (file: File, localId: string) => { + try { + const uploadUrl = await convex.mutation( + api.files.generateUploadUrl, + {}, + ); + const res = await fetch(uploadUrl, { + method: "POST", + headers: { "Content-Type": file.type }, + body: file, + }); + if (!res.ok) throw new Error("Upload failed"); + const { storageId } = await res.json(); + setAttachments((prev) => + prev.map((a) => + a.localId === localId ? { ...a, status: "ready", storageId } : a, + ), + ); + } catch { + setAttachments((prev) => + prev.map((a) => + a.localId === localId ? { ...a, status: "error" } : a, + ), + ); + toast.error(`Failed to upload ${file.name}`); + } + }, + [convex], + ); + + const addFiles = useCallback( + (files: File[]) => { + let current = attachments.length; + for (const file of files) { + if (current >= MAX_ATTACHMENTS) { + toast.error(`Maximum ${MAX_ATTACHMENTS} attachments per message`); + break; + } + if (!allowedMimes.has(file.type)) { + toast.error(`${file.name}: not supported by this model`); + continue; + } + const maxBytes = getMaxBytes(file.type); + if (file.size > maxBytes) { + toast.error(`${file.name}: exceeds ${getSizeLabel(file.type)} limit`); + continue; + } + + const localId = String(++localIdCounter.current); + const previewUrl = file.type.startsWith("image/") + ? URL.createObjectURL(file) + : null; + + setAttachments((prev) => [ + ...prev, + { + localId, + previewUrl, + mimeType: file.type, + status: "uploading", + fileName: file.name, + fileSize: file.size, + }, + ]); + + // Fire and forget — state updates happen inside uploadOne + uploadOne(file, localId); + current++; + } + }, + [attachments.length, uploadOne, allowedMimes], + ); + + const removeAttachment = useCallback((localId: string) => { + setAttachments((prev) => { + const target = prev.find((a) => a.localId === localId); + if (target?.previewUrl) URL.revokeObjectURL(target.previewUrl); + return prev.filter((a) => a.localId !== localId); + }); + }, []); + + const clearAttachments = useCallback(() => { + setAttachments((prev) => { + for (const a of prev) { + if (a.previewUrl) URL.revokeObjectURL(a.previewUrl); + } + return []; + }); + }, []); + + const hasUploading = attachments.some((a) => a.status === "uploading"); + + const resolveSignedUrls = useCallback( + async ( + readyAttachments: Array<{ + storageId: string; + mimeType: string; + fileName: string; + }>, + ): Promise> => { + const results = await Promise.all( + readyAttachments.map(async (a) => { + const url = await convex.query(api.files.getFileUrl, { + storageId: a.storageId as Id<"_storage">, + }); + return url ? { url, mime_type: a.mimeType, file_name: a.fileName } : null; + }), + ); + return results.filter((r): r is NonNullable => r !== null); + }, + [convex], + ); + + return { + attachments, + addFiles, + removeAttachment, + clearAttachments, + hasUploading, + resolveSignedUrls, + }; +} diff --git a/apps/web/src/lib/models.ts b/apps/web/src/lib/models.ts index 72fcc48..188c958 100644 --- a/apps/web/src/lib/models.ts +++ b/apps/web/src/lib/models.ts @@ -1,20 +1,84 @@ -export const MODELS = [ - { value: "openai/gpt-5.4", label: "GPT-5.4" }, - { value: "gpt-4o", label: "GPT-4o" }, - { value: "gpt-4.1", label: "GPT-4.1" }, - { value: "gpt-4.1-mini", label: "GPT-4.1 Mini" }, - { value: "claude-sonnet-4", label: "Claude Sonnet 4" }, - { value: "claude-sonnet-4-thinking", label: "Claude Sonnet 4 (Thinking)" }, - { value: "claude-opus-4", label: "Claude Opus 4" }, - { value: "claude-opus-4-thinking", label: "Claude Opus 4 (Thinking)" }, - { - value: "google/gemini-3.1-flash-lite-preview", - label: "Gemini 3.1 Flash Lite Preview", - }, - { value: "gemini-2.5-pro", label: "Gemini 2.5 Pro" }, - { value: "gemini-2.5-flash", label: "Gemini 2.5 Flash" }, - { value: "deepseek-r1", label: "DeepSeek R1" }, - { value: "deepseek-v3", label: "DeepSeek V3" }, - { value: "grok-3", label: "Grok 3" }, - { value: "grok-3-mini", label: "Grok 3 Mini" }, +// Per-model modality support on OpenRouter +// "image" = image inputs, "pdf" = PDF inputs, "audio" = audio inputs +type Modality = "image" | "pdf" | "audio"; + +export const MODELS: Array<{ + value: string; + label: string; + modalities: Modality[]; +}> = [ + // Audio input: only Gemini models are confirmed on OpenRouter + { value: "openai/gpt-5.4", label: "GPT-5.4", modalities: ["image", "pdf"] }, + { value: "gpt-4o", label: "GPT-4o", modalities: ["image", "pdf"] }, + { value: "gpt-4.1", label: "GPT-4.1", modalities: ["image", "pdf"] }, + { value: "gpt-4.1-mini", label: "GPT-4.1 Mini", modalities: ["image", "pdf"] }, + { value: "claude-sonnet-4", label: "Claude Sonnet 4", modalities: ["image", "pdf"] }, + { value: "claude-sonnet-4-thinking", label: "Claude Sonnet 4 (Thinking)", modalities: ["image", "pdf"] }, + { value: "claude-opus-4", label: "Claude Opus 4", modalities: ["image", "pdf"] }, + { value: "claude-opus-4-thinking", label: "Claude Opus 4 (Thinking)", modalities: ["image", "pdf"] }, + { value: "google/gemini-3.1-flash-lite-preview", label: "Gemini 3.1 Flash Lite Preview", modalities: ["image", "pdf", "audio"] }, + { value: "gemini-2.5-pro", label: "Gemini 2.5 Pro", modalities: ["image", "pdf", "audio"] }, + { value: "gemini-2.5-flash", label: "Gemini 2.5 Flash", modalities: ["image", "pdf", "audio"] }, + { value: "deepseek-r1", label: "DeepSeek R1", modalities: [] }, + { value: "deepseek-v3", label: "DeepSeek V3", modalities: [] }, + { value: "grok-3", label: "Grok 3", modalities: ["image"] }, + { value: "grok-3-mini", label: "Grok 3 Mini", modalities: [] }, ]; + +// Lookup index built once from the MODELS array +const modalityIndex = new Map(MODELS.map((m) => [m.value, new Set(m.modalities)])); + +function modelHas(model: string | undefined, modality: Modality): boolean { + if (!model) return false; + return modalityIndex.get(model)?.has(modality) ?? false; +} + +export function modelSupportsMedia(model: string | undefined): boolean { + return modelHas(model, "image"); +} + +export function modelSupportsAudio(model: string | undefined): boolean { + return modelHas(model, "audio"); +} + +// ── MIME mappings ──────────────────────────────────────────────────── + +const IMAGE_MIMES = ["image/png", "image/jpeg", "image/gif", "image/webp"]; +const PDF_MIMES = ["application/pdf"]; +const AUDIO_MIMES = [ + "audio/wav", "audio/mpeg", "audio/mp3", "audio/aiff", + "audio/aac", "audio/ogg", "audio/flac", "audio/mp4", + "audio/x-m4a", "audio/webm", +]; + +/** Returns the set of MIME types the model can accept, or empty set if none. */ +export function allowedMimeTypes(model: string | undefined): Set { + const mimes: string[] = []; + if (modelHas(model, "image")) mimes.push(...IMAGE_MIMES); + if (modelHas(model, "pdf")) mimes.push(...PDF_MIMES); + if (modelHas(model, "audio")) mimes.push(...AUDIO_MIMES); + return new Set(mimes); +} + +/** Returns an `accept` string for based on model capabilities. */ +export function acceptString(model: string | undefined): string { + return [...allowedMimeTypes(model)].join(","); +} + +/** Map MIME type to OpenRouter audio format identifier */ +const AUDIO_FORMAT_MAP: Record = { + "audio/wav": "wav", + "audio/mpeg": "mp3", + "audio/mp3": "mp3", + "audio/aiff": "aiff", + "audio/aac": "aac", + "audio/ogg": "ogg", + "audio/flac": "flac", + "audio/mp4": "m4a", + "audio/x-m4a": "m4a", + "audio/webm": "webm", +}; + +export function mimeToAudioFormat(mime: string): string { + return AUDIO_FORMAT_MAP[mime] ?? "wav"; +} diff --git a/apps/web/src/lib/multimodal.ts b/apps/web/src/lib/multimodal.ts new file mode 100644 index 0000000..9d7c864 --- /dev/null +++ b/apps/web/src/lib/multimodal.ts @@ -0,0 +1,64 @@ +import { mimeToAudioFormat } from "./models"; + +type ContentPart = Record; +export type MessageContent = string | ContentPart[]; + +/** Read a Blob into a raw base64 string (no data-URL prefix). */ +function blobToBase64(blob: Blob): Promise { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onloadend = () => { + const result = reader.result as string; + resolve(result.split(",")[1]); + }; + reader.onerror = () => reject(new Error("Failed to read audio data")); + reader.readAsDataURL(blob); + }); +} + +type ResolveSignedUrls = ( + atts: Array<{ storageId: string; mimeType: string; fileName: string }>, +) => Promise>; + +/** + * Build an OpenRouter multimodal content array from text + attachment metadata. + * Returns a plain string when there are no attachments. + */ +export async function buildMultimodalContent( + text: string, + atts: Array<{ storageId: string; mimeType: string; fileName: string }>, + resolveSignedUrls: ResolveSignedUrls, +): Promise { + const signed = await resolveSignedUrls(atts); + if (signed.length === 0) return text; + + const parts: ContentPart[] = []; + if (text) parts.push({ type: "text", text }); + + for (const a of signed) { + if (a.mime_type.startsWith("image/")) { + parts.push({ type: "image_url", image_url: { url: a.url } }); + } else if (a.mime_type === "application/pdf") { + parts.push({ type: "file", file: { filename: a.file_name, file_data: a.url } }); + } else if (a.mime_type.startsWith("audio/")) { + try { + const res = await fetch(a.url); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const blob = await res.blob(); + const base64 = await blobToBase64(blob); + parts.push({ + type: "input_audio", + input_audio: { + data: base64, + format: mimeToAudioFormat(a.mime_type), + }, + }); + } catch (err) { + const msg = err instanceof Error ? err.message : "unknown error"; + throw new Error(`Failed to encode audio "${a.file_name}": ${msg}`); + } + } + } + + return parts; +} diff --git a/apps/web/src/lib/use-chat-stream.ts b/apps/web/src/lib/use-chat-stream.ts index f44d8e5..fb9c918 100644 --- a/apps/web/src/lib/use-chat-stream.ts +++ b/apps/web/src/lib/use-chat-stream.ts @@ -59,8 +59,10 @@ interface UseChatStreamCallbacks { onAbort?: (conversationId: string) => void; } +export type MessageContent = string | Array>; + export interface ChatStreamRequest { - messages: Array<{ role: string; content: string }>; + messages: Array<{ role: string; content: MessageContent }>; harness: { model: string; mcp_servers: Array<{ diff --git a/apps/web/src/routes/chat/index.tsx b/apps/web/src/routes/chat/index.tsx index 45d42a4..72df8c0 100644 --- a/apps/web/src/routes/chat/index.tsx +++ b/apps/web/src/routes/chat/index.tsx @@ -21,8 +21,10 @@ import { Loader2, LogOut, MessageSquare, + Mic, PanelLeftClose, PanelLeftOpen, + Paperclip, Plus, Search, // Icon for search Settings, @@ -94,6 +96,9 @@ import { TooltipTrigger, } from "../../components/ui/tooltip"; import { env } from "../../env"; +import { acceptString, allowedMimeTypes, modelSupportsAudio, modelSupportsMedia } from "../../lib/models"; +import { buildMultimodalContent } from "../../lib/multimodal"; +import { useFileAttachments } from "../../hooks/use-file-attachments"; import { type ConvoStreamState, type StreamPart, @@ -102,6 +107,8 @@ import { useChatStream, } from "../../lib/use-chat-stream"; import { cn } from "../../lib/utils"; +import { AttachmentChip } from "../../components/attachment-chip"; +import { MessageAttachments } from "../../components/message-attachments"; export const Route = createFileRoute("/chat/")({ validateSearch: (search: Record) => ({ @@ -1754,6 +1761,12 @@ function ChatMessages({ }; model?: string; interrupted?: boolean; + attachments?: Array<{ + storageId: Id<"_storage">; + mimeType: string; + fileName: string; + fileSize: number; + }>; }>; streamingContent: string | null; streamingReasoning: string | null; @@ -2012,7 +2025,13 @@ function ChatMessages({ )} -
+
+ {msg.role === "user" && msg.attachments && msg.attachments.length > 0 && ( + + )}
) => void; isStreaming: boolean; onStream: (body: { - messages: Array<{ role: string; content: string }>; + messages: Array<{ role: string; content: string | Array> }>; harness: { model: string; mcp_servers: Array<{ @@ -2586,7 +2605,16 @@ function ChatInput({ }) => Promise; onInterrupt: (convoId: string) => void; onEnqueue: (content: string) => void; - messages?: Array<{ role: string; content: string }>; + messages?: Array<{ + role: string; + content: string; + attachments?: Array<{ + storageId: Id<"_storage">; + mimeType: string; + fileName: string; + fileSize: number; + }>; + }>; messageQueue: { id: number; content: string }[]; onDequeue: (index: number) => void; onSendNow: (index: number) => void; @@ -2595,6 +2623,55 @@ function ChatInput({ }) { const [text, setText] = useState(""); const textareaRef = useRef(null); + const fileInputRef = useRef(null); + const [isDragOver, setIsDragOver] = useState(false); + + const supportsMedia = modelSupportsMedia(activeHarness?.model); + const supportsAudio = modelSupportsAudio(activeHarness?.model); + const supportsAnyAttachment = supportsMedia || supportsAudio; + const modelAccept = acceptString(activeHarness?.model); + const modelAllowedMimes = useMemo(() => allowedMimeTypes(activeHarness?.model), [activeHarness?.model]); + + const { attachments, addFiles, removeAttachment, clearAttachments, hasUploading, resolveSignedUrls } = + useFileAttachments(modelAllowedMimes); + + // Clear attachments if the active model switches to one that doesn't support media + useEffect(() => { + if (!supportsAnyAttachment) clearAttachments(); + }, [supportsAnyAttachment, clearAttachments]); + + // ── Voice recording ────────────────────────────────────────────── + const [isRecording, setIsRecording] = useState(false); + const mediaRecorderRef = useRef(null); + const chunksRef = useRef([]); + + const startRecording = useCallback(async () => { + try { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + const recorder = new MediaRecorder(stream, { mimeType: "audio/webm" }); + chunksRef.current = []; + recorder.ondataavailable = (e) => { + if (e.data.size > 0) chunksRef.current.push(e.data); + }; + recorder.onstop = () => { + const blob = new Blob(chunksRef.current, { type: "audio/webm" }); + const file = new File([blob], `recording-${Date.now()}.webm`, { type: "audio/webm" }); + addFiles([file]); + stream.getTracks().forEach((t) => t.stop()); + }; + mediaRecorderRef.current = recorder; + recorder.start(); + setIsRecording(true); + } catch { + toast.error("Microphone access denied"); + } + }, [addFiles]); + + const stopRecording = useCallback(() => { + mediaRecorderRef.current?.stop(); + mediaRecorderRef.current = null; + setIsRecording(false); + }, []); // Fill input from suggested prompt click useEffect(() => { @@ -2648,6 +2725,7 @@ function ChatInput({ setText(""); setHistoryIndex(-1); setDraft(""); + clearAttachments(); // If streaming, just enqueue — don't interrupt if (isStreaming && conversationId) { @@ -2677,21 +2755,41 @@ function ChatInput({ onConvoCreated(newId); } + // Snapshot ready attachments from the current render's state (clearAttachments above is async) + const readyAttachments = attachments + .filter((a) => a.status === "ready" && a.storageId) + .map((a) => ({ + storageId: a.storageId as Id<"_storage">, + mimeType: a.mimeType, + fileName: a.fileName, + fileSize: a.fileSize, + })); + // Save user message to Convex await sendMessage.mutateAsync({ conversationId: convoId, role: "user", content, harnessId: activeHarness._id, + ...(readyAttachments.length > 0 ? { attachments: readyAttachments } : {}), }); - // Build message history for the LLM - const history: Array<{ role: string; content: string }> = - existingMessages?.map((m) => ({ - role: m.role, - content: m.content, - })) ?? []; - history.push({ role: "user", content }); + // Build message history for the LLM (with multimodal content where applicable) + const history: Array<{ role: string; content: string | Array> }> = []; + for (const m of existingMessages ?? []) { + if (m.role === "user" && m.attachments && m.attachments.length > 0) { + history.push({ role: m.role, content: await buildMultimodalContent(m.content, m.attachments, resolveSignedUrls) }); + } else { + history.push({ role: m.role, content: m.content }); + } + } + + // Add the new user message (with any current attachments) + if (readyAttachments.length > 0) { + history.push({ role: "user", content: await buildMultimodalContent(content, readyAttachments, resolveSignedUrls) }); + } else { + history.push({ role: "user", content }); + } // Start streaming from FastAPI onStream({ @@ -2742,11 +2840,75 @@ function ChatInput({ } }; + const handlePaste = (e: React.ClipboardEvent) => { + if (!supportsAnyAttachment) return; + const files = Array.from(e.clipboardData.files).filter( + (f) => f.type.startsWith("image/") || f.type === "application/pdf" || f.type.startsWith("audio/"), + ); + if (files.length > 0) { + e.preventDefault(); + addFiles(files); + } + }; + + const handleDragOver = (e: React.DragEvent) => { + e.preventDefault(); + if (supportsAnyAttachment && e.dataTransfer.types.includes("Files")) { + setIsDragOver(true); + } + }; + + const handleDragLeave = (e: React.DragEvent) => { + // Only clear if leaving the container entirely (not moving to a child) + if (!e.currentTarget.contains(e.relatedTarget as Node)) { + setIsDragOver(false); + } + }; + + const handleDrop = (e: React.DragEvent) => { + e.preventDefault(); + setIsDragOver(false); + if (!supportsAnyAttachment) return; + const files = Array.from(e.dataTransfer.files); + if (files.length > 0) addFiles(files); + }; + const showStopButton = isStreaming && !text.trim(); return ( -
-
+
+ {/* Drop overlay */} + {isDragOver && ( +
+
+ + Drop files to attach +
+
+ )} + + {/* Hidden file input */} + { + if (e.target.files) addFiles(Array.from(e.target.files)); + e.target.value = ""; + }} + /> + +
{/* Queued messages as chips above the input */} {messageQueue.length > 0 && ( @@ -2799,7 +2961,63 @@ function ChatInput({ )} -
+ {/* Attachment preview strip */} + + {attachments.length > 0 && ( + + {attachments.map((attachment) => ( + removeAttachment(attachment.localId)} + /> + ))} + + )} + + +
+ {/* Attach button — hidden for models that don't support media */} + {supportsAnyAttachment && ( + + + + + Attach files + + )} + + {supportsAudio && ( + + + + + {isRecording ? "Stop recording" : "Record audio"} + + )} +