From 3506958ddd64e5bfbd9a79da1aeba209620beb79 Mon Sep 17 00:00:00 2001 From: CTO Agent Date: Tue, 14 Apr 2026 21:47:19 +0000 Subject: [PATCH] feat: add edit vs generate logic for content thread replies (REC-68) When a user tags the Recoup Content Agent in the thread of existing content, the bot now: - Classifies user intent as "edit" or "generate" using an AI agent - For edits: parses natural-language edit instructions into ffmpeg operations (trim, crop, resize, overlay_text) and triggers the ffmpeg-edit task with the previously generated video URL - For new generation: re-runs the full content creation pipeline with new parameters parsed from the reply Key changes: - ContentAgentThreadState stores videoUrls on completion - handleContentAgentCallback persists video URLs in thread state - New AI agents: createContentIntentAgent (edit vs generate), createEditOperationsAgent (NL to ffmpeg ops) - registerOnSubscribedMessage handles completed/failed/timeout states - triggerEditContent wraps ffmpeg-edit task triggering Co-Authored-By: Paperclip --- .../content/createContentIntentAgent.ts | 47 +++ .../content/createEditOperationsAgent.ts | 87 ++++++ .../content/handleContentAgentCallback.ts | 8 +- .../handlers/registerOnSubscribedMessage.ts | 284 +++++++++++++++++- lib/agents/content/parseContentIntent.ts | 29 ++ lib/agents/content/parseEditOperations.ts | 22 ++ lib/agents/content/types.ts | 4 + lib/trigger/triggerEditContent.ts | 28 ++ 8 files changed, 507 insertions(+), 2 deletions(-) create mode 100644 lib/agents/content/createContentIntentAgent.ts create mode 100644 lib/agents/content/createEditOperationsAgent.ts create mode 100644 lib/agents/content/parseContentIntent.ts create mode 100644 lib/agents/content/parseEditOperations.ts create mode 100644 lib/trigger/triggerEditContent.ts diff --git a/lib/agents/content/createContentIntentAgent.ts b/lib/agents/content/createContentIntentAgent.ts new file mode 100644 index 000000000..a595e2927 --- /dev/null +++ b/lib/agents/content/createContentIntentAgent.ts @@ -0,0 +1,47 @@ +import { Output, ToolLoopAgent, stepCountIs } from "ai"; +import { z } from "zod"; +import { LIGHTWEIGHT_MODEL } from "@/lib/const"; + +export const contentIntentSchema = z.object({ + action: z + .enum(["edit", "generate"]) + .describe( + 'Whether the user wants to edit/modify the existing content ("edit") or create entirely new content from scratch ("generate"). Use "edit" when the user references changing, adjusting, trimming, cropping, resizing, or adding overlays to the existing video. Use "generate" when the user wants something completely new or different.', + ), +}); + +export type ContentIntent = z.infer; + +const instructions = `You classify whether a user's message in a content thread is requesting an edit to existing content or a brand new generation. + +Context: The user previously generated content (videos/images) in this Slack thread. They are now replying with a new request. You must decide if they want to modify what was already created or start fresh. + +Classify as "edit" when the user wants to: +- Trim, shorten, or change the duration +- Crop or change the aspect ratio +- Resize the video +- Add or change text overlays / captions +- Make adjustments to the existing content +- Phrases like "make it shorter", "add text", "crop to square", "resize to 1080x1920" + +Classify as "generate" when the user wants to: +- Create entirely new content with a different template, artist, or style +- Generate more videos +- Phrases like "make another one", "try a different template", "generate 3 more" + +When in doubt, prefer "edit" since the user is replying in an existing content thread.`; + +/** + * Creates a ToolLoopAgent that classifies user intent as "edit" or "generate" + * based on a message in an existing content thread. + * + * @returns A configured ToolLoopAgent for intent classification. + */ +export function createContentIntentAgent() { + return new ToolLoopAgent({ + model: LIGHTWEIGHT_MODEL, + instructions, + output: Output.object({ schema: contentIntentSchema }), + stopWhen: stepCountIs(1), + }); +} diff --git a/lib/agents/content/createEditOperationsAgent.ts b/lib/agents/content/createEditOperationsAgent.ts new file mode 100644 index 000000000..5a5379be3 --- /dev/null +++ b/lib/agents/content/createEditOperationsAgent.ts @@ -0,0 +1,87 @@ +import { Output, ToolLoopAgent, stepCountIs } from "ai"; +import { z } from "zod"; +import { LIGHTWEIGHT_MODEL } from "@/lib/const"; +import { TEMPLATE_IDS } from "@/lib/content/templates"; + +const editOperationSchema = z.discriminatedUnion("type", [ + z.object({ + type: z.literal("trim"), + start: z.number().nonnegative().describe("Start time in seconds."), + duration: z.number().positive().describe("Duration in seconds."), + }), + z.object({ + type: z.literal("crop"), + aspect: z.string().optional().describe('Aspect ratio like "9:16", "1:1", "16:9".'), + width: z.number().int().positive().optional(), + height: z.number().int().positive().optional(), + }), + z.object({ + type: z.literal("resize"), + width: z.number().int().positive().optional(), + height: z.number().int().positive().optional(), + }), + z.object({ + type: z.literal("overlay_text"), + content: z.string().min(1).describe("The text to overlay on the video."), + color: z.string().optional().default("white"), + stroke_color: z.string().optional().default("black"), + max_font_size: z.number().positive().optional().default(42), + position: z.enum(["top", "center", "bottom"]).optional().default("bottom"), + }), +]); + +export const editOperationsResultSchema = z.object({ + template: z + .enum(TEMPLATE_IDS) + .optional() + .describe( + "If the user wants to apply a named template instead of explicit operations, set this. Otherwise omit.", + ), + operations: z + .array(editOperationSchema) + .describe( + "Ordered list of edit operations to apply. Empty array if a template is used instead.", + ), +}); + +export type EditOperationsResult = z.infer; + +const templateList = TEMPLATE_IDS.map(id => `- "${id}"`).join("\n"); + +const instructions = `You extract video edit operations from a natural-language request. + +The user wants to modify an existing video. Parse their request into a list of edit operations. + +Available operations: +- "trim": Cut the video. Requires start (seconds) and duration (seconds). +- "crop": Change aspect ratio or dimensions. Use aspect (e.g. "9:16") or width/height. +- "resize": Change output dimensions. Provide width and/or height. +- "overlay_text": Add text on top of the video. Provide the text content, position (top/center/bottom), and optionally color. + +Available templates (use instead of operations when user references a template name): +${templateList} + +If the user mentions a template name, set "template" and leave "operations" empty. +Otherwise, extract explicit operations from the request. + +Examples: +- "make it 10 seconds" → trim with start=0, duration=10 +- "crop to square" → crop with aspect="1:1" +- "add 'New Release' text at the top" → overlay_text with content="New Release", position="top" +- "resize to 1080x1920" → resize with width=1080, height=1920 +- "apply the bedroom template" → template="artist-caption-bedroom"`; + +/** + * Creates a ToolLoopAgent that parses natural-language edit instructions + * into structured ffmpeg operations. + * + * @returns A configured ToolLoopAgent for edit operations parsing. + */ +export function createEditOperationsAgent() { + return new ToolLoopAgent({ + model: LIGHTWEIGHT_MODEL, + instructions, + output: Output.object({ schema: editOperationsResultSchema }), + stopWhen: stepCountIs(1), + }); +} diff --git a/lib/agents/content/handleContentAgentCallback.ts b/lib/agents/content/handleContentAgentCallback.ts index 52892dd3c..8729c68ae 100644 --- a/lib/agents/content/handleContentAgentCallback.ts +++ b/lib/agents/content/handleContentAgentCallback.ts @@ -68,7 +68,13 @@ export async function handleContentAgentCallback(request: Request): Promise v.videoUrl).filter(Boolean) as string[]; + + await thread.setState({ + status: "completed", + ...(videoUrls.length > 0 && { videoUrls }), + }); break; } diff --git a/lib/agents/content/handlers/registerOnSubscribedMessage.ts b/lib/agents/content/handlers/registerOnSubscribedMessage.ts index 5371a24bf..69cf1f804 100644 --- a/lib/agents/content/handlers/registerOnSubscribedMessage.ts +++ b/lib/agents/content/handlers/registerOnSubscribedMessage.ts @@ -1,8 +1,42 @@ import type { ContentAgentBot } from "../bot"; +import type { ContentAgentThreadState } from "../types"; +import { parseContentIntent } from "../parseContentIntent"; +import { parseEditOperations } from "../parseEditOperations"; +import { parseContentPrompt } from "../parseContentPrompt"; +import { extractMessageAttachments } from "../extractMessageAttachments"; +import { triggerEditContent } from "@/lib/trigger/triggerEditContent"; +import { triggerCreateContent } from "@/lib/trigger/triggerCreateContent"; +import { triggerPollContentRun } from "@/lib/trigger/triggerPollContentRun"; +import { resolveArtistSlug } from "@/lib/content/resolveArtistSlug"; +import { getArtistContentReadiness } from "@/lib/content/getArtistContentReadiness"; +import { selectAccountSnapshots } from "@/lib/supabase/account_snapshots/selectAccountSnapshots"; +import { loadTemplate } from "@/lib/content/templates"; +import { buildTaskCard } from "@/lib/agents/buildTaskCard"; + +/** Minimal thread interface used by helper functions. */ +interface ContentThread { + id: string; + post: (msg: unknown) => Promise; + setState: (s: Partial) => Promise; +} + +/** Minimal message shape compatible with extractMessageAttachments. */ +interface ContentMessage { + text: string; + attachments?: Array<{ + type: "image" | "file" | "video" | "audio"; + mimeType?: string; + name?: string; + url?: string; + data?: Buffer | Blob; + fetchData?: () => Promise; + }>; +} /** * Registers the onSubscribedMessage handler for the content agent. - * Handles replies in active threads while content is being generated. + * Handles replies in active threads — determines whether to edit + * existing content or generate new content based on AI classification. * * @param bot - The content agent bot instance to register the handler on */ @@ -15,6 +49,254 @@ export function registerOnSubscribedMessage(bot: ContentAgentBot) { if (state?.status === "running") { await thread.post("Still generating your content. I'll reply here when it's ready."); + return; + } + + if (state?.status === "completed") { + await handleCompletedThreadReply(thread, message, state); + return; + } + + // For failed/timeout states, suggest starting a new thread + if (state?.status === "failed" || state?.status === "timeout") { + await thread.post( + "The previous generation failed or timed out. Please start a new thread by mentioning me again.", + ); + } + }); +} + +/** + * Handles a reply in a thread where content was previously generated. + * Classifies intent as edit or generate and dispatches accordingly. + * + * @param thread - The active thread to post to and manage state + * @param message - The incoming user message + * @param message.text - The raw text of the user's message + * @param state - Current thread state with previous generation details + */ +async function handleCompletedThreadReply( + thread: ContentThread, + message: ContentMessage, + state: ContentAgentThreadState, +) { + try { + const threadContext = buildThreadContext(state); + const intent = await parseContentIntent(message.text, threadContext); + + if (intent.action === "edit") { + await handleEditFlow(thread, message.text, state); + } else { + await handleGenerateFlow(thread, message, state); + } + } catch (error) { + console.error("[content-agent] handleCompletedThreadReply error:", error); + await thread.post("Something went wrong processing your request. Please try again."); + } +} + +/** + * Builds a human-readable summary of the thread's previous generation + * for the intent classifier. + * + * @param state - The thread state to summarise. + * @returns A one-line context string. + */ +function buildThreadContext(state: ContentAgentThreadState): string { + const parts = [ + `Previously generated ${state.batch} video(s)`, + `template: ${state.template}`, + `lipsync: ${state.lipsync}`, + ]; + if (state.videoUrls?.length) { + parts.push(`${state.videoUrls.length} video(s) available for editing`); + } + return parts.join(", "); +} + +/** + * Handles the edit flow: parses edit operations, triggers ffmpeg-edit, + * and starts polling for results. + * + * @param thread - The active thread to post to and manage state + * @param messageText - The user's edit instruction text + * @param state - Current thread state with video URLs + */ +async function handleEditFlow( + thread: ContentThread, + messageText: string, + state: ContentAgentThreadState, +) { + const videoUrl = state.videoUrls?.[0]; + if (!videoUrl) { + await thread.post( + "No video URL found from the previous generation. Please start a new thread to generate content first.", + ); + return; + } + + const editResult = await parseEditOperations(messageText); + if (!editResult) { + await thread.post( + "Could not parse edit instructions. Try something like: trim to 10 seconds, crop to square, or add text overlay.", + ); + return; + } + + // Resolve operations from template if needed + let operations = editResult.operations; + if (editResult.template && (!operations || operations.length === 0)) { + const template = loadTemplate(editResult.template); + if (template?.edit.operations) { + operations = template.edit.operations as typeof operations; } + } + + if (!operations || operations.length === 0) { + await thread.post( + "No edit operations could be determined from your message. Try: trim, crop, resize, or add text overlay.", + ); + return; + } + + const accountId = "fb678396-a68f-4294-ae50-b8cacf9ce77b"; + + const handle = await triggerEditContent({ + accountId, + video_url: videoUrl, + operations, }); + + const card = buildTaskCard( + "Content Edit Started", + `Editing video with ${operations.length} operation(s)...\n\nI'll reply here when ready.`, + handle.id, + ); + await thread.post({ card }); + + await thread.setState({ status: "running", runIds: [handle.id] }); + + try { + await triggerPollContentRun({ + runIds: [handle.id], + callbackThreadId: thread.id, + }); + } catch (pollError) { + console.error("[content-agent] triggerPollContentRun (edit) failed:", pollError); + await thread.setState({ status: "failed" }); + await thread.post("Failed to start edit polling. Please try again."); + } +} + +/** + * Handles the generate-new flow: parses content prompt flags, + * triggers content creation, and starts polling. + * + * @param thread - The active thread to post to and manage state + * @param message - The incoming user message with potential attachments + * @param message.text - The raw text of the user's message + * @param state - Current thread state with artist details + */ +async function handleGenerateFlow( + thread: ContentThread, + message: ContentMessage, + state: ContentAgentThreadState, +) { + const accountId = "fb678396-a68f-4294-ae50-b8cacf9ce77b"; + const artistAccountId = state.artistAccountId; + + const { lipsync, batch, captionLength, upscale, template, songs } = await parseContentPrompt( + message.text, + ); + + const { songUrl, imageUrls } = await extractMessageAttachments(message); + + const artistSlug = await resolveArtistSlug(artistAccountId); + if (!artistSlug) { + await thread.post(`Artist not found for account ID \`${artistAccountId}\`.`); + return; + } + + let githubRepo: string; + try { + const readiness = await getArtistContentReadiness({ + accountId, + artistAccountId, + artistSlug, + }); + githubRepo = readiness.githubRepo; + } catch { + const snapshots = await selectAccountSnapshots(artistAccountId); + const repo = snapshots?.[0]?.github_repo; + if (!repo) { + await thread.post( + `No GitHub repository found for artist \`${artistSlug}\`. Content creation requires a configured repo.`, + ); + return; + } + githubRepo = repo; + } + + const allSongs = [...(songs ?? []), ...(songUrl ? [songUrl] : [])]; + + const payload = { + accountId, + artistSlug, + template, + lipsync, + captionLength, + upscale, + githubRepo, + ...(allSongs.length > 0 && { songs: allSongs }), + ...(imageUrls.length > 0 && { images: imageUrls }), + }; + + const results = await Promise.allSettled( + Array.from({ length: batch }, () => triggerCreateContent(payload)), + ); + const runIds = results + .filter(r => r.status === "fulfilled") + .map(r => (r as PromiseFulfilledResult<{ id: string }>).value.id); + + if (runIds.length === 0) { + await thread.post("Failed to trigger content creation. Please try again."); + return; + } + + const details = [ + `- Artist: *${artistSlug}*`, + `- Template: ${template}`, + `- Videos: ${batch}`, + `- Lipsync: ${lipsync ? "yes" : "no"}`, + ]; + if (songs && songs.length > 0) details.push(`- Songs: ${songs.join(", ")}`); + if (songUrl) details.push("- Audio: attached file"); + if (imageUrls.length > 0) details.push(`- Images: ${imageUrls.length} attached`); + + const card = buildTaskCard( + "Content Generation Started", + `Generating content for *${artistSlug}*...\n${details.join("\n")}\n\nI'll reply here when ready (~5-10 min).`, + runIds[0], + ); + await thread.post({ card }); + + await thread.setState({ + status: "running", + artistAccountId, + template, + lipsync, + batch, + runIds, + }); + + try { + await triggerPollContentRun({ + runIds, + callbackThreadId: thread.id, + }); + } catch (pollError) { + console.error("[content-agent] triggerPollContentRun (generate) failed:", pollError); + await thread.setState({ status: "failed" }); + await thread.post("Failed to start content polling. Please try again."); + } } diff --git a/lib/agents/content/parseContentIntent.ts b/lib/agents/content/parseContentIntent.ts new file mode 100644 index 000000000..08b514d78 --- /dev/null +++ b/lib/agents/content/parseContentIntent.ts @@ -0,0 +1,29 @@ +import { createContentIntentAgent } from "./createContentIntentAgent"; +import type { ContentIntent } from "./createContentIntentAgent"; + +export type { ContentIntent }; + +/** + * Uses AI to classify whether a thread reply is requesting an edit to + * existing content or a new generation. + * + * @param prompt - The user's reply message text. + * @param threadContext - Summary of what was previously generated (template, video count, etc.). + * @returns The classified intent, defaulting to "edit" on failure. + */ +export async function parseContentIntent( + prompt: string, + threadContext: string, +): Promise { + try { + const agent = createContentIntentAgent(); + const { output } = await agent.generate({ + prompt: `Thread context: ${threadContext}\n\nUser's new message: ${prompt}`, + }); + + return output ?? { action: "edit" }; + } catch (error) { + console.error("[content-agent] parseContentIntent failed:", error); + return { action: "edit" }; + } +} diff --git a/lib/agents/content/parseEditOperations.ts b/lib/agents/content/parseEditOperations.ts new file mode 100644 index 000000000..61656cbdc --- /dev/null +++ b/lib/agents/content/parseEditOperations.ts @@ -0,0 +1,22 @@ +import { createEditOperationsAgent } from "./createEditOperationsAgent"; +import type { EditOperationsResult } from "./createEditOperationsAgent"; + +export type { EditOperationsResult }; + +/** + * Parses a natural-language edit request into structured ffmpeg operations + * or a template reference. + * + * @param prompt - The user's edit instructions. + * @returns Parsed edit operations, or null if parsing fails. + */ +export async function parseEditOperations(prompt: string): Promise { + try { + const agent = createEditOperationsAgent(); + const { output } = await agent.generate({ prompt }); + return output ?? null; + } catch (error) { + console.error("[content-agent] parseEditOperations failed:", error); + return null; + } +} diff --git a/lib/agents/content/types.ts b/lib/agents/content/types.ts index bae358625..b63c7d49e 100644 --- a/lib/agents/content/types.ts +++ b/lib/agents/content/types.ts @@ -9,4 +9,8 @@ export interface ContentAgentThreadState { lipsync: boolean; batch: number; runIds: string[]; + /** URLs of successfully generated videos, stored on completion for edit flows. */ + videoUrls?: string[]; + /** URLs of successfully generated images, stored on completion for edit flows. */ + imageUrls?: string[]; } diff --git a/lib/trigger/triggerEditContent.ts b/lib/trigger/triggerEditContent.ts new file mode 100644 index 000000000..6bc26474a --- /dev/null +++ b/lib/trigger/triggerEditContent.ts @@ -0,0 +1,28 @@ +import { tasks } from "@trigger.dev/sdk"; + +interface EditOperation { + [key: string]: unknown; + type: string; +} + +export interface TriggerEditContentPayload { + accountId: string; + video_url: string; + operations: EditOperation[]; + output_format?: string; +} + +/** + * Triggers the ffmpeg-edit task in Trigger.dev to apply edit operations + * to an existing video. + * + * @param payload - The edit payload with video URL and operations. + * @returns The task handle with runId. + */ +export async function triggerEditContent(payload: TriggerEditContentPayload) { + const handle = await tasks.trigger("ffmpeg-edit", { + ...payload, + output_format: payload.output_format ?? "mp4", + }); + return handle; +}