From e44e9272c46b9834917334b924333286f3113191 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Sun, 10 May 2026 22:26:11 +0100 Subject: [PATCH 1/9] =?UTF-8?q?feat:=20Sessions=20primitive=20=E2=80=94=20?= =?UTF-8?q?durable=20run-aware=20streams=20+=20dashboard?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds Sessions, a durable, run-aware stream primitive that scopes session.in / session.out records to a session (not a single run). Records survive run boundaries; reconnect-from-last-event-id is built in. Server foundation: - New /realtime/v1/sessions/:session/:io/append + /records routes - sessionRunManager + sessionsRepository + clickhouseSessionsRepository - mintRunToken for short-lived per-session tokens - s2Append retry-with-backoff + undici cause diagnostics - /api/v[12]/packets/* exempt from customer rate limits - BackgroundWorker schema gains taskKind enum (TASK, AGENT, SCHEDULED) - TaskRun.taskKind column + clickhouse 029_add_task_kind_to_task_runs_v2 Core types: - new sessionStreams, inputStreams, realtimeStreams packages in @trigger.dev/core - session-streams-api / realtime-streams-api surface Sessions dashboard UI (the primitive's own viewer): - /sessions index + detail routes - SessionsTable, SessionFilters, SessionStatus, CloseSessionDialog - AGENT/SCHEDULED filter in RunFilters + TaskTriggerSource Includes the sessions-primitive changeset. --- .changeset/chat-ready-core-additions.md | 5 + .gitignore | 4 + CLAUDE.md | 2 + .../app/components/BlankStatePanels.tsx | 23 + .../components/BulkActionFilterSummary.tsx | 13 + .../app/components/runs/v3/RunFilters.tsx | 117 ++- .../app/components/runs/v3/TaskRunsTable.tsx | 6 + .../components/runs/v3/TaskTriggerSource.tsx | 10 +- .../sessions/v1/CloseSessionDialog.tsx | 72 ++ .../components/sessions/v1/SessionFilters.tsx | 764 ++++++++++++++++++ .../components/sessions/v1/SessionStatus.tsx | 89 ++ .../components/sessions/v1/SessionsTable.tsx | 224 +++++ .../app/presenters/RunFilters.server.ts | 2 + .../app/presenters/SessionFilters.server.ts | 18 + .../v3/ApiRunListPresenter.server.ts | 4 + .../v3/NextRunListPresenter.server.ts | 6 + .../v3/SessionListPresenter.server.ts | 219 +++++ .../presenters/v3/TaskListPresenter.server.ts | 1 + .../app/presenters/v3/TestPresenter.server.ts | 21 +- .../presenters/v3/TestTaskPresenter.server.ts | 4 + .../route.tsx | 107 +++ .../route.tsx | 10 + .../app/routes/api.v1.deployments.current.ts | 55 ++ ...altime.v1.sessions.$session.$io.records.ts | 97 +++ .../realtime.v1.streams.$runId.$streamId.ts | 3 +- .../resources.sessions.$sessionParam.close.ts | 98 +++ apps/webapp/app/routes/runs.$runParam.ts | 12 +- .../app/runEngine/concerns/queues.server.ts | 41 +- .../runEngine/services/triggerTask.server.ts | 89 +- apps/webapp/app/runEngine/types.ts | 35 +- .../app/services/apiRateLimit.server.ts | 7 + .../services/realtime/mintRunToken.server.ts | 41 + .../realtime/s2realtimeStreams.server.ts | 107 ++- .../realtime/sessionRunManager.server.ts | 130 ++- .../services/runsReplicationService.server.ts | 1 + .../clickhouseRunsRepository.server.ts | 19 + .../runsRepository/runsRepository.server.ts | 3 + .../clickhouseSessionsRepository.server.ts | 1 + .../sessionsRepository.server.ts | 1 + apps/webapp/app/utils/pathBuilder.ts | 42 + .../services/createBackgroundWorker.server.ts | 10 +- .../029_add_task_kind_to_task_runs_v2.sql | 7 + .../clickhouse/src/taskRuns.test.ts | 4 + internal-packages/clickhouse/src/taskRuns.ts | 4 + .../migration.sql | 5 + .../migration.sql | 34 + .../migration.sql | 3 + .../database/prisma/schema.prisma | 53 +- packages/core/src/v3/apiClient/errors.ts | 12 + packages/core/src/v3/apiClient/index.ts | 311 +++++++ .../core/src/v3/apiClient/runStream.test.ts | 444 ++++++++++ packages/core/src/v3/apiClient/runStream.ts | 199 ++++- packages/core/src/v3/errors.ts | 20 + packages/core/src/v3/index.ts | 2 + packages/core/src/v3/inputStreams/index.ts | 12 + packages/core/src/v3/inputStreams/manager.ts | 29 + .../core/src/v3/inputStreams/noopManager.ts | 6 + packages/core/src/v3/inputStreams/types.ts | 22 + packages/core/src/v3/realtime-streams-api.ts | 2 + packages/core/src/v3/realtimeStreams/index.ts | 6 + .../core/src/v3/realtimeStreams/manager.ts | 3 +- .../src/v3/realtimeStreams/noopManager.ts | 2 +- .../realtimeStreams/sessionStreamInstance.ts | 103 +++ .../src/v3/realtimeStreams/streamInstance.ts | 7 +- .../src/v3/realtimeStreams/streamsWriterV1.ts | 7 +- .../realtimeStreams/streamsWriterV2.test.ts | 150 ++++ .../src/v3/realtimeStreams/streamsWriterV2.ts | 45 +- packages/core/src/v3/realtimeStreams/types.ts | 42 +- packages/core/src/v3/schemas/api.ts | 31 + packages/core/src/v3/schemas/build.ts | 11 +- packages/core/src/v3/schemas/resources.ts | 7 + packages/core/src/v3/schemas/runEngine.ts | 4 + packages/core/src/v3/schemas/schemas.ts | 27 + .../core/src/v3/semanticInternalAttributes.ts | 1 + packages/core/src/v3/session-streams-api.ts | 7 + packages/core/src/v3/sessionStreams/index.ts | 89 ++ .../src/v3/sessionStreams/manager.test.ts | 151 ++++ .../core/src/v3/sessionStreams/manager.ts | 412 ++++++++++ .../core/src/v3/sessionStreams/noopManager.ts | 51 ++ packages/core/src/v3/sessionStreams/types.ts | 76 ++ .../src/v3/test/test-input-stream-manager.ts | 219 +++++ .../v3/test/test-realtime-streams-manager.ts | 169 ++++ .../src/v3/test/test-run-metadata-manager.ts | 103 +++ .../v3/test/test-session-stream-manager.ts | 287 +++++++ packages/core/src/v3/types/tasks.ts | 40 + packages/core/src/v3/utils/globals.ts | 2 + packages/core/src/v3/workers/index.ts | 1 + packages/core/src/v3/workers/taskExecutor.ts | 2 + packages/core/test/runStream.test.ts | 44 +- packages/trigger-sdk/src/v3/shared.ts | 212 ++++- packages/trigger-sdk/src/v3/streams.ts | 121 ++- packages/trigger-sdk/src/v3/tasks.ts | 2 + 92 files changed, 5933 insertions(+), 183 deletions(-) create mode 100644 .changeset/chat-ready-core-additions.md create mode 100644 apps/webapp/app/components/sessions/v1/CloseSessionDialog.tsx create mode 100644 apps/webapp/app/components/sessions/v1/SessionFilters.tsx create mode 100644 apps/webapp/app/components/sessions/v1/SessionStatus.tsx create mode 100644 apps/webapp/app/components/sessions/v1/SessionsTable.tsx create mode 100644 apps/webapp/app/presenters/SessionFilters.server.ts create mode 100644 apps/webapp/app/presenters/v3/SessionListPresenter.server.ts create mode 100644 apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.sessions._index/route.tsx create mode 100644 apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.sessions/route.tsx create mode 100644 apps/webapp/app/routes/api.v1.deployments.current.ts create mode 100644 apps/webapp/app/routes/realtime.v1.sessions.$session.$io.records.ts create mode 100644 apps/webapp/app/routes/resources.sessions.$sessionParam.close.ts create mode 100644 apps/webapp/app/services/realtime/mintRunToken.server.ts create mode 100644 internal-packages/clickhouse/schema/029_add_task_kind_to_task_runs_v2.sql create mode 100644 internal-packages/database/prisma/migrations/20260329100903_add_agent_trigger_source_and_task_config/migration.sql create mode 100644 internal-packages/database/prisma/migrations/20260330113734_add_playground_conversation/migration.sql create mode 100644 internal-packages/database/prisma/migrations/20260330135232_add_messages_and_last_event_id_to_playground/migration.sql create mode 100644 packages/core/src/v3/apiClient/runStream.test.ts create mode 100644 packages/core/src/v3/realtimeStreams/sessionStreamInstance.ts create mode 100644 packages/core/src/v3/realtimeStreams/streamsWriterV2.test.ts create mode 100644 packages/core/src/v3/session-streams-api.ts create mode 100644 packages/core/src/v3/sessionStreams/index.ts create mode 100644 packages/core/src/v3/sessionStreams/manager.test.ts create mode 100644 packages/core/src/v3/sessionStreams/manager.ts create mode 100644 packages/core/src/v3/sessionStreams/noopManager.ts create mode 100644 packages/core/src/v3/sessionStreams/types.ts create mode 100644 packages/core/src/v3/test/test-input-stream-manager.ts create mode 100644 packages/core/src/v3/test/test-realtime-streams-manager.ts create mode 100644 packages/core/src/v3/test/test-run-metadata-manager.ts create mode 100644 packages/core/src/v3/test/test-session-stream-manager.ts diff --git a/.changeset/chat-ready-core-additions.md b/.changeset/chat-ready-core-additions.md new file mode 100644 index 00000000000..e06db5e2c9f --- /dev/null +++ b/.changeset/chat-ready-core-additions.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Add `ChatChunkTooLargeError` and ApiClient methods for subscribing to session streams. Lays the groundwork for the upcoming `chat.agent`. diff --git a/.gitignore b/.gitignore index 5f6adddba0a..d071d5ae4e3 100644 --- a/.gitignore +++ b/.gitignore @@ -65,6 +65,10 @@ apps/**/public/build /packages/trigger-sdk/src/package.json /packages/python/src/package.json **/.claude/settings.local.json +.claude/architecture/ +.claude/docs-plans/ +.claude/review-guides/ +.claude/scheduled_tasks.lock .mcp.log .mcp.json .cursor/debug.log diff --git a/CLAUDE.md b/CLAUDE.md index 53348d012a2..28650fd08aa 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,6 +6,8 @@ This file provides guidance to Claude Code when working with this repository. Su This is a pnpm 10.33.2 monorepo using Turborepo. Run commands from root with `pnpm run`. +**Adding dependencies:** Edit `package.json` directly instead of using `pnpm add`, then run `pnpm i` from the repo root. See `.claude/rules/package-installation.md` for the full process. + ```bash pnpm run docker # Start Docker services (PostgreSQL, Redis, Electric) pnpm run db:migrate # Run database migrations diff --git a/apps/webapp/app/components/BlankStatePanels.tsx b/apps/webapp/app/components/BlankStatePanels.tsx index fe39f6785c5..9a4884e09d3 100644 --- a/apps/webapp/app/components/BlankStatePanels.tsx +++ b/apps/webapp/app/components/BlankStatePanels.tsx @@ -1,4 +1,5 @@ import { + ArrowsRightLeftIcon, BeakerIcon, BellAlertIcon, BookOpenIcon, @@ -189,6 +190,28 @@ export function BatchesNone() { ); } +export function SessionsNone() { + return ( + + Sessions docs + + } + > + + You have no sessions in this environment. Sessions are durable, typed, bidirectional I/O + primitives that outlive a single run — used by chat.agent and any + long-running task that needs streaming input and output. + + + ); +} + export function TestHasNoTasks() { const organization = useOrganization(); const project = useProject(); diff --git a/apps/webapp/app/components/BulkActionFilterSummary.tsx b/apps/webapp/app/components/BulkActionFilterSummary.tsx index a230e70b346..c5d1a2f48d7 100644 --- a/apps/webapp/app/components/BulkActionFilterSummary.tsx +++ b/apps/webapp/app/components/BulkActionFilterSummary.tsx @@ -240,6 +240,19 @@ export function BulkActionFilterSummary({ /> ); } + case "sources": { + const values = Array.isArray(value) ? value : [`${value}`]; + return ( + + ); + } default: { assertNever(typedKey); } diff --git a/apps/webapp/app/components/runs/v3/RunFilters.tsx b/apps/webapp/app/components/runs/v3/RunFilters.tsx index c02e93a5c6e..c27ac1bc187 100644 --- a/apps/webapp/app/components/runs/v3/RunFilters.tsx +++ b/apps/webapp/app/components/runs/v3/RunFilters.tsx @@ -2,6 +2,7 @@ import * as Ariakit from "@ariakit/react"; import { CalendarIcon, ClockIcon, + CpuChipIcon, FingerPrintIcon, PlusIcon, RectangleStackIcon, @@ -190,6 +191,9 @@ export const TaskRunListSearchFilters = z.object({ `Machine presets to filter by (${machines.join(", ")})` ), errorId: z.string().optional().describe("Error ID to filter runs by (e.g. error_abc123)"), + sources: StringOrStringArray.describe( + "Task trigger sources to filter by (STANDARD, SCHEDULED, AGENT)" + ), }); export type TaskRunListSearchFilters = z.infer; @@ -231,6 +235,8 @@ export function filterTitle(filterKey: string) { return "Version"; case "errorId": return "Error ID"; + case "sources": + return "Source"; default: return filterKey; } @@ -271,6 +277,8 @@ export function filterIcon(filterKey: string): ReactNode | undefined { return ; case "errorId": return ; + case "sources": + return ; default: return undefined; } @@ -318,6 +326,10 @@ export function getRunFiltersFromSearchParams( ? searchParams.getAll("versions") : undefined, errorId: searchParams.get("errorId") ?? undefined, + sources: + searchParams.getAll("sources").filter((v) => v.length > 0).length > 0 + ? searchParams.getAll("sources") + : undefined, }; const parsed = TaskRunListSearchFilters.safeParse(params); @@ -359,7 +371,8 @@ export function RunsFilters(props: RunFiltersProps) { searchParams.has("queues") || searchParams.has("machines") || searchParams.has("versions") || - searchParams.has("errorId"); + searchParams.has("errorId") || + searchParams.has("sources"); return (
@@ -395,6 +408,7 @@ const filterTypes = [ { name: "schedule", title: "Schedule ID", icon: }, { name: "bulk", title: "Bulk action", icon: }, { name: "error", title: "Error ID", icon: }, + { name: "source", title: "Source", icon: }, ] as const; type FilterType = (typeof filterTypes)[number]["name"]; @@ -448,6 +462,7 @@ function AppliedFilters({ bulkActions }: RunFiltersProps) { + ); } @@ -482,6 +497,8 @@ function Menu(props: MenuProps) { return props.setFilterType(undefined)} {...props} />; case "error": return props.setFilterType(undefined)} {...props} />; + case "source": + return props.setFilterType(undefined)} {...props} />; } } @@ -1739,3 +1756,101 @@ function AppliedErrorIdFilter() { ); } + +const sourceOptions: { value: TaskTriggerSource; title: string }[] = [ + { value: "STANDARD", title: "Standard" }, + { value: "SCHEDULED", title: "Scheduled" }, + { value: "AGENT", title: "Agent" }, +]; + +function SourceDropdown({ + trigger, + clearSearchValue, + searchValue, + onClose, +}: { + trigger: ReactNode; + clearSearchValue: () => void; + searchValue: string; + onClose?: () => void; +}) { + const { values, replace } = useSearchParams(); + + const handleChange = (values: string[]) => { + clearSearchValue(); + replace({ sources: values, cursor: undefined, direction: undefined }); + }; + + const filtered = useMemo(() => { + return sourceOptions.filter((item) => + item.title.toLowerCase().includes(searchValue.toLowerCase()) + ); + }, [searchValue]); + + return ( + + {trigger} + { + if (onClose) { + onClose(); + return false; + } + return true; + }} + > + + + {filtered.map((item, index) => ( + + } + shortcut={shortcutFromIndex(index, { shortcutsEnabled: true })} + > + {item.title} + + ))} + + + + ); +} + +function AppliedSourceFilter() { + const { values, del } = useSearchParams(); + const sources = values("sources"); + + if (sources.length === 0 || sources.every((v) => v === "")) { + return null; + } + + return ( + + {(search, setSearch) => ( + }> + } + value={appliedSummary( + sources.map( + (v) => sourceOptions.find((o) => o.value === v)?.title ?? v + ) + )} + onRemove={() => del(["sources", "cursor", "direction"])} + variant="secondary/small" + /> + + } + searchValue={search} + clearSearchValue={() => setSearch("")} + /> + )} + + ); +} diff --git a/apps/webapp/app/components/runs/v3/TaskRunsTable.tsx b/apps/webapp/app/components/runs/v3/TaskRunsTable.tsx index 346fd25eee2..bf8337baa10 100644 --- a/apps/webapp/app/components/runs/v3/TaskRunsTable.tsx +++ b/apps/webapp/app/components/runs/v3/TaskRunsTable.tsx @@ -55,8 +55,10 @@ import { filterableTaskRunStatuses, TaskRunStatusCombo, } from "./TaskRunStatus"; +import { TaskTriggerSourceIcon } from "./TaskTriggerSource"; import { useOptimisticLocation } from "~/hooks/useOptimisticLocation"; import { useSearchParams } from "~/hooks/useSearchParam"; +import type { TaskTriggerSource } from "@trigger.dev/database"; type RunsTableProps = { total: number; @@ -352,6 +354,10 @@ export function TaskRunsTable({ + {run.taskIdentifier} {run.rootTaskRunId === null ? Root : null} diff --git a/apps/webapp/app/components/runs/v3/TaskTriggerSource.tsx b/apps/webapp/app/components/runs/v3/TaskTriggerSource.tsx index 8d81e2f36c3..dc61644e14c 100644 --- a/apps/webapp/app/components/runs/v3/TaskTriggerSource.tsx +++ b/apps/webapp/app/components/runs/v3/TaskTriggerSource.tsx @@ -1,4 +1,4 @@ -import { ClockIcon } from "@heroicons/react/20/solid"; +import { ClockIcon, CpuChipIcon } from "@heroicons/react/20/solid"; import type { TaskTriggerSource } from "@trigger.dev/database"; import { TaskIconSmall } from "~/assets/icons/TaskIcon"; import { cn } from "~/utils/cn"; @@ -19,6 +19,11 @@ export function TaskTriggerSourceIcon({ ); } + case "AGENT": { + return ( + + ); + } } } @@ -30,5 +35,8 @@ export function taskTriggerSourceDescription(source: TaskTriggerSource) { case "SCHEDULED": { return "Scheduled task"; } + case "AGENT": { + return "Agent"; + } } } diff --git a/apps/webapp/app/components/sessions/v1/CloseSessionDialog.tsx b/apps/webapp/app/components/sessions/v1/CloseSessionDialog.tsx new file mode 100644 index 00000000000..7feba8e6db5 --- /dev/null +++ b/apps/webapp/app/components/sessions/v1/CloseSessionDialog.tsx @@ -0,0 +1,72 @@ +import { XCircleIcon } from "@heroicons/react/24/solid"; +import { DialogClose } from "@radix-ui/react-dialog"; +import { Form, useNavigation } from "@remix-run/react"; +import { Button } from "~/components/primitives/Buttons"; +import { DialogContent, DialogHeader } from "~/components/primitives/Dialog"; +import { FormButtons } from "~/components/primitives/FormButtons"; +import { Input } from "~/components/primitives/Input"; +import { Label } from "~/components/primitives/Label"; +import { Paragraph } from "~/components/primitives/Paragraph"; +import { SpinnerWhite } from "~/components/primitives/Spinner"; + +type CloseSessionDialogProps = { + sessionParam: string; + environmentId: string; + redirectPath: string; +}; + +export function CloseSessionDialog({ + sessionParam, + environmentId, + redirectPath, +}: CloseSessionDialogProps) { + const navigation = useNavigation(); + + const formAction = `/resources/sessions/${encodeURIComponent(sessionParam)}/close`; + const isLoading = navigation.formAction === formAction; + + return ( + + Close this session? +
+ + Closing a session is permanent. The session will no longer accept new input or trigger + new runs. Any in-flight run continues until it finishes on its own. + +
+ + +
+ + +
+ + {isLoading ? "Closing..." : "Close session"} + + } + cancelButton={ + + + + } + /> + +
+
+ ); +} diff --git a/apps/webapp/app/components/sessions/v1/SessionFilters.tsx b/apps/webapp/app/components/sessions/v1/SessionFilters.tsx new file mode 100644 index 00000000000..9c13b7b4b3f --- /dev/null +++ b/apps/webapp/app/components/sessions/v1/SessionFilters.tsx @@ -0,0 +1,764 @@ +import * as Ariakit from "@ariakit/react"; +import { + CpuChipIcon, + FingerPrintIcon, + TagIcon, + XMarkIcon, +} from "@heroicons/react/20/solid"; +import { Form } from "@remix-run/react"; +import { ListFilterIcon } from "lucide-react"; +import { type ReactNode, useCallback, useMemo, useState } from "react"; +import { z } from "zod"; +import { StatusIcon } from "~/assets/icons/StatusIcon"; +import { TaskIcon } from "~/assets/icons/TaskIcon"; +import { AppliedFilter } from "~/components/primitives/AppliedFilter"; +import { Input } from "~/components/primitives/Input"; +import { Label } from "~/components/primitives/Label"; +import { Paragraph } from "~/components/primitives/Paragraph"; +import { + ComboBox, + SelectButtonItem, + SelectItem, + SelectList, + SelectPopover, + SelectProvider, + SelectTrigger, + shortcutFromIndex, +} from "~/components/primitives/Select"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "~/components/primitives/Tooltip"; +import { useOptimisticLocation } from "~/hooks/useOptimisticLocation"; +import { useSearchParams } from "~/hooks/useSearchParam"; +import { Button } from "../../primitives/Buttons"; +import { + appliedSummary, + FilterMenuProvider, + TimeFilter, +} from "../../runs/v3/SharedFilters"; +import { + allSessionStatuses, + descriptionForSessionStatus, + SessionStatusCombo, + sessionStatusTitle, +} from "./SessionStatus"; + +const StringOrStringArray = z.preprocess( + (value) => (typeof value === "string" ? [value] : value), + z.array(z.string()).optional() +); + +export const SessionStatus = z.enum(allSessionStatuses); + +export const SessionListSearchFilters = z.object({ + cursor: z.string().optional(), + direction: z.enum(["forward", "backward"]).optional(), + statuses: z.preprocess( + (value) => (typeof value === "string" ? [value] : value), + SessionStatus.array().optional() + ), + types: StringOrStringArray, + taskIdentifiers: StringOrStringArray, + externalId: z.string().optional(), + tags: StringOrStringArray, + period: z.preprocess((value) => (value === "all" ? undefined : value), z.string().optional()), + from: z.coerce.number().optional(), + to: z.coerce.number().optional(), +}); + +export type SessionListSearchFilters = z.infer; +export type SessionListSearchFilterKey = keyof SessionListSearchFilters; + +export function getSessionFiltersFromSearchParams( + searchParams: URLSearchParams +): SessionListSearchFilters { + function listOrUndefined(key: string) { + const values = searchParams.getAll(key).filter((v) => v.length > 0); + return values.length > 0 ? values : undefined; + } + + const params = { + cursor: searchParams.get("cursor") ?? undefined, + direction: searchParams.get("direction") ?? undefined, + statuses: listOrUndefined("statuses"), + types: listOrUndefined("types"), + taskIdentifiers: listOrUndefined("taskIdentifiers"), + externalId: searchParams.get("externalId") ?? undefined, + tags: listOrUndefined("tags"), + period: searchParams.get("period") ?? undefined, + from: searchParams.get("from") ?? undefined, + to: searchParams.get("to") ?? undefined, + }; + + const parsed = SessionListSearchFilters.safeParse(params); + if (!parsed.success) { + return {}; + } + return parsed.data; +} + +type SessionFiltersProps = { + hasFilters: boolean; + possibleTypes?: string[]; +}; + +export function SessionFilters(props: SessionFiltersProps) { + const location = useOptimisticLocation(); + const searchParams = new URLSearchParams(location.search); + const hasFilters = + searchParams.has("statuses") || + searchParams.has("types") || + searchParams.has("taskIdentifiers") || + searchParams.has("externalId") || + searchParams.has("tags"); + + return ( +
+ + + + {hasFilters && ( +
+
+ ); +} + +const filterTypes = [ + { + name: "statuses", + title: "Status", + icon: , + }, + { name: "types", title: "Type", icon: }, + { + name: "taskIdentifiers", + title: "Task", + icon: , + }, + { + name: "externalId", + title: "External ID", + icon: , + }, + { name: "tags", title: "Tags", icon: }, +] as const; + +type FilterType = (typeof filterTypes)[number]["name"]; + +const shortcut = { key: "f" }; + +function FilterMenu(props: SessionFiltersProps) { + const [filterType, setFilterType] = useState(); + + const filterTrigger = ( + + +
+ } + variant={"secondary/small"} + shortcut={shortcut} + tooltipTitle={"Filter sessions"} + > + Filter + + ); + + return ( + setFilterType(undefined)}> + {(search, setSearch) => ( + setSearch("")} + trigger={filterTrigger} + filterType={filterType} + setFilterType={setFilterType} + {...props} + /> + )} + + ); +} + +function AppliedFilters() { + return ( + <> + + + + + + + ); +} + +type MenuProps = { + searchValue: string; + clearSearchValue: () => void; + trigger: React.ReactNode; + filterType: FilterType | undefined; + setFilterType: (filterType: FilterType | undefined) => void; +} & SessionFiltersProps; + +function Menu(props: MenuProps) { + switch (props.filterType) { + case undefined: + return ; + case "statuses": + return props.setFilterType(undefined)} {...props} />; + case "types": + return props.setFilterType(undefined)} {...props} />; + case "taskIdentifiers": + return ( + props.setFilterType(undefined)} {...props} /> + ); + case "externalId": + return props.setFilterType(undefined)} {...props} />; + case "tags": + return props.setFilterType(undefined)} {...props} />; + } +} + +function MainMenu({ searchValue, trigger, clearSearchValue, setFilterType }: MenuProps) { + const filtered = useMemo(() => { + return filterTypes.filter((item) => + item.title.toLowerCase().includes(searchValue.toLowerCase()) + ); + }, [searchValue]); + + return ( + + {trigger} + + + + {filtered.map((type, index) => ( + { + clearSearchValue(); + setFilterType(type.name); + }} + icon={type.icon} + shortcut={shortcutFromIndex(index, { shortcutsEnabled: true })} + > + {type.title} + + ))} + + + + ); +} + +const statusItems = allSessionStatuses.map((status) => ({ + title: sessionStatusTitle(status), + value: status, +})); + +function StatusDropdown({ + trigger, + clearSearchValue, + searchValue, + onClose, +}: { + trigger: ReactNode; + clearSearchValue: () => void; + searchValue: string; + onClose?: () => void; +}) { + const { values, replace } = useSearchParams(); + + const handleChange = (next: string[]) => { + clearSearchValue(); + replace({ statuses: next, cursor: undefined, direction: undefined }); + }; + + const filtered = useMemo(() => { + return statusItems.filter((item) => + item.title.toLowerCase().includes(searchValue.toLowerCase()) + ); + }, [searchValue]); + + return ( + + {trigger} + { + if (onClose) { + onClose(); + return false; + } + return true; + }} + > + + + {filtered.map((item, index) => ( + + + + + + + + + {descriptionForSessionStatus(item.value)} + + + + + + ))} + + + + ); +} + +function AppliedStatusFilter() { + const { values, del } = useSearchParams(); + const statuses = values("statuses"); + + if (statuses.length === 0) return null; + + return ( + + {(search, setSearch) => ( + }> + } + value={appliedSummary( + statuses.map((v) => sessionStatusTitle(v as (typeof allSessionStatuses)[number])) + )} + onRemove={() => del(["statuses", "cursor", "direction"])} + variant="secondary/small" + /> + + } + searchValue={search} + clearSearchValue={() => setSearch("")} + /> + )} + + ); +} + +function TypeDropdown({ + trigger, + searchValue, + clearSearchValue, + possibleTypes, + onClose, +}: { + trigger: ReactNode; + searchValue: string; + clearSearchValue: () => void; + possibleTypes?: string[]; + onClose?: () => void; +}) { + const { values, replace } = useSearchParams(); + + const handleChange = (next: string[]) => { + clearSearchValue(); + replace({ types: next, cursor: undefined, direction: undefined }); + }; + + const items = useMemo(() => { + const all = possibleTypes && possibleTypes.length > 0 ? possibleTypes : ["chat"]; + const seen = new Set(all); + for (const v of values("types")) { + if (!seen.has(v)) { + all.push(v); + seen.add(v); + } + } + return all.filter((t) => t.toLowerCase().includes(searchValue.toLowerCase())); + }, [possibleTypes, searchValue, values]); + + return ( + + {trigger} + { + if (onClose) { + onClose(); + return false; + } + return true; + }} + > + + + {items.map((value, index) => ( + + {value} + + ))} + + + + ); +} + +function AppliedTypeFilter() { + const { values, del } = useSearchParams(); + const types = values("types"); + if (types.length === 0) return null; + + return ( + + {(search, setSearch) => ( + }> + } + value={appliedSummary(types)} + onRemove={() => del(["types", "cursor", "direction"])} + variant="secondary/small" + /> + + } + searchValue={search} + clearSearchValue={() => setSearch("")} + /> + )} + + ); +} + +function TaskIdentifierDropdown({ + trigger, + searchValue, + clearSearchValue, + onClose, +}: { + trigger: ReactNode; + searchValue: string; + clearSearchValue: () => void; + onClose?: () => void; +}) { + const [open, setOpen] = useState(); + const { value, replace } = useSearchParams(); + const current = value("taskIdentifiers"); + const [draft, setDraft] = useState(current ?? ""); + + const apply = useCallback(() => { + clearSearchValue(); + replace({ + taskIdentifiers: draft.trim() === "" ? undefined : [draft.trim()], + cursor: undefined, + direction: undefined, + }); + setOpen(false); + }, [clearSearchValue, draft, replace]); + + return ( + + {trigger} + { + if (onClose) { + onClose(); + return false; + } + return true; + }} + className="max-w-[min(32ch,var(--popover-available-width))]" + > +
+
+ + setDraft(e.target.value)} + variant="small" + className="w-[29ch] font-mono" + spellCheck={false} + /> +
+
+ + +
+
+
+
+ ); +} + +function AppliedTaskIdentifierFilter() { + const { values, del } = useSearchParams(); + const taskIdentifiers = values("taskIdentifiers"); + if (taskIdentifiers.length === 0) return null; + + return ( + + {(search, setSearch) => ( + }> + } + value={appliedSummary(taskIdentifiers)} + onRemove={() => del(["taskIdentifiers", "cursor", "direction"])} + variant="secondary/small" + /> + + } + searchValue={search} + clearSearchValue={() => setSearch("")} + /> + )} + + ); +} + +function ExternalIdDropdown({ + trigger, + searchValue, + clearSearchValue, + onClose, +}: { + trigger: ReactNode; + searchValue: string; + clearSearchValue: () => void; + onClose?: () => void; +}) { + const [open, setOpen] = useState(); + const { value, replace } = useSearchParams(); + const current = value("externalId"); + const [draft, setDraft] = useState(current ?? ""); + + const apply = useCallback(() => { + clearSearchValue(); + replace({ + externalId: draft.trim() === "" ? undefined : draft.trim(), + cursor: undefined, + direction: undefined, + }); + setOpen(false); + }, [clearSearchValue, draft, replace]); + + return ( + + {trigger} + { + if (onClose) { + onClose(); + return false; + } + return true; + }} + className="max-w-[min(36ch,var(--popover-available-width))]" + > +
+
+ + setDraft(e.target.value)} + variant="small" + className="w-[33ch] font-mono" + spellCheck={false} + /> +
+
+ + +
+
+
+
+ ); +} + +function AppliedExternalIdFilter() { + const { value, del } = useSearchParams(); + const externalId = value("externalId"); + if (!externalId) return null; + + return ( + + {(search, setSearch) => ( + }> + } + value={externalId} + onRemove={() => del(["externalId", "cursor", "direction"])} + variant="secondary/small" + /> + + } + searchValue={search} + clearSearchValue={() => setSearch("")} + /> + )} + + ); +} + +function TagsDropdown({ + trigger, + searchValue, + clearSearchValue, + onClose, +}: { + trigger: ReactNode; + searchValue: string; + clearSearchValue: () => void; + onClose?: () => void; +}) { + const [open, setOpen] = useState(); + const { values, replace } = useSearchParams(); + const current = values("tags"); + const [draft, setDraft] = useState(current.join(", ")); + + const apply = useCallback(() => { + clearSearchValue(); + const next = draft + .split(/[,\n]/) + .map((t) => t.trim()) + .filter((t) => t.length > 0); + replace({ + tags: next.length === 0 ? undefined : next, + cursor: undefined, + direction: undefined, + }); + setOpen(false); + }, [clearSearchValue, draft, replace]); + + return ( + + {trigger} + { + if (onClose) { + onClose(); + return false; + } + return true; + }} + className="max-w-[min(40ch,var(--popover-available-width))]" + > +
+
+ + setDraft(e.target.value)} + variant="small" + className="w-[37ch] font-mono" + spellCheck={false} + /> + + Comma-separated. Matches sessions with any of these tags. + +
+
+ + +
+
+
+
+ ); +} + +function AppliedTagsFilter() { + const { values, del } = useSearchParams(); + const tags = values("tags"); + if (tags.length === 0) return null; + + return ( + + {(search, setSearch) => ( + }> + } + value={appliedSummary(tags)} + onRemove={() => del(["tags", "cursor", "direction"])} + variant="secondary/small" + /> + + } + searchValue={search} + clearSearchValue={() => setSearch("")} + /> + )} + + ); +} + diff --git a/apps/webapp/app/components/sessions/v1/SessionStatus.tsx b/apps/webapp/app/components/sessions/v1/SessionStatus.tsx new file mode 100644 index 00000000000..a4e17affd83 --- /dev/null +++ b/apps/webapp/app/components/sessions/v1/SessionStatus.tsx @@ -0,0 +1,89 @@ +import { CheckCircleIcon, ClockIcon } from "@heroicons/react/20/solid"; +import assertNever from "assert-never"; +import { type SessionStatus } from "~/services/sessionsRepository/sessionsRepository.server"; +import { cn } from "~/utils/cn"; + +export const allSessionStatuses = ["ACTIVE", "CLOSED", "EXPIRED"] as const satisfies Readonly< + Array +>; + +const descriptions: Record = { + ACTIVE: "The session is open and can receive input or schedule new runs.", + CLOSED: "The session was closed; no further input or runs can be triggered against it.", + EXPIRED: "The session passed its expiry time without being closed explicitly.", +}; + +export function descriptionForSessionStatus(status: SessionStatus): string { + return descriptions[status]; +} + +export function sessionStatusTitle(status: SessionStatus): string { + switch (status) { + case "ACTIVE": + return "Active"; + case "CLOSED": + return "Closed"; + case "EXPIRED": + return "Expired"; + default: + assertNever(status); + } +} + +export function sessionStatusColor(status: SessionStatus): string { + switch (status) { + case "ACTIVE": + return "text-pending"; + case "CLOSED": + return "text-success"; + case "EXPIRED": + return "text-text-dimmed"; + default: + assertNever(status); + } +} + +export function SessionStatusIcon({ + status, + className, +}: { + status: SessionStatus; + className: string; +}) { + switch (status) { + case "ACTIVE": + return ( + + + + ); + case "CLOSED": + return ; + case "EXPIRED": + return ; + default: + assertNever(status); + } +} + +export function SessionStatusLabel({ status }: { status: SessionStatus }) { + return {sessionStatusTitle(status)}; +} + +export function SessionStatusCombo({ + status, + className, + iconClassName, +}: { + status: SessionStatus; + className?: string; + iconClassName?: string; +}) { + return ( + + + + + ); +} + diff --git a/apps/webapp/app/components/sessions/v1/SessionsTable.tsx b/apps/webapp/app/components/sessions/v1/SessionsTable.tsx new file mode 100644 index 00000000000..fb83f2d03eb --- /dev/null +++ b/apps/webapp/app/components/sessions/v1/SessionsTable.tsx @@ -0,0 +1,224 @@ +import { ArrowRightIcon } from "@heroicons/react/20/solid"; +import { useLocation, useNavigation } from "@remix-run/react"; +import { formatDuration } from "@trigger.dev/core/v3/utils/durations"; +import { ListBulletIcon } from "~/assets/icons/ListBulletIcon"; +import { MiddleTruncate } from "~/components/primitives/MiddleTruncate"; +import { DateTime } from "~/components/primitives/DateTime"; +import { Paragraph } from "~/components/primitives/Paragraph"; +import { PopoverMenuItem } from "~/components/primitives/Popover"; +import { Spinner } from "~/components/primitives/Spinner"; +import { + Table, + TableBlankRow, + TableBody, + TableCell, + TableCellMenu, + TableHeader, + TableHeaderCell, + TableRow, +} from "~/components/primitives/Table"; +import { SimpleTooltip } from "~/components/primitives/Tooltip"; +import { LiveTimer } from "~/components/runs/v3/LiveTimer"; +import { RunTag } from "~/components/runs/v3/RunTag"; +import { useEnvironment } from "~/hooks/useEnvironment"; +import { useOrganization } from "~/hooks/useOrganizations"; +import { useProject } from "~/hooks/useProject"; +import { + type SessionListItem, + type SessionList, +} from "~/presenters/v3/SessionListPresenter.server"; +import { v3RunPath, v3RunsPath, v3SessionPath } from "~/utils/pathBuilder"; +import { + descriptionForSessionStatus, + SessionStatusCombo, + allSessionStatuses, +} from "./SessionStatus"; + +type SessionsTableProps = Pick; + +export function SessionsTable({ sessions, hasFilters }: SessionsTableProps) { + const navigation = useNavigation(); + const location = useLocation(); + const isLoading = + navigation.state !== "idle" && navigation.location?.pathname === location.pathname; + + const organization = useOrganization(); + const project = useProject(); + const environment = useEnvironment(); + + return ( + + + + ID + + {allSessionStatuses.map((status) => ( +
+
+ +
+ + {descriptionForSessionStatus(status)} + +
+ ))} + + } + > + Status +
+ Type + Task + Tags + Created + Duration + + Actions + +
+
+ + {sessions.length === 0 ? ( + +
+ + {hasFilters + ? "No sessions match these filters" + : "No sessions in this environment yet"} + +
+
+ ) : ( + sessions.map((session) => { + const runPath = session.currentRunFriendlyId + ? v3RunPath(organization, project, environment, { + friendlyId: session.currentRunFriendlyId, + }) + : undefined; + + const displayId = session.externalId ?? session.friendlyId; + const sessionPath = v3SessionPath(organization, project, environment, { + friendlyId: session.friendlyId, + }); + const allRunsPath = v3RunsPath(organization, project, environment, { + tags: [`chat:${displayId}`], + }); + + return ( + + +
+ +
+
+ + } + /> + + + {session.type} + + +
+ +
+
+ + {session.tags.length > 0 ? ( +
+ {session.tags.map((tag) => ( + + ))} +
+ ) : ( + + )} +
+ + + + + + + +
+ ); + }) + )} + {isLoading && ( + + Loading… + + )} +
+
+ ); +} + +function SessionDuration({ session }: { session: SessionListItem }) { + // Active sessions tick live; closed/expired sessions freeze at the + // moment they ended (closedAt for explicit closes, expiresAt when the + // TTL ran out without a close call). + const endedAt = + session.status === "CLOSED" + ? session.closedAt + : session.status === "EXPIRED" + ? session.expiresAt + : undefined; + + if (endedAt) { + return <>{formatDuration(new Date(session.createdAt), new Date(endedAt), { style: "short" })}; + } + + return ; +} + +function SessionActionsCell({ + runPath, + allRunsPath, +}: { + runPath?: string; + allRunsPath: string; +}) { + return ( + + {runPath && ( + + )} + + + } + /> + ); +} diff --git a/apps/webapp/app/presenters/RunFilters.server.ts b/apps/webapp/app/presenters/RunFilters.server.ts index ff9f53429eb..44bb4c01f50 100644 --- a/apps/webapp/app/presenters/RunFilters.server.ts +++ b/apps/webapp/app/presenters/RunFilters.server.ts @@ -36,6 +36,7 @@ export async function getRunFiltersFromRequest(request: Request): Promise 0; const hasFilters = + (sources !== undefined && sources.length > 0) || (tasks !== undefined && tasks.length > 0) || (versions !== undefined && versions.length > 0) || hasStatusFilters || @@ -186,6 +190,7 @@ export class NextRunListPresenter { queues, machines, errorId, + taskKinds: sources, page: { size: pageSize, cursor, @@ -250,6 +255,7 @@ export class NextRunListPresenter { name: run.queue.replace("task/", ""), type: run.queue.startsWith("task/") ? "task" : "custom", }, + taskKind: RunAnnotations.safeParse(run.annotations).data?.taskKind ?? "STANDARD", }; }), pagination: { diff --git a/apps/webapp/app/presenters/v3/SessionListPresenter.server.ts b/apps/webapp/app/presenters/v3/SessionListPresenter.server.ts new file mode 100644 index 00000000000..684d5d6dab5 --- /dev/null +++ b/apps/webapp/app/presenters/v3/SessionListPresenter.server.ts @@ -0,0 +1,219 @@ +import { type Span } from "@opentelemetry/api"; +import { type ClickHouse } from "@internal/clickhouse"; +import { type PrismaClient, type PrismaClientOrTransaction } from "@trigger.dev/database"; +import { type Direction } from "~/components/ListPagination"; +import { timeFilters } from "~/components/runs/v3/SharedFilters"; +import { findDisplayableEnvironment } from "~/models/runtimeEnvironment.server"; +import { + type SessionStatus, + SessionsRepository, +} from "~/services/sessionsRepository/sessionsRepository.server"; +import { ServiceValidationError } from "~/v3/services/baseService.server"; +import { startActiveSpan } from "~/v3/tracer.server"; + +export type SessionListOptions = { + userId?: string; + projectId: string; + // filters + types?: string[]; + taskIdentifiers?: string[]; + externalId?: string; + tags?: string[]; + statuses?: SessionStatus[]; + period?: string; + from?: number; + to?: number; + // pagination + direction?: Direction; + cursor?: string; + pageSize?: number; +}; + +const DEFAULT_PAGE_SIZE = 25; + +export type SessionList = Awaited>; +export type SessionListItem = SessionList["sessions"][0]; +export type SessionListAppliedFilters = SessionList["filters"]; + +export class SessionListPresenter { + constructor( + private readonly replica: PrismaClientOrTransaction, + private readonly clickhouse: ClickHouse + ) {} + + public async call( + organizationId: string, + environmentId: string, + options: SessionListOptions + ) { + return startActiveSpan( + "SessionListPresenter.call", + (span) => this.#call(organizationId, environmentId, options, span), + { + attributes: { + organizationId, + environmentId, + projectId: options.projectId, + }, + } + ); + } + + async #call( + organizationId: string, + environmentId: string, + { + userId, + projectId, + types, + taskIdentifiers, + externalId, + tags, + statuses, + period, + from, + to, + direction = "forward", + cursor, + pageSize = DEFAULT_PAGE_SIZE, + }: SessionListOptions, + rootSpan: Span + ) { + const time = timeFilters({ period, from, to }); + + const hasFilters = + (types !== undefined && types.length > 0) || + (taskIdentifiers !== undefined && taskIdentifiers.length > 0) || + (externalId !== undefined && externalId !== "") || + (tags !== undefined && tags.length > 0) || + (statuses !== undefined && statuses.length > 0) || + !time.isDefault; + + rootSpan.setAttribute("filters.hasFilters", hasFilters); + rootSpan.setAttribute("page.size", pageSize); + if (cursor) rootSpan.setAttribute("page.cursor", cursor); + + const displayableEnvironment = await startActiveSpan( + "SessionListPresenter.findDisplayableEnvironment", + () => findDisplayableEnvironment(environmentId, userId) + ); + if (!displayableEnvironment) { + throw new ServiceValidationError("No environment found"); + } + + const sessionsRepository = new SessionsRepository({ + clickhouse: this.clickhouse, + prisma: this.replica as PrismaClient, + }); + + function clampToNow(date: Date): Date { + const now = new Date(); + return date > now ? now : date; + } + + const { sessions, pagination } = await sessionsRepository.listSessions({ + organizationId, + projectId, + environmentId, + types, + taskIdentifiers, + externalId, + tags, + statuses, + period, + from: time.from ? time.from.getTime() : undefined, + to: time.to ? clampToNow(time.to).getTime() : undefined, + page: { + size: pageSize, + cursor, + direction, + }, + }); + + rootSpan.setAttribute("page.count", sessions.length); + + let hasAnySessions = sessions.length > 0; + if (!hasAnySessions) { + const firstSession = await startActiveSpan( + "SessionListPresenter.hasAnySessions", + () => + this.replica.session.findFirst({ + where: { runtimeEnvironmentId: environmentId }, + select: { id: true }, + }) + ); + if (firstSession) { + hasAnySessions = true; + } + } + + // Resolve current-run friendlyIds in one query so each row can link to + // its live run. Status is intentionally not joined yet — that lives in + // ClickHouse and would mean a second query per page; the link itself + // is the value most viewers want first. + const currentRunIds = sessions + .map((s) => s.currentRunId) + .filter((id): id is string => Boolean(id)); + + const currentRuns = await startActiveSpan( + "SessionListPresenter.findCurrentRuns", + async (span) => { + span.setAttribute("currentRunIds.count", currentRunIds.length); + return currentRunIds.length > 0 + ? this.replica.taskRun.findMany({ + where: { id: { in: currentRunIds } }, + select: { id: true, friendlyId: true }, + }) + : []; + } + ); + const runById = new Map(currentRuns.map((r) => [r.id, r] as const)); + + const now = Date.now(); + + return { + sessions: sessions.map((session) => { + const status: SessionStatus = + session.closedAt != null + ? "CLOSED" + : session.expiresAt != null && session.expiresAt.getTime() < now + ? "EXPIRED" + : "ACTIVE"; + + const currentRun = session.currentRunId ? runById.get(session.currentRunId) : undefined; + + return { + id: session.id, + friendlyId: session.friendlyId, + externalId: session.externalId, + type: session.type, + taskIdentifier: session.taskIdentifier, + tags: session.tags ? [...session.tags].sort((a, b) => a.localeCompare(b)) : [], + status, + closedAt: session.closedAt ? session.closedAt.toISOString() : undefined, + closedReason: session.closedReason ?? undefined, + expiresAt: session.expiresAt ? session.expiresAt.toISOString() : undefined, + createdAt: session.createdAt.toISOString(), + updatedAt: session.updatedAt.toISOString(), + environment: displayableEnvironment, + currentRunFriendlyId: currentRun?.friendlyId, + }; + }), + pagination: { + next: pagination.nextCursor ?? undefined, + previous: pagination.previousCursor ?? undefined, + }, + filters: { + types: types ?? [], + taskIdentifiers: taskIdentifiers ?? [], + externalId, + tags: tags ?? [], + statuses: statuses ?? [], + from: time.from, + to: time.to, + }, + hasFilters, + hasAnySessions, + }; + } +} diff --git a/apps/webapp/app/presenters/v3/TaskListPresenter.server.ts b/apps/webapp/app/presenters/v3/TaskListPresenter.server.ts index f1635f23375..fc29f5510e8 100644 --- a/apps/webapp/app/presenters/v3/TaskListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/TaskListPresenter.server.ts @@ -61,6 +61,7 @@ export class TaskListPresenter { const tasks = await this._replica.backgroundWorkerTask.findMany({ where: { workerId: currentWorker.id, + triggerSource: { not: "AGENT" }, }, select: { id: true, diff --git a/apps/webapp/app/presenters/v3/TestPresenter.server.ts b/apps/webapp/app/presenters/v3/TestPresenter.server.ts index af5bb93a7e7..b817bbf155e 100644 --- a/apps/webapp/app/presenters/v3/TestPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/TestPresenter.server.ts @@ -19,15 +19,13 @@ export class TestPresenter extends BasePresenter { const tasks = await this.#getTasks(environmentId, isDev); return { - tasks: tasks.map((task) => { - return { - id: task.id, - taskIdentifier: task.slug, - filePath: task.filePath, - friendlyId: task.friendlyId, - triggerSource: task.triggerSource, - }; - }), + tasks: tasks.map((task) => ({ + id: task.id, + taskIdentifier: task.slug, + filePath: task.filePath, + friendlyId: task.friendlyId, + triggerSource: task.triggerSource, + })), }; } @@ -54,10 +52,13 @@ export class TestPresenter extends BasePresenter { SELECT bwt.id, version, slug, "filePath", bwt."friendlyId", bwt."triggerSource" FROM latest_workers JOIN ${sqlDatabaseSchema}."BackgroundWorkerTask" bwt ON bwt."workerId" = latest_workers.id + WHERE bwt."triggerSource" != 'AGENT' ORDER BY slug ASC;`; } else { const currentDeployment = await findCurrentWorkerDeployment({ environmentId: envId }); - return currentDeployment?.worker?.tasks ?? []; + return (currentDeployment?.worker?.tasks ?? []).filter( + (t) => t.triggerSource !== "AGENT" + ); } } } diff --git a/apps/webapp/app/presenters/v3/TestTaskPresenter.server.ts b/apps/webapp/app/presenters/v3/TestTaskPresenter.server.ts index 09abb22639e..d5360cd004a 100644 --- a/apps/webapp/app/presenters/v3/TestTaskPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/TestTaskPresenter.server.ts @@ -373,6 +373,10 @@ export class TestTaskPresenter { ), }; } + case "AGENT": { + // AGENT tasks are filtered out by TestPresenter and shouldn't reach here + return { foundTask: false }; + } default: { return task.triggerSource satisfies never; } diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.sessions._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.sessions._index/route.tsx new file mode 100644 index 00000000000..99b0a96b5d1 --- /dev/null +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.sessions._index/route.tsx @@ -0,0 +1,107 @@ +import { BookOpenIcon } from "@heroicons/react/24/solid"; +import { type MetaFunction } from "@remix-run/react"; +import { type LoaderFunctionArgs } from "@remix-run/server-runtime"; +import { typedjson, useTypedLoaderData } from "remix-typedjson"; +import { ListPagination } from "~/components/ListPagination"; +import { AdminDebugTooltip } from "~/components/admin/debugTooltip"; +import { MainCenteredContainer, PageBody } from "~/components/layout/AppLayout"; +import { LinkButton } from "~/components/primitives/Buttons"; +import { NavBar, PageAccessories, PageTitle } from "~/components/primitives/PageHeader"; +import { SessionFilters } from "~/components/sessions/v1/SessionFilters"; +import { SessionsTable } from "~/components/sessions/v1/SessionsTable"; +import { SessionsNone } from "~/components/BlankStatePanels"; +import { $replica } from "~/db.server"; +import { redirectWithErrorMessage } from "~/models/message.server"; +import { findProjectBySlug } from "~/models/project.server"; +import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; +import { getSessionFiltersFromRequest } from "~/presenters/SessionFilters.server"; +import { SessionListPresenter } from "~/presenters/v3/SessionListPresenter.server"; +import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { requireUserId } from "~/services/session.server"; +import { docsPath, EnvironmentParamSchema } from "~/utils/pathBuilder"; + +export const meta: MetaFunction = () => { + return [ + { + title: `Sessions | Trigger.dev`, + }, + ]; +}; + +export const loader = async ({ request, params }: LoaderFunctionArgs) => { + const userId = await requireUserId(request); + const { projectParam, organizationSlug, envParam } = EnvironmentParamSchema.parse(params); + + const project = await findProjectBySlug(organizationSlug, projectParam, userId); + if (!project) { + return redirectWithErrorMessage("/", request, "Project not found"); + } + + const environment = await findEnvironmentBySlug(project.id, envParam, userId); + if (!environment) { + throw new Error("Environment not found"); + } + + const filters = getSessionFiltersFromRequest(request); + + const presenter = new SessionListPresenter($replica, clickhouseClient); + const list = await presenter.call(project.organizationId, environment.id, { + userId, + projectId: project.id, + statuses: filters.statuses, + types: filters.types, + taskIdentifiers: filters.taskIdentifiers, + externalId: filters.externalId, + tags: filters.tags, + period: filters.period, + from: filters.from, + to: filters.to, + cursor: filters.cursor, + direction: filters.direction, + }); + + return typedjson(list); +}; + +export default function Page() { + const list = useTypedLoaderData(); + + return ( + <> + + + + + + Sessions docs + + + + + {!list.hasAnySessions ? ( + + + + ) : ( +
+
+ +
+ +
+
+ +
+ )} +
+ + ); +} diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.sessions/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.sessions/route.tsx new file mode 100644 index 00000000000..f6723ddebaa --- /dev/null +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.sessions/route.tsx @@ -0,0 +1,10 @@ +import { Outlet } from "@remix-run/react"; +import { PageContainer } from "~/components/layout/AppLayout"; + +export default function Page() { + return ( + + + + ); +} diff --git a/apps/webapp/app/routes/api.v1.deployments.current.ts b/apps/webapp/app/routes/api.v1.deployments.current.ts new file mode 100644 index 00000000000..ed185f41b97 --- /dev/null +++ b/apps/webapp/app/routes/api.v1.deployments.current.ts @@ -0,0 +1,55 @@ +import { json } from "@remix-run/server-runtime"; +import { $replica } from "~/db.server"; +import { createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; + +export const loader = createLoaderApiRoute( + { + allowJWT: true, + corsStrategy: "none", + authorization: { + action: "read", + resource: () => ({ deployments: "current" }), + superScopes: ["read:deployments", "read:all", "admin"], + }, + findResource: async (_params, auth) => { + const promotion = await $replica.workerDeploymentPromotion.findFirst({ + where: { + environmentId: auth.environment.id, + label: "current", + }, + select: { + deployment: { + select: { + friendlyId: true, + createdAt: true, + shortCode: true, + version: true, + runtime: true, + runtimeVersion: true, + status: true, + deployedAt: true, + git: true, + errorData: true, + }, + }, + }, + }); + + return promotion?.deployment ?? null; + }, + }, + async ({ resource: deployment }) => { + return json({ + id: deployment.friendlyId, + createdAt: deployment.createdAt, + shortCode: deployment.shortCode, + version: deployment.version, + runtime: deployment.runtime, + runtimeVersion: deployment.runtimeVersion, + status: deployment.status, + deployedAt: deployment.deployedAt ?? undefined, + git: deployment.git ?? undefined, + error: deployment.errorData ?? undefined, + }); + } +); diff --git a/apps/webapp/app/routes/realtime.v1.sessions.$session.$io.records.ts b/apps/webapp/app/routes/realtime.v1.sessions.$session.$io.records.ts new file mode 100644 index 00000000000..579f6daf8d7 --- /dev/null +++ b/apps/webapp/app/routes/realtime.v1.sessions.$session.$io.records.ts @@ -0,0 +1,97 @@ +import { json } from "@remix-run/server-runtime"; +import { z } from "zod"; +import { $replica } from "~/db.server"; +import { S2RealtimeStreams } from "~/services/realtime/s2realtimeStreams.server"; +import { + canonicalSessionAddressingKey, + isSessionFriendlyIdForm, + resolveSessionByIdOrExternalId, +} from "~/services/realtime/sessions.server"; +import { getRealtimeStreamInstance } from "~/services/realtime/v1StreamsGlobal.server"; +import { createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; + +const ParamsSchema = z.object({ + session: z.string(), + io: z.enum(["out", "in"]), +}); + +const SearchSchema = z.object({ + // S2 sequence number — same cursor format as the SSE Last-Event-ID + // (the SSE `id:` field on session-channel events is the seq_num, + // stringified). Records returned have `seqNum > afterEventId`. + afterEventId: z.string().regex(/^\d+$/).optional(), +}); + +// GET: non-SSE, `wait=0` drain of a session channel. Returns a JSON body +// `{ records: StreamRecord[] }` with whatever records exist after +// `afterEventId` (or from the head if absent) and closes immediately. +// +// Used by the SDK's `replaySessionOutTail` at run boot — the SSE long-poll +// path costs ~1s per fresh chat (the timeout duration) regardless of stream +// content, which is unacceptable on the first-message TTFC budget. This +// route gives the agent a cheap "what's there right now" peek instead. +// +// Same row-optional addressing as the SSE GET route in `…$io.ts`: we +// resolve via `resolveSessionByIdOrExternalId` and only 404 for opaque +// `session_*` friendlyIds (which must reference a real row). External-id +// form falls through with `row: null` so the boot path doesn't 404 on a +// fresh chat that hasn't written its first chunk yet. +const loader = createLoaderApiRoute( + { + params: ParamsSchema, + searchParams: SearchSchema, + allowJWT: true, + corsStrategy: "all", + findResource: async (params, auth) => { + const row = await resolveSessionByIdOrExternalId( + $replica, + auth.environment.id, + params.session + ); + if (!row && isSessionFriendlyIdForm(params.session)) { + return undefined; + } + return { + row, + addressingKey: canonicalSessionAddressingKey(row, params.session), + }; + }, + authorization: { + action: "read", + resource: ({ row, addressingKey }) => { + const ids = new Set([addressingKey]); + if (row) { + ids.add(row.friendlyId); + if (row.externalId) ids.add(row.externalId); + } + return { sessions: [...ids] }; + }, + superScopes: ["read:sessions", "read:all", "admin"], + }, + }, + async ({ params, authentication, resource, searchParams }) => { + const realtimeStream = getRealtimeStreamInstance(authentication.environment, "v2", { + session: resource.row, + organization: resource.row ? null : authentication.environment.organization, + }); + + if (!(realtimeStream instanceof S2RealtimeStreams)) { + return new Response("Session channels require the S2 realtime backend", { + status: 501, + }); + } + + const afterSeqNum = + searchParams.afterEventId !== undefined ? Number(searchParams.afterEventId) : undefined; + + const records = await realtimeStream.readSessionStreamRecords( + resource.addressingKey, + params.io, + afterSeqNum + ); + + return json({ records }); + } +); + +export { loader }; diff --git a/apps/webapp/app/routes/realtime.v1.streams.$runId.$streamId.ts b/apps/webapp/app/routes/realtime.v1.streams.$runId.$streamId.ts index 39935b9de1f..d6470794a73 100644 --- a/apps/webapp/app/routes/realtime.v1.streams.$runId.$streamId.ts +++ b/apps/webapp/app/routes/realtime.v1.streams.$runId.$streamId.ts @@ -87,7 +87,7 @@ export const loader = createLoaderApiRoute( allowJWT: true, corsStrategy: "all", findResource: async (params, auth) => { - return $replica.taskRun.findFirst({ + const run = await $replica.taskRun.findFirst({ where: { friendlyId: params.runId, runtimeEnvironmentId: auth.environment.id, @@ -106,6 +106,7 @@ export const loader = createLoaderApiRoute( }, }, }); + return run; }, authorization: { action: "read", diff --git a/apps/webapp/app/routes/resources.sessions.$sessionParam.close.ts b/apps/webapp/app/routes/resources.sessions.$sessionParam.close.ts new file mode 100644 index 00000000000..27ffec56716 --- /dev/null +++ b/apps/webapp/app/routes/resources.sessions.$sessionParam.close.ts @@ -0,0 +1,98 @@ +import { parse } from "@conform-to/zod"; +import { type ActionFunction, json } from "@remix-run/node"; +import { z } from "zod"; +import { $replica, prisma } from "~/db.server"; +import { redirectWithErrorMessage, redirectWithSuccessMessage } from "~/models/message.server"; +import { resolveSessionByIdOrExternalId } from "~/services/realtime/sessions.server"; +import { logger } from "~/services/logger.server"; +import { requireUserId } from "~/services/session.server"; + +export const closeSessionSchema = z.object({ + redirectUrl: z.string(), + environmentId: z.string(), + reason: z.string().optional(), +}); + +const ParamSchema = z.object({ + sessionParam: z.string(), +}); + +export const action: ActionFunction = async ({ request, params }) => { + const userId = await requireUserId(request); + const { sessionParam } = ParamSchema.parse(params); + + const formData = await request.formData(); + const submission = parse(formData, { schema: closeSessionSchema }); + + if (!submission.value) { + return json(submission); + } + + const { redirectUrl, environmentId, reason } = submission.value; + const trimmedReason = reason?.trim(); + const closedReason = + trimmedReason && trimmedReason.length > 0 ? trimmedReason : "closed-from-dashboard"; + + try { + // Confirm the user belongs to the org that owns this environment, then + // resolve the session by friendlyId or externalId scoped to that env. + const environment = await $replica.runtimeEnvironment.findFirst({ + where: { + id: environmentId, + organization: { members: { some: { userId } } }, + }, + select: { id: true }, + }); + + if (!environment) { + submission.error = { environmentId: ["Environment not found"] }; + return json(submission); + } + + const session = await resolveSessionByIdOrExternalId( + $replica, + environment.id, + sessionParam + ); + + if (!session) { + submission.error = { sessionParam: ["Session not found"] }; + return json(submission); + } + + if (session.closedAt) { + // Already closed — no-op, but redirect with a friendly message so the + // UI doesn't look like it did nothing. + return redirectWithSuccessMessage(redirectUrl, request, `Session already closed`); + } + + // Conditional update mirrors the public API: two concurrent closes race + // through the read but only one wins this update. + await prisma.session.updateMany({ + where: { id: session.id, closedAt: null }, + data: { + closedAt: new Date(), + closedReason, + }, + }); + + return redirectWithSuccessMessage(redirectUrl, request, `Closed session`); + } catch (error) { + if (error instanceof Error) { + logger.error("Failed to close session", { + error: { name: error.name, message: error.message, stack: error.stack }, + }); + return redirectWithErrorMessage( + redirectUrl, + request, + `Failed to close session, ${error.message}` + ); + } + logger.error("Failed to close session", { error }); + return redirectWithErrorMessage( + redirectUrl, + request, + `Failed to close session, ${JSON.stringify(error)}` + ); + } +}; diff --git a/apps/webapp/app/routes/runs.$runParam.ts b/apps/webapp/app/routes/runs.$runParam.ts index 4a8d7a12d32..b472d7ae8f4 100644 --- a/apps/webapp/app/routes/runs.$runParam.ts +++ b/apps/webapp/app/routes/runs.$runParam.ts @@ -28,6 +28,7 @@ export async function loader({ params, request }: LoaderFunctionArgs) { }, }, select: { + spanId: true, runtimeEnvironment: { select: { slug: true, @@ -57,11 +58,20 @@ export async function loader({ params, request }: LoaderFunctionArgs) { ); } + // Preserve existing search params from the request, add span if not already set + const url = new URL(request.url); + const searchParams = url.searchParams; + + if (!searchParams.has("span") && run.spanId) { + searchParams.set("span", run.spanId); + } + const path = v3RunPath( { slug: run.project.organization.slug }, { slug: run.project.slug }, { slug: run.runtimeEnvironment.slug }, - { friendlyId: runParam } + { friendlyId: runParam }, + searchParams ); return redirect(path); diff --git a/apps/webapp/app/runEngine/concerns/queues.server.ts b/apps/webapp/app/runEngine/concerns/queues.server.ts index 136c3da3b9c..ce25696d1b8 100644 --- a/apps/webapp/app/runEngine/concerns/queues.server.ts +++ b/apps/webapp/app/runEngine/concerns/queues.server.ts @@ -79,6 +79,7 @@ export class DefaultQueueManager implements QueueManager { let queueName: string; let lockedQueueId: string | undefined; let taskTtl: string | null | undefined; + let taskKind: string | undefined; // Determine queue name based on lockToVersion and provided options if (lockedBackgroundWorker) { @@ -106,19 +107,26 @@ export class DefaultQueueManager implements QueueManager { queueName = specifiedQueue.name; lockedQueueId = specifiedQueue.id; - // Only fetch task for TTL if caller didn't provide a per-trigger TTL - if (request.body.options?.ttl === undefined) { - const lockedTask = await this.replicaPrisma.backgroundWorkerTask.findFirst({ - where: { - workerId: lockedBackgroundWorker.id, - runtimeEnvironmentId: request.environment.id, - slug: request.taskId, - }, - select: { ttl: true }, - }); + // Always fetch the task so we can resolve `triggerSource` (which + // becomes `taskKind` on annotations and replicates to ClickHouse). + // Without this, AGENT/SCHEDULED runs triggered with + // `lockToVersion` + a queue override would be annotated as + // STANDARD and disappear from the run-list "Source" filter. + // `ttl` is read from the same row but only used when the caller + // didn't specify a per-trigger TTL. + const lockedTask = await this.replicaPrisma.backgroundWorkerTask.findFirst({ + where: { + workerId: lockedBackgroundWorker.id, + runtimeEnvironmentId: request.environment.id, + slug: request.taskId, + }, + select: { ttl: true, triggerSource: true }, + }); + if (request.body.options?.ttl === undefined) { taskTtl = lockedTask?.ttl; } + taskKind = lockedTask?.triggerSource; } else { // No queue override - fetch task with queue to get both default queue and TTL const lockedTask = await this.replicaPrisma.backgroundWorkerTask.findFirst({ @@ -158,6 +166,7 @@ export class DefaultQueueManager implements QueueManager { // Use the task's default queue name queueName = lockedTask.queue.name; lockedQueueId = lockedTask.queue.id; + taskKind = lockedTask.triggerSource; } } else { // Task is not locked to a specific version, use regular logic @@ -172,6 +181,7 @@ export class DefaultQueueManager implements QueueManager { const taskInfo = await this.getTaskQueueInfo(request); queueName = taskInfo.queueName; taskTtl = taskInfo.taskTtl; + taskKind = taskInfo.taskKind; } // Sanitize the final determined queue name once @@ -188,12 +198,13 @@ export class DefaultQueueManager implements QueueManager { queueName, lockedQueueId, taskTtl, + taskKind, }; } private async getTaskQueueInfo( request: TriggerTaskRequest - ): Promise<{ queueName: string; taskTtl?: string | null }> { + ): Promise<{ queueName: string; taskTtl?: string | null; taskKind?: string | undefined }> { const { taskId, environment, body } = request; const { queue } = body.options ?? {}; @@ -228,10 +239,10 @@ export class DefaultQueueManager implements QueueManager { runtimeEnvironmentId: environment.id, slug: taskId, }, - select: { ttl: true }, + select: { ttl: true, triggerSource: true }, }); - return { queueName: overriddenQueueName, taskTtl: task?.ttl }; + return { queueName: overriddenQueueName, taskTtl: task?.ttl, taskKind: task?.triggerSource }; } const task = await this.replicaPrisma.backgroundWorkerTask.findFirst({ @@ -261,10 +272,10 @@ export class DefaultQueueManager implements QueueManager { queueConfig: task.queueConfig, }); - return { queueName: defaultQueueName, taskTtl: task.ttl }; + return { queueName: defaultQueueName, taskTtl: task.ttl, taskKind: task.triggerSource }; } - return { queueName: task.queue.name ?? defaultQueueName, taskTtl: task.ttl }; + return { queueName: task.queue.name ?? defaultQueueName, taskTtl: task.ttl, taskKind: task.triggerSource }; } async validateQueueLimits( diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.ts b/apps/webapp/app/runEngine/services/triggerTask.server.ts index 445e0eb155a..bbfdc3956c2 100644 --- a/apps/webapp/app/runEngine/services/triggerTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerTask.server.ts @@ -185,7 +185,7 @@ export class RunEngineTriggerTaskService { if (debounceDelayError || !debounceDelayUntil) { throw new ServiceValidationError( `Invalid debounce delay: ${body.options.debounce.delay}. ` + - `Supported formats: {number}s, {number}m, {number}h, {number}d, {number}w` + `Supported formats: {number}s, {number}m, {number}h, {number}d, {number}w` ); } } @@ -193,11 +193,11 @@ export class RunEngineTriggerTaskService { // Get parent run if specified const parentRun = body.options?.parentRunId ? await this.prisma.taskRun.findFirst({ - where: { - id: RunId.fromFriendlyId(body.options.parentRunId), - runtimeEnvironmentId: environment.id, - }, - }) + where: { + id: RunId.fromFriendlyId(body.options.parentRunId), + runtimeEnvironmentId: environment.id, + }, + }) : undefined; // Validate parent run @@ -231,21 +231,21 @@ export class RunEngineTriggerTaskService { const lockedToBackgroundWorker = body.options?.lockToVersion ? await this.prisma.backgroundWorker.findFirst({ - where: { - projectId: environment.projectId, - runtimeEnvironmentId: environment.id, - version: body.options?.lockToVersion, - }, - select: { - id: true, - version: true, - sdkVersion: true, - cliVersion: true, - }, - }) + where: { + projectId: environment.projectId, + runtimeEnvironmentId: environment.id, + version: body.options?.lockToVersion, + }, + select: { + id: true, + version: true, + sdkVersion: true, + cliVersion: true, + }, + }) : undefined; - const { queueName, lockedQueueId, taskTtl } = + const { queueName, lockedQueueId, taskTtl, taskKind } = await this.queueConcern.resolveQueueProperties( triggerRequest, lockedToBackgroundWorker ?? undefined @@ -281,10 +281,10 @@ export class RunEngineTriggerTaskService { const metadataPacket = body.options?.metadata ? handleMetadataPacket( - body.options?.metadata, - body.options?.metadataType ?? "application/json", - this.metadataMaximumSize - ) + body.options?.metadata, + body.options?.metadataType ?? "application/json", + this.metadataMaximumSize + ) : undefined; const tags = ( @@ -313,6 +313,7 @@ export class RunEngineTriggerTaskService { triggerAction, rootTriggerSource: parentAnnotations?.rootTriggerSource ?? triggerSource, rootScheduleId: parentAnnotations?.rootScheduleId || options.scheduleId || undefined, + taskKind: taskKind ?? "STANDARD", }; try { @@ -369,9 +370,9 @@ export class RunEngineTriggerTaskService { rootTaskRunId: parentRun?.rootTaskRunId ?? parentRun?.id, batch: options?.batchId ? { - id: options.batchId, - index: options.batchIndex ?? 0, - } + id: options.batchId, + index: options.batchIndex ?? 0, + } : undefined, resumeParentOnCompletion: body.options?.resumeParentOnCompletion, depth, @@ -402,26 +403,26 @@ export class RunEngineTriggerTaskService { onDebounced: body.options?.debounce && body.options?.resumeParentOnCompletion ? async ({ existingRun, waitpoint, debounceKey }) => { - return await this.traceEventConcern.traceDebouncedRun( - triggerRequest, - parentRun?.taskEventStore, - { - existingRun, - debounceKey, - incomplete: waitpoint.status === "PENDING", - isError: waitpoint.outputIsError, - }, - async (spanEvent) => { - const spanId = - options?.parentAsLinkType === "replay" - ? spanEvent.spanId - : spanEvent.traceparent?.spanId + return await this.traceEventConcern.traceDebouncedRun( + triggerRequest, + parentRun?.taskEventStore, + { + existingRun, + debounceKey, + incomplete: waitpoint.status === "PENDING", + isError: waitpoint.outputIsError, + }, + async (spanEvent) => { + const spanId = + options?.parentAsLinkType === "replay" + ? spanEvent.spanId + : spanEvent.traceparent?.spanId ? `${spanEvent.traceparent.spanId}:${spanEvent.spanId}` : spanEvent.spanId; - return spanId; - } - ); - } + return spanId; + } + ); + } : undefined, }, this.prisma diff --git a/apps/webapp/app/runEngine/types.ts b/apps/webapp/app/runEngine/types.ts index d5e61d01889..c0c5de1d2fd 100644 --- a/apps/webapp/app/runEngine/types.ts +++ b/apps/webapp/app/runEngine/types.ts @@ -37,18 +37,19 @@ export type TriggerTaskResult = { export type QueueValidationResult = | { - ok: true; - } + ok: true; + } | { - ok: false; - maximumSize: number; - queueSize: number; - }; + ok: false; + maximumSize: number; + queueSize: number; + }; export type QueueProperties = { queueName: string; lockedQueueId?: string; taskTtl?: string | null; + taskKind?: string; }; export type LockedBackgroundWorker = Pick< @@ -98,22 +99,22 @@ export interface ParentRunValidationParams { export type ValidationResult = | { - ok: true; - } + ok: true; + } | { - ok: false; - error: Error; - }; + ok: false; + error: Error; + }; export type EntitlementValidationResult = | { - ok: true; - plan?: ReportUsagePlan; - } + ok: true; + plan?: ReportUsagePlan; + } | { - ok: false; - error: Error; - }; + ok: false; + error: Error; + }; export interface TriggerTaskValidator { validateTags(params: TagValidationParams): ValidationResult; diff --git a/apps/webapp/app/services/apiRateLimit.server.ts b/apps/webapp/app/services/apiRateLimit.server.ts index 8f40da009a4..3618806fce7 100644 --- a/apps/webapp/app/services/apiRateLimit.server.ts +++ b/apps/webapp/app/services/apiRateLimit.server.ts @@ -63,6 +63,13 @@ export const apiRateLimiter = authorizationRateLimitMiddleware({ /^\/api\/v1\/runs\/[^\/]+\/attempts$/, // /api/v1/runs/$runFriendlyId/attempts /^\/api\/v1\/waitpoints\/tokens\/[^\/]+\/callback\/[^\/]+$/, // /api/v1/waitpoints/tokens/$waitpointFriendlyId/callback/$hash /^\/api\/v\d+\/deployments/, // /api/v{1,2,3,n}/deployments/* + // Internal SDK plumbing — packets are presigned-URL handshakes for + // payload uploads (v2 PUT) and downloads (v1 GET), authenticated via + // run-scoped JWT, called once per task/turn boundary by the runtime. + // Same shape as `/api/v1/runs/$runFriendlyId/attempts` above; not a + // customer-facing surface so customer rate limits shouldn't apply. + /^\/api\/v1\/packets\//, + /^\/api\/v2\/packets\//, ], log: { rejections: env.API_RATE_LIMIT_REJECTION_LOGS_ENABLED === "1", diff --git a/apps/webapp/app/services/realtime/mintRunToken.server.ts b/apps/webapp/app/services/realtime/mintRunToken.server.ts new file mode 100644 index 00000000000..2cdc4316e66 --- /dev/null +++ b/apps/webapp/app/services/realtime/mintRunToken.server.ts @@ -0,0 +1,41 @@ +import { generateJWT as internal_generateJWT } from "@trigger.dev/core/v3"; +import { extractJwtSigningSecretKey } from "./jwtAuth.server"; + +type Environment = Parameters[0]; + +export type MintRunTokenOptions = { + /** Include the input-stream write scope (needed for steering messages from the playground). */ + includeInputStreamWrite?: boolean; + /** Token expiration. Defaults to "1h". */ + expirationTime?: string; +}; + +/** + * Mint a run-scoped public access token (JWT) for browser subscription to a + * run's realtime streams. + * + * Used by: + * - The playground action to give a freshly triggered chat session a token. + * - The run details page to let the agent view subscribe to the chat stream + * of an existing run (read-only). + */ +export async function mintRunToken( + environment: Environment, + runFriendlyId: string, + options: MintRunTokenOptions = {} +): Promise { + const scopes = [`read:runs:${runFriendlyId}`]; + if (options.includeInputStreamWrite) { + scopes.push(`write:inputStreams:${runFriendlyId}`); + } + + return internal_generateJWT({ + secretKey: extractJwtSigningSecretKey(environment), + payload: { + sub: environment.id, + pub: true, + scopes, + }, + expirationTime: options.expirationTime ?? "1h", + }); +} diff --git a/apps/webapp/app/services/realtime/s2realtimeStreams.server.ts b/apps/webapp/app/services/realtime/s2realtimeStreams.server.ts index 0295d5a58b6..4c735d21d46 100644 --- a/apps/webapp/app/services/realtime/s2realtimeStreams.server.ts +++ b/apps/webapp/app/services/realtime/s2realtimeStreams.server.ts @@ -441,8 +441,16 @@ export class S2RealtimeStreams implements StreamResponder, StreamIngestor { // ---------- Internals: S2 REST ---------- private async s2Append(stream: string, body: S2AppendInput): Promise { - // POST /v1/streams/{stream}/records (JSON) - const res = await fetch(`${this.baseUrl}/streams/${encodeURIComponent(stream)}/records`, { + // POST /v1/streams/{stream}/records (JSON). + // + // Retries transient failures (network errors and 5xx) up to 3 times with + // exponential backoff. Undici's "fetch failed" errors observed locally + // are pre-connection (DNS/TCP) so the request never reaches S2, making + // retry safe — the alternative is a 500 surfacing to the SDK transport, + // which then retries the whole `/in/append` round-trip and pollutes + // logs. 4xx are not retried (genuine client errors). + const url = `${this.baseUrl}/streams/${encodeURIComponent(stream)}/records`; + const init: RequestInit = { method: "POST", headers: { Authorization: `Bearer ${this.token}`, @@ -451,12 +459,60 @@ export class S2RealtimeStreams implements StreamResponder, StreamIngestor { "S2-Basin": this.basin, }, body: JSON.stringify(body), - }); - if (!res.ok) { - const text = await res.text().catch(() => ""); - throw new Error(`S2 append failed: ${res.status} ${res.statusText} ${text}`); + }; + + const maxAttempts = 3; + const backoffsMs = [100, 250, 600]; + let lastError: unknown; + + for (let attempt = 0; attempt < maxAttempts; attempt++) { + // The `try` only wraps `fetch` — once we have a Response we handle status + // outside the catch, so a 4xx throw can't be swallowed and retried. + let res: Response | undefined; + try { + res = await fetch(url, init); + } catch (err) { + lastError = err; + } + + if (res) { + if (res.ok) { + return (await res.json()) as S2AppendAck; + } + const text = await res.text().catch(() => ""); + const httpError = new Error( + `S2 append failed: ${res.status} ${res.statusText} ${text}` + ); + if (res.status >= 400 && res.status < 500) { + // 4xx — caller-side problem (auth, malformed body, closed stream). + // Retrying won't help. + throw httpError; + } + // 5xx — retryable. + lastError = httpError; + } + + const isLastAttempt = attempt === maxAttempts - 1; + const diagnostics = describeFetchError(lastError); + if (isLastAttempt) { + this.logger.error("S2 append failed after retries", { + stream, + attempts: maxAttempts, + ...diagnostics, + }); + break; + } + + this.logger.warn("S2 append transient failure, retrying", { + stream, + attempt: attempt + 1, + nextDelayMs: backoffsMs[attempt], + ...diagnostics, + }); + await new Promise((resolve) => setTimeout(resolve, backoffsMs[attempt])); } - return (await res.json()) as S2AppendAck; + + throw lastError instanceof Error ? lastError : new Error(String(lastError)); } private async getS2AccessToken(id: string): Promise { @@ -560,3 +616,40 @@ export class S2RealtimeStreams implements StreamResponder, StreamIngestor { return Number.isFinite(n) && n >= 0 ? n + 1 : undefined; } } + +// Pulls the underlying network error out of undici's generic "fetch failed". +// undici sets `error.cause` to either a SystemError-shaped object with `code` +// (e.g. `ECONNRESET`, `UND_ERR_SOCKET`, `ETIMEDOUT`), `errno`, and `syscall`, +// or — for happy-eyeballs / multi-address connect attempts — an +// `AggregateError` whose `errors[]` each carry their own code. Surfacing +// those tells us whether failures are pre-connection (DNS / TCP), mid-stream +// socket resets, or genuine S2 server errors. +function describeFetchError(err: unknown): Record { + if (!(err instanceof Error)) { + return { error: String(err) }; + } + const out: Record = { + error: err.message, + name: err.name, + }; + const cause = (err as { cause?: unknown }).cause; + if (cause && typeof cause === "object") { + const c = cause as Record; + if (typeof c.code === "string") out.causeCode = c.code; + if (typeof c.errno === "number" || typeof c.errno === "string") out.causeErrno = c.errno; + if (typeof c.syscall === "string") out.causeSyscall = c.syscall; + if (typeof c.message === "string") out.causeMessage = c.message; + if (Array.isArray(c.errors)) { + out.causeErrors = c.errors + .filter((e: unknown): e is Error => e instanceof Error) + .map((e) => ({ + message: e.message, + code: (e as { code?: unknown }).code, + syscall: (e as { syscall?: unknown }).syscall, + address: (e as { address?: unknown }).address, + port: (e as { port?: unknown }).port, + })); + } + } + return out; +} diff --git a/apps/webapp/app/services/realtime/sessionRunManager.server.ts b/apps/webapp/app/services/realtime/sessionRunManager.server.ts index 58513460b14..0c2e5765455 100644 --- a/apps/webapp/app/services/realtime/sessionRunManager.server.ts +++ b/apps/webapp/app/services/realtime/sessionRunManager.server.ts @@ -40,10 +40,22 @@ type EnsureRunForSessionParams = { /** * Session row to operate on. Caller is responsible for the env match — * we don't re-check `runtimeEnvironmentId` against `environment.id`. + * + * `friendlyId` is used to pre-populate `payload.sessionId` on the new + * run so the agent's `chat.agent` boot path can attach to `session.in/.out` + * without a control-plane round-trip. `currentRunId` is also forwarded + * as `payload.previousRunId` (with `continuation: true`) when the prior + * run is dead, so the agent's boot gate triggers snapshot.read + replay + * instead of treating the run as a fresh chat. */ session: Pick< Session, - "id" | "taskIdentifier" | "triggerConfig" | "currentRunId" | "currentRunVersion" + | "id" + | "friendlyId" + | "taskIdentifier" + | "triggerConfig" + | "currentRunId" + | "currentRunVersion" >; environment: AuthenticatedEnvironment; reason: EnsureRunReason; @@ -97,20 +109,50 @@ export async function ensureRunForSession( } // 1. Probe currentRunId. + let priorDeadRunFriendlyId: string | undefined; if (session.currentRunId) { - const status = await getRunStatus(session.currentRunId); - if (status && !isFinalRunStatus(status)) { + const probe = await getRunStatusAndFriendlyId(session.currentRunId); + if (probe && !isFinalRunStatus(probe.status)) { return { runId: session.currentRunId, triggered: false }; } + // Either the row vanished (probe null) or its status is final. Either + // way the prior run isn't going to consume new appends — but the + // session may still hold conversation state on `session.out` and an + // S3 snapshot keyed on `session.friendlyId`. Forward the prior run's + // public-form id (friendlyId — same shape as `ctx.run.id`) to the + // agent as `previousRunId` so its boot gate flips + // `couldHavePriorState` and replays the persisted state instead of + // treating this as a fresh chat. See `chat.agent`'s boot orchestration + // in `packages/trigger-sdk/src/v3/ai.ts`. Falls back to the cuid on + // probe miss (rare — replica miss on a row we just read) so the + // continuation flag still propagates with degraded id fidelity. + priorDeadRunFriendlyId = probe?.friendlyId ?? session.currentRunId; } - // 2. Validate config + trigger upfront. + // 2. Validate config + trigger upfront. Continuation overrides + // (`continuation`, `previousRunId`) are derived from session state above + // and merged AFTER caller-supplied overrides — caller can't accidentally + // unset them on a session that has had a prior run, but can still + // override `trigger`/`metadata` etc. `sessionId` is always set so the + // agent doesn't need a control-plane round-trip to look up the session + // friendlyId from `payload.chatId`. + const continuationOverrides: Record = { + sessionId: session.friendlyId, + ...(priorDeadRunFriendlyId !== undefined + ? { continuation: true, previousRunId: priorDeadRunFriendlyId } + : {}), + }; + const mergedPayloadOverrides: Record = { + ...(payloadOverrides ?? {}), + ...continuationOverrides, + }; + const config = SessionTriggerConfigSchema.parse(session.triggerConfig); const triggered = await triggerSessionRun({ session, config, environment, - payloadOverrides, + payloadOverrides: mergedPayloadOverrides, }); // 3. Try to claim the slot atomically. @@ -161,6 +203,7 @@ export async function ensureRunForSession( where: { id: session.id }, select: { id: true, + friendlyId: true, taskIdentifier: true, triggerConfig: true, currentRunId: true, @@ -175,8 +218,8 @@ export async function ensureRunForSession( } if (fresh.currentRunId) { - const status = await getRunStatus(fresh.currentRunId); - if (status && !isFinalRunStatus(status)) { + const probe = await getRunStatusAndFriendlyId(fresh.currentRunId); + if (probe && !isFinalRunStatus(probe.status)) { return { runId: fresh.currentRunId, triggered: false }; } } @@ -223,6 +266,9 @@ async function triggerSessionRun(params: { ...(config.queue ? { queue: { name: config.queue } } : {}), ...(config.tags ? { tags: config.tags } : {}), ...(config.maxAttempts !== undefined ? { maxAttempts: config.maxAttempts } : {}), + ...(config.maxDuration !== undefined ? { maxDuration: config.maxDuration } : {}), + ...(config.lockToVersion ? { lockToVersion: config.lockToVersion } : {}), + ...(config.region ? { region: config.region } : {}), }, }; @@ -242,15 +288,32 @@ async function triggerSessionRun(params: { } type SwapSessionRunParams = { + /** + * Session row to swap. `friendlyId` is forwarded as `payload.sessionId` + * on the new run so the agent attaches to `session.in/.out` without a + * control-plane round-trip (same convention as + * {@link EnsureRunForSessionParams}). + */ session: Pick< Session, - "id" | "taskIdentifier" | "triggerConfig" | "currentRunId" | "currentRunVersion" + | "id" + | "friendlyId" + | "taskIdentifier" + | "triggerConfig" + | "currentRunId" + | "currentRunVersion" >; /** * The run requesting the swap. Optimistic claim requires * `Session.currentRunId === callingRunId` so the swap can't clobber * a run triggered out-of-band (e.g. a parallel `.in/append` probe * that already replaced the dead run). + * + * Also forwarded as `payload.previousRunId` on the new run alongside + * `continuation: true` — every swap is a continuation by construction + * (`chat.requestUpgrade` / `chat.endRun` deliberately hand off prior + * conversation state to a new run), so the agent's boot gate flips + * `couldHavePriorState` and replays the snapshot + session.out tail. */ callingRunId: string; environment: AuthenticatedEnvironment; @@ -285,12 +348,31 @@ export async function swapSessionRun( ): Promise { const { session, callingRunId, environment, reason, payloadOverrides } = params; + // `callingRunId` is the internal cuid (`Session.currentRunId` stores + // cuid; the route handler resolves the wire's friendlyId before passing + // it here). The agent's `previousRunId` is customer-visible and must + // match the public `run_*` form exposed via `ctx.run.id` — resolve + // before forwarding. + const callingRunFriendlyId = await resolveRunFriendlyId(callingRunId); + + // Continuation overrides — unconditionally set on swap. Unlike + // `ensureRunForSession`, there's no dead-run-detection branch here: + // every swap is a deliberate handoff from `callingRunId` (which owned + // prior conversation state) to a fresh run. Merged AFTER caller-supplied + // overrides so a caller can't accidentally unset them. + const mergedPayloadOverrides: Record = { + ...(payloadOverrides ?? {}), + sessionId: session.friendlyId, + continuation: true, + previousRunId: callingRunFriendlyId, + }; + const config = SessionTriggerConfigSchema.parse(session.triggerConfig); const triggered = await triggerSessionRun({ session, config, environment, - payloadOverrides, + payloadOverrides: mergedPayloadOverrides, }); const claim = await prisma.session.updateMany({ @@ -347,14 +429,38 @@ export async function swapSessionRun( }; } -async function getRunStatus(runId: string): Promise { +async function getRunStatusAndFriendlyId( + runId: string +): Promise<{ status: TaskRunStatus; friendlyId: string } | null> { // Use the read replica — this is a hot-path probe and stale-by-ms is // fine. The append handler re-checks if it ends up reusing the runId. + // `friendlyId` is fetched alongside `status` so the dead-run-detection + // branch in `ensureRunForSession` can forward the public-form id as + // `payload.previousRunId` without a second read. `Session.currentRunId` + // stores the internal cuid; the agent's wire / customer hooks expose + // the friendlyId via `ctx.run.id`, so consistency matters. + const row = await $replica.taskRun.findFirst({ + where: { id: runId }, + select: { status: true, friendlyId: true }, + }); + return row ?? null; +} + +/** + * Resolve a TaskRun cuid to its friendlyId. Used by `swapSessionRun` to + * forward the calling run's public-form id as `payload.previousRunId` on + * the new run. Falls back to the cuid on lookup miss so the swap doesn't + * fail just because the read replica hasn't caught up — the agent only + * uses `previousRunId` for customer-visible bookkeeping (e.g. + * `runs.retrieve(previousRunId)`), so a stale-but-non-null value is + * acceptable degraded behavior. + */ +async function resolveRunFriendlyId(runId: string): Promise { const row = await $replica.taskRun.findFirst({ where: { id: runId }, - select: { status: true }, + select: { friendlyId: true }, }); - return row?.status ?? null; + return row?.friendlyId ?? runId; } async function cancelLostRaceRun( diff --git a/apps/webapp/app/services/runsReplicationService.server.ts b/apps/webapp/app/services/runsReplicationService.server.ts index 7930c05481f..167564572eb 100644 --- a/apps/webapp/app/services/runsReplicationService.server.ts +++ b/apps/webapp/app/services/runsReplicationService.server.ts @@ -921,6 +921,7 @@ export class RunsReplicationService { run.maxDurationInSeconds ?? null, // max_duration_in_seconds annotations?.triggerSource ?? "", // trigger_source annotations?.rootTriggerSource ?? "", // root_trigger_source + annotations?.taskKind ?? "", // task_kind run.isWarmStart ?? null, // is_warm_start ]; } diff --git a/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts b/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts index 1368279e63d..49725d2cefb 100644 --- a/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts +++ b/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts @@ -151,6 +151,7 @@ export class ClickHouseRunsRepository implements IRunsRepository { metadataType: true, machinePreset: true, queue: true, + annotations: true, }, }); @@ -334,4 +335,22 @@ function applyRunFiltersToQueryBuilder( errorFingerprint: ErrorId.toId(options.errorId), }); } + + if (options.taskKinds && options.taskKinds.length > 0) { + const includesStandard = options.taskKinds.includes("STANDARD"); + // Include empty string when filtering for STANDARD (default value for pre-existing runs) + const effectiveKinds = includesStandard + ? [...options.taskKinds, ""] + : options.taskKinds; + + if (effectiveKinds.length === 1) { + queryBuilder.where("task_kind = {taskKind: String}", { + taskKind: effectiveKinds[0]!, + }); + } else { + queryBuilder.where("task_kind IN {taskKinds: Array(String)}", { + taskKinds: effectiveKinds, + }); + } + } } diff --git a/apps/webapp/app/services/runsRepository/runsRepository.server.ts b/apps/webapp/app/services/runsRepository/runsRepository.server.ts index c8bb6264b4e..68c9da63098 100644 --- a/apps/webapp/app/services/runsRepository/runsRepository.server.ts +++ b/apps/webapp/app/services/runsRepository/runsRepository.server.ts @@ -42,6 +42,7 @@ const RunListInputOptionsSchema = z.object({ queues: z.array(z.string()).optional(), machines: MachinePresetName.array().optional(), errorId: z.string().optional(), + taskKinds: z.array(z.string()).optional(), }); export type RunListInputOptions = z.infer; @@ -53,6 +54,7 @@ export type RunListInputFilters = Omit< export type ParsedRunFilters = RunListInputFilters & { cursor?: string; direction?: "forward" | "backward"; + sources?: string[]; }; export type FilterRunsOptions = Omit & { @@ -102,6 +104,7 @@ export type ListedRun = Prisma.TaskRunGetPayload<{ metadataType: true; machinePreset: true; queue: true; + annotations: true; }; }>; diff --git a/apps/webapp/app/services/sessionsRepository/clickhouseSessionsRepository.server.ts b/apps/webapp/app/services/sessionsRepository/clickhouseSessionsRepository.server.ts index c810a0dfa1e..aebf61628fa 100644 --- a/apps/webapp/app/services/sessionsRepository/clickhouseSessionsRepository.server.ts +++ b/apps/webapp/app/services/sessionsRepository/clickhouseSessionsRepository.server.ts @@ -101,6 +101,7 @@ export class ClickHouseSessionsRepository implements ISessionsRepository { createdAt: true, updatedAt: true, runtimeEnvironmentId: true, + currentRunId: true, }, }); diff --git a/apps/webapp/app/services/sessionsRepository/sessionsRepository.server.ts b/apps/webapp/app/services/sessionsRepository/sessionsRepository.server.ts index 15566295e33..245f1df2295 100644 --- a/apps/webapp/app/services/sessionsRepository/sessionsRepository.server.ts +++ b/apps/webapp/app/services/sessionsRepository/sessionsRepository.server.ts @@ -95,6 +95,7 @@ export type ListedSession = Prisma.SessionGetPayload<{ createdAt: true; updatedAt: true; runtimeEnvironmentId: true; + currentRunId: true; }; }>; diff --git a/apps/webapp/app/utils/pathBuilder.ts b/apps/webapp/app/utils/pathBuilder.ts index 8f94b302ef7..0ebae151d0e 100644 --- a/apps/webapp/app/utils/pathBuilder.ts +++ b/apps/webapp/app/utils/pathBuilder.ts @@ -318,6 +318,31 @@ export function v3TestTaskPath( )}`; } +export function v3PlaygroundPath( + organization: OrgForPath, + project: ProjectForPath, + environment: EnvironmentForPath +) { + return `${v3EnvironmentPath(organization, project, environment)}/playground`; +} + +export function v3PlaygroundAgentPath( + organization: OrgForPath, + project: ProjectForPath, + environment: EnvironmentForPath, + agentSlug: string +) { + return `${v3PlaygroundPath(organization, project, environment)}/${encodeURIComponent(agentSlug)}`; +} + +export function v3AgentsPath( + organization: OrgForPath, + project: ProjectForPath, + environment: EnvironmentForPath +) { + return `${v3EnvironmentPath(organization, project, environment)}/agents`; +} + export function v3RunsPath( organization: OrgForPath, project: ProjectForPath, @@ -486,6 +511,23 @@ export function v3BatchesPath( return `${v3EnvironmentPath(organization, project, environment)}/batches`; } +export function v3SessionsPath( + organization: OrgForPath, + project: ProjectForPath, + environment: EnvironmentForPath +) { + return `${v3EnvironmentPath(organization, project, environment)}/sessions`; +} + +export function v3SessionPath( + organization: OrgForPath, + project: ProjectForPath, + environment: EnvironmentForPath, + session: { friendlyId: string } +) { + return `${v3SessionsPath(organization, project, environment)}/${session.friendlyId}`; +} + export function v3BatchPath( organization: OrgForPath, project: ProjectForPath, diff --git a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts index f1bcb8e3699..da79e386afb 100644 --- a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts +++ b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts @@ -324,6 +324,13 @@ async function createWorkerTask( ); } + const resolvedTriggerSource = + task.triggerSource === "schedule" + ? ("SCHEDULED" as const) + : task.triggerSource === "agent" + ? ("AGENT" as const) + : ("STANDARD" as const); + await prisma.backgroundWorkerTask.create({ data: { friendlyId: generateFriendlyId("task"), @@ -337,7 +344,8 @@ async function createWorkerTask( retryConfig: task.retry, queueConfig: task.queue, machineConfig: task.machine, - triggerSource: task.triggerSource === "schedule" ? "SCHEDULED" : "STANDARD", + triggerSource: resolvedTriggerSource, + config: task.agentConfig ? (task.agentConfig as any) : undefined, fileId: tasksToBackgroundFiles?.get(task.id) ?? null, maxDurationInSeconds: task.maxDuration ? clampMaxDuration(task.maxDuration) : null, ttl: diff --git a/internal-packages/clickhouse/schema/029_add_task_kind_to_task_runs_v2.sql b/internal-packages/clickhouse/schema/029_add_task_kind_to_task_runs_v2.sql new file mode 100644 index 00000000000..a88a7a46cef --- /dev/null +++ b/internal-packages/clickhouse/schema/029_add_task_kind_to_task_runs_v2.sql @@ -0,0 +1,7 @@ +-- +goose Up +ALTER TABLE trigger_dev.task_runs_v2 + ADD COLUMN task_kind LowCardinality(String) DEFAULT ''; + +-- +goose Down +ALTER TABLE trigger_dev.task_runs_v2 + DROP COLUMN task_kind; diff --git a/internal-packages/clickhouse/src/taskRuns.test.ts b/internal-packages/clickhouse/src/taskRuns.test.ts index 8bd403f14f0..2d35ab0d420 100644 --- a/internal-packages/clickhouse/src/taskRuns.test.ts +++ b/internal-packages/clickhouse/src/taskRuns.test.ts @@ -84,6 +84,7 @@ describe("Task Runs V2", () => { null, // max_duration_in_seconds "", // trigger_source "", // root_trigger_source + "", // task_kind null, // is_warm_start ]; @@ -215,6 +216,7 @@ describe("Task Runs V2", () => { null, // max_duration_in_seconds "", // trigger_source "", // root_trigger_source + "", // task_kind null, // is_warm_start ]; @@ -269,6 +271,7 @@ describe("Task Runs V2", () => { null, // max_duration_in_seconds "", // trigger_source "", // root_trigger_source + "", // task_kind null, // is_warm_start ]; @@ -370,6 +373,7 @@ describe("Task Runs V2", () => { null, // max_duration_in_seconds "", // trigger_source "", // root_trigger_source + "", // task_kind null, // is_warm_start ]; diff --git a/internal-packages/clickhouse/src/taskRuns.ts b/internal-packages/clickhouse/src/taskRuns.ts index 6a9f66d7844..f6427359772 100644 --- a/internal-packages/clickhouse/src/taskRuns.ts +++ b/internal-packages/clickhouse/src/taskRuns.ts @@ -51,6 +51,7 @@ export const TaskRunV2 = z.object({ max_duration_in_seconds: z.number().int().nullish(), trigger_source: z.string().default(""), root_trigger_source: z.string().default(""), + task_kind: z.string().default(""), is_warm_start: z.boolean().nullish(), _version: z.string(), _is_deleted: z.number().int().default(0), @@ -110,6 +111,7 @@ export const TASK_RUN_COLUMNS = [ "max_duration_in_seconds", "trigger_source", "root_trigger_source", + "task_kind", "is_warm_start", ] as const; @@ -176,6 +178,7 @@ export type TaskRunFieldTypes = { max_duration_in_seconds: number | null; trigger_source: string; root_trigger_source: string; + task_kind: string; is_warm_start: boolean | null; }; @@ -313,6 +316,7 @@ export type TaskRunInsertArray = [ max_duration_in_seconds: number | null, trigger_source: string, root_trigger_source: string, + task_kind: string, is_warm_start: boolean | null, ]; diff --git a/internal-packages/database/prisma/migrations/20260329100903_add_agent_trigger_source_and_task_config/migration.sql b/internal-packages/database/prisma/migrations/20260329100903_add_agent_trigger_source_and_task_config/migration.sql new file mode 100644 index 00000000000..29233ab2740 --- /dev/null +++ b/internal-packages/database/prisma/migrations/20260329100903_add_agent_trigger_source_and_task_config/migration.sql @@ -0,0 +1,5 @@ +-- AlterEnum +ALTER TYPE "public"."TaskTriggerSource" ADD VALUE 'AGENT'; + +-- AlterTable +ALTER TABLE "public"."BackgroundWorkerTask" ADD COLUMN "config" JSONB; diff --git a/internal-packages/database/prisma/migrations/20260330113734_add_playground_conversation/migration.sql b/internal-packages/database/prisma/migrations/20260330113734_add_playground_conversation/migration.sql new file mode 100644 index 00000000000..7d061a51395 --- /dev/null +++ b/internal-packages/database/prisma/migrations/20260330113734_add_playground_conversation/migration.sql @@ -0,0 +1,34 @@ +-- CreateTable +CREATE TABLE "public"."PlaygroundConversation" ( + "id" TEXT NOT NULL, + "chatId" TEXT NOT NULL, + "title" TEXT NOT NULL DEFAULT 'New conversation', + "agentSlug" TEXT NOT NULL, + "runId" TEXT, + "clientData" JSONB, + "projectId" TEXT NOT NULL, + "runtimeEnvironmentId" TEXT NOT NULL, + "userId" TEXT NOT NULL, + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "updatedAt" TIMESTAMP(3) NOT NULL, + + CONSTRAINT "PlaygroundConversation_pkey" PRIMARY KEY ("id") +); + +-- CreateIndex +CREATE INDEX "PlaygroundConversation_runtimeEnvironmentId_agentSlug_updat_idx" ON "public"."PlaygroundConversation"("runtimeEnvironmentId", "agentSlug", "updatedAt" DESC); + +-- CreateIndex +CREATE INDEX "PlaygroundConversation_userId_runtimeEnvironmentId_idx" ON "public"."PlaygroundConversation"("userId", "runtimeEnvironmentId"); + +-- CreateIndex +CREATE UNIQUE INDEX "PlaygroundConversation_chatId_runtimeEnvironmentId_key" ON "public"."PlaygroundConversation"("chatId", "runtimeEnvironmentId"); + +-- AddForeignKey +ALTER TABLE "public"."PlaygroundConversation" ADD CONSTRAINT "PlaygroundConversation_runId_fkey" FOREIGN KEY ("runId") REFERENCES "public"."TaskRun"("id") ON DELETE SET NULL ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "public"."PlaygroundConversation" ADD CONSTRAINT "PlaygroundConversation_projectId_fkey" FOREIGN KEY ("projectId") REFERENCES "public"."Project"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "public"."PlaygroundConversation" ADD CONSTRAINT "PlaygroundConversation_runtimeEnvironmentId_fkey" FOREIGN KEY ("runtimeEnvironmentId") REFERENCES "public"."RuntimeEnvironment"("id") ON DELETE CASCADE ON UPDATE CASCADE; diff --git a/internal-packages/database/prisma/migrations/20260330135232_add_messages_and_last_event_id_to_playground/migration.sql b/internal-packages/database/prisma/migrations/20260330135232_add_messages_and_last_event_id_to_playground/migration.sql new file mode 100644 index 00000000000..0793d411c38 --- /dev/null +++ b/internal-packages/database/prisma/migrations/20260330135232_add_messages_and_last_event_id_to_playground/migration.sql @@ -0,0 +1,3 @@ +-- AlterTable +ALTER TABLE "public"."PlaygroundConversation" ADD COLUMN "lastEventId" TEXT, +ADD COLUMN "messages" JSONB; diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma index f588bdfc453..7e32a96d805 100644 --- a/internal-packages/database/prisma/schema.prisma +++ b/internal-packages/database/prisma/schema.prisma @@ -386,7 +386,8 @@ model RuntimeEnvironment { waitpointTags WaitpointTag[] BulkActionGroup BulkActionGroup[] customerQueries CustomerQuery[] - prompts Prompt[] + prompts Prompt[] + playgroundConversations PlaygroundConversation[] errorGroupStates ErrorGroupState[] taskIdentifiers TaskIdentifier[] revokedApiKeys RevokedApiKey[] @@ -470,6 +471,7 @@ model Project { connectedGithubRepository ConnectedGithubRepository? organizationProjectIntegration OrganizationProjectIntegration[] customerQueries CustomerQuery[] + playgroundConversations PlaygroundConversation[] buildSettings Json? onboardingData Json? @@ -706,6 +708,10 @@ model BackgroundWorkerTask { triggerSource TaskTriggerSource @default(STANDARD) + /// Extra task configuration JSON. Shape depends on triggerSource. + /// AGENT: { type: "ai-sdk-chat" } + config Json? + payloadSchema Json? @@unique([workerId, slug]) @@ -718,6 +724,49 @@ model BackgroundWorkerTask { enum TaskTriggerSource { STANDARD SCHEDULED + AGENT +} + +model PlaygroundConversation { + id String @id @default(cuid()) + + /// The chat session ID used by the transport + chatId String + + /// User-editable conversation title (auto-generated from first message) + title String @default("New conversation") + + /// Which agent this conversation is with + agentSlug String + + /// The current active run backing this conversation (null if no run yet) + runId String? + run TaskRun? @relation(fields: [runId], references: [id], onDelete: SetNull, onUpdate: Cascade) + + /// The client data JSON used for this conversation + clientData Json? + + /// Accumulated UIMessages from completed turns (for resume without stream replay) + messages Json? + + /// Last SSE event ID — resume from this position to avoid replaying old turns + lastEventId String? + + project Project @relation(fields: [projectId], references: [id], onDelete: Cascade, onUpdate: Cascade) + projectId String + + runtimeEnvironment RuntimeEnvironment @relation(fields: [runtimeEnvironmentId], references: [id], onDelete: Cascade, onUpdate: Cascade) + runtimeEnvironmentId String + + /// The user who started this conversation + userId String + + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + + @@unique([chatId, runtimeEnvironmentId]) + @@index([runtimeEnvironmentId, agentSlug, updatedAt(sort: Desc)]) + @@index([userId, runtimeEnvironmentId]) } /// Durable, typed, bidirectional I/O primitive. Owns two S2 streams (.out / .in). @@ -1021,6 +1070,8 @@ model TaskRun { /// (OSS, or pre-backfill); reads fall back to the global basin. streamBasinName String? + playgroundConversations PlaygroundConversation[] + @@unique([oneTimeUseToken]) @@unique([runtimeEnvironmentId, taskIdentifier, idempotencyKey]) // Finding child runs diff --git a/packages/core/src/v3/apiClient/errors.ts b/packages/core/src/v3/apiClient/errors.ts index 14f69b31302..5f38a4947b8 100644 --- a/packages/core/src/v3/apiClient/errors.ts +++ b/packages/core/src/v3/apiClient/errors.ts @@ -128,6 +128,18 @@ export class PermissionDeniedError extends ApiError { override readonly status: 403 = 403; } +/** + * True when `error` is a 401/403 from the Trigger API (e.g. expired run-scoped PAT on realtime streams). + * Uses structural checks so it works even if multiple copies of `@trigger.dev/core` are bundled (subclass `instanceof` can fail). + */ +export function isTriggerRealtimeAuthError(error: unknown): boolean { + if (error === null || typeof error !== "object") { + return false; + } + const e = error as ApiError; + return e.name === "TriggerApiError" && (e.status === 401 || e.status === 403); +} + export class NotFoundError extends ApiError { override readonly status: 404 = 404; } diff --git a/packages/core/src/v3/apiClient/index.ts b/packages/core/src/v3/apiClient/index.ts index 89a551954f0..04a9009e356 100644 --- a/packages/core/src/v3/apiClient/index.ts +++ b/packages/core/src/v3/apiClient/index.ts @@ -6,19 +6,32 @@ import { ApiDeploymentListOptions, ApiDeploymentListResponseItem, ApiDeploymentListSearchParams, + RetrieveCurrentDeploymentResponseBody, AppendToStreamResponseBody, BatchItemNDJSON, BatchTaskRunExecutionResult, BatchTriggerTaskV3RequestBody, BatchTriggerTaskV3Response, CanceledRunResponse, + CloseSessionRequestBody, CompleteWaitpointTokenRequestBody, CompleteWaitpointTokenResponseBody, + CreatedSessionResponseBody, + CreateSessionRequestBody, + EndAndContinueSessionRequestBody, + EndAndContinueSessionResponseBody, + ListSessionsOptions, + ListSessionsResponseBody, + ListedSessionItem, + RetrieveSessionResponseBody, + UpdateSessionRequestBody, CreateBatchRequestBody, CreateBatchResponse, CreateEnvironmentVariableRequestBody, CreateInputStreamWaitpointRequestBody, CreateInputStreamWaitpointResponseBody, + CreateSessionStreamWaitpointRequestBody, + CreateSessionStreamWaitpointResponseBody, CreateScheduleOptions, CreateStreamResponseBody, CreateUploadPayloadUrlResponseBody, @@ -59,6 +72,7 @@ import { SendInputStreamResponseBody, StreamBatchItemsResponse, TaskRunExecutionResult, + ReadSessionStreamRecordsResponseBody, TriggerTaskRequestBody, TriggerTaskResponse, UpdateEnvironmentVariableRequestBody, @@ -1094,6 +1108,233 @@ export class ApiClient { ); } + // ======================================================================== + // Sessions + // ======================================================================== + + createSession(body: CreateSessionRequestBody, requestOptions?: ZodFetchOptions) { + return zodfetch( + CreatedSessionResponseBody, + `${this.baseUrl}/api/v1/sessions`, + { + method: "POST", + headers: this.#getHeaders(false), + body: JSON.stringify(body), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + + retrieveSession(sessionIdOrExternalId: string, requestOptions?: ZodFetchOptions) { + return zodfetch( + RetrieveSessionResponseBody, + `${this.baseUrl}/api/v1/sessions/${encodeURIComponent(sessionIdOrExternalId)}`, + { + method: "GET", + headers: this.#getHeaders(false), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + + updateSession( + sessionIdOrExternalId: string, + body: UpdateSessionRequestBody, + requestOptions?: ZodFetchOptions + ) { + return zodfetch( + RetrieveSessionResponseBody, + `${this.baseUrl}/api/v1/sessions/${encodeURIComponent(sessionIdOrExternalId)}`, + { + method: "PATCH", + headers: this.#getHeaders(false), + body: JSON.stringify(body), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + + closeSession( + sessionIdOrExternalId: string, + body?: CloseSessionRequestBody, + requestOptions?: ZodFetchOptions + ) { + return zodfetch( + RetrieveSessionResponseBody, + `${this.baseUrl}/api/v1/sessions/${encodeURIComponent(sessionIdOrExternalId)}/close`, + { + method: "POST", + headers: this.#getHeaders(false), + body: JSON.stringify(body ?? {}), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + + endAndContinueSession( + sessionIdOrExternalId: string, + body: EndAndContinueSessionRequestBody, + requestOptions?: ZodFetchOptions + ) { + return zodfetch( + EndAndContinueSessionResponseBody, + `${this.baseUrl}/api/v1/sessions/${encodeURIComponent(sessionIdOrExternalId)}/end-and-continue`, + { + method: "POST", + headers: this.#getHeaders(false), + body: JSON.stringify(body), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + + listSessions( + options?: ListSessionsOptions, + requestOptions?: ZodFetchOptions + ): CursorPagePromise { + const searchParams = createSearchQueryForListSessions(options); + + return zodfetchCursorPage( + ListedSessionItem, + `${this.baseUrl}/api/v1/sessions`, + { + query: searchParams, + limit: options?.limit, + after: options?.after, + before: options?.before, + }, + { + method: "GET", + headers: this.#getHeaders(false), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + + // ======================================================================== + // Session realtime channels + // ======================================================================== + + async initializeSessionStream( + sessionIdOrExternalId: string, + io: "out" | "in", + requestOptions?: ZodFetchOptions + ) { + // The server returns S2 credentials in response headers alongside a tiny + // JSON body with the realtime version. Follow the same shape as + // `createStream` so downstream clients can feed them into + // `StreamsWriterV2`. + return zodfetch( + CreateStreamResponseBody, + `${this.baseUrl}/realtime/v1/sessions/${encodeURIComponent(sessionIdOrExternalId)}/${io}`, + { + method: "PUT", + headers: this.#getHeaders(false), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ) + .withResponse() + .then(({ data, response }) => ({ + ...data, + headers: Object.fromEntries(response.headers.entries()), + })); + } + + async appendToSessionStream( + sessionIdOrExternalId: string, + io: "out" | "in", + part: TBody, + requestOptions?: ZodFetchOptions + ) { + return zodfetch( + AppendToStreamResponseBody, + `${this.baseUrl}/realtime/v1/sessions/${encodeURIComponent(sessionIdOrExternalId)}/${io}/append`, + { + method: "POST", + headers: this.#getHeaders(false), + body: part, + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + + /** + * Non-SSE drain of a Session channel's tail. Returns whatever records + * exist after `afterEventId` (or from the head of the stream) and closes + * — `wait=0` semantics, no long-poll. Used by `replaySessionOutTail` at + * run boot, where the SSE long-poll's ~1s tax on empty streams is the + * dominant cost on every fresh chat. + * + * `afterEventId` is the same cursor format as the SSE Last-Event-ID + * (the S2 sequence number, stringified) — pass `lastOutEventId` from a + * persisted snapshot to resume. + */ + async readSessionStreamRecords( + sessionIdOrExternalId: string, + io: "out" | "in", + options?: { afterEventId?: string; baseUrl?: string } + ) { + const qs = new URLSearchParams(); + if (options?.afterEventId !== undefined) { + qs.set("afterEventId", options.afterEventId); + } + const url = `${options?.baseUrl ?? this.baseUrl}/realtime/v1/sessions/${encodeURIComponent( + sessionIdOrExternalId + )}/${io}/records${qs.toString() ? `?${qs.toString()}` : ""}`; + return zodfetch( + ReadSessionStreamRecordsResponseBody, + url, + { + method: "GET", + headers: this.#getHeaders(false), + }, + mergeRequestOptions(this.defaultRequestOptions, undefined) + ); + } + + /** + * Subscribe to SSE records on a Session channel. Reuses the same + * {@link SSEStreamSubscription} plumbing as `readStream` for run-scoped + * realtime streams — auto-retry, Last-Event-ID resume, abort-on-cancel. + */ + async subscribeToSessionStream( + sessionIdOrExternalId: string, + io: "out" | "in", + options?: { + signal?: AbortSignal; + baseUrl?: string; + timeoutInSeconds?: number; + onComplete?: () => void; + onError?: (error: Error) => void; + lastEventId?: string; + onPart?: (part: SSEStreamPart) => void; + } + ): Promise> { + const url = `${options?.baseUrl ?? this.baseUrl}/realtime/v1/sessions/${encodeURIComponent(sessionIdOrExternalId)}/${io}`; + + const subscription = new SSEStreamSubscription(url, { + headers: this.getHeaders(), + signal: options?.signal, + onComplete: options?.onComplete, + onError: options?.onError, + timeoutInSeconds: options?.timeoutInSeconds, + lastEventId: options?.lastEventId, + }); + + const stream = await subscription.subscribe(); + const onPart = options?.onPart; + + return stream.pipeThrough( + new TransformStream({ + transform(chunk, controller) { + const data = chunk.chunk as T; + onPart?.(chunk as SSEStreamPart); + controller.enqueue(data); + }, + }) + ); + } + async waitForDuration( runId: string, body: WaitForDurationRequestBody, @@ -1340,6 +1581,18 @@ export class ApiClient { ); } + retrieveCurrentDeployment(requestOptions?: ZodFetchOptions) { + return zodfetch( + RetrieveCurrentDeploymentResponseBody, + `${this.baseUrl}/api/v1/deployments/current`, + { + method: "GET", + headers: this.#getHeaders(false), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + async fetchStream( runId: string, streamKey: string, @@ -1459,6 +1712,23 @@ export class ApiClient { ); } + async createSessionStreamWaitpoint( + runFriendlyId: string, + body: CreateSessionStreamWaitpointRequestBody, + requestOptions?: ZodFetchOptions + ) { + return zodfetch( + CreateSessionStreamWaitpointResponseBody, + `${this.baseUrl}/api/v1/runs/${runFriendlyId}/session-streams/wait`, + { + method: "POST", + headers: this.#getHeaders(false), + body: JSON.stringify(body), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + async generateJWTClaims(requestOptions?: ZodFetchOptions): Promise> { return zodfetch( z.record(z.any()), @@ -1823,6 +2093,47 @@ function queueNameFromQueueTypeName(queue: QueueTypeName): string { return queue.name; } +function createSearchQueryForListSessions(options?: ListSessionsOptions): URLSearchParams { + const searchParams = new URLSearchParams(); + + if (!options) return searchParams; + + const appendMany = (name: string, value: string | string[] | undefined) => { + if (value === undefined) return; + searchParams.append(name, Array.isArray(value) ? value.join(",") : value); + }; + + appendMany("filter[type]", options.type); + appendMany("filter[tags]", options.tag); + appendMany("filter[taskIdentifier]", options.taskIdentifier); + + if (options.externalId) { + searchParams.append("filter[externalId]", options.externalId); + } + + appendMany("filter[status]", options.status as string | string[] | undefined); + + if (options.period) { + searchParams.append("filter[createdAt][period]", options.period); + } + + if (options.from !== undefined) { + searchParams.append( + "filter[createdAt][from]", + options.from instanceof Date ? options.from.getTime().toString() : options.from.toString() + ); + } + + if (options.to !== undefined) { + searchParams.append( + "filter[createdAt][to]", + options.to instanceof Date ? options.to.getTime().toString() : options.to.toString() + ); + } + + return searchParams; +} + function createSearchQueryForListWaitpointTokens( query?: ListWaitpointTokensQueryParams ): URLSearchParams { diff --git a/packages/core/src/v3/apiClient/runStream.test.ts b/packages/core/src/v3/apiClient/runStream.test.ts new file mode 100644 index 00000000000..a91e70c6e56 --- /dev/null +++ b/packages/core/src/v3/apiClient/runStream.test.ts @@ -0,0 +1,444 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import { SSEStreamSubscription } from "./runStream.js"; + +vi.setConfig({ testTimeout: 10_000 }); + +describe("SSEStreamSubscription retry behavior", () => { + const originalFetch = globalThis.fetch; + + afterEach(() => { + globalThis.fetch = originalFetch; + vi.restoreAllMocks(); + }); + + // A response.body that emits one SSE event then closes, so each + // successful subscribe() exits cleanly via reader.read() done=true + // and the test doesn't hang reading from a long-lived stream. + function makeSSEResponse() { + const body = new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode(`id: 1\ndata: {"hello":1}\n\n`)); + controller.close(); + }, + }); + return new Response(body, { + status: 200, + headers: { "Content-Type": "text/event-stream", "X-Stream-Version": "v1" }, + }); + } + + // Drain a ReadableStream until it closes or errors. + // Returns received chunks plus terminal state. + async function drain(stream: ReadableStream<{ id: string; chunk: unknown }>) { + const reader = stream.getReader(); + const chunks: Array<{ id: string; chunk: unknown }> = []; + try { + while (true) { + const { done, value } = await reader.read(); + if (done) return { chunks, error: undefined as Error | undefined }; + chunks.push(value); + } + } catch (e) { + return { chunks, error: e as Error }; + } finally { + try { + reader.releaseLock(); + } catch { + /* already released */ + } + } + } + + it("retries past the legacy 5-attempt cap", async () => { + let attempts = 0; + globalThis.fetch = vi.fn().mockImplementation(async () => { + attempts++; + if (attempts < 8) { + throw new TypeError("fetch failed (simulated network drop)"); + } + return makeSSEResponse(); + }); + + const sub = new SSEStreamSubscription("http://example.test/sse", { + // Compress the timing for the test — defaults are 100ms initial, + // 5s cap, retry forever; here we want fast iteration. + retryDelayMs: 1, + maxRetryDelayMs: 5, + }); + + const stream = await sub.subscribe(); + const result = await drain(stream); + + expect(attempts).toBe(8); + expect(result.error).toBeUndefined(); + expect(result.chunks).toHaveLength(1); + }); + + it("caps the exponential backoff at maxRetryDelayMs", async () => { + let attempts = 0; + const callTimes: number[] = []; + globalThis.fetch = vi.fn().mockImplementation(async () => { + callTimes.push(Date.now()); + attempts++; + if (attempts < 6) { + throw new TypeError("fetch failed"); + } + return makeSSEResponse(); + }); + + const sub = new SSEStreamSubscription("http://example.test/sse", { + retryDelayMs: 10, + maxRetryDelayMs: 30, + }); + + const stream = await sub.subscribe(); + await drain(stream); + + expect(attempts).toBe(6); + + // Without the cap, backoff would be 10, 20, 40, 80, 160 (= 310ms total). + // With cap=30, it's 10, 20, 30, 30, 30 (= 120ms total). Allow generous + // slack for setTimeout jitter; the assertion is "well under uncapped". + const totalElapsed = callTimes.at(-1)! - callTimes[0]!; + expect(totalElapsed).toBeLessThan(250); + }); + + it("retryNow() wakes an in-flight backoff and reconnects immediately", async () => { + let attempts = 0; + globalThis.fetch = vi.fn().mockImplementation(async () => { + attempts++; + if (attempts === 1) { + throw new TypeError("fetch failed"); + } + return makeSSEResponse(); + }); + + const sub = new SSEStreamSubscription("http://example.test/sse", { + // Backoff is intentionally long. retryNow() should short-circuit it. + retryDelayMs: 5_000, + maxRetryDelayMs: 5_000, + }); + + const subscribePromise = sub.subscribe().then(drain); + + // Wait for the first attempt to fail and the backoff to start. + await new Promise((r) => setTimeout(r, 50)); + sub.retryNow(); + + const start = Date.now(); + const result = await subscribePromise; + const elapsed = Date.now() - start; + + expect(attempts).toBe(2); + expect(result.error).toBeUndefined(); + // Without retryNow this would have waited ~5000ms; with it, the + // second attempt fires nearly immediately after the first failure. + expect(elapsed).toBeLessThan(500); + }); + + it("respects abort signal during retry backoff", async () => { + let attempts = 0; + globalThis.fetch = vi.fn().mockImplementation(async () => { + attempts++; + throw new TypeError("fetch failed"); + }); + + const ac = new AbortController(); + const sub = new SSEStreamSubscription("http://example.test/sse", { + signal: ac.signal, + retryDelayMs: 1_000, + maxRetryDelayMs: 1_000, + }); + + const subscribePromise = sub.subscribe().then(drain); + + // Let the first attempt fail and enter backoff, then abort. + await new Promise((r) => setTimeout(r, 50)); + ac.abort(); + + const result = await subscribePromise; + expect(result.error).toBeUndefined(); + // Abort should stop retries; we should have made at most a couple + // of attempts before the abort took effect. + expect(attempts).toBeLessThanOrEqual(2); + }); + + it("forceReconnect mid-read drops the stream and resumes with Last-Event-ID", async () => { + let attempts = 0; + const seenLastEventIds: Array = []; + globalThis.fetch = vi.fn().mockImplementation(async (_url: string, init?: RequestInit) => { + attempts++; + const lastEventIdHeader = (init?.headers as Record | undefined)?.[ + "Last-Event-ID" + ]; + seenLastEventIds.push(lastEventIdHeader ?? null); + + if (attempts === 1) { + // Headers arrive immediately, body emits one chunk then hangs + // until aborted. The test calls forceReconnect after seeing + // the chunk, which should drop this stream and trigger a + // resume request with Last-Event-ID set. + const body = new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode(`id: 7\ndata: {"first":true}\n\n`)); + init?.signal?.addEventListener("abort", () => controller.error(new Error("aborted"))); + }, + }); + return new Response(body, { + status: 200, + headers: { "Content-Type": "text/event-stream", "X-Stream-Version": "v1" }, + }); + } + // Second attempt: emit a second chunk and close cleanly. + const body = new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode(`id: 8\ndata: {"second":true}\n\n`)); + controller.close(); + }, + }); + return new Response(body, { + status: 200, + headers: { "Content-Type": "text/event-stream", "X-Stream-Version": "v1" }, + }); + }); + + const sub = new SSEStreamSubscription("http://example.test/sse", { + retryDelayMs: 1, + maxRetryDelayMs: 5, + fetchTimeoutMs: 60_000, + }); + + const stream = await sub.subscribe(); + const reader = stream.getReader(); + + // Read the first chunk, then force-reconnect mid-stream. + const first = await reader.read(); + expect(first.done).toBe(false); + expect((first.value!.chunk as { first?: boolean }).first).toBe(true); + + sub.forceReconnect(); + + // Second chunk arrives from the resumed connection. + const second = await reader.read(); + expect(second.done).toBe(false); + expect((second.value!.chunk as { second?: boolean }).second).toBe(true); + + const tail = await reader.read(); + expect(tail.done).toBe(true); + + expect(attempts).toBe(2); + expect(seenLastEventIds[0]).toBeNull(); + // Resumed request includes the Last-Event-ID from the first chunk. + expect(seenLastEventIds[1]).toBe("7"); + }); + + it("forceReconnect aborts the in-flight fetch and retries", async () => { + let attempts = 0; + let firstResolve: (() => void) | undefined; + globalThis.fetch = vi.fn().mockImplementation(async (_url: string, init?: RequestInit) => { + attempts++; + if (attempts === 1) { + // Hang the first attempt forever (or until signal aborts). + // forceReconnect should make this attempt's signal abort and + // throw, taking us into the retry path. + return new Promise((resolve, reject) => { + firstResolve = () => resolve(makeSSEResponse()); + init?.signal?.addEventListener("abort", () => { + reject(new DOMException("aborted", "AbortError")); + }); + }); + } + return makeSSEResponse(); + }); + + const sub = new SSEStreamSubscription("http://example.test/sse", { + retryDelayMs: 1, + maxRetryDelayMs: 5, + // Long fetch timeout so it doesn't fire instead of forceReconnect. + fetchTimeoutMs: 60_000, + }); + + const subscribePromise = sub.subscribe().then(drain); + + // Let the first fetch hang, then force reconnect. + await new Promise((r) => setTimeout(r, 50)); + sub.forceReconnect(); + + const result = await subscribePromise; + expect(attempts).toBe(2); + expect(result.error).toBeUndefined(); + expect(result.chunks).toHaveLength(1); + // Sanity: the hung first fetch was abandoned, never resolved. + expect(firstResolve).toBeDefined(); + }); + + it("aborts a slow fetch via fetchTimeoutMs and retries", async () => { + let attempts = 0; + globalThis.fetch = vi.fn().mockImplementation(async (_url: string, init?: RequestInit) => { + attempts++; + if (attempts === 1) { + // Hang until aborted. + return new Promise((_resolve, reject) => { + init?.signal?.addEventListener("abort", () => { + reject(new DOMException("aborted", "AbortError")); + }); + }); + } + return makeSSEResponse(); + }); + + const sub = new SSEStreamSubscription("http://example.test/sse", { + retryDelayMs: 1, + maxRetryDelayMs: 5, + fetchTimeoutMs: 100, + }); + + const result = await sub.subscribe().then(drain); + expect(attempts).toBe(2); + expect(result.error).toBeUndefined(); + expect(result.chunks).toHaveLength(1); + }); + + it("aborts a silent reader via stallTimeoutMs and retries", async () => { + let attempts = 0; + globalThis.fetch = vi.fn().mockImplementation(async (_url: string, init?: RequestInit) => { + attempts++; + if (attempts === 1) { + // Headers arrive immediately, but the body stream emits no + // chunks until aborted. The stall timer should fire and + // force a reconnect. + const body = new ReadableStream({ + start(controller) { + init?.signal?.addEventListener("abort", () => controller.error(new Error("aborted"))); + }, + }); + return new Response(body, { + status: 200, + headers: { "Content-Type": "text/event-stream", "X-Stream-Version": "v1" }, + }); + } + return makeSSEResponse(); + }); + + const sub = new SSEStreamSubscription("http://example.test/sse", { + retryDelayMs: 1, + maxRetryDelayMs: 5, + stallTimeoutMs: 100, + }); + + const result = await sub.subscribe().then(drain); + expect(attempts).toBe(2); + expect(result.error).toBeUndefined(); + expect(result.chunks).toHaveLength(1); + }); + + it("does not retry on 404 (stream gone)", async () => { + let attempts = 0; + globalThis.fetch = vi.fn().mockImplementation(async () => { + attempts++; + return new Response("not found", { status: 404 }); + }); + + const errors: Error[] = []; + const sub = new SSEStreamSubscription("http://example.test/sse", { + retryDelayMs: 1, + maxRetryDelayMs: 5, + onError: (e) => errors.push(e), + }); + + const result = await sub.subscribe().then(drain); + expect(attempts).toBe(1); + expect(result.error).toBeDefined(); + expect(errors).toHaveLength(1); + }); + + it("does not retry on 410 (session closed)", async () => { + let attempts = 0; + globalThis.fetch = vi.fn().mockImplementation(async () => { + attempts++; + return new Response("gone", { status: 410 }); + }); + + const sub = new SSEStreamSubscription("http://example.test/sse", { + retryDelayMs: 1, + maxRetryDelayMs: 5, + }); + + const result = await sub.subscribe().then(drain); + expect(attempts).toBe(1); + expect(result.error).toBeDefined(); + }); + + it("respects custom nonRetryableStatuses", async () => { + let attempts = 0; + globalThis.fetch = vi.fn().mockImplementation(async () => { + attempts++; + return new Response("forbidden", { status: 403 }); + }); + + const sub = new SSEStreamSubscription("http://example.test/sse", { + retryDelayMs: 1, + maxRetryDelayMs: 5, + nonRetryableStatuses: [403], + }); + + const result = await sub.subscribe().then(drain); + expect(attempts).toBe(1); + expect(result.error).toBeDefined(); + }); + + it("retries on 503 (caller-tunable nonRetryableStatuses)", async () => { + let attempts = 0; + globalThis.fetch = vi.fn().mockImplementation(async () => { + attempts++; + if (attempts < 3) return new Response("unavailable", { status: 503 }); + return makeSSEResponse(); + }); + + const sub = new SSEStreamSubscription("http://example.test/sse", { + retryDelayMs: 1, + maxRetryDelayMs: 5, + // 503 is NOT in the default non-retryable set; it should retry. + }); + + const result = await sub.subscribe().then(drain); + expect(attempts).toBe(3); + expect(result.error).toBeUndefined(); + expect(result.chunks).toHaveLength(1); + }); + + it("applies jitter to backoff (delays vary across attempts)", async () => { + const callTimes: number[] = []; + globalThis.fetch = vi.fn().mockImplementation(async () => { + callTimes.push(performance.now()); + throw new TypeError("fetch failed"); + }); + + const ac = new AbortController(); + const sub = new SSEStreamSubscription("http://example.test/sse", { + signal: ac.signal, + retryDelayMs: 50, + maxRetryDelayMs: 50, + retryJitter: 0.5, // 50% — final delay in [25ms, 50ms] + }); + + const promise = sub.subscribe().then(drain); + await new Promise((r) => setTimeout(r, 600)); // allow ~10 attempts + ac.abort(); + await promise; + + expect(callTimes.length).toBeGreaterThanOrEqual(5); + + // Compute inter-attempt gaps (skip the first since it has no prior). + const gaps = callTimes.slice(1).map((t, i) => t - callTimes[i]!); + // Without jitter all gaps would be ~50ms. With 50% jitter they + // should land in [~25ms, ~50ms] and not all be identical. + const min = Math.min(...gaps); + const max = Math.max(...gaps); + expect(min).toBeGreaterThanOrEqual(20); // a little slack for timer scheduling + expect(max).toBeLessThanOrEqual(80); + // Variance check — at least one gap should differ from another by + // a measurable amount (rules out a deterministic-delay regression). + expect(max - min).toBeGreaterThan(2); + }); +}); diff --git a/packages/core/src/v3/apiClient/runStream.ts b/packages/core/src/v3/apiClient/runStream.ts index 520ecd8dc2b..2152c6c69ca 100644 --- a/packages/core/src/v3/apiClient/runStream.ts +++ b/packages/core/src/v3/apiClient/runStream.ts @@ -14,7 +14,7 @@ import { IOPacket, parsePacket, } from "../utils/ioSerialization.js"; -import { ApiError } from "./errors.js"; +import { ApiError, isTriggerRealtimeAuthError } from "./errors.js"; import { ApiClient } from "./index.js"; import { zodShapeStream } from "./stream.js"; @@ -182,8 +182,15 @@ export type SSEStreamPart = { export class SSEStreamSubscription implements StreamSubscription { private lastEventId: string | undefined; private retryCount = 0; - private maxRetries = 5; - private retryDelayMs = 1000; + private maxRetries: number; + private retryDelayMs: number; + private maxRetryDelayMs: number; + private retryJitter: number; + private fetchTimeoutMs: number; + private stallTimeoutMs: number; + private nonRetryableStatuses: ReadonlySet; + private retryNowController: AbortController | null = null; + private internalAbort: AbortController | null = null; constructor( private url: string, @@ -194,9 +201,69 @@ export class SSEStreamSubscription implements StreamSubscription { onError?: (error: Error) => void; timeoutInSeconds?: number; lastEventId?: string; + // Retry knobs. Defaults: retry forever, 100ms initial backoff, + // capped at 5s with 50% jitter. Keeps mobile clients reconnecting + // through transient drops without giving up after a fixed window + // and prevents thundering-herd when many clients reconnect after + // a brief server blip. + maxRetries?: number; + retryDelayMs?: number; + maxRetryDelayMs?: number; + retryJitter?: number; + // Per-attempt fetch timeout — aborts the connect attempt if + // response headers don't arrive in time. Catches stuck TCP + // sockets where `fetch()` blocks forever waiting on a dead + // server. Cleared once headers arrive; long-lived chunk reads + // are governed by `stallTimeoutMs` instead. + fetchTimeoutMs?: number; + // Stall detector — if no chunks arrive within this window after + // the connection is established, force a reconnect. Catches + // silent-dead-socket cases (mobile OS killed the TCP socket but + // the read just blocks). Disabled (`0`) by default; opt in + // explicitly. Servers that emit periodic keepalive comments + // reset the timer naturally. + stallTimeoutMs?: number; + // HTTP statuses that should NOT be retried — fail the stream + // permanently. `404` (stream gone) and `410` (session closed) + // are sensible defaults; tune per-caller for other 4xx. + nonRetryableStatuses?: readonly number[]; } ) { this.lastEventId = options.lastEventId; + this.maxRetries = options.maxRetries ?? Infinity; + this.retryDelayMs = options.retryDelayMs ?? 100; + this.maxRetryDelayMs = options.maxRetryDelayMs ?? 5000; + this.retryJitter = options.retryJitter ?? 0.5; + this.fetchTimeoutMs = options.fetchTimeoutMs ?? 30_000; + this.stallTimeoutMs = options.stallTimeoutMs ?? 0; + this.nonRetryableStatuses = new Set(options.nonRetryableStatuses ?? [404, 410]); + } + + /** + * Wake an in-flight retry backoff and reconnect immediately. + * + * No-op if no retry is currently waiting (i.e. we're already + * connected and reading). Use this for cheap "hint" wakeups like + * the `online` event or a short-hidden visibility return — + * `forceReconnect()` is the heavier hammer. + */ + retryNow(): void { + this.retryNowController?.abort(); + } + + /** + * Drop the current connection (or wake a pending backoff) and + * reconnect. + * + * Use when the existing TCP socket is suspected dead but the reader + * hasn't noticed yet — common after a mobile tab background-kill or + * a Safari bfcache restore. Aborts the in-flight fetch / read so + * the catch path takes us through `retryConnection` and re-fetches + * with `Last-Event-ID`. + */ + forceReconnect(): void { + this.internalAbort?.abort(); + this.retryNowController?.abort(); } async subscribe(): Promise> { @@ -206,7 +273,7 @@ export class SSEStreamSubscription implements StreamSubscription { async start(controller) { await self.connectStream(controller); }, - cancel(reason) { + cancel() { self.options.onComplete?.(); }, }); @@ -215,25 +282,51 @@ export class SSEStreamSubscription implements StreamSubscription { private async connectStream( controller: ReadableStreamDefaultController ): Promise { + // Two abort sources flow through `internalAbort.signal`: + // - this.options.signal: caller cancel — bypass retry, exit cleanly. + // - this.internalAbort: per-attempt force-reconnect / fetch-timeout + // / stall-timeout — treated as a transient error, retry path runs. + // Use `this.options.signal?.aborted` in the catch to distinguish. + this.internalAbort = new AbortController(); + const unlinkUserAbort = linkAbort(this.options.signal, this.internalAbort); + + // Per-attempt fetch timeout. Cleared once response headers arrive; + // chunk-read latency is governed by `stallTimeoutMs` instead. + const fetchTimer = setTimeout(() => this.internalAbort?.abort(), this.fetchTimeoutMs); + + let stallTimer: ReturnType | undefined; + const armStall = () => { + if (this.stallTimeoutMs <= 0) return; + clearTimeout(stallTimer); + stallTimer = setTimeout(() => this.internalAbort?.abort(), this.stallTimeoutMs); + }; + + // Idempotent — both the catch (before recursion) and the finally + // call this. Without the catch-side call, every retry leaks an + // abort listener on `this.options.signal` because the finally + // doesn't run until the entire recursion unwinds. + const cleanupAttempt = () => { + clearTimeout(fetchTimer); + clearTimeout(stallTimer); + unlinkUserAbort(); + this.internalAbort = null; + }; + try { const headers: Record = { Accept: "text/event-stream", ...this.options.headers, }; - - // Include Last-Event-ID header if we're resuming - if (this.lastEventId) { - headers["Last-Event-ID"] = this.lastEventId; - } - + if (this.lastEventId) headers["Last-Event-ID"] = this.lastEventId; if (this.options.timeoutInSeconds) { headers["Timeout-Seconds"] = this.options.timeoutInSeconds.toString(); } const response = await fetch(this.url, { headers, - signal: this.options.signal, + signal: this.internalAbort.signal, }); + clearTimeout(fetchTimer); if (!response.ok) { const error = ApiError.generate( @@ -242,22 +335,23 @@ export class SSEStreamSubscription implements StreamSubscription { "Could not subscribe to stream", Object.fromEntries(response.headers) ); - this.options.onError?.(error); + if (this.nonRetryableStatuses.has(response.status)) { + controller.error(error); + return; + } throw error; } if (!response.body) { const error = new Error("No response body"); - this.options.onError?.(error); throw error; } const streamVersion = response.headers.get("X-Stream-Version") ?? "v1"; - - // Reset retry count on successful connection - this.retryCount = 0; + this.retryCount = 0; // reset on success + armStall(); const seenIds = new Set(); @@ -268,13 +362,10 @@ export class SSEStreamSubscription implements StreamSubscription { new TransformStream({ transform: (chunk, chunkController) => { if (streamVersion === "v1") { - // Track the last event ID for resume support if (chunk.id) { this.lastEventId = chunk.id; } - const timestamp = parseRedisStreamIdTimestamp(chunk.id); - chunkController.enqueue({ id: chunk.id ?? "unknown", chunk: safeParseJSON(chunk.data), @@ -288,13 +379,9 @@ export class SSEStreamSubscription implements StreamSubscription { for (const record of data.records) { this.lastEventId = record.seq_num.toString(); - const parsedBody = safeParseJSON(record.body) as { data: unknown; id: string }; - if (seenIds.has(parsedBody.id)) { - continue; - } + if (seenIds.has(parsedBody.id)) continue; seenIds.add(parsedBody.id); - chunkController.enqueue({ id: record.seq_num.toString(), chunk: parsedBody.data, @@ -310,7 +397,6 @@ export class SSEStreamSubscription implements StreamSubscription { const reader = stream.getReader(); try { - let chunkCount = 0; while (true) { const { done, value } = await reader.read(); @@ -329,7 +415,7 @@ export class SSEStreamSubscription implements StreamSubscription { return; } - chunkCount++; + armStall(); // any chunk (including server keepalives) resets the silence timer controller.enqueue(value); } } catch (error) { @@ -338,14 +424,24 @@ export class SSEStreamSubscription implements StreamSubscription { } } catch (error) { if (this.options.signal?.aborted) { - // Don't retry if aborted + // User cancel — exit cleanly, don't retry. controller.close(); this.options.onComplete?.(); return; } - // Retry on error + if (isTriggerRealtimeAuthError(error)) { + // `onError` was already invoked in the `!response.ok` branch above + // (where the auth ApiError was originally constructed and thrown). + // Auth errors are non-retryable: terminate the stream cleanly. + controller.error(error as Error); + return; + } + + cleanupAttempt(); await this.retryConnection(controller, error as Error); + } finally { + cleanupAttempt(); } } @@ -367,10 +463,33 @@ export class SSEStreamSubscription implements StreamSubscription { } this.retryCount++; - const delay = this.retryDelayMs * Math.pow(2, this.retryCount - 1); - - // Wait before retrying - await new Promise((resolve) => setTimeout(resolve, delay)); + const baseDelay = Math.min( + this.retryDelayMs * Math.pow(2, this.retryCount - 1), + this.maxRetryDelayMs + ); + // Jitter scales the delay into [(1 - retryJitter) * base, base]. + // E.g. retryJitter=0.5 → final delay is in [50%, 100%] of base. + // Spreads simultaneous reconnect attempts so many clients don't + // dogpile on the server right after a brief outage. + const delay = baseDelay * (1 - this.retryJitter * Math.random()); + + // Wait before retrying. The wait is wakeable: `retryNow()` aborts + // `retryNowController` so the timer resolves immediately and the + // next connect attempt starts now (e.g. on tab focus / `online` + // event from the browser layer). + this.retryNowController = new AbortController(); + await new Promise((resolve) => { + const timer = setTimeout(() => { + this.retryNowController?.signal.removeEventListener("abort", onAbort); + resolve(); + }, delay); + const onAbort = () => { + clearTimeout(timer); + resolve(); + }; + this.retryNowController!.signal.addEventListener("abort", onAbort, { once: true }); + }); + this.retryNowController = null; if (this.options.signal?.aborted) { controller.close(); @@ -383,6 +502,22 @@ export class SSEStreamSubscription implements StreamSubscription { } } +/** + * One-way abort link: when `parent` aborts, abort `child` too. Returns + * a cleanup that removes the listener so `parent` doesn't accumulate + * subscriptions across many connect attempts. + */ +function linkAbort(parent: AbortSignal | undefined, child: AbortController): () => void { + if (!parent) return () => {}; + if (parent.aborted) { + child.abort(); + return () => {}; + } + const onAbort = () => child.abort(); + parent.addEventListener("abort", onAbort, { once: true }); + return () => parent.removeEventListener("abort", onAbort); +} + export class SSEStreamSubscriptionFactory implements StreamSubscriptionFactory { constructor( private baseUrl: string, diff --git a/packages/core/src/v3/errors.ts b/packages/core/src/v3/errors.ts index 90650bbd18f..d32e32f91c9 100644 --- a/packages/core/src/v3/errors.ts +++ b/packages/core/src/v3/errors.ts @@ -631,6 +631,26 @@ export class GracefulExitTimeoutError extends Error { } } +export class ChatChunkTooLargeError extends Error { + constructor( + public readonly chunkSize: number, + public readonly maxSize: number, + public readonly chunkType?: string + ) { + super( + `chat.agent chunk${chunkType ? ` of type "${chunkType}"` : ""} is ${chunkSize} bytes, ` + + `over the realtime stream's per-record cap of ${maxSize} bytes. ` + + `For oversized payloads (e.g. large tool outputs), write the value to your own store and ` + + `emit only an id/url through the chat stream — see https://trigger.dev/docs/ai-chat/patterns/large-payloads.` + ); + this.name = "ChatChunkTooLargeError"; + } +} + +export function isChatChunkTooLargeError(error: unknown): error is ChatChunkTooLargeError { + return error instanceof Error && error.name === "ChatChunkTooLargeError"; +} + export class MaxDurationExceededError extends Error { constructor( public readonly maxDurationInSeconds: number, diff --git a/packages/core/src/v3/index.ts b/packages/core/src/v3/index.ts index 2757363f4be..72b91c46071 100644 --- a/packages/core/src/v3/index.ts +++ b/packages/core/src/v3/index.ts @@ -21,6 +21,7 @@ export * from "./locals-api.js"; export * from "./heartbeats-api.js"; export * from "./realtime-streams-api.js"; export * from "./input-streams-api.js"; +export * from "./session-streams-api.js"; export * from "./waitpoints/index.js"; export * from "./schemas/index.js"; export { SemanticInternalAttributes } from "./semanticInternalAttributes.js"; @@ -80,6 +81,7 @@ export { getSchemaParseFn, type AnySchemaParseFn, type SchemaParseFn, + type inferSchemaOut, isSchemaZodEsque, isSchemaValibotEsque, isSchemaArkTypeEsque, diff --git a/packages/core/src/v3/inputStreams/index.ts b/packages/core/src/v3/inputStreams/index.ts index 4a871d6bfcc..0b3c7af063f 100644 --- a/packages/core/src/v3/inputStreams/index.ts +++ b/packages/core/src/v3/inputStreams/index.ts @@ -51,6 +51,18 @@ export class InputStreamsAPI implements InputStreamManager { return this.#getManager().lastSeqNum(streamId); } + public setLastSeqNum(streamId: string, seqNum: number): void { + this.#getManager().setLastSeqNum(streamId, seqNum); + } + + public shiftBuffer(streamId: string): boolean { + return this.#getManager().shiftBuffer(streamId); + } + + public disconnectStream(streamId: string): void { + this.#getManager().disconnectStream(streamId); + } + public clearHandlers(): void { this.#getManager().clearHandlers(); } diff --git a/packages/core/src/v3/inputStreams/manager.ts b/packages/core/src/v3/inputStreams/manager.ts index f393f4a169a..09212fb6a84 100644 --- a/packages/core/src/v3/inputStreams/manager.ts +++ b/packages/core/src/v3/inputStreams/manager.ts @@ -40,6 +40,26 @@ export class StandardInputStreamManager implements InputStreamManager { return this.seqNums.get(streamId); } + setLastSeqNum(streamId: string, seqNum: number): void { + const current = this.seqNums.get(streamId); + // Only advance forward, never backward + if (current === undefined || seqNum > current) { + this.seqNums.set(streamId, seqNum); + } + } + + shiftBuffer(streamId: string): boolean { + const buffered = this.buffer.get(streamId); + if (buffered && buffered.length > 0) { + buffered.shift(); + if (buffered.length === 0) { + this.buffer.delete(streamId); + } + return true; + } + return false; + } + setRunId(runId: string, streamsVersion?: string): void { this.currentRunId = runId; this.streamsVersion = streamsVersion; @@ -158,6 +178,15 @@ export class StandardInputStreamManager implements InputStreamManager { } } + disconnectStream(streamId: string): void { + const tail = this.tails.get(streamId); + if (tail) { + tail.abortController.abort(); + this.tails.delete(streamId); + } + this.buffer.delete(streamId); + } + connectTail(runId: string, _fromSeq?: number): void { // No-op: tails are now created per-stream lazily } diff --git a/packages/core/src/v3/inputStreams/noopManager.ts b/packages/core/src/v3/inputStreams/noopManager.ts index 6d72d9e2f76..612da832d7e 100644 --- a/packages/core/src/v3/inputStreams/noopManager.ts +++ b/packages/core/src/v3/inputStreams/noopManager.ts @@ -22,6 +22,12 @@ export class NoopInputStreamManager implements InputStreamManager { return undefined; } + setLastSeqNum(_streamId: string, _seqNum: number): void {} + + shiftBuffer(_streamId: string): boolean { return false; } + + disconnectStream(_streamId: string): void {} + clearHandlers(): void {} reset(): void {} disconnect(): void {} diff --git a/packages/core/src/v3/inputStreams/types.ts b/packages/core/src/v3/inputStreams/types.ts index 0816c06493f..c456bb61216 100644 --- a/packages/core/src/v3/inputStreams/types.ts +++ b/packages/core/src/v3/inputStreams/types.ts @@ -70,6 +70,28 @@ export interface InputStreamManager { */ lastSeqNum(streamId: string): number | undefined; + /** + * Advance the last-seen S2 sequence number for the given input stream. + * Used after `.wait()` resumes to prevent the SSE tail from replaying + * the record that was consumed via the waitpoint path. + */ + setLastSeqNum(streamId: string, seqNum: number): void; + + /** + * Remove and discard the first buffered item for the given input stream. + * Used after `.wait()` resumes to remove the duplicate that the SSE tail + * buffered while the waitpoint was being completed via a separate path. + * Returns true if an item was removed, false if the buffer was empty. + */ + shiftBuffer(streamId: string): boolean; + + /** + * Disconnect the SSE tail and clear the buffer for a specific input stream. + * Used before suspending via `.wait()` so the tail doesn't buffer duplicates + * of data that will be delivered through the waitpoint path. + */ + disconnectStream(streamId: string): void; + /** * Clear all persistent `.on()` handlers and abort tails that have no remaining once waiters. * Called automatically when a task run completes. diff --git a/packages/core/src/v3/realtime-streams-api.ts b/packages/core/src/v3/realtime-streams-api.ts index 0bc0665c052..e873413e2c3 100644 --- a/packages/core/src/v3/realtime-streams-api.ts +++ b/packages/core/src/v3/realtime-streams-api.ts @@ -5,3 +5,5 @@ import { RealtimeStreamsAPI } from "./realtimeStreams/index.js"; export const realtimeStreams = RealtimeStreamsAPI.getInstance(); export * from "./realtimeStreams/types.js"; +export { SessionStreamInstance } from "./realtimeStreams/sessionStreamInstance.js"; +export type { SessionStreamInstanceOptions } from "./realtimeStreams/sessionStreamInstance.js"; diff --git a/packages/core/src/v3/realtimeStreams/index.ts b/packages/core/src/v3/realtimeStreams/index.ts index 2a35b38befd..80c44f5a3db 100644 --- a/packages/core/src/v3/realtimeStreams/index.ts +++ b/packages/core/src/v3/realtimeStreams/index.ts @@ -6,6 +6,12 @@ import { RealtimeStreamsManager, } from "./types.js"; +// Re-export the session-scoped stream instance so the SDK's +// `SessionOutputChannel.pipe` / `.writer` can construct it without reaching +// into the core package's internals. +export { SessionStreamInstance } from "./sessionStreamInstance.js"; +export type { SessionStreamInstanceOptions } from "./sessionStreamInstance.js"; + const API_NAME = "realtime-streams"; const NOOP_MANAGER = new NoopRealtimeStreamsManager(); diff --git a/packages/core/src/v3/realtimeStreams/manager.ts b/packages/core/src/v3/realtimeStreams/manager.ts index 323735df106..beda3535fb4 100644 --- a/packages/core/src/v3/realtimeStreams/manager.ts +++ b/packages/core/src/v3/realtimeStreams/manager.ts @@ -6,6 +6,7 @@ import { RealtimeStreamInstance, RealtimeStreamOperationOptions, RealtimeStreamsManager, + StreamWriteResult, } from "./types.js"; export class StandardRealtimeStreamsManager implements RealtimeStreamsManager { @@ -16,7 +17,7 @@ export class StandardRealtimeStreamsManager implements RealtimeStreamsManager { ) {} // Track active streams - using a Set allows multiple streams for the same key to coexist private activeStreams = new Set<{ - wait: () => Promise; + wait: () => Promise; abortController: AbortController; }>(); diff --git a/packages/core/src/v3/realtimeStreams/noopManager.ts b/packages/core/src/v3/realtimeStreams/noopManager.ts index 542e66fd53a..881a82294e2 100644 --- a/packages/core/src/v3/realtimeStreams/noopManager.ts +++ b/packages/core/src/v3/realtimeStreams/noopManager.ts @@ -15,7 +15,7 @@ export class NoopRealtimeStreamsManager implements RealtimeStreamsManager { options?: RealtimeStreamOperationOptions ): RealtimeStreamInstance { return { - wait: () => Promise.resolve(), + wait: () => Promise.resolve({}), get stream(): AsyncIterableStream { return createAsyncIterableStreamFromAsyncIterable(source); }, diff --git a/packages/core/src/v3/realtimeStreams/sessionStreamInstance.ts b/packages/core/src/v3/realtimeStreams/sessionStreamInstance.ts new file mode 100644 index 00000000000..11eb7290edc --- /dev/null +++ b/packages/core/src/v3/realtimeStreams/sessionStreamInstance.ts @@ -0,0 +1,103 @@ +import { ApiClient } from "../apiClient/index.js"; +import { AsyncIterableStream } from "../streams/asyncIterableStream.js"; +import { AnyZodFetchOptions } from "../zodfetch.js"; +import { StreamsWriterV2 } from "./streamsWriterV2.js"; +import { StreamsWriter, StreamWriteResult } from "./types.js"; + +export type SessionStreamInstanceOptions = { + apiClient: ApiClient; + baseUrl: string; + sessionId: string; + io: "out" | "in"; + source: ReadableStream; + signal?: AbortSignal; + requestOptions?: AnyZodFetchOptions; + debug?: boolean; +}; + +/** + * Session-scoped parallel to {@link StreamInstance}. Calls + * `initializeSessionStream` to fetch S2 credentials for the session's + * channel, then pipes `source` directly to S2 via {@link StreamsWriterV2}. + * + * Sessions are S2-only — there's no v1 (Redis) fallback — so this + * skips the version-detection dance `StreamInstance` does. + */ +export class SessionStreamInstance implements StreamsWriter { + private streamPromise: Promise>; + + constructor(private options: SessionStreamInstanceOptions) { + this.streamPromise = this.initializeWriter(); + } + + private async initializeWriter(): Promise> { + const response = await this.options.apiClient.initializeSessionStream( + this.options.sessionId, + this.options.io, + this.options?.requestOptions + ); + + const headers = response.headers ?? {}; + const accessToken = headers["x-s2-access-token"]; + const basin = headers["x-s2-basin"]; + const streamName = headers["x-s2-stream-name"]; + const endpoint = headers["x-s2-endpoint"]; + const flushIntervalMs = headers["x-s2-flush-interval-ms"] + ? parseInt(headers["x-s2-flush-interval-ms"]) + : undefined; + const maxRetries = headers["x-s2-max-retries"] + ? parseInt(headers["x-s2-max-retries"]) + : undefined; + + if (!accessToken || !basin || !streamName) { + throw new Error( + "Session stream initialize did not return S2 credentials — server may be configured for v1 realtime streams, which sessions do not support." + ); + } + + return new StreamsWriterV2({ + basin, + stream: streamName, + accessToken, + endpoint, + source: this.options.source, + signal: this.options.signal, + debug: this.options.debug, + flushIntervalMs, + maxRetries, + }); + } + + public async wait(): Promise { + const writer = await this.streamPromise; + return writer.wait(); + } + + public get stream(): AsyncIterableStream { + const self = this; + + return new ReadableStream({ + async start(controller) { + const streamWriter = await self.streamPromise; + + const iterator = streamWriter[Symbol.asyncIterator](); + + while (true) { + if (self.options.signal?.aborted) { + controller.close(); + break; + } + + const { done, value } = await iterator.next(); + + if (done) { + controller.close(); + break; + } + + controller.enqueue(value); + } + }, + }); + } +} diff --git a/packages/core/src/v3/realtimeStreams/streamInstance.ts b/packages/core/src/v3/realtimeStreams/streamInstance.ts index 6d8106ffe6c..07ee0158bfb 100644 --- a/packages/core/src/v3/realtimeStreams/streamInstance.ts +++ b/packages/core/src/v3/realtimeStreams/streamInstance.ts @@ -3,7 +3,7 @@ import { AsyncIterableStream } from "../streams/asyncIterableStream.js"; import { AnyZodFetchOptions } from "../zodfetch.js"; import { StreamsWriterV1 } from "./streamsWriterV1.js"; import { StreamsWriterV2 } from "./streamsWriterV2.js"; -import { StreamsWriter } from "./types.js"; +import { StreamsWriter, StreamWriteResult } from "./types.js"; export type StreamInstanceOptions = { apiClient: ApiClient; @@ -63,8 +63,9 @@ export class StreamInstance implements StreamsWriter { return streamWriter; } - public async wait(): Promise { - return this.streamPromise.then((writer) => writer.wait()); + public async wait(): Promise { + const writer = await this.streamPromise; + return writer.wait(); } public get stream(): AsyncIterableStream { diff --git a/packages/core/src/v3/realtimeStreams/streamsWriterV1.ts b/packages/core/src/v3/realtimeStreams/streamsWriterV1.ts index 2f2b4af1682..c19faf6c2f8 100644 --- a/packages/core/src/v3/realtimeStreams/streamsWriterV1.ts +++ b/packages/core/src/v3/realtimeStreams/streamsWriterV1.ts @@ -2,7 +2,7 @@ import { request as httpsRequest } from "node:https"; import { request as httpRequest } from "node:http"; import { URL } from "node:url"; import { randomBytes } from "node:crypto"; -import { StreamsWriter } from "./types.js"; +import { StreamsWriter, StreamWriteResult } from "./types.js"; export type StreamsWriterV1Options = { baseUrl: string; @@ -258,8 +258,9 @@ export class StreamsWriterV1 implements StreamsWriter { await this.makeRequest(0); } - public async wait(): Promise { - return this.streamPromise; + public async wait(): Promise { + await this.streamPromise; + return {}; } public [Symbol.asyncIterator]() { diff --git a/packages/core/src/v3/realtimeStreams/streamsWriterV2.test.ts b/packages/core/src/v3/realtimeStreams/streamsWriterV2.test.ts new file mode 100644 index 00000000000..0d73b0d48f3 --- /dev/null +++ b/packages/core/src/v3/realtimeStreams/streamsWriterV2.test.ts @@ -0,0 +1,150 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; + +import { ChatChunkTooLargeError, isChatChunkTooLargeError } from "../errors.js"; + +const lastAckedPosition = vi.fn(() => undefined); + +const appendSession = vi.fn(async () => { + // A WritableStream that just consumes records — we never reach S2 because + // the size check fires upstream of this for the oversize case, but we still + // need a valid writable for the small-chunk path. + const writable = new WritableStream({}); + return { + writable, + lastAckedPosition, + }; +}); + +vi.mock("@s2-dev/streamstore", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + S2: class FakeS2 { + basin() { + return { + stream: () => ({ + appendSession, + }), + }; + } + }, + }; +}); + +import { StreamsWriterV2 } from "./streamsWriterV2.js"; + +afterEach(() => { + vi.clearAllMocks(); +}); + +describe("StreamsWriterV2", () => { + it("rejects with ChatChunkTooLargeError when a single chunk exceeds the per-record cap", async () => { + const oversized = { + type: "tool-output-available", + output: { text: "x".repeat(2_000_000) }, + }; + const source = new ReadableStream({ + start(controller) { + controller.enqueue(oversized); + controller.close(); + }, + }); + + const writer = new StreamsWriterV2({ + basin: "test", + stream: "test", + accessToken: "test", + source, + }); + + await expect(writer.wait()).rejects.toBeInstanceOf(ChatChunkTooLargeError); + + let captured: unknown; + try { + await writer.wait(); + } catch (err) { + captured = err; + } + expect(isChatChunkTooLargeError(captured)).toBe(true); + const e = captured as ChatChunkTooLargeError; + expect(e.chunkType).toBe("tool-output-available"); + expect(e.chunkSize).toBeGreaterThan(1_000_000); + expect(e.maxSize).toBe(1024 * 1024 - 1024); + expect(e.message).toMatch(/tool-output-available/); + expect(e.message).toMatch(/chat\.agent chunk/); + }); + + it("uses chunk.kind when chunk.type is missing (ChatInputChunk-style)", async () => { + const oversized = { + kind: "action", + payload: "x".repeat(2_000_000), + }; + const source = new ReadableStream({ + start(controller) { + controller.enqueue(oversized); + controller.close(); + }, + }); + + const writer = new StreamsWriterV2({ + basin: "test", + stream: "test", + accessToken: "test", + source, + }); + + let captured: unknown; + try { + await writer.wait(); + } catch (err) { + captured = err; + } + expect(isChatChunkTooLargeError(captured)).toBe(true); + expect((captured as ChatChunkTooLargeError).chunkType).toBe("action"); + }); + + it("omits chunkType when chunk has no discriminant", async () => { + const oversized = "x".repeat(2_000_000); + const source = new ReadableStream({ + start(controller) { + controller.enqueue(oversized); + controller.close(); + }, + }); + + const writer = new StreamsWriterV2({ + basin: "test", + stream: "test", + accessToken: "test", + source, + }); + + let captured: unknown; + try { + await writer.wait(); + } catch (err) { + captured = err; + } + expect(isChatChunkTooLargeError(captured)).toBe(true); + expect((captured as ChatChunkTooLargeError).chunkType).toBeUndefined(); + }); + + it("does not reject for chunks under the cap", async () => { + const small = { type: "text-delta", delta: "hello" }; + const source = new ReadableStream({ + start(controller) { + controller.enqueue(small); + controller.close(); + }, + }); + + const writer = new StreamsWriterV2({ + basin: "test", + stream: "test", + accessToken: "test", + source, + }); + + await expect(writer.wait()).resolves.toBeDefined(); + }); +}); diff --git a/packages/core/src/v3/realtimeStreams/streamsWriterV2.ts b/packages/core/src/v3/realtimeStreams/streamsWriterV2.ts index 91713630dbe..ffd6fc92702 100644 --- a/packages/core/src/v3/realtimeStreams/streamsWriterV2.ts +++ b/packages/core/src/v3/realtimeStreams/streamsWriterV2.ts @@ -1,7 +1,16 @@ import { S2, AppendRecord, BatchTransform } from "@s2-dev/streamstore"; -import { StreamsWriter } from "./types.js"; +import { ChatChunkTooLargeError } from "../errors.js"; +import { StreamsWriter, StreamWriteResult } from "./types.js"; import { nanoid } from "nanoid"; +// S2 caps a single record at 1 MiB of metered bytes (body + headers + 8 byte +// overhead). We give ourselves ~1 KiB of headroom for the JSON envelope and +// metering bytes so the check fires before the SDK's internal `BatchTransform` +// rejects the record with an opaque `S2Error`. +const RECORD_BODY_MAX_BYTES = 1024 * 1024 - 1024; + +const utf8Encoder = new TextEncoder(); + export type StreamsWriterV2Options = { basin: string; stream: string; @@ -54,6 +63,7 @@ export class StreamsWriterV2 implements StreamsWriter { private readonly maxInflightBytes: number; private aborted = false; private sessionWritable: WritableStream | null = null; + private lastSeqNum: number | undefined; constructor(private options: StreamsWriterV2Options) { this.debug = options.debug ?? false; @@ -151,8 +161,16 @@ export class StreamsWriterV2 implements StreamsWriter { controller.error(new Error("Stream aborted")); return; } - // Convert each chunk to JSON string and wrap in AppendRecord - controller.enqueue(AppendRecord.string({ body: JSON.stringify({ data: chunk, id: nanoid(7) }) })); + const body = JSON.stringify({ data: chunk, id: nanoid(7) }); + const size = utf8Encoder.encode(body).length; + if (size > RECORD_BODY_MAX_BYTES) { + const chunkType = extractChunkType(chunk); + controller.error( + new ChatChunkTooLargeError(size, RECORD_BODY_MAX_BYTES, chunkType) + ); + return; + } + controller.enqueue(AppendRecord.string({ body })); }, }) ) @@ -169,9 +187,9 @@ export class StreamsWriterV2 implements StreamsWriter { const lastAcked = session.lastAckedPosition(); if (lastAcked?.end) { - const recordsWritten = lastAcked.end.seqNum; + this.lastSeqNum = lastAcked.end.seqNum; this.log( - `[S2MetadataStream] Written ${recordsWritten} records, ending at seqNum=${lastAcked.end.seqNum}` + `[S2MetadataStream] Written ${this.lastSeqNum} records, ending at seqNum=${this.lastSeqNum}` ); } } catch (error) { @@ -184,8 +202,9 @@ export class StreamsWriterV2 implements StreamsWriter { } } - public async wait(): Promise { + public async wait(): Promise { await this.streamPromise; + return { lastEventId: this.lastSeqNum?.toString() }; } public [Symbol.asyncIterator]() { @@ -225,3 +244,17 @@ function safeReleaseLock(reader: ReadableStreamDefaultReader) { reader.releaseLock(); } catch (error) {} } + +// chat.agent emits two chunk shapes through this writer: +// - UIMessageChunks + custom data parts: `{ type: "tool-output-available" | "data-..." | ... }` +// - ChatInputChunks (mostly seen on `.in`, but reused as the discriminant +// elsewhere): `{ kind: "message" | "stop" | "action" }` +// Surfacing whichever discriminant exists turns "chunk too large" into +// "tool-output-available chunk too large", which is what users actually need. +function extractChunkType(chunk: unknown): string | undefined { + if (!chunk || typeof chunk !== "object") return undefined; + const c = chunk as { type?: unknown; kind?: unknown }; + if (typeof c.type === "string") return c.type; + if (typeof c.kind === "string") return c.kind; + return undefined; +} diff --git a/packages/core/src/v3/realtimeStreams/types.ts b/packages/core/src/v3/realtimeStreams/types.ts index 174970c2830..5e537d991ff 100644 --- a/packages/core/src/v3/realtimeStreams/types.ts +++ b/packages/core/src/v3/realtimeStreams/types.ts @@ -26,13 +26,17 @@ export interface RealtimeStreamsManager { ): Promise; } +export type StreamWriteResult = { + lastEventId?: string; +}; + export interface RealtimeStreamInstance { - wait(): Promise; + wait(): Promise; get stream(): AsyncIterableStream; } export interface StreamsWriter { - wait(): Promise; + wait(): Promise; } export type RealtimeDefinedStream = { @@ -71,6 +75,10 @@ export type PipeStreamOptions = { * Additional request options for the API call. */ requestOptions?: ApiRequestOptions; + /** Override the default span name for this operation. */ + spanName?: string; + /** When true, the span will be collapsed in the dashboard. */ + collapsed?: boolean; }; /** @@ -89,7 +97,7 @@ export type PipeStreamResult = { * to the realtime stream. Use this to wait for the stream to complete before * finishing your task. */ - waitUntilComplete: () => Promise; + waitUntilComplete: () => Promise; }; /** @@ -185,6 +193,14 @@ export type RealtimeDefinedInputStream = { * Uses a waitpoint token internally. Can only be called inside a task.run(). */ wait: (options?: InputStreamWaitOptions) => ManualWaitpointPromise; + /** + * Wait for data with an idle phase before suspending. + * + * Keeps the task active (using compute) for `idleTimeoutInSeconds`, + * then suspends via `.wait()` if no data arrives. If data arrives during + * the idle phase the task responds instantly without suspending. + */ + waitWithIdleTimeout: (options: InputStreamWaitWithIdleTimeoutOptions) => Promise<{ ok: true; output: TData } | { ok: false; error?: any }>; /** * Send data to this input stream on a specific run. * This is used from outside the task (e.g., from your backend or another task). @@ -199,6 +215,8 @@ export type InputStreamSubscription = { export type InputStreamOnceOptions = { signal?: AbortSignal; timeoutMs?: number; + /** Override the default span name for this operation. */ + spanName?: string; }; export type SendInputStreamOptions = { @@ -234,6 +252,24 @@ export type InputStreamWaitOptions = { * and filtering waitpoints via `wait.listTokens()`. */ tags?: string[]; + + /** Override the default span name for this operation. */ + spanName?: string; +}; + +export type InputStreamWaitWithIdleTimeoutOptions = { + /** Seconds to keep the task idle (active, using compute) before suspending. */ + idleTimeoutInSeconds: number; + /** Maximum time to wait after suspending (duration string, e.g. "1h"). */ + timeout?: string; + /** Override the default span name for the outer operation. */ + spanName?: string; + /** Called right before suspending (after idle phase times out). Not called if data arrives during idle. */ + onSuspend?: () => void | Promise; + /** Called right after resuming from suspension with data. Not called if data arrived during idle or on timeout. */ + onResume?: () => void | Promise; + /** When true, skip the suspend phase entirely. If idle times out, return `{ ok: false }` immediately. */ + skipSuspend?: boolean; }; export type InferInputStreamType = T extends RealtimeDefinedInputStream diff --git a/packages/core/src/v3/schemas/api.ts b/packages/core/src/v3/schemas/api.ts index 0db92a67c64..42dc1826977 100644 --- a/packages/core/src/v3/schemas/api.ts +++ b/packages/core/src/v3/schemas/api.ts @@ -1115,6 +1115,7 @@ const CommonRunFields = { baseCostInCents: z.number(), durationMs: z.number(), metadata: z.record(z.any()).optional(), + taskKind: z.string().optional(), }; const RetrieveRunCommandFields = { @@ -1494,6 +1495,12 @@ export const SessionTriggerConfig = z.object({ queue: z.string().max(128).optional(), tags: z.array(z.string().max(128)).max(5).optional(), maxAttempts: z.number().int().positive().max(10).optional(), + /** Per-run wall-clock cap (seconds). Forwarded to `TaskRunOptions.maxDuration`. */ + maxDuration: z.number().int().positive().optional(), + /** Pin every run to a specific worker version. Forwarded to `TaskRunOptions.lockToVersion`. */ + lockToVersion: z.string().optional(), + /** Region to schedule runs in. Forwarded to `TaskRunOptions.region`. */ + region: z.string().optional(), /** Convenience field surfaced to chat.agent via the wire payload. */ idleTimeoutInSeconds: z.number().int().positive().max(3600).optional(), }); @@ -1818,6 +1825,9 @@ export const ApiDeploymentListResponseItem = z.object({ export type ApiDeploymentListResponseItem = z.infer; +export const RetrieveCurrentDeploymentResponseBody = ApiDeploymentListResponseItem; +export type RetrieveCurrentDeploymentResponseBody = ApiDeploymentListResponseItem; + export const ApiBranchListResponseBody = z.object({ branches: z.array( z.object({ @@ -1938,6 +1948,27 @@ export const SendInputStreamResponseBody = z.object({ }); export type SendInputStreamResponseBody = z.infer; +/** + * Response body for `GET /realtime/v1/sessions/:id/:io/records`. A non-SSE, + * `wait=0` drain of a session channel — used at run boot for snapshot + * replay where the SSE long-poll tax (~1s on empty streams) was the + * dominant cost. The shape mirrors the webapp's internal `StreamRecord` + * type (`apps/webapp/app/services/realtime/types.ts`); each record's + * `data` is a JSON-encoded chunk body that callers parse client-side. + */ +export const ReadSessionStreamRecordsResponseBody = z.object({ + records: z.array( + z.object({ + data: z.string(), + id: z.string(), + seqNum: z.number(), + }) + ), +}); +export type ReadSessionStreamRecordsResponseBody = z.infer< + typeof ReadSessionStreamRecordsResponseBody +>; + export const ResolvePromptRequestBody = z.object({ variables: z.record(z.unknown()).default({}), label: z.string().optional(), diff --git a/packages/core/src/v3/schemas/build.ts b/packages/core/src/v3/schemas/build.ts index bda5efb0ad9..e1543529a48 100644 --- a/packages/core/src/v3/schemas/build.ts +++ b/packages/core/src/v3/schemas/build.ts @@ -1,6 +1,12 @@ import { z } from "zod"; import { ConfigManifest } from "./config.js"; -import { PromptManifest, QueueManifest, TaskFile, TaskManifest } from "./schemas.js"; +import { + PromptManifest, + QueueManifest, + SkillManifest, + TaskFile, + TaskManifest, +} from "./schemas.js"; export const BuildExternal = z.object({ name: z.string(), @@ -70,6 +76,8 @@ export const BuildManifest = z.object({ .optional(), /** Maps output file paths to their content hashes for deduplication during dev */ outputHashes: z.record(z.string()).optional(), + /** Skills discovered and bundled into `.trigger/skills/{id}/` under `outputPath`. */ + skills: SkillManifest.array().optional(), }); export type BuildManifest = z.infer; @@ -87,6 +95,7 @@ export const WorkerManifest = z.object({ configPath: z.string(), tasks: TaskManifest.array(), prompts: PromptManifest.array().optional(), + skills: SkillManifest.array().optional(), queues: QueueManifest.array().optional(), workerEntryPoint: z.string(), controllerEntryPoint: z.string().optional(), diff --git a/packages/core/src/v3/schemas/resources.ts b/packages/core/src/v3/schemas/resources.ts index e681c728416..753324d1257 100644 --- a/packages/core/src/v3/schemas/resources.ts +++ b/packages/core/src/v3/schemas/resources.ts @@ -2,6 +2,12 @@ import { z } from "zod"; import { QueueManifest, RetryOptions, ScheduleMetadata } from "./schemas.js"; import { MachineConfig } from "./common.js"; +export const AgentConfig = z.object({ + type: z.string(), // "ai-sdk-chat" initially, extensible for future agent types +}); + +export type AgentConfig = z.infer; + export const TaskResource = z.object({ id: z.string(), description: z.string().optional(), @@ -11,6 +17,7 @@ export const TaskResource = z.object({ retry: RetryOptions.optional(), machine: MachineConfig.optional(), triggerSource: z.string().optional(), + agentConfig: AgentConfig.optional(), schedule: ScheduleMetadata.optional(), maxDuration: z.number().optional(), ttl: z.string().or(z.number().nonnegative().int()).optional(), diff --git a/packages/core/src/v3/schemas/runEngine.ts b/packages/core/src/v3/schemas/runEngine.ts index b9e41c9a8d7..5ea22960bf2 100644 --- a/packages/core/src/v3/schemas/runEngine.ts +++ b/packages/core/src/v3/schemas/runEngine.ts @@ -15,11 +15,15 @@ export const TriggerAction = z.enum(["trigger", "replay", "test"]).or(anyString) export type TriggerAction = z.infer; +export const TaskKind = z.enum(["STANDARD", "SCHEDULED", "AGENT"]).or(anyString); +export type TaskKind = z.infer; + export const RunAnnotations = z.object({ triggerSource: TriggerSource, triggerAction: TriggerAction, rootTriggerSource: TriggerSource, rootScheduleId: z.string().optional(), + taskKind: TaskKind.optional(), }); export type RunAnnotations = z.infer; diff --git a/packages/core/src/v3/schemas/schemas.ts b/packages/core/src/v3/schemas/schemas.ts index 5fb85f80ae8..95564cb1efc 100644 --- a/packages/core/src/v3/schemas/schemas.ts +++ b/packages/core/src/v3/schemas/schemas.ts @@ -180,6 +180,10 @@ export const ScheduleMetadata = z.object({ environments: z.array(EnvironmentType).optional(), }); +const AgentConfig = z.object({ + type: z.string(), +}); + const taskMetadata = { id: z.string(), description: z.string().optional(), @@ -187,6 +191,7 @@ const taskMetadata = { retry: RetryOptions.optional(), machine: MachineConfig.optional(), triggerSource: z.string().optional(), + agentConfig: AgentConfig.optional(), schedule: ScheduleMetadata.optional(), maxDuration: z.number().optional(), ttl: z.string().or(z.number().nonnegative().int()).optional(), @@ -241,6 +246,28 @@ export const PromptManifest = z.object({ export type PromptManifest = z.infer; +// ── Skills ──────────────────────────────────────────────────────────────── +// +// A skill is a developer-authored folder (SKILL.md + scripts/references/assets) +// bundled into the deploy image. SkillMetadata is registered at module load +// by `ai.defineSkill({ id, path })`; the CLI's built-in bundler picks it up +// during deploy and copies the folder into the deploy image. + +const skillMetadata = { + id: z.string(), + /** Path to the skill's source folder, relative to the project root. */ + sourcePath: z.string(), +}; + +export const SkillMetadata = z.object(skillMetadata); +export type SkillMetadata = z.infer; + +export const SkillManifest = z.object({ + ...skillMetadata, + ...taskFileMetadata, +}); +export type SkillManifest = z.infer; + export const PostStartCauses = z.enum(["index", "create", "restore"]); export type PostStartCauses = z.infer; diff --git a/packages/core/src/v3/semanticInternalAttributes.ts b/packages/core/src/v3/semanticInternalAttributes.ts index 2c715a03ea1..e6e0160663d 100644 --- a/packages/core/src/v3/semanticInternalAttributes.ts +++ b/packages/core/src/v3/semanticInternalAttributes.ts @@ -13,6 +13,7 @@ export const SemanticInternalAttributes = { RUN_ID: "ctx.run.id", RUN_IS_TEST: "ctx.run.isTest", RUN_IS_REPLAY: "ctx.run.isReplay", + GEN_AI_CONVERSATION_ID: "gen_ai.conversation.id", ORIGINAL_RUN_ID: "$original_run_id", BATCH_ID: "ctx.batch.id", TASK_SLUG: "ctx.task.id", diff --git a/packages/core/src/v3/session-streams-api.ts b/packages/core/src/v3/session-streams-api.ts new file mode 100644 index 00000000000..afa417a6418 --- /dev/null +++ b/packages/core/src/v3/session-streams-api.ts @@ -0,0 +1,7 @@ +// Split module-level variable definition into separate files to allow +// tree-shaking on each api instance. +import { SessionStreamsAPI } from "./sessionStreams/index.js"; + +export const sessionStreams = SessionStreamsAPI.getInstance(); + +export * from "./sessionStreams/types.js"; diff --git a/packages/core/src/v3/sessionStreams/index.ts b/packages/core/src/v3/sessionStreams/index.ts new file mode 100644 index 00000000000..75b372c8314 --- /dev/null +++ b/packages/core/src/v3/sessionStreams/index.ts @@ -0,0 +1,89 @@ +import { getGlobal, registerGlobal } from "../utils/globals.js"; +import { NoopSessionStreamManager } from "./noopManager.js"; +import { + InputStreamOncePromise, + SessionChannelIO, + SessionStreamManager, +} from "./types.js"; +import { InputStreamOnceOptions } from "../realtimeStreams/types.js"; + +const API_NAME = "session-streams"; + +const NOOP_MANAGER = new NoopSessionStreamManager(); + +export class SessionStreamsAPI implements SessionStreamManager { + private static _instance?: SessionStreamsAPI; + + private constructor() {} + + public static getInstance(): SessionStreamsAPI { + if (!this._instance) { + this._instance = new SessionStreamsAPI(); + } + return this._instance; + } + + setGlobalManager(manager: SessionStreamManager): boolean { + return registerGlobal(API_NAME, manager); + } + + #getManager(): SessionStreamManager { + return getGlobal(API_NAME) ?? NOOP_MANAGER; + } + + public on( + sessionId: string, + io: SessionChannelIO, + handler: (data: unknown) => void | Promise + ): { off: () => void } { + return this.#getManager().on(sessionId, io, handler); + } + + public once( + sessionId: string, + io: SessionChannelIO, + options?: InputStreamOnceOptions + ): InputStreamOncePromise { + return this.#getManager().once(sessionId, io, options); + } + + public peek(sessionId: string, io: SessionChannelIO): unknown | undefined { + return this.#getManager().peek(sessionId, io); + } + + public lastSeqNum(sessionId: string, io: SessionChannelIO): number | undefined { + return this.#getManager().lastSeqNum(sessionId, io); + } + + public setLastSeqNum(sessionId: string, io: SessionChannelIO, seqNum: number): void { + this.#getManager().setLastSeqNum(sessionId, io, seqNum); + } + + public setMinTimestamp( + sessionId: string, + io: SessionChannelIO, + minTimestamp: number | undefined + ): void { + this.#getManager().setMinTimestamp(sessionId, io, minTimestamp); + } + + public shiftBuffer(sessionId: string, io: SessionChannelIO): boolean { + return this.#getManager().shiftBuffer(sessionId, io); + } + + public disconnectStream(sessionId: string, io: SessionChannelIO): void { + this.#getManager().disconnectStream(sessionId, io); + } + + public clearHandlers(): void { + this.#getManager().clearHandlers(); + } + + public reset(): void { + this.#getManager().reset(); + } + + public disconnect(): void { + this.#getManager().disconnect(); + } +} diff --git a/packages/core/src/v3/sessionStreams/manager.test.ts b/packages/core/src/v3/sessionStreams/manager.test.ts new file mode 100644 index 00000000000..6089d705783 --- /dev/null +++ b/packages/core/src/v3/sessionStreams/manager.test.ts @@ -0,0 +1,151 @@ +import { describe, expect, it } from "vitest"; +import { StandardSessionStreamManager } from "./manager.js"; +import type { ApiClient } from "../apiClient/index.js"; +import type { SSEStreamPart } from "../apiClient/runStream.js"; + +// Single-shot mock that mimics S2's long-poll: delivers `records` once via +// `onPart` on the first subscribe call, then keeps the returned async +// iterable OPEN until the abort signal fires. Real S2 keeps the SSE +// connection alive on a long-poll; the manager's `runTail` finally / +// reconnect path only fires when the connection actually closes. Returning +// an empty stream synchronously triggers a tight reconnect loop, so the +// mock parks indefinitely instead. +function singleShotApiClient( + records: Array<{ id: string; chunk: unknown; timestamp: number }> +): ApiClient { + let delivered = false; + return { + async subscribeToSessionStream( + _sessionIdOrExternalId: string, + _io: "out" | "in", + options?: { onPart?: (part: SSEStreamPart) => void; signal?: AbortSignal } + ) { + if (!delivered) { + delivered = true; + for (const record of records) { + options?.onPart?.(record as SSEStreamPart); + } + } + const signal = options?.signal; + return (async function* () { + if (signal?.aborted) return; + await new Promise((resolve) => { + if (!signal) { + // No signal — block the stream forever; tests must + // explicitly call `disconnectStream` / `disconnect` to + // unblock. + return; + } + signal.addEventListener("abort", () => resolve(), { once: true }); + }); + })() as unknown as Awaited>; + }, + } as unknown as ApiClient; +} + +describe("StandardSessionStreamManager — minTimestamp filter", () => { + const sessionId = "session-1"; + const io = "in" as const; + + it("dispatches records when no filter is set", async () => { + const records = [ + { id: "0", chunk: { kind: "message", payload: { id: "u1" } }, timestamp: 1000 }, + { id: "1", chunk: { kind: "message", payload: { id: "u2" } }, timestamp: 2000 }, + ]; + const manager = new StandardSessionStreamManager(singleShotApiClient(records), "http://localhost"); + + const first = await manager.once(sessionId, io); + expect(first).toEqual({ ok: true, output: { kind: "message", payload: { id: "u1" } } }); + + const second = await manager.once(sessionId, io); + expect(second).toEqual({ ok: true, output: { kind: "message", payload: { id: "u2" } } }); + + manager.disconnectStream(sessionId, io); // stop reconnect loop + manager.disconnect(); + }); + + it("drops records whose timestamp is <= minTimestamp", async () => { + const records = [ + { id: "0", chunk: { kind: "message", payload: { id: "u1" } }, timestamp: 1000 }, + { id: "1", chunk: { kind: "message", payload: { id: "u2" } }, timestamp: 2000 }, + { id: "2", chunk: { kind: "message", payload: { id: "u3" } }, timestamp: 3000 }, + ]; + const manager = new StandardSessionStreamManager(singleShotApiClient(records), "http://localhost"); + + // Cutoff at 2000 (inclusive: `<=` is dropped). Only u3 should pass. + manager.setMinTimestamp(sessionId, io, 2000); + + const passed = await manager.once(sessionId, io, { timeoutMs: 200 }); + expect(passed).toEqual({ ok: true, output: { kind: "message", payload: { id: "u3" } } }); + + manager.disconnectStream(sessionId, io); + manager.disconnect(); + }); + + it("clears the filter when set to undefined", async () => { + const records = [ + { id: "0", chunk: { kind: "message", payload: { id: "u1" } }, timestamp: 1000 }, + ]; + const manager = new StandardSessionStreamManager(singleShotApiClient(records), "http://localhost"); + + manager.setMinTimestamp(sessionId, io, 5000); + manager.setMinTimestamp(sessionId, io, undefined); + + const passed = await manager.once(sessionId, io, { timeoutMs: 200 }); + expect(passed).toEqual({ ok: true, output: { kind: "message", payload: { id: "u1" } } }); + + manager.disconnectStream(sessionId, io); + manager.disconnect(); + }); + + it("filter is per-(sessionId, io) and doesn't bleed across streams", async () => { + const inApi = singleShotApiClient([ + { id: "0", chunk: { kind: "in-record" }, timestamp: 1000 }, + ]); + const manager = new StandardSessionStreamManager(inApi, "http://localhost"); + + manager.setMinTimestamp(sessionId, "in", 5000); + + // The "out" stream uses the same singleShotApiClient instance — its + // single-shot delivers the same fixture, but the filter doesn't apply + // to "out" so the record passes. + const outResult = await manager.once(sessionId, "out", { timeoutMs: 200 }); + expect(outResult).toEqual({ ok: true, output: { kind: "in-record" } }); + + // The "in" stream is filtered (minTimestamp=5000, record ts=1000): the + // once() call should idle-timeout instead of resolving with the record. + // But the singleShot instance has already delivered to the "out" tail, + // so the "in" tail will get nothing on first connect anyway. Use a + // separate manager+api to keep the assertion crisp. + const inApi2 = singleShotApiClient([ + { id: "0", chunk: { kind: "in-record-2" }, timestamp: 1000 }, + ]); + const manager2 = new StandardSessionStreamManager(inApi2, "http://localhost"); + manager2.setMinTimestamp(sessionId, "in", 5000); + + const inResult = await manager2.once(sessionId, "in", { timeoutMs: 100 }); + expect(inResult.ok).toBe(false); // filter-dropped → idle timeout + + manager.disconnectStream(sessionId, "in"); + manager.disconnectStream(sessionId, "out"); + manager.disconnect(); + manager2.disconnectStream(sessionId, "in"); + manager2.disconnect(); + }); + + it("reset() clears all per-stream timestamp filters", async () => { + const records = [ + { id: "0", chunk: { kind: "message", payload: { id: "u1" } }, timestamp: 1000 }, + ]; + const manager = new StandardSessionStreamManager(singleShotApiClient(records), "http://localhost"); + + manager.setMinTimestamp(sessionId, io, 5000); + manager.reset(); + + const passed = await manager.once(sessionId, io, { timeoutMs: 200 }); + expect(passed).toEqual({ ok: true, output: { kind: "message", payload: { id: "u1" } } }); + + manager.disconnectStream(sessionId, io); + manager.disconnect(); + }); +}); diff --git a/packages/core/src/v3/sessionStreams/manager.ts b/packages/core/src/v3/sessionStreams/manager.ts new file mode 100644 index 00000000000..d40af9e7b2d --- /dev/null +++ b/packages/core/src/v3/sessionStreams/manager.ts @@ -0,0 +1,412 @@ +import { ApiClient } from "../apiClient/index.js"; +import { + InputStreamOncePromise, + InputStreamOnceResult, + InputStreamTimeoutError, +} from "../inputStreams/types.js"; +import { InputStreamOnceOptions } from "../realtimeStreams/types.js"; +import { SessionChannelIO, SessionStreamManager } from "./types.js"; + +type SessionStreamHandler = (data: unknown) => void | Promise; + +type OnceWaiter = { + resolve: (result: InputStreamOnceResult) => void; + reject: (error: Error) => void; + timeoutHandle?: ReturnType; +}; + +type TailState = { + abortController: AbortController; + promise: Promise; +}; + +function keyFor(sessionId: string, io: SessionChannelIO): string { + return `${sessionId}:${io}`; +} + +/** + * Session-scoped parallel to {@link StandardInputStreamManager}. Keeps the + * same buffer / once-waiter / tail lifecycle, but keyed on + * `(sessionId, io)` and subscribing via + * {@link ApiClient.subscribeToSessionStream} instead of the run input + * stream SSE. + */ +export class StandardSessionStreamManager implements SessionStreamManager { + private handlers = new Map>(); + private onceWaiters = new Map(); + private buffer = new Map(); + private tails = new Map(); + // Per-stream lower-bound timestamp filter. When set, records whose + // SSE timestamp is <= the bound are dropped before dispatch — used by + // chat.agent on OOM-retry boot to skip session.in records belonging + // to turns that already completed on the prior attempt. The filter + // is consulted in `runTail`'s `onPart` so the buffer never sees the + // dropped records. + private minTimestamps = new Map(); + // Keys that were explicitly torn down by `disconnectStream`. The tail's + // `.finally` reconnect path checks this so a long-lived persistent handler + // (e.g. `chat.agent`'s run-level `stopInput.on(...)`) doesn't silently + // resurrect the tail mid-`session.in.wait()` and re-deliver the record + // that's already being delivered out-of-band via the waitpoint. + private explicitlyDisconnected = new Set(); + private seqNums = new Map(); + + constructor( + private apiClient: ApiClient, + private baseUrl: string, + private debug: boolean = false + ) {} + + on( + sessionId: string, + io: SessionChannelIO, + handler: SessionStreamHandler + ): { off: () => void } { + const key = keyFor(sessionId, io); + + let handlerSet = this.handlers.get(key); + if (!handlerSet) { + handlerSet = new Set(); + this.handlers.set(key, handlerSet); + } + handlerSet.add(handler); + + // Explicit re-attach clears the "explicitly disconnected" suppression + // so the tail can subscribe again now that callers want delivery back. + this.explicitlyDisconnected.delete(key); + this.#ensureTailConnected(sessionId, io); + + const buffered = this.buffer.get(key); + if (buffered && buffered.length > 0) { + for (const data of buffered) { + this.#invokeHandler(handler, data); + } + this.buffer.delete(key); + } + + return { + off: () => { + handlerSet?.delete(handler); + if (handlerSet?.size === 0) { + this.handlers.delete(key); + } + }, + }; + } + + once( + sessionId: string, + io: SessionChannelIO, + options?: InputStreamOnceOptions + ): InputStreamOncePromise { + const key = keyFor(sessionId, io); + + this.explicitlyDisconnected.delete(key); + this.#ensureTailConnected(sessionId, io); + + const buffered = this.buffer.get(key); + if (buffered && buffered.length > 0) { + const data = buffered.shift()!; + if (buffered.length === 0) { + this.buffer.delete(key); + } + return new InputStreamOncePromise((resolve) => { + resolve({ ok: true, output: data }); + }); + } + + return new InputStreamOncePromise((resolve, reject) => { + const waiter: OnceWaiter = { resolve, reject }; + + if (options?.signal) { + if (options.signal.aborted) { + reject(new Error("Aborted")); + return; + } + options.signal.addEventListener( + "abort", + () => { + if (waiter.timeoutHandle) clearTimeout(waiter.timeoutHandle); + this.#removeOnceWaiter(key, waiter); + reject(new Error("Aborted")); + }, + { once: true } + ); + } + + if (options?.timeoutMs) { + waiter.timeoutHandle = setTimeout(() => { + this.#removeOnceWaiter(key, waiter); + resolve({ + ok: false, + error: new InputStreamTimeoutError(key, options.timeoutMs!), + }); + }, options.timeoutMs); + } + + let waiters = this.onceWaiters.get(key); + if (!waiters) { + waiters = []; + this.onceWaiters.set(key, waiters); + } + waiters.push(waiter); + }); + } + + peek(sessionId: string, io: SessionChannelIO): unknown | undefined { + const buffered = this.buffer.get(keyFor(sessionId, io)); + if (buffered && buffered.length > 0) return buffered[0]; + return undefined; + } + + lastSeqNum(sessionId: string, io: SessionChannelIO): number | undefined { + return this.seqNums.get(keyFor(sessionId, io)); + } + + setLastSeqNum(sessionId: string, io: SessionChannelIO, seqNum: number): void { + const key = keyFor(sessionId, io); + const current = this.seqNums.get(key); + if (current === undefined || seqNum > current) { + this.seqNums.set(key, seqNum); + } + } + + setMinTimestamp( + sessionId: string, + io: SessionChannelIO, + minTimestamp: number | undefined + ): void { + const key = keyFor(sessionId, io); + if (minTimestamp === undefined) { + this.minTimestamps.delete(key); + } else { + this.minTimestamps.set(key, minTimestamp); + } + } + + shiftBuffer(sessionId: string, io: SessionChannelIO): boolean { + const key = keyFor(sessionId, io); + const buffered = this.buffer.get(key); + if (buffered && buffered.length > 0) { + buffered.shift(); + if (buffered.length === 0) this.buffer.delete(key); + return true; + } + return false; + } + + disconnectStream(sessionId: string, io: SessionChannelIO): void { + const key = keyFor(sessionId, io); + const tail = this.tails.get(key); + const bufferedSize = this.buffer.get(key)?.length ?? 0; + // Mark as explicitly disconnected BEFORE we abort, so the tail's + // `.finally` reconnect path sees the flag when it runs (which can be + // synchronous in the AbortError catch). Cleared on the next explicit + // `on()`/`once()`. + this.explicitlyDisconnected.add(key); + if (tail) { + tail.abortController.abort(); + this.tails.delete(key); + } + this.buffer.delete(key); + } + + clearHandlers(): void { + this.handlers.clear(); + + for (const [key, tail] of this.tails) { + const hasWaiters = this.onceWaiters.has(key) && this.onceWaiters.get(key)!.length > 0; + if (!hasWaiters) { + tail.abortController.abort(); + this.tails.delete(key); + } + } + } + + disconnect(): void { + for (const [, tail] of this.tails) { + tail.abortController.abort(); + } + this.tails.clear(); + } + + reset(): void { + this.disconnect(); + this.seqNums.clear(); + this.minTimestamps.clear(); + this.handlers.clear(); + + for (const [, waiters] of this.onceWaiters) { + for (const waiter of waiters) { + if (waiter.timeoutHandle) clearTimeout(waiter.timeoutHandle); + waiter.reject(new Error("Session stream manager reset")); + } + } + this.onceWaiters.clear(); + this.buffer.clear(); + } + + #ensureTailConnected(sessionId: string, io: SessionChannelIO): void { + const key = keyFor(sessionId, io); + if (this.tails.has(key)) return; + + const abortController = new AbortController(); + const promise = this.#runTail(sessionId, io, abortController.signal) + .catch((error) => { + if (this.debug) { + console.error(`[SessionStreamManager] Tail error for "${key}":`, error); + } + }) + .finally(() => { + this.tails.delete(key); + + // If the tail was torn down explicitly via `disconnectStream`, + // honor that — the caller (typically `session.in.wait()`) is + // suspending the run and expects no records to be buffered or + // delivered until a fresh `on()` / `once()` re-attaches. Without + // this guard a run-level persistent handler (e.g. `chat.agent`'s + // `stopInput.on(...)`) would auto-reconnect during the suspend + // window, the resurrected tail would receive the same record the + // waitpoint just delivered, and that record would land in the + // buffer where the next turn's `messagesInput.on(...)` drains it + // and runs a duplicate turn. + if (this.explicitlyDisconnected.has(key)) { + return; + } + + const hasHandlers = this.handlers.has(key) && this.handlers.get(key)!.size > 0; + const hasWaiters = + this.onceWaiters.has(key) && this.onceWaiters.get(key)!.length > 0; + if (hasHandlers || hasWaiters) { + this.#ensureTailConnected(sessionId, io); + } + }); + this.tails.set(key, { abortController, promise }); + } + + async #runTail( + sessionId: string, + io: SessionChannelIO, + signal: AbortSignal + ): Promise { + const key = keyFor(sessionId, io); + try { + const lastSeq = this.seqNums.get(key); + // Dispatch is driven from `onPart` (not the for-await loop) so each + // record reaches dispatch with its full SSE metadata in scope — + // specifically the timestamp, which we need for the per-stream + // min-timestamp filter. The for-await loop below just drains the + // pipeThrough output to keep the source flowing. + const stream = await this.apiClient.subscribeToSessionStream(sessionId, io, { + signal, + baseUrl: this.baseUrl, + timeoutInSeconds: 600, + lastEventId: lastSeq !== undefined ? String(lastSeq) : undefined, + onPart: (part) => { + if (signal.aborted) return; + const seqNum = parseInt(part.id, 10); + if (Number.isFinite(seqNum)) { + this.seqNums.set(key, seqNum); + } + + // Min-timestamp filter: drop records older than (or at) the + // bound. Used to skip already-processed records on OOM-retry + // boot. + const minTs = this.minTimestamps.get(key); + if (minTs !== undefined && part.timestamp <= minTs) { + return; + } + + let data: unknown = part.chunk; + if (typeof data === "string") { + try { + data = JSON.parse(data); + } catch { + // keep as string + } + } + this.#dispatch(key, data); + }, + onComplete: () => { + if (this.debug) { + console.log(`[SessionStreamManager] Tail completed for "${key}"`); + } + }, + onError: (error) => { + if (this.debug) { + console.error(`[SessionStreamManager] Tail error for "${key}":`, error); + } + }, + }); + + // Drain to keep the pipeThrough flowing. Records were already + // dispatched in `onPart`, so the body here is a no-op. + for await (const _record of stream) { + if (signal.aborted) break; + } + } catch (error) { + if (error instanceof Error && error.name === "AbortError") return; + throw error; + } + } + + #dispatch(key: string, data: unknown): void { + const waiters = this.onceWaiters.get(key); + if (waiters && waiters.length > 0) { + const waiter = waiters.shift()!; + if (waiters.length === 0) this.onceWaiters.delete(key); + if (waiter.timeoutHandle) clearTimeout(waiter.timeoutHandle); + waiter.resolve({ ok: true, output: data }); + this.#invokeHandlers(key, data); + return; + } + + // Persistent handlers (e.g. `stopInput.on(...)`) get a copy of the chunk, + // but they don't "consume" it — handlers usually filter by `kind` and + // ignore chunks they don't care about. Buffer the chunk regardless so a + // subsequent `once()` (e.g. `messagesInput.waitWithIdleTimeout` in + // chat.agent's preload) can still pick up the same chunk that arrived + // before its waiter was registered. + this.#invokeHandlers(key, data); + + let buffered = this.buffer.get(key); + if (!buffered) { + buffered = []; + this.buffer.set(key, buffered); + } + buffered.push(data); + } + + #invokeHandlers(key: string, data: unknown): void { + const handlers = this.handlers.get(key); + if (!handlers) return; + for (const handler of handlers) { + this.#invokeHandler(handler, data); + } + } + + #invokeHandler(handler: SessionStreamHandler, data: unknown): void { + try { + const result = handler(data); + if (result && typeof result === "object" && "catch" in result) { + (result as Promise).catch((error) => { + if (this.debug) { + console.error("[SessionStreamManager] Handler error:", error); + } + }); + } + } catch (error) { + if (this.debug) { + console.error("[SessionStreamManager] Handler error:", error); + } + } + } + + #removeOnceWaiter(key: string, waiter: OnceWaiter): void { + const waiters = this.onceWaiters.get(key); + if (!waiters) return; + const index = waiters.indexOf(waiter); + if (index !== -1) waiters.splice(index, 1); + if (waiters.length === 0) this.onceWaiters.delete(key); + } +} diff --git a/packages/core/src/v3/sessionStreams/noopManager.ts b/packages/core/src/v3/sessionStreams/noopManager.ts new file mode 100644 index 00000000000..c1c3c38dcdf --- /dev/null +++ b/packages/core/src/v3/sessionStreams/noopManager.ts @@ -0,0 +1,51 @@ +import { InputStreamOnceOptions } from "../realtimeStreams/types.js"; +import { InputStreamOncePromise } from "../inputStreams/types.js"; +import { SessionChannelIO, SessionStreamManager } from "./types.js"; + +export class NoopSessionStreamManager implements SessionStreamManager { + on( + _sessionId: string, + _io: SessionChannelIO, + _handler: (data: unknown) => void | Promise + ): { off: () => void } { + return { off: () => {} }; + } + + once( + _sessionId: string, + _io: SessionChannelIO, + _options?: InputStreamOnceOptions + ): InputStreamOncePromise { + return new InputStreamOncePromise(() => { + // Never resolves in noop mode. + }); + } + + peek(_sessionId: string, _io: SessionChannelIO): unknown | undefined { + return undefined; + } + + lastSeqNum(_sessionId: string, _io: SessionChannelIO): number | undefined { + return undefined; + } + + setLastSeqNum(_sessionId: string, _io: SessionChannelIO, _seqNum: number): void {} + + setMinTimestamp( + _sessionId: string, + _io: SessionChannelIO, + _minTimestamp: number | undefined + ): void {} + + shiftBuffer(_sessionId: string, _io: SessionChannelIO): boolean { + return false; + } + + disconnectStream(_sessionId: string, _io: SessionChannelIO): void {} + + clearHandlers(): void {} + + reset(): void {} + + disconnect(): void {} +} diff --git a/packages/core/src/v3/sessionStreams/types.ts b/packages/core/src/v3/sessionStreams/types.ts new file mode 100644 index 00000000000..2310fabae25 --- /dev/null +++ b/packages/core/src/v3/sessionStreams/types.ts @@ -0,0 +1,76 @@ +import { InputStreamOnceOptions } from "../realtimeStreams/types.js"; +import { + InputStreamOncePromise, + InputStreamOnceResult, + InputStreamTimeoutError, +} from "../inputStreams/types.js"; + +/** + * Re-export the run-scoped input stream once-promise machinery so callers + * depending on sessionStreams don't also need to import from inputStreams. + * Both APIs return the same shape. + */ +export { InputStreamOncePromise, InputStreamTimeoutError }; +export type { InputStreamOnceResult }; + +export type SessionChannelIO = "out" | "in"; + +/** + * Manager for Session channel reads: a session-scoped parallel to + * {@link InputStreamManager} keyed on `(sessionId, io)` instead of + * `(runId, streamId)`. Used by {@link SessionChannel} to implement + * `.on` / `.once` / `.peek` / `.wait` / `.waitWithIdleTimeout`. + */ +export interface SessionStreamManager { + /** Register a handler that fires every time data arrives on the given channel. */ + on( + sessionId: string, + io: SessionChannelIO, + handler: (data: unknown) => void | Promise + ): { off: () => void }; + + /** Wait for the next record on the given channel (buffered or live). */ + once( + sessionId: string, + io: SessionChannelIO, + options?: InputStreamOnceOptions + ): InputStreamOncePromise; + + /** Non-blocking peek at the head of the channel buffer. */ + peek(sessionId: string, io: SessionChannelIO): unknown | undefined; + + /** Last S2 sequence number seen on the given channel. */ + lastSeqNum(sessionId: string, io: SessionChannelIO): number | undefined; + + /** Advance the last-seen sequence number (prevents SSE replay after `.wait` resume). */ + setLastSeqNum(sessionId: string, io: SessionChannelIO, seqNum: number): void; + + /** + * Set a per-stream lower-bound SSE timestamp. Records whose timestamp + * is `<= minTimestamp` are dropped before dispatch. Used by chat.agent + * on OOM-retry boot to skip session.in records belonging to turns + * that already completed on the prior attempt. + * + * Pass `undefined` to clear the filter. + */ + setMinTimestamp( + sessionId: string, + io: SessionChannelIO, + minTimestamp: number | undefined + ): void; + + /** Remove and discard the first buffered record. Returns true if one was removed. */ + shiftBuffer(sessionId: string, io: SessionChannelIO): boolean; + + /** Abort the SSE tail and clear the buffer. Called before `.wait` suspends. */ + disconnectStream(sessionId: string, io: SessionChannelIO): void; + + /** Clear all `.on` handlers; abort tails without pending once-waiters. */ + clearHandlers(): void; + + /** Reset state between task executions. */ + reset(): void; + + /** Disconnect every tail. */ + disconnect(): void; +} diff --git a/packages/core/src/v3/test/test-input-stream-manager.ts b/packages/core/src/v3/test/test-input-stream-manager.ts new file mode 100644 index 00000000000..933b92d07c6 --- /dev/null +++ b/packages/core/src/v3/test/test-input-stream-manager.ts @@ -0,0 +1,219 @@ +import type { InputStreamManager, InputStreamOnceResult } from "../inputStreams/types.js"; +import { InputStreamOncePromise, InputStreamTimeoutError } from "../inputStreams/types.js"; +import type { InputStreamOnceOptions } from "../realtimeStreams/types.js"; + +type OnceWaiter = { + resolve: (value: InputStreamOnceResult) => void; + timer?: ReturnType; + signal?: AbortSignal; + abortHandler?: () => void; +}; + +type Handler = (data: unknown) => void | Promise; + +/** + * In-memory implementation of `InputStreamManager` for unit tests. + * + * Tests push data via the driver's `.send(streamId, data)` method. Any + * pending `.once()` waiters resolve immediately, and all `.on()` handlers + * fire synchronously (awaited if they return a promise). + * + * Use this alongside {@link runInMockTaskContext} — not directly. + */ +export class TestInputStreamManager implements InputStreamManager { + private handlers = new Map>(); + private onceWaiters = new Map(); + private latest = new Map(); + private lastSeqNums = new Map(); + // Buffered sends that arrived before a `.once()` waiter was registered. + // `.once()` semantically means "wait for NEXT value" but tests often + // send data before the task has had a chance to reach the wait point. + // Buffering closes that race so the waiter picks up the pending send. + private pendingSends = new Map(); + + setRunId(_runId: string, _streamsVersion?: string): void { + // No-op — the test driver tracks nothing about runs + } + + on(streamId: string, handler: Handler): { off: () => void } { + if (!this.handlers.has(streamId)) { + this.handlers.set(streamId, new Set()); + } + this.handlers.get(streamId)!.add(handler); + + return { + off: () => { + this.handlers.get(streamId)?.delete(handler); + }, + }; + } + + once(streamId: string, options?: InputStreamOnceOptions): InputStreamOncePromise { + return new InputStreamOncePromise((resolve) => { + if (options?.signal?.aborted) { + resolve({ + ok: false, + error: new InputStreamTimeoutError(streamId, options.timeoutMs ?? 0), + }); + return; + } + + // Pick up any buffered send that arrived before this waiter. + const buffered = this.pendingSends.get(streamId); + if (buffered && buffered.length > 0) { + const next = buffered.shift(); + if (buffered.length === 0) this.pendingSends.delete(streamId); + resolve({ ok: true, output: next }); + return; + } + + const waiter: OnceWaiter = { + resolve, + signal: options?.signal, + }; + + if (options?.timeoutMs !== undefined) { + waiter.timer = setTimeout(() => { + this.removeWaiter(streamId, waiter); + resolve({ + ok: false, + error: new InputStreamTimeoutError(streamId, options.timeoutMs!), + }); + }, options.timeoutMs); + } + + if (options?.signal) { + const abortHandler = () => { + this.removeWaiter(streamId, waiter); + if (waiter.timer) clearTimeout(waiter.timer); + resolve({ + ok: false, + error: new InputStreamTimeoutError(streamId, options.timeoutMs ?? 0), + }); + }; + waiter.abortHandler = abortHandler; + options.signal.addEventListener("abort", abortHandler, { once: true }); + } + + if (!this.onceWaiters.has(streamId)) { + this.onceWaiters.set(streamId, []); + } + this.onceWaiters.get(streamId)!.push(waiter); + }); + } + + peek(streamId: string): unknown | undefined { + return this.latest.get(streamId); + } + + lastSeqNum(streamId: string): number | undefined { + return this.lastSeqNums.get(streamId); + } + + setLastSeqNum(streamId: string, seqNum: number): void { + this.lastSeqNums.set(streamId, seqNum); + } + + shiftBuffer(_streamId: string): boolean { + return false; + } + + disconnectStream(_streamId: string): void {} + + clearHandlers(): void { + this.handlers.clear(); + } + + reset(): void { + // Cancel any pending waiters to avoid dangling promises leaking between tests + for (const waiters of this.onceWaiters.values()) { + for (const w of waiters) { + if (w.timer) clearTimeout(w.timer); + if (w.signal && w.abortHandler) { + w.signal.removeEventListener("abort", w.abortHandler); + } + } + } + this.onceWaiters.clear(); + this.handlers.clear(); + this.latest.clear(); + this.lastSeqNums.clear(); + this.pendingSends.clear(); + } + + disconnect(): void { + this.reset(); + } + + connectTail(_runId: string, _fromSeq?: number): void {} + + // ── Test driver API (not part of InputStreamManager interface) ────────── + + /** + * Push data onto an input stream. Resolves pending `once()` waiters + * and fires all `on()` handlers (awaiting async handlers). + */ + async __sendFromTest(streamId: string, data: unknown): Promise { + this.latest.set(streamId, data); + + const waiters = this.onceWaiters.get(streamId); + const handlers = this.handlers.get(streamId); + const hasWaiters = waiters && waiters.length > 0; + const hasHandlers = handlers && handlers.size > 0; + + // If nothing is listening yet, buffer so the next `.once()` call picks it up. + if (!hasWaiters && !hasHandlers) { + if (!this.pendingSends.has(streamId)) { + this.pendingSends.set(streamId, []); + } + this.pendingSends.get(streamId)!.push(data); + return; + } + + if (hasWaiters) { + // Drain every pending once() waiter — this mirrors the real manager's + // behavior where the stream tail delivers the same record to all listeners. + const pending = waiters!.splice(0); + for (const w of pending) { + if (w.timer) clearTimeout(w.timer); + if (w.signal && w.abortHandler) { + w.signal.removeEventListener("abort", w.abortHandler); + } + w.resolve({ ok: true, output: data }); + } + } + + if (hasHandlers) { + await Promise.all( + Array.from(handlers!).map((h) => Promise.resolve().then(() => h(data))) + ); + } + } + + /** + * Immediately resolve every pending `once()` waiter for a stream with a + * timeout error. Used to simulate closed streams (e.g. `exitAfterPreloadIdle`). + */ + __closeFromTest(streamId: string): void { + const waiters = this.onceWaiters.get(streamId); + if (!waiters) return; + const pending = waiters.splice(0); + for (const w of pending) { + if (w.timer) clearTimeout(w.timer); + if (w.signal && w.abortHandler) { + w.signal.removeEventListener("abort", w.abortHandler); + } + w.resolve({ + ok: false, + error: new InputStreamTimeoutError(streamId, 0), + }); + } + } + + private removeWaiter(streamId: string, waiter: OnceWaiter): void { + const waiters = this.onceWaiters.get(streamId); + if (!waiters) return; + const idx = waiters.indexOf(waiter); + if (idx >= 0) waiters.splice(idx, 1); + } +} diff --git a/packages/core/src/v3/test/test-realtime-streams-manager.ts b/packages/core/src/v3/test/test-realtime-streams-manager.ts new file mode 100644 index 00000000000..b53a2630d9c --- /dev/null +++ b/packages/core/src/v3/test/test-realtime-streams-manager.ts @@ -0,0 +1,169 @@ +import { + AsyncIterableStream, + createAsyncIterableStreamFromAsyncIterable, +} from "../streams/asyncIterableStream.js"; +import type { + RealtimeStreamInstance, + RealtimeStreamOperationOptions, + RealtimeStreamsManager, +} from "../realtimeStreams/types.js"; + +/** + * In-memory implementation of `RealtimeStreamsManager` for unit tests. + * Collects every chunk that tasks write via `pipe()` or `append()` into + * per-stream buffers that tests can inspect. + * + * Use this alongside {@link runInMockTaskContext} — not directly. + */ +type WriteListener = (key: string, chunk: unknown) => void; + +export class TestRealtimeStreamsManager implements RealtimeStreamsManager { + private buffers = new Map(); + private pipeWaits = new Map[]>(); + private writeListeners = new Set(); + + pipe( + key: string, + source: AsyncIterable | ReadableStream, + _options?: RealtimeStreamOperationOptions + ): RealtimeStreamInstance { + const buffer = this.getBuffer(key); + const self = this; + + // Eagerly drain the source in the background so chunks land in the + // buffer + notify listeners even when the caller never consumes the + // returned stream. This mirrors the real SDK behavior: `streams.writer` + // awaits `instance.wait()`, it doesn't read the returned stream. + // + // The source is read ONCE (into a chunks array) and replayed into a + // ReadableStream so the caller can still consume it if they want. + const readChunks: T[] = []; + let resolveDone!: () => void; + const done = new Promise((resolve) => { + resolveDone = resolve; + }); + + (async () => { + try { + const iter = + source instanceof ReadableStream + ? (async function* () { + const reader = source.getReader(); + try { + while (true) { + const { done: d, value } = await reader.read(); + if (d) return; + yield value as T; + } + } finally { + reader.releaseLock(); + } + })() + : source; + + for await (const chunk of iter) { + readChunks.push(chunk); + buffer.push(chunk); + self.notify(key, chunk); + } + } catch { + // Swallow — tests can inspect what made it into the buffer + } finally { + resolveDone(); + } + })(); + + const replayStream = (async function* () { + // Wait for all chunks to be drained, then replay from our snapshot + await done; + for (const chunk of readChunks) yield chunk; + })(); + const wrappedStream = createAsyncIterableStreamFromAsyncIterable(replayStream); + + if (!this.pipeWaits.has(key)) this.pipeWaits.set(key, []); + this.pipeWaits.get(key)!.push(done); + + return { + wait: () => done.then(() => ({})), + get stream(): AsyncIterableStream { + return wrappedStream; + }, + }; + } + + async append( + key: string, + part: TPart, + _options?: RealtimeStreamOperationOptions + ): Promise { + this.getBuffer(key).push(part); + this.notify(key, part); + } + + /** + * Register a listener fired for every chunk written to any stream. + * Returns an unsubscribe function. + * + * Intended for test harnesses that need to react to writes synchronously + * (e.g. resolving a "turn complete" latch). + */ + onWrite(listener: WriteListener): () => void { + this.writeListeners.add(listener); + return () => { + this.writeListeners.delete(listener); + }; + } + + private notify(key: string, chunk: unknown): void { + for (const listener of this.writeListeners) { + try { + listener(key, chunk); + } catch { + // Never let a listener error break stream writes + } + } + } + + // ── Test driver API (not part of RealtimeStreamsManager interface) ────── + + /** + * Return all chunks written to the given stream key in order of write. + */ + __chunksFromTest(key: string): T[] { + return (this.buffers.get(key) ?? []).slice() as T[]; + } + + /** + * Return all chunks across every stream, keyed by stream id. + */ + __allChunksFromTest(): Record { + const result: Record = {}; + for (const [key, chunks] of this.buffers.entries()) { + result[key] = chunks.slice(); + } + return result; + } + + /** + * Clear the buffer for a specific stream or all streams. + */ + __clearFromTest(key?: string): void { + if (key === undefined) { + this.buffers.clear(); + } else { + this.buffers.delete(key); + } + } + + reset(): void { + this.buffers.clear(); + this.pipeWaits.clear(); + } + + private getBuffer(key: string): unknown[] { + if (!this.buffers.has(key)) { + this.buffers.set(key, []); + } + return this.buffers.get(key)!; + } +} diff --git a/packages/core/src/v3/test/test-run-metadata-manager.ts b/packages/core/src/v3/test/test-run-metadata-manager.ts new file mode 100644 index 00000000000..9d806f17a03 --- /dev/null +++ b/packages/core/src/v3/test/test-run-metadata-manager.ts @@ -0,0 +1,103 @@ +import type { DeserializedJson } from "../../schemas/json.js"; +import type { AsyncIterableStream } from "../streams/asyncIterableStream.js"; +import type { RunMetadataManager, RunMetadataUpdater } from "../runMetadata/types.js"; + +/** + * In-memory implementation of `RunMetadataManager` for unit tests. + * + * Just stores metadata in a Map — no API calls, no queue. Good enough + * for tests that read/write metadata via `runMetadata.getKey()` / + * `runMetadata.set()`, including the IDLE_TIMEOUT and TURN_TIMEOUT + * checks inside `chat.agent()`. + */ +export class TestRunMetadataManager implements RunMetadataManager { + private store: Record = {}; + + enterWithMetadata(metadata: Record): void { + this.store = { ...metadata }; + } + + current(): Record | undefined { + return { ...this.store }; + } + + getKey(key: string): DeserializedJson | undefined { + return this.store[key]; + } + + set(key: string, value: DeserializedJson): this { + this.store[key] = value; + return this; + } + + del(key: string): this { + delete this.store[key]; + return this; + } + + append(key: string, value: DeserializedJson): this { + const existing = this.store[key]; + if (Array.isArray(existing)) { + existing.push(value); + } else { + this.store[key] = [value]; + } + return this; + } + + remove(key: string, value: DeserializedJson): this { + const existing = this.store[key]; + if (Array.isArray(existing)) { + this.store[key] = existing.filter((v) => v !== value) as DeserializedJson; + } + return this; + } + + increment(key: string, value: number): this { + const existing = this.store[key]; + const current = typeof existing === "number" ? existing : 0; + this.store[key] = current + value; + return this; + } + + decrement(key: string, value: number): this { + return this.increment(key, -value); + } + + update(metadata: Record): this { + this.store = { ...metadata }; + return this; + } + + async flush(): Promise {} + async refresh(): Promise {} + + async stream( + _key: string, + value: AsyncIterable | ReadableStream + ): Promise> { + return value as AsyncIterable; + } + + async fetchStream(_key: string): Promise> { + // Return an empty async iterable — tests can override if needed + const empty = { + [Symbol.asyncIterator]: () => ({ + next: () => Promise.resolve({ done: true as const, value: undefined as T }), + }), + }; + return empty as unknown as AsyncIterableStream; + } + + get parent(): RunMetadataUpdater { + return this; + } + + get root(): RunMetadataUpdater { + return this; + } + + reset(): void { + this.store = {}; + } +} diff --git a/packages/core/src/v3/test/test-session-stream-manager.ts b/packages/core/src/v3/test/test-session-stream-manager.ts new file mode 100644 index 00000000000..1bd7499a0a7 --- /dev/null +++ b/packages/core/src/v3/test/test-session-stream-manager.ts @@ -0,0 +1,287 @@ +import { + InputStreamOncePromise, + InputStreamOnceResult, + InputStreamTimeoutError, +} from "../inputStreams/types.js"; +import type { InputStreamOnceOptions } from "../realtimeStreams/types.js"; +import type { + SessionChannelIO, + SessionStreamManager, +} from "../sessionStreams/types.js"; + +type OnceWaiter = { + resolve: (value: InputStreamOnceResult) => void; + timer?: ReturnType; + signal?: AbortSignal; + abortHandler?: () => void; +}; + +type Handler = (data: unknown) => void | Promise; + +function keyFor(sessionId: string, io: SessionChannelIO): string { + return `${sessionId}:${io}`; +} + +/** + * In-memory implementation of `SessionStreamManager` for unit tests. Same + * shape as {@link TestInputStreamManager} but keyed on `(sessionId, io)`. + * + * Tests push data via `__sendFromTest(sessionId, io, data)` — any pending + * `once()` waiters resolve immediately, and all `on()` handlers fire (awaited + * if they return a promise). Records that arrive before a listener is + * registered are buffered so the first `once()` picks them up. + */ +export class TestSessionStreamManager implements SessionStreamManager { + private handlers = new Map>(); + private onceWaiters = new Map(); + private buffer = new Map(); + private seqNums = new Map(); + + on( + sessionId: string, + io: SessionChannelIO, + handler: Handler + ): { off: () => void } { + const key = keyFor(sessionId, io); + + let set = this.handlers.get(key); + if (!set) { + set = new Set(); + this.handlers.set(key, set); + } + set.add(handler); + + // Note: we intentionally do NOT replay buffered records into the + // newly-registered handler, and we do NOT drain the buffer. The + // buffer is owned by `once()` — registering a passive observer + // (`on`) must not consume records destined for a future `once` + // waiter. This matches production SSE semantics where handlers + // observe records as they arrive, not retroactively. + // + // Earlier versions drained the buffer here, which caused user + // messages buffered during the runtime's `runFn` boot phase to be + // silently swallowed by the `stopInput.on()` handler registered at + // ai.ts:4806 (the stop handler ignores `kind: "message"` chunks). + // The next `messagesInput.waitWithIdleTimeout` then waited 30s for + // a record that had already been "delivered" to a handler that + // didn't want it. + + return { + off: () => { + this.handlers.get(key)?.delete(handler); + }, + }; + } + + once( + sessionId: string, + io: SessionChannelIO, + options?: InputStreamOnceOptions + ): InputStreamOncePromise { + const key = keyFor(sessionId, io); + + return new InputStreamOncePromise((resolve) => { + if (options?.signal?.aborted) { + resolve({ + ok: false, + error: new InputStreamTimeoutError(key, options.timeoutMs ?? 0), + }); + return; + } + + const buffered = this.buffer.get(key); + if (buffered && buffered.length > 0) { + const next = buffered.shift(); + if (buffered.length === 0) this.buffer.delete(key); + resolve({ ok: true, output: next }); + return; + } + + const waiter: OnceWaiter = { resolve, signal: options?.signal }; + + if (options?.timeoutMs !== undefined) { + waiter.timer = setTimeout(() => { + this.removeWaiter(key, waiter); + resolve({ + ok: false, + error: new InputStreamTimeoutError(key, options.timeoutMs!), + }); + }, options.timeoutMs); + } + + if (options?.signal) { + const abortHandler = () => { + this.removeWaiter(key, waiter); + if (waiter.timer) clearTimeout(waiter.timer); + resolve({ + ok: false, + error: new InputStreamTimeoutError(key, options.timeoutMs ?? 0), + }); + }; + waiter.abortHandler = abortHandler; + options.signal.addEventListener("abort", abortHandler, { once: true }); + } + + let waiters = this.onceWaiters.get(key); + if (!waiters) { + waiters = []; + this.onceWaiters.set(key, waiters); + } + waiters.push(waiter); + }); + } + + peek(sessionId: string, io: SessionChannelIO): unknown | undefined { + const buffered = this.buffer.get(keyFor(sessionId, io)); + if (buffered && buffered.length > 0) return buffered[0]; + return undefined; + } + + lastSeqNum(sessionId: string, io: SessionChannelIO): number | undefined { + return this.seqNums.get(keyFor(sessionId, io)); + } + + setLastSeqNum(sessionId: string, io: SessionChannelIO, seqNum: number): void { + this.seqNums.set(keyFor(sessionId, io), seqNum); + } + + setMinTimestamp( + _sessionId: string, + _io: SessionChannelIO, + _minTimestamp: number | undefined + ): void { + // No filter applied in tests; the test harness drives records directly + // and the chat.agent retry path is exercised separately. + } + + shiftBuffer(sessionId: string, io: SessionChannelIO): boolean { + const key = keyFor(sessionId, io); + const buffered = this.buffer.get(key); + if (buffered && buffered.length > 0) { + buffered.shift(); + if (buffered.length === 0) this.buffer.delete(key); + return true; + } + return false; + } + + disconnectStream(_sessionId: string, _io: SessionChannelIO): void { + // no-op — no real SSE tail in tests + } + + clearHandlers(): void { + this.handlers.clear(); + } + + reset(): void { + for (const waiters of this.onceWaiters.values()) { + for (const w of waiters) { + if (w.timer) clearTimeout(w.timer); + if (w.signal && w.abortHandler) { + w.signal.removeEventListener("abort", w.abortHandler); + } + } + } + this.onceWaiters.clear(); + this.handlers.clear(); + this.buffer.clear(); + this.seqNums.clear(); + } + + disconnect(): void { + this.reset(); + } + + // ── Test driver API (not part of SessionStreamManager interface) ────── + + /** + * Push a record onto the given channel. + * + * Dispatch rules — similar to the production manager, but with a tweak + * that makes unit tests deterministic: + * + * 1. **Handlers always observe** (like production). A session-level `.on` + * is a filter-observer — it fires every time a record arrives, + * regardless of whether a `.once` waiter is also active. + * 2. **First waiter consumes** the record if present (like production). + * 3. **If no waiter, the record is buffered for the next `.once` call.** + * Production discards records that only match handlers — but in + * production the SSE tail introduces enough latency that the next + * `.once` is usually registered before the next record arrives. Tests + * send synchronously right after `turn-complete`, so without this + * buffer the next `waitWithIdleTimeout` would race and lose the + * message. The buffer is the only deviation from production semantics. + */ + async __sendFromTest( + sessionId: string, + io: SessionChannelIO, + data: unknown + ): Promise { + const key = keyFor(sessionId, io); + + const handlers = this.handlers.get(key); + if (handlers && handlers.size > 0) { + await Promise.all( + Array.from(handlers).map((h) => Promise.resolve().then(() => h(data))) + ); + } + + const waiters = this.onceWaiters.get(key); + if (waiters && waiters.length > 0) { + const w = waiters.shift()!; + if (waiters.length === 0) this.onceWaiters.delete(key); + if (w.timer) clearTimeout(w.timer); + if (w.signal && w.abortHandler) { + w.signal.removeEventListener("abort", w.abortHandler); + } + w.resolve({ ok: true, output: data }); + return; + } + + let buffered = this.buffer.get(key); + if (!buffered) { + buffered = []; + this.buffer.set(key, buffered); + } + buffered.push(data); + } + + /** + * Immediately resolve every pending `once()` waiter for the given channel + * with a timeout error. Simulates a closed stream (e.g. session closed). + */ + __closeFromTest(sessionId: string, io: SessionChannelIO): void { + const key = keyFor(sessionId, io); + const waiters = this.onceWaiters.get(key); + if (!waiters) return; + const pending = waiters.splice(0); + for (const w of pending) { + if (w.timer) clearTimeout(w.timer); + if (w.signal && w.abortHandler) { + w.signal.removeEventListener("abort", w.abortHandler); + } + w.resolve({ + ok: false, + error: new InputStreamTimeoutError(key, 0), + }); + } + } + + private invoke(handler: Handler, data: unknown): void { + try { + const result = handler(data); + if (result && typeof result === "object" && "catch" in result) { + (result as Promise).catch(() => {}); + } + } catch { + // Never let a handler error break test state + } + } + + private removeWaiter(key: string, waiter: OnceWaiter): void { + const waiters = this.onceWaiters.get(key); + if (!waiters) return; + const idx = waiters.indexOf(waiter); + if (idx >= 0) waiters.splice(idx, 1); + } +} diff --git a/packages/core/src/v3/types/tasks.ts b/packages/core/src/v3/types/tasks.ts index d04d088ef1a..978a6e5bd0a 100644 --- a/packages/core/src/v3/types/tasks.ts +++ b/packages/core/src/v3/types/tasks.ts @@ -387,6 +387,12 @@ type CommonTaskOptions< * Should be a valid JSON Schema Draft 7 object. */ jsonSchema?: JSONSchema; + + /** @internal Set by SDK internals (e.g. `chat.agent()`, `schedules.task()`). */ + triggerSource?: string; + + /** @internal Agent configuration, only set when `triggerSource` is `"agent"`. */ + agentConfig?: { type: string }; }; export type TaskOptions< @@ -641,6 +647,30 @@ export interface Task requestOptions?: TriggerApiRequestOptions ) => TaskRunPromise; + /** + * Trigger a task and subscribe to its updates via realtime. Unlike `triggerAndWait`, + * this does NOT suspend the parent run — the parent stays alive and polls for updates. + * This enables parallel tool calls and proper abort signal handling. + * + * @param payload + * @param options - Options for the task run, including an optional `signal` to cancel the subscription and child run + * @returns TaskRunPromise + * @example + * ``` + * const result = await task.triggerAndSubscribe({ foo: "bar" }, { signal: abortSignal }); + * + * if (result.ok) { + * console.log(result.output); + * } else { + * console.error(result.error); + * } + * ``` + */ + triggerAndSubscribe: ( + payload: TInput, + options?: TriggerAndSubscribeOptions, + ) => TaskRunPromise; + /** * Batch trigger multiple task runs with the given payloads, and wait for the results. Returns the results of the task runs. * @param items - Array, AsyncIterable, or ReadableStream of batch items @@ -989,6 +1019,16 @@ export type TriggerOptions = { }; export type TriggerAndWaitOptions = Omit; + +export type TriggerAndSubscribeOptions = Omit & { + /** An AbortSignal to cancel the subscription. When fired, the subscription closes and the promise rejects. */ + signal?: AbortSignal; + /** + * Whether to cancel the child run when the abort signal fires. + * @default true + */ + cancelOnAbort?: boolean; +}; export type BatchTriggerOptions = { /** * If no idempotencyKey is set on an individual item in the batch, it will use this key on each item + the array index. diff --git a/packages/core/src/v3/utils/globals.ts b/packages/core/src/v3/utils/globals.ts index 08b62d379b2..fa5b8176f6e 100644 --- a/packages/core/src/v3/utils/globals.ts +++ b/packages/core/src/v3/utils/globals.ts @@ -3,6 +3,7 @@ import { Clock } from "../clock/clock.js"; import { HeartbeatsManager } from "../heartbeats/types.js"; import type { IdempotencyKeyCatalog } from "../idempotency-key-catalog/catalog.js"; import { InputStreamManager } from "../inputStreams/types.js"; +import { SessionStreamManager } from "../sessionStreams/types.js"; import { LifecycleHooksManager } from "../lifecycleHooks/types.js"; import { LocalsManager } from "../locals/types.js"; import { RealtimeStreamsManager } from "../realtimeStreams/types.js"; @@ -76,4 +77,5 @@ type TriggerDotDevGlobalAPI = { ["heartbeats"]?: HeartbeatsManager; ["realtime-streams"]?: RealtimeStreamsManager; ["input-streams"]?: InputStreamManager; + ["session-streams"]?: SessionStreamManager; }; diff --git a/packages/core/src/v3/workers/index.ts b/packages/core/src/v3/workers/index.ts index e5f8eecff98..8ac06930328 100644 --- a/packages/core/src/v3/workers/index.ts +++ b/packages/core/src/v3/workers/index.ts @@ -33,3 +33,4 @@ export { StandardTraceContextManager } from "../traceContext/manager.js"; export { StandardHeartbeatsManager } from "../heartbeats/manager.js"; export { StandardRealtimeStreamsManager } from "../realtimeStreams/manager.js"; export { StandardInputStreamManager } from "../inputStreams/manager.js"; +export { StandardSessionStreamManager } from "../sessionStreams/manager.js"; diff --git a/packages/core/src/v3/workers/taskExecutor.ts b/packages/core/src/v3/workers/taskExecutor.ts index 2b9ffecf151..838ef3c6e77 100644 --- a/packages/core/src/v3/workers/taskExecutor.ts +++ b/packages/core/src/v3/workers/taskExecutor.ts @@ -17,6 +17,7 @@ import { lifecycleHooks, OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT, runMetadata, + sessionStreams, traceContext, waitUntil, } from "../index.js"; @@ -1048,6 +1049,7 @@ export class TaskExecutor { ) { await this.#callCleanupFunctions(payload, ctx, initOutput, signal); inputStreams.clearHandlers(); + sessionStreams.clearHandlers(); await this.#blockForWaitUntil(); } diff --git a/packages/core/test/runStream.test.ts b/packages/core/test/runStream.test.ts index 0bf7f17432c..a953b7b694b 100644 --- a/packages/core/test/runStream.test.ts +++ b/packages/core/test/runStream.test.ts @@ -1,7 +1,8 @@ -import { describe, expect, it } from "vitest"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { RunSubscription, SSEStreamPart, + SSEStreamSubscription, StreamSubscription, StreamSubscriptionFactory, } from "../src/v3/apiClient/runStream.js"; @@ -470,6 +471,47 @@ describe("RunSubscription", () => { }); }); +describe("SSEStreamSubscription", () => { + let originalFetch: typeof global.fetch; + + beforeEach(() => { + originalFetch = global.fetch; + }); + + afterEach(() => { + global.fetch = originalFetch; + vi.restoreAllMocks(); + }); + + it("does not retry the initial fetch on 401", async () => { + const fetchMock = vi.fn().mockResolvedValue(new Response(null, { status: 401 })); + global.fetch = fetchMock; + + const sub = new SSEStreamSubscription("https://api.test/realtime/v1/streams/run_x/chat", { + headers: { Authorization: "Bearer expired" }, + }); + + const stream = await sub.subscribe(); + const reader = stream.getReader(); + await expect(reader.read()).rejects.toMatchObject({ status: 401 }); + expect(fetchMock).toHaveBeenCalledTimes(1); + }); + + it("does not retry the initial fetch on 403", async () => { + const fetchMock = vi.fn().mockResolvedValue(new Response(null, { status: 403 })); + global.fetch = fetchMock; + + const sub = new SSEStreamSubscription("https://api.test/realtime/v1/streams/run_x/chat", { + headers: { Authorization: "Bearer denied" }, + }); + + const stream = await sub.subscribe(); + const reader = stream.getReader(); + await expect(reader.read()).rejects.toMatchObject({ status: 403 }); + expect(fetchMock).toHaveBeenCalledTimes(1); + }); +}); + export async function convertAsyncIterableToArray(iterable: AsyncIterable): Promise { const result: T[] = []; for await (const item of iterable) { diff --git a/packages/trigger-sdk/src/v3/shared.ts b/packages/trigger-sdk/src/v3/shared.ts index c69bceeb535..cd51b35e32d 100644 --- a/packages/trigger-sdk/src/v3/shared.ts +++ b/packages/trigger-sdk/src/v3/shared.ts @@ -90,6 +90,7 @@ import type { TaskWithToolOptions, ToolTask, ToolTaskParameters, + TriggerAndSubscribeOptions, TriggerAndWaitOptions, TriggerApiRequestOptions, TriggerOptions, @@ -214,6 +215,26 @@ export function createTask< }); }, params.id); }, + triggerAndSubscribe: (payload, options) => { + return new TaskRunPromise((resolve, reject) => { + triggerAndSubscribe_internal( + "triggerAndSubscribe()", + params.id, + payload, + undefined, + { + queue: params.queue?.name, + ...options, + } + ) + .then((result) => { + resolve(result); + }) + .catch((error) => { + reject(error); + }); + }, params.id); + }, batchTriggerAndWait: async (items, options) => { return await batchTriggerAndWait_internal( "batchTriggerAndWait()", @@ -235,6 +256,8 @@ export function createTask< queue: params.queue, retry: params.retry ? { ...defaultRetryOptions, ...params.retry } : undefined, machine: typeof params.machine === "string" ? { preset: params.machine } : params.machine, + triggerSource: params.triggerSource, + agentConfig: params.agentConfig, maxDuration: params.maxDuration, ttl: params.ttl, payloadSchema: params.jsonSchema, @@ -259,7 +282,7 @@ export function createTask< } /** - * @deprecated use ai.tool() instead + * @deprecated Use `schemaTask` plus AI SDK `tool()` with `execute: ai.toolExecute(task)` instead. */ export function createToolTask< TIdentifier extends string, @@ -346,6 +369,26 @@ export function createSchemaTask< }); }, params.id); }, + triggerAndSubscribe: (payload, options) => { + return new TaskRunPromise((resolve, reject) => { + triggerAndSubscribe_internal, TOutput>( + "triggerAndSubscribe()", + params.id, + payload, + parsePayload, + { + queue: params.queue?.name, + ...options, + } + ) + .then((result) => { + resolve(result); + }) + .catch((error) => { + reject(error); + }); + }, params.id); + }, batchTriggerAndWait: async (items, options) => { return await batchTriggerAndWait_internal, TOutput>( "batchTriggerAndWait()", @@ -367,6 +410,8 @@ export function createSchemaTask< queue: params.queue, retry: params.retry ? { ...defaultRetryOptions, ...params.retry } : undefined, machine: typeof params.machine === "string" ? { preset: params.machine } : params.machine, + triggerSource: params.triggerSource, + agentConfig: params.agentConfig, maxDuration: params.maxDuration, ttl: params.ttl, fns: { @@ -465,6 +510,49 @@ export function triggerAndWait( }, id); } +/** + * Trigger a task and subscribe to its updates via realtime. Unlike `triggerAndWait`, + * this does NOT suspend the parent run — the parent stays alive and subscribes to updates. + * This enables parallel execution and proper abort signal handling. + * + * @param id - The id of the task to trigger + * @param payload + * @param options - Options for the task run, including an optional `signal` to cancel the subscription and child run + * @returns TaskRunPromise + * @example + * ```ts + * import { tasks } from "@trigger.dev/sdk/v3"; + * const result = await tasks.triggerAndSubscribe("my-task", { foo: "bar" }); + * + * if (result.ok) { + * console.log(result.output); + * } else { + * console.error(result.error); + * } + * ``` + */ +export function triggerAndSubscribe( + id: TaskIdentifier, + payload: TaskPayload, + options?: TriggerAndSubscribeOptions +): TaskRunPromise, TaskOutput> { + return new TaskRunPromise, TaskOutput>((resolve, reject) => { + triggerAndSubscribe_internal, TaskPayload, TaskOutput>( + "tasks.triggerAndSubscribe()", + id, + payload, + undefined, + options + ) + .then((result) => { + resolve(result); + }) + .catch((error) => { + reject(error); + }); + }, id); +} + /** * Batch trigger multiple task runs with the given payloads, and wait for the results. Returns the results of the task runs. * @param id - The id of the task to trigger @@ -2441,6 +2529,128 @@ async function triggerAndWait_internal( + name: string, + id: TIdentifier, + payload: TPayload, + parsePayload?: SchemaParseFn, + options?: TriggerAndSubscribeOptions +): Promise> { + const ctx = taskContext.ctx; + + if (!ctx) { + throw new Error("triggerAndSubscribe can only be used from inside a task.run()"); + } + + const apiClient = apiClientManager.clientOrThrow(); + + const parsedPayload = parsePayload ? await parsePayload(payload) : payload; + const payloadPacket = await stringifyIO(parsedPayload); + + const processedIdempotencyKey = await makeIdempotencyKey(options?.idempotencyKey); + const idempotencyKeyOptions = processedIdempotencyKey + ? getIdempotencyKeyOptions(processedIdempotencyKey) + : undefined; + + return await tracer.startActiveSpan( + name, + async (span) => { + const response = await apiClient.triggerTask( + id, + { + payload: payloadPacket.data, + options: { + lockToVersion: taskContext.worker?.version, + queue: options?.queue ? { name: options.queue } : undefined, + concurrencyKey: options?.concurrencyKey, + test: taskContext.ctx?.run.isTest, + payloadType: payloadPacket.dataType, + delay: options?.delay, + ttl: options?.ttl, + tags: options?.tags, + maxAttempts: options?.maxAttempts, + metadata: options?.metadata, + maxDuration: options?.maxDuration, + parentRunId: ctx.run.id, + // NOTE: no resumeParentOnCompletion — parent stays alive and subscribes + idempotencyKey: processedIdempotencyKey?.toString(), + idempotencyKeyTTL: options?.idempotencyKeyTTL, + idempotencyKeyOptions, + machine: options?.machine, + priority: options?.priority, + region: options?.region, + debounce: options?.debounce, + }, + }, + {} + ); + + // Set attributes after trigger so the dashboard can link to the child run + span.setAttribute("messaging.message.id", response.id); + span.setAttribute("runId", response.id); + span.setAttribute(SemanticInternalAttributes.ENTITY_TYPE, "run"); + span.setAttribute(SemanticInternalAttributes.ENTITY_ID, response.id); + + // Optionally cancel the child run when the abort signal fires (default: true) + const cancelOnAbort = options?.cancelOnAbort !== false; + if (options?.signal && cancelOnAbort) { + const onAbort = () => { + apiClient.cancelRun(response.id).catch(() => {}); + }; + if (options.signal.aborted) { + await apiClient.cancelRun(response.id).catch(() => {}); + throw new Error("Aborted"); + } + options.signal.addEventListener("abort", onAbort, { once: true }); + } + + for await (const run of apiClient.subscribeToRun(response.id, { + closeOnComplete: true, + signal: options?.signal, + skipColumns: ["payload"], + })) { + if (run.isSuccess) { + // run.output from subscribeToRun is already deserialized + return { + ok: true as const, + id: response.id, + taskIdentifier: id as TIdentifier, + output: run.output as TOutput, + }; + } + if (run.isFailed || run.isCancelled) { + const error = new Error(run.error?.message ?? `Task ${id} failed (${run.status})`); + if (run.error?.name) error.name = run.error.name; + + return { + ok: false as const, + id: response.id, + taskIdentifier: id as TIdentifier, + error, + }; + } + } + + throw new Error(`Task ${id}: subscription ended without completion`); + }, + { + kind: SpanKind.PRODUCER, + attributes: { + [SemanticInternalAttributes.STYLE_ICON]: "trigger", + ...accessoryAttributes({ + items: [ + { + text: id, + variant: "normal", + }, + ], + style: "codepath", + }), + }, + } + ); +} + async function batchTriggerAndWait_internal( name: string, id: TIdentifier, diff --git a/packages/trigger-sdk/src/v3/streams.ts b/packages/trigger-sdk/src/v3/streams.ts index 68edc2a64ab..fdc30e3ec22 100644 --- a/packages/trigger-sdk/src/v3/streams.ts +++ b/packages/trigger-sdk/src/v3/streams.ts @@ -25,8 +25,10 @@ import { InputStreamOncePromise, type InputStreamOnceResult, type InputStreamWaitOptions, + type InputStreamWaitWithIdleTimeoutOptions, type SendInputStreamOptions, type InferInputStreamType, + type StreamWriteResult, } from "@trigger.dev/core/v3"; import { conditionallyImportAndParsePacket } from "@trigger.dev/core/v3/utils/ioSerialization"; import { tracer } from "./tracer.js"; @@ -139,7 +141,7 @@ function pipe( opts = valueOrOptions as PipeStreamOptions | undefined; } - return pipeInternal(key, value, opts, "streams.pipe()"); + return pipeInternal(key, value, opts, opts?.spanName ?? "streams.pipe()"); } /** @@ -167,6 +169,7 @@ function pipeInternal( [SemanticInternalAttributes.ENTITY_TYPE]: "realtime-stream", [SemanticInternalAttributes.ENTITY_ID]: `${runId}:${key}`, [SemanticInternalAttributes.STYLE_ICON]: "streams", + ...(opts?.collapsed ? { [SemanticInternalAttributes.COLLAPSED]: true } : {}), ...accessoryAttributes({ items: [ { @@ -194,7 +197,9 @@ function pipeInternal( return { stream: instance.stream, - waitUntilComplete: () => instance.wait(), + waitUntilComplete: async () => { + return instance.wait(); + }, }; } catch (error) { // if the error is a signal abort error, we need to end the span but not record an exception @@ -640,7 +645,7 @@ function writerInternal(key: string, options: WriterStreamOptions) } }); - return pipeInternal(key, stream, options, "streams.writer()"); + return pipeInternal(key, stream, options, options.spanName ?? "streams.writer()"); } export type RealtimeDefineStreamOptions = { @@ -656,8 +661,18 @@ function define(opts: RealtimeDefineStreamOptions): RealtimeDefinedStream read(runId, options) { return read(runId, opts.id, options); }, - append(value, options) { - return append(opts.id, value as BodyInit, options); + async append(value, options) { + // Use a single-write writer so objects are serialized the same way + // as stream.writer() — the raw append API sends BodyInit which + // doesn't serialize objects correctly for SSE consumers. + const { waitUntilComplete } = writer(opts.id, { + ...options, + spanName: "streams.append()", + execute: ({ write }) => { + write(value); + }, + }); + await waitUntilComplete(); }, writer(options) { return writer(opts.id, options); @@ -713,7 +728,7 @@ function input(opts: { id: string }): RealtimeDefinedInputStream { return new InputStreamOncePromise((resolve, reject) => { tracer .startActiveSpan( - `inputStream.once()`, + options?.spanName ?? `inputStream.once()`, async () => { const result = await innerPromise; resolve(result as InputStreamOnceResult); @@ -750,23 +765,21 @@ function input(opts: { id: string }): RealtimeDefinedInputStream { const apiClient = apiClientManager.clientOrThrow(); + // Create the waitpoint before the span so we have the entity ID upfront + const response = await apiClient.createInputStreamWaitpoint(ctx.run.id, { + streamId: opts.id, + timeout: options?.timeout, + idempotencyKey: options?.idempotencyKey, + idempotencyKeyTTL: options?.idempotencyKeyTTL, + tags: options?.tags, + lastSeqNum: inputStreams.lastSeqNum(opts.id), + }); + const result = await tracer.startActiveSpan( - `inputStream.wait()`, + options?.spanName ?? `inputStream.wait()`, async (span) => { - // 1. Create a waitpoint linked to this input stream - const response = await apiClient.createInputStreamWaitpoint(ctx.run.id, { - streamId: opts.id, - timeout: options?.timeout, - idempotencyKey: options?.idempotencyKey, - idempotencyKeyTTL: options?.idempotencyKeyTTL, - tags: options?.tags, - lastSeqNum: inputStreams.lastSeqNum(opts.id), - }); - // Set the entity ID now that we have the waitpoint ID - span.setAttribute(SemanticInternalAttributes.ENTITY_ID, response.waitpointId); - - // 2. Block the run on the waitpoint + // 1. Block the run on the waitpoint const waitResponse = await apiClient.waitForWaitpointToken({ runFriendlyId: ctx.run.id, waitpointFriendlyId: response.waitpointId, @@ -776,6 +789,12 @@ function input(opts: { id: string }): RealtimeDefinedInputStream { throw new Error("Failed to block on input stream waitpoint"); } + // 2. Disconnect the SSE tail and clear the buffer before suspending. + // Without this, the tail stays alive during the suspension window and + // may buffer a copy of the same message that will be delivered via the + // waitpoint, causing a duplicate on resume. + inputStreams.disconnectStream(opts.id); + // 3. Suspend the task const waitResult = await runtime.waitUntil(response.waitpointId); @@ -792,6 +811,12 @@ function input(opts: { id: string }): RealtimeDefinedInputStream { : undefined; if (waitResult.ok) { + // Advance the seq counter so the SSE tail doesn't replay + // the record that was consumed via the waitpoint path when + // it lazily reconnects on the next on()/once() call. + const prevSeq = inputStreams.lastSeqNum(opts.id); + inputStreams.setLastSeqNum(opts.id, (prevSeq ?? -1) + 1); + return { ok: true as const, output: data as TData }; } else { const error = new WaitpointTimeoutError(data?.message ?? "Timed out"); @@ -806,6 +831,7 @@ function input(opts: { id: string }): RealtimeDefinedInputStream { attributes: { [SemanticInternalAttributes.STYLE_ICON]: "wait", [SemanticInternalAttributes.ENTITY_TYPE]: "waitpoint", + [SemanticInternalAttributes.ENTITY_ID]: response.waitpointId, streamId: opts.id, ...accessoryAttributes({ items: [ @@ -826,6 +852,61 @@ function input(opts: { id: string }): RealtimeDefinedInputStream { } }); }, + async waitWithIdleTimeout(options) { + const self = this; + const spanName = options.spanName ?? `inputStream.waitWithIdleTimeout()`; + + return tracer.startActiveSpan( + spanName, + async (span) => { + // Idle phase: keep compute alive + if (options.idleTimeoutInSeconds > 0) { + const warm = await inputStreams.once(opts.id, { + timeoutMs: options.idleTimeoutInSeconds * 1000, + }); + if (warm.ok) { + span.setAttribute("wait.resolved", "idle"); + return { ok: true as const, output: warm.output as TData }; + } + } + + // Skip suspend if requested — return as if timed out + if (options.skipSuspend) { + span.setAttribute("wait.resolved", "skipped"); + return { ok: false as const, error: undefined }; + } + + // Fire onSuspend callback before entering cold phase + if (options.onSuspend) { + await options.onSuspend(); + } + + // Cold phase: suspend via .wait() — creates a child span + span.setAttribute("wait.resolved", "suspended"); + const waitResult = await self.wait({ + timeout: options.timeout, + spanName: "suspended", + }); + + // Fire onResume callback after successful resume + if (waitResult.ok && options.onResume) { + await options.onResume(); + } + + return waitResult; + }, + { + attributes: { + [SemanticInternalAttributes.STYLE_ICON]: "streams", + streamId: opts.id, + ...accessoryAttributes({ + items: [{ text: opts.id, variant: "normal" }], + style: "codepath", + }), + }, + } + ); + }, async send(runId, data, options) { return tracer.startActiveSpan( `inputStream.send()`, diff --git a/packages/trigger-sdk/src/v3/tasks.ts b/packages/trigger-sdk/src/v3/tasks.ts index 75b7e85e625..5781a104229 100644 --- a/packages/trigger-sdk/src/v3/tasks.ts +++ b/packages/trigger-sdk/src/v3/tasks.ts @@ -20,6 +20,7 @@ import { SubtaskUnwrapError, trigger, triggerAndWait, + triggerAndSubscribe, } from "./shared.js"; export { SubtaskUnwrapError }; @@ -96,6 +97,7 @@ export const tasks = { trigger, batchTrigger, triggerAndWait, + triggerAndSubscribe, batchTriggerAndWait, /** @deprecated Use onStartAttempt instead */ onStart, From 10200608fa0a32d8ea0a23d7e256c4c640303eb3 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Tue, 12 May 2026 18:42:15 +0100 Subject: [PATCH 2/9] fix(webapp): strip sticky boot-payload fields on continuation runs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Session.triggerConfig.basePayload is the boot payload for every Run triggered by ensureRunForSession / swapSessionRun. When a customer's first-run payload includes `message` / `messages` / `trigger: "submit-message"` (the chat.createStartSessionAction({ basePayload: { message } }) pattern designed to boot the FIRST run straight into a turn), those fields are sticky — every continuation run replays them and produces a phantom turn re-processing the original first message. Continuation overrides now clear those three fields so the new run boots clean. ensureRunForSession strips on continuation (prior run dead); swapSessionRun strips unconditionally (every swap is a deliberate handoff). Pairs with the SDK's new continuation-wait boot branch — together, the new run waits silently on session.in for the next user message instead of replaying stale basePayload. --- .../realtime/sessionRunManager.server.ts | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/apps/webapp/app/services/realtime/sessionRunManager.server.ts b/apps/webapp/app/services/realtime/sessionRunManager.server.ts index 0c2e5765455..6b3ac805c0b 100644 --- a/apps/webapp/app/services/realtime/sessionRunManager.server.ts +++ b/apps/webapp/app/services/realtime/sessionRunManager.server.ts @@ -136,10 +136,29 @@ export async function ensureRunForSession( // override `trigger`/`metadata` etc. `sessionId` is always set so the // agent doesn't need a control-plane round-trip to look up the session // friendlyId from `payload.chatId`. + // Continuation overrides strip the basePayload's first-run-only fields + // so a continuation run doesn't inherit a stale boot payload. The Session + // row's `triggerConfig.basePayload` is captured at create-time and used + // verbatim for every Run we trigger; if the customer included `message` + // / `messages` / `trigger: "submit-message"` to make the FIRST run boot + // straight into a first turn (via `chat.createStartSessionAction`), those + // values stick around and get replayed on every continuation. With + // `continuation: true` and `message`/`messages` cleared, the SDK boot + // path enters its continuation-wait branch and waits for the next + // session.in record before running a turn. const continuationOverrides: Record = { sessionId: session.friendlyId, ...(priorDeadRunFriendlyId !== undefined - ? { continuation: true, previousRunId: priorDeadRunFriendlyId } + ? { + continuation: true, + previousRunId: priorDeadRunFriendlyId, + // Clear sticky boot-payload fields so the new run waits for the + // next session.in record instead of re-processing whatever was + // in the original `createStartSessionAction({ basePayload })`. + message: undefined, + messages: undefined, + trigger: undefined, + } : {}), }; const mergedPayloadOverrides: Record = { @@ -360,11 +379,19 @@ export async function swapSessionRun( // every swap is a deliberate handoff from `callingRunId` (which owned // prior conversation state) to a fresh run. Merged AFTER caller-supplied // overrides so a caller can't accidentally unset them. + // + // Sticky boot-payload fields (`message` / `messages` / `trigger`) are + // cleared here for the same reason as in `ensureRunForSession`: the + // Session's basePayload is captured at create-time and replays on every + // continuation if not stripped. See the comment in `ensureRunForSession`. const mergedPayloadOverrides: Record = { ...(payloadOverrides ?? {}), sessionId: session.friendlyId, continuation: true, previousRunId: callingRunFriendlyId, + message: undefined, + messages: undefined, + trigger: undefined, }; const config = SessionTriggerConfigSchema.parse(session.triggerConfig); From 84c717cf128da220668faeadb72e9df72d5ee839 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Tue, 12 May 2026 20:07:43 +0100 Subject: [PATCH 3/9] fix(webapp): migrate Sessions routes to new createLoaderApiRoute auth shape MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The auth-consolidation commit on main rewrote createLoaderApiRoute's authorization contract: - `resource` returns a typed RbacResource ({ type, id }) instead of an untyped record ({ deployments: 'current' } / { sessions: [...] }) - multi-resource auth uses `anyResource(...)` instead of an array literal - `superScopes` is no longer a field — super-scopes are resolved at the JWT ability layer - the `findResource` resolver must return `T | undefined`, not `T | null` Our Sessions PR added two routes (api.v1.deployments.current.ts and realtime.v1.sessions.$session.$io.records.ts) using the pre-consolidation shape. The rebase had no textual conflict because the files didn't exist on main, but typecheck fails because the new contract doesn't accept the old shape. Migrate both routes to match. --- apps/webapp/app/routes/api.v1.deployments.current.ts | 5 ++--- .../routes/realtime.v1.sessions.$session.$io.records.ts | 9 ++++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/apps/webapp/app/routes/api.v1.deployments.current.ts b/apps/webapp/app/routes/api.v1.deployments.current.ts index ed185f41b97..48170414ec2 100644 --- a/apps/webapp/app/routes/api.v1.deployments.current.ts +++ b/apps/webapp/app/routes/api.v1.deployments.current.ts @@ -8,8 +8,7 @@ export const loader = createLoaderApiRoute( corsStrategy: "none", authorization: { action: "read", - resource: () => ({ deployments: "current" }), - superScopes: ["read:deployments", "read:all", "admin"], + resource: () => ({ type: "deployments", id: "current" }), }, findResource: async (_params, auth) => { const promotion = await $replica.workerDeploymentPromotion.findFirst({ @@ -35,7 +34,7 @@ export const loader = createLoaderApiRoute( }, }); - return promotion?.deployment ?? null; + return promotion?.deployment ?? undefined; }, }, async ({ resource: deployment }) => { diff --git a/apps/webapp/app/routes/realtime.v1.sessions.$session.$io.records.ts b/apps/webapp/app/routes/realtime.v1.sessions.$session.$io.records.ts index 579f6daf8d7..7a27538c5a2 100644 --- a/apps/webapp/app/routes/realtime.v1.sessions.$session.$io.records.ts +++ b/apps/webapp/app/routes/realtime.v1.sessions.$session.$io.records.ts @@ -8,7 +8,7 @@ import { resolveSessionByIdOrExternalId, } from "~/services/realtime/sessions.server"; import { getRealtimeStreamInstance } from "~/services/realtime/v1StreamsGlobal.server"; -import { createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; +import { anyResource, createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; const ParamsSchema = z.object({ session: z.string(), @@ -58,15 +58,18 @@ const loader = createLoaderApiRoute( }, authorization: { action: "read", + // Multi-key: the channel is addressable by the URL key, the row's + // friendlyId, and (if set) externalId. Type-level `read:sessions` + // matches any of them; `read:all` / `admin` bypass via the JWT + // ability's wildcard branches. resource: ({ row, addressingKey }) => { const ids = new Set([addressingKey]); if (row) { ids.add(row.friendlyId); if (row.externalId) ids.add(row.externalId); } - return { sessions: [...ids] }; + return anyResource([...ids].map((id) => ({ type: "sessions", id }))); }, - superScopes: ["read:sessions", "read:all", "admin"], }, }, async ({ params, authentication, resource, searchParams }) => { From f714aa9597a7dee5123f394424e612343e56dd56 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Tue, 12 May 2026 21:45:19 +0100 Subject: [PATCH 4/9] fix(webapp,sdk): address PR review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four fixes from the #3542 review pass. webapp/runEngine/queues.server.ts — non-locked-worker path of getTaskQueueInfo skipped the task lookup when the caller provided both a queue override and a per-trigger TTL, leaving `taskKind` undefined. AGENT/SCHEDULED runs hitting this path got stamped as STANDARD in ClickHouse and disappeared from the dashboard's Source filter. Mirrors the locked-worker fix above (always fetch triggerSource). webapp/presenters/v3/SessionListPresenter.server.ts — current-run lookup wasn't scoped to projectId + runtimeEnvironmentId. Session .currentRunId has no FK, so a stale or corrupted pointer could surface another tenant's run. The list query is env-scoped; this adds the same fence to the run lookup. webapp/services/realtime/sessionRunManager.server.ts — after a lost claim race, the post-reload probe of fresh.currentRunId went through getRunStatusAndFriendlyId which reads from $replica. The replica can lag behind the writer the winner just wrote to, miss the live run, and force another trigger+recurse up to ENSURE_RUN_FOR_SESSION_MAX_ATTEMPTS. Probe the writer for the same read-after-write reason the fresh reload already used. trigger-sdk/v3/shared.ts — triggerAndSubscribe leaked the abort listener on normal completion. `{ once: true }` only auto-removes after firing; long-lived signals shared across many calls accumulated dead listeners pinning apiClient + response.id until GC. Wrap the subscribe loop in try/finally and removeEventListener on every exit path. Also switched the synchronous-pre-aborted throw to a DOMException with name AbortError so callers can detect the abort with the standard err.name === 'AbortError' check. --- .../v3/SessionListPresenter.server.ts | 10 ++- .../app/runEngine/concerns/queues.server.ts | 23 +++--- .../realtime/sessionRunManager.server.ts | 9 ++- packages/trigger-sdk/src/v3/shared.ts | 70 +++++++++++-------- 4 files changed, 73 insertions(+), 39 deletions(-) diff --git a/apps/webapp/app/presenters/v3/SessionListPresenter.server.ts b/apps/webapp/app/presenters/v3/SessionListPresenter.server.ts index 684d5d6dab5..df68569c85d 100644 --- a/apps/webapp/app/presenters/v3/SessionListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/SessionListPresenter.server.ts @@ -159,9 +159,17 @@ export class SessionListPresenter { "SessionListPresenter.findCurrentRuns", async (span) => { span.setAttribute("currentRunIds.count", currentRunIds.length); + // Scope by projectId + runtimeEnvironmentId — Session.currentRunId + // is a plain string column without an FK, so a stale or corrupted + // pointer could surface another tenant's run. The list query above + // is already env-scoped; the run lookup needs the same fence. return currentRunIds.length > 0 ? this.replica.taskRun.findMany({ - where: { id: { in: currentRunIds } }, + where: { + id: { in: currentRunIds }, + projectId, + runtimeEnvironmentId: environmentId, + }, select: { id: true, friendlyId: true }, }) : []; diff --git a/apps/webapp/app/runEngine/concerns/queues.server.ts b/apps/webapp/app/runEngine/concerns/queues.server.ts index ce25696d1b8..4b4298bc935 100644 --- a/apps/webapp/app/runEngine/concerns/queues.server.ts +++ b/apps/webapp/app/runEngine/concerns/queues.server.ts @@ -213,14 +213,21 @@ export class DefaultQueueManager implements QueueManager { const defaultQueueName = `task/${taskId}`; - // When caller provides both a queue override and a per-trigger TTL, - // we don't need any DB queries - the per-trigger TTL takes precedence - if (overriddenQueueName && body.options?.ttl !== undefined) { - return { queueName: overriddenQueueName, taskTtl: undefined }; - } - - // Find the current worker for the environment - const worker = await findCurrentWorkerFromEnvironment(environment, this.prisma); + // Even when the caller provides both a queue override and a + // per-trigger TTL, we still need to fetch the task so `triggerSource` + // (which becomes `taskKind` on annotations and replicates to + // ClickHouse) is populated. Without it, AGENT/SCHEDULED runs hitting + // this path get stamped as STANDARD and disappear from the + // dashboard's `Source` filter. Mirrors the locked-worker fix above + // — `taskTtl` is harmless in the returned value because the call + // site coalesces `body.options.ttl ?? taskTtl`. + + // Find the current worker for the environment. Replica is fine here — + // the adjacent `backgroundWorkerTask` lookups below already use + // `replicaPrisma` (replica lag for "just deployed" is bounded the same + // way for both queries; reading the worker from the writer and the + // task from the replica would only widen the inconsistency window). + const worker = await findCurrentWorkerFromEnvironment(environment, this.replicaPrisma); if (!worker) { logger.debug("Failed to get queue name: No worker found", { diff --git a/apps/webapp/app/services/realtime/sessionRunManager.server.ts b/apps/webapp/app/services/realtime/sessionRunManager.server.ts index 6b3ac805c0b..8a7cd4df967 100644 --- a/apps/webapp/app/services/realtime/sessionRunManager.server.ts +++ b/apps/webapp/app/services/realtime/sessionRunManager.server.ts @@ -237,7 +237,14 @@ export async function ensureRunForSession( } if (fresh.currentRunId) { - const probe = await getRunStatusAndFriendlyId(fresh.currentRunId); + // Same read-after-write reason as the `fresh` reload above: the winner + // just wrote `currentRunId` on the writer, so probe the writer too — + // the replica may not have the run row yet, and a missed probe forces + // another trigger+recurse until `ENSURE_RUN_FOR_SESSION_MAX_ATTEMPTS`. + const probe = await prisma.taskRun.findFirst({ + where: { id: fresh.currentRunId }, + select: { status: true, friendlyId: true }, + }); if (probe && !isFinalRunStatus(probe.status)) { return { runId: fresh.currentRunId, triggered: false }; } diff --git a/packages/trigger-sdk/src/v3/shared.ts b/packages/trigger-sdk/src/v3/shared.ts index cd51b35e32d..dfcdcf8add9 100644 --- a/packages/trigger-sdk/src/v3/shared.ts +++ b/packages/trigger-sdk/src/v3/shared.ts @@ -2593,45 +2593,57 @@ async function triggerAndSubscribe_internal void) | undefined; if (options?.signal && cancelOnAbort) { - const onAbort = () => { - apiClient.cancelRun(response.id).catch(() => {}); - }; if (options.signal.aborted) { await apiClient.cancelRun(response.id).catch(() => {}); - throw new Error("Aborted"); + throw new DOMException("Aborted", "AbortError"); } + onAbort = () => { + apiClient.cancelRun(response.id).catch(() => {}); + }; + // `{ once: true }` auto-removes the listener on abort, but if the + // run completes normally the listener stays attached and pins + // `apiClient` + `response.id` until the signal is GC'd. Long-lived + // signals shared across many calls accumulate dead listeners; the + // `finally` below removes the listener on every exit path. options.signal.addEventListener("abort", onAbort, { once: true }); } - for await (const run of apiClient.subscribeToRun(response.id, { - closeOnComplete: true, - signal: options?.signal, - skipColumns: ["payload"], - })) { - if (run.isSuccess) { - // run.output from subscribeToRun is already deserialized - return { - ok: true as const, - id: response.id, - taskIdentifier: id as TIdentifier, - output: run.output as TOutput, - }; + try { + for await (const run of apiClient.subscribeToRun(response.id, { + closeOnComplete: true, + signal: options?.signal, + skipColumns: ["payload"], + })) { + if (run.isSuccess) { + // run.output from subscribeToRun is already deserialized + return { + ok: true as const, + id: response.id, + taskIdentifier: id as TIdentifier, + output: run.output as TOutput, + }; + } + if (run.isFailed || run.isCancelled) { + const error = new Error(run.error?.message ?? `Task ${id} failed (${run.status})`); + if (run.error?.name) error.name = run.error.name; + + return { + ok: false as const, + id: response.id, + taskIdentifier: id as TIdentifier, + error, + }; + } } - if (run.isFailed || run.isCancelled) { - const error = new Error(run.error?.message ?? `Task ${id} failed (${run.status})`); - if (run.error?.name) error.name = run.error.name; - - return { - ok: false as const, - id: response.id, - taskIdentifier: id as TIdentifier, - error, - }; + + throw new Error(`Task ${id}: subscription ended without completion`); + } finally { + if (onAbort && options?.signal) { + options.signal.removeEventListener("abort", onAbort); } } - - throw new Error(`Task ${id}: subscription ended without completion`); }, { kind: SpanKind.PRODUCER, From 127530cdff3b3c79f123874140a548d386c2f828 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Tue, 12 May 2026 22:00:09 +0100 Subject: [PATCH 5/9] fix(sdk): more PR review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit shared.ts — triggerAndSubscribe was missing the requestOptions parameter that every sibling task-trigger API (trigger, triggerAndWait, batchTrigger) accepts. Without it callers couldn't pass clientConfig, forcing the internal to hardcode apiClientManager.clientOrThrow() with no override. Added requestOptions?: TriggerApiRequestOptions to both the public and internal signatures and threaded clientConfig through to clientOrThrow. streams.ts — the skipSuspend branch of waitWithIdleTimeout returned { ok: false, error: undefined } instead of a WaitpointTimeoutError, so the result shape didn't match the cold-phase wait() path below. Callers that did `throw result.error` would have thrown undefined. Now returns a real WaitpointTimeoutError with a descriptive message. changeset — adds .changeset/sessions-primitive.md for the new public SDK surface (Sessions primitive + tasks.triggerAndSubscribe + chat.agent on top). Marked as minor on @trigger.dev/sdk. --- .changeset/sessions-primitive.md | 9 +++++++++ packages/trigger-sdk/src/v3/shared.ts | 11 +++++++---- packages/trigger-sdk/src/v3/streams.ts | 13 +++++++++++-- 3 files changed, 27 insertions(+), 6 deletions(-) create mode 100644 .changeset/sessions-primitive.md diff --git a/.changeset/sessions-primitive.md b/.changeset/sessions-primitive.md new file mode 100644 index 00000000000..20690235c9a --- /dev/null +++ b/.changeset/sessions-primitive.md @@ -0,0 +1,9 @@ +--- +"@trigger.dev/sdk": minor +"@trigger.dev/core": patch +--- + +Adds the Sessions primitive — a durable, run-aware stream channel keyed +on a stable `externalId`. Public SDK additions: `tasks.triggerAndSubscribe()` +and the `chat.agent` runtime built on top of Sessions. See +https://trigger.dev/docs/ai-chat/overview for the full feature surface. diff --git a/packages/trigger-sdk/src/v3/shared.ts b/packages/trigger-sdk/src/v3/shared.ts index dfcdcf8add9..bccf7334380 100644 --- a/packages/trigger-sdk/src/v3/shared.ts +++ b/packages/trigger-sdk/src/v3/shared.ts @@ -534,7 +534,8 @@ export function triggerAndWait( export function triggerAndSubscribe( id: TaskIdentifier, payload: TaskPayload, - options?: TriggerAndSubscribeOptions + options?: TriggerAndSubscribeOptions, + requestOptions?: TriggerApiRequestOptions ): TaskRunPromise, TaskOutput> { return new TaskRunPromise, TaskOutput>((resolve, reject) => { triggerAndSubscribe_internal, TaskPayload, TaskOutput>( @@ -542,7 +543,8 @@ export function triggerAndSubscribe( id, payload, undefined, - options + options, + requestOptions ) .then((result) => { resolve(result); @@ -2534,7 +2536,8 @@ async function triggerAndSubscribe_internal, - options?: TriggerAndSubscribeOptions + options?: TriggerAndSubscribeOptions, + requestOptions?: TriggerApiRequestOptions ): Promise> { const ctx = taskContext.ctx; @@ -2542,7 +2545,7 @@ async function triggerAndSubscribe_internal(opts: { id: string }): RealtimeDefinedInputStream { } } - // Skip suspend if requested — return as if timed out + // Skip suspend if requested — return a real WaitpointTimeoutError + // so the result shape matches the cold-phase `self.wait()` path + // below. Callers that check `if (!result.ok)` work the same as + // before; callers that do `throw result.error` get a useful error + // instead of `undefined`. if (options.skipSuspend) { span.setAttribute("wait.resolved", "skipped"); - return { ok: false as const, error: undefined }; + return { + ok: false as const, + error: new WaitpointTimeoutError( + "Idle timeout elapsed and skipSuspend is set" + ), + }; } // Fire onSuspend callback before entering cold phase From 5359edac2576de93b55cf48edc67ffd0d65a87ef Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Wed, 13 May 2026 07:14:10 +0100 Subject: [PATCH 6/9] =?UTF-8?q?fix(sdk):=20more=20PR=20review=20findings?= =?UTF-8?q?=20(round=203)=20=E2=80=94=20reconnect=20backoff=20+=20design-n?= =?UTF-8?q?ote=20divergence?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three more #3542 review fixes addressing the design-question bucket. sessionStreams/manager.ts + inputStreams/manager.ts — both #runTail loops swallowed errors and the .finally reconnected immediately whenever hasHandlers || hasWaiters. A persistent backend failure (auth rejection, 5xx, DNS) would reconnect in a tight loop with no rate limiting. Both managers now exponentially back off: 1s base, doubling per attempt, capped at 30s, plus 0–1s jitter. A reconnectAttempts counter resets to 0 on every successful #dispatch (any record flowing through = healthy connection), so transient blips don't accumulate. Per-waiter timeouts still bound how long any once() waits regardless. realtimeStreams/streamsWriterV2.ts + .test.ts — extracted the size-check + discriminant-extraction logic into encodeChunkOrError, a pure helper. Tests now exercise it directly, no `vi.mock("@s2-dev/ streamstore")` shim. The original vi.mock conflicted with the codebase rule of using testcontainers / not mocking; the new tests are framework-pure and faster. trigger-sdk/v3/shared.ts — added an in-code comment in triggerAndSubscribe explaining the error-shape divergence from triggerAndWait. The SerializedError surfaced by subscribeToRun strips the TaskRunError type discriminator at the server boundary (createJsonErrorObject in errors.ts:274), so the SDK can't reconstruct the discriminator on the receive side. Callers needing exact error-type matching should use triggerAndWait. --- packages/core/src/v3/inputStreams/manager.ts | 32 ++- .../realtimeStreams/streamsWriterV2.test.ts | 191 ++++++------------ .../src/v3/realtimeStreams/streamsWriterV2.ts | 38 +++- .../core/src/v3/sessionStreams/manager.ts | 36 +++- .../src/v3/utils/reconnectBackoff.test.ts | 82 ++++++++ .../core/src/v3/utils/reconnectBackoff.ts | 25 +++ packages/trigger-sdk/src/v3/shared.ts | 12 ++ 7 files changed, 273 insertions(+), 143 deletions(-) create mode 100644 packages/core/src/v3/utils/reconnectBackoff.test.ts create mode 100644 packages/core/src/v3/utils/reconnectBackoff.ts diff --git a/packages/core/src/v3/inputStreams/manager.ts b/packages/core/src/v3/inputStreams/manager.ts index 09212fb6a84..a20d69be7e5 100644 --- a/packages/core/src/v3/inputStreams/manager.ts +++ b/packages/core/src/v3/inputStreams/manager.ts @@ -6,6 +6,7 @@ import { InputStreamTimeoutError, } from "./types.js"; import { InputStreamOnceOptions } from "../realtimeStreams/types.js"; +import { computeReconnectDelayMs } from "../utils/reconnectBackoff.js"; type InputStreamHandler = (data: unknown) => void | Promise; @@ -29,6 +30,12 @@ export class StandardInputStreamManager implements InputStreamManager { private seqNums = new Map(); private currentRunId: string | null = null; private streamsVersion: string | undefined; + // Reconnect attempt counter per streamId. Drives the exponential + // backoff applied by `#ensureStreamTailConnected`'s `.finally` so a + // persistent backend failure (auth rejection, 5xx, DNS, etc.) doesn't + // reconnect in a tight loop. Reset to 0 by `#dispatch` whenever a + // record flows through. + private reconnectAttempts = new Map(); constructor( private apiClient: ApiClient, @@ -204,6 +211,7 @@ export class StandardInputStreamManager implements InputStreamManager { this.streamsVersion = undefined; this.seqNums.clear(); this.handlers.clear(); + this.reconnectAttempts.clear(); // Reject all pending once waiters for (const [, waiters] of this.onceWaiters) { @@ -238,13 +246,29 @@ export class StandardInputStreamManager implements InputStreamManager { .finally(() => { this.tails.delete(streamId); - // Auto-reconnect if there are still active handlers or waiters + // Auto-reconnect with exponential backoff if there are still + // active handlers or waiters. Without backoff a persistent + // failure (auth rejected, 5xx, DNS) would reconnect in a tight + // loop because `#runTail`'s error path only logs. `#dispatch` + // resets the counter on every successful record. const hasHandlers = this.handlers.has(streamId) && this.handlers.get(streamId)!.size > 0; const hasWaiters = this.onceWaiters.has(streamId) && this.onceWaiters.get(streamId)!.length > 0; if (hasHandlers || hasWaiters) { - this.#ensureStreamTailConnected(streamId); + const attempt = this.reconnectAttempts.get(streamId) ?? 0; + this.reconnectAttempts.set(streamId, attempt + 1); + const delayMs = computeReconnectDelayMs(attempt); + setTimeout(() => { + if (this.tails.has(streamId)) return; + const stillHasHandlers = + this.handlers.has(streamId) && this.handlers.get(streamId)!.size > 0; + const stillHasWaiters = + this.onceWaiters.has(streamId) && + this.onceWaiters.get(streamId)!.length > 0; + if (!stillHasHandlers && !stillHasWaiters) return; + this.#ensureStreamTailConnected(streamId); + }, delayMs); } }); this.tails.set(streamId, { abortController, promise }); @@ -310,6 +334,10 @@ export class StandardInputStreamManager implements InputStreamManager { } #dispatch(streamId: string, data: unknown): void { + // Any record flowing through = healthy connection; reset the backoff + // counter so the next disconnect starts fresh. + this.reconnectAttempts.delete(streamId); + // First try to resolve a once waiter const waiters = this.onceWaiters.get(streamId); if (waiters && waiters.length > 0) { diff --git a/packages/core/src/v3/realtimeStreams/streamsWriterV2.test.ts b/packages/core/src/v3/realtimeStreams/streamsWriterV2.test.ts index 0d73b0d48f3..85a03973708 100644 --- a/packages/core/src/v3/realtimeStreams/streamsWriterV2.test.ts +++ b/packages/core/src/v3/realtimeStreams/streamsWriterV2.test.ts @@ -1,150 +1,77 @@ -import { afterEach, describe, expect, it, vi } from "vitest"; +import { describe, expect, it } from "vitest"; import { ChatChunkTooLargeError, isChatChunkTooLargeError } from "../errors.js"; +import { encodeChunkOrError } from "./streamsWriterV2.js"; -const lastAckedPosition = vi.fn(() => undefined); - -const appendSession = vi.fn(async () => { - // A WritableStream that just consumes records — we never reach S2 because - // the size check fires upstream of this for the oversize case, but we still - // need a valid writable for the small-chunk path. - const writable = new WritableStream({}); - return { - writable, - lastAckedPosition, - }; -}); - -vi.mock("@s2-dev/streamstore", async (importOriginal) => { - const actual = await importOriginal(); - return { - ...actual, - S2: class FakeS2 { - basin() { - return { - stream: () => ({ - appendSession, - }), - }; - } - }, - }; -}); - -import { StreamsWriterV2 } from "./streamsWriterV2.js"; - -afterEach(() => { - vi.clearAllMocks(); -}); +// The size cap and discriminant extraction are the only S2-independent bits +// of `StreamsWriterV2` that benefit from unit coverage. Both live in the +// `encodeChunkOrError` pure helper, so the tests exercise it directly — no +// `vi.mock("@s2-dev/streamstore", ...)` shim needed. -describe("StreamsWriterV2", () => { - it("rejects with ChatChunkTooLargeError when a single chunk exceeds the per-record cap", async () => { +describe("encodeChunkOrError", () => { + it("flags oversize chunks and carries the chunk's `type` discriminant", () => { const oversized = { type: "tool-output-available", output: { text: "x".repeat(2_000_000) }, }; - const source = new ReadableStream({ - start(controller) { - controller.enqueue(oversized); - controller.close(); - }, - }); - - const writer = new StreamsWriterV2({ - basin: "test", - stream: "test", - accessToken: "test", - source, - }); - - await expect(writer.wait()).rejects.toBeInstanceOf(ChatChunkTooLargeError); - - let captured: unknown; - try { - await writer.wait(); - } catch (err) { - captured = err; - } - expect(isChatChunkTooLargeError(captured)).toBe(true); - const e = captured as ChatChunkTooLargeError; - expect(e.chunkType).toBe("tool-output-available"); - expect(e.chunkSize).toBeGreaterThan(1_000_000); - expect(e.maxSize).toBe(1024 * 1024 - 1024); - expect(e.message).toMatch(/tool-output-available/); - expect(e.message).toMatch(/chat\.agent chunk/); + + const result = encodeChunkOrError(oversized); + + expect(result.ok).toBe(false); + if (result.ok) return; // type guard + expect(isChatChunkTooLargeError(result.error)).toBe(true); + expect(result.error.chunkType).toBe("tool-output-available"); + expect(result.error.chunkSize).toBeGreaterThan(1_000_000); + expect(result.error.maxSize).toBe(1024 * 1024 - 1024); + expect(result.error.message).toMatch(/tool-output-available/); + expect(result.error.message).toMatch(/chat\.agent chunk/); }); - it("uses chunk.kind when chunk.type is missing (ChatInputChunk-style)", async () => { - const oversized = { - kind: "action", - payload: "x".repeat(2_000_000), - }; - const source = new ReadableStream({ - start(controller) { - controller.enqueue(oversized); - controller.close(); - }, - }); - - const writer = new StreamsWriterV2({ - basin: "test", - stream: "test", - accessToken: "test", - source, - }); - - let captured: unknown; - try { - await writer.wait(); - } catch (err) { - captured = err; - } - expect(isChatChunkTooLargeError(captured)).toBe(true); - expect((captured as ChatChunkTooLargeError).chunkType).toBe("action"); + it("falls back to chunk.kind when chunk.type is missing (ChatInputChunk-style)", () => { + const oversized = { kind: "action", payload: "x".repeat(2_000_000) }; + + const result = encodeChunkOrError(oversized); + + expect(result.ok).toBe(false); + if (result.ok) return; + expect(result.error.chunkType).toBe("action"); }); - it("omits chunkType when chunk has no discriminant", async () => { + it("omits chunkType when the chunk has no discriminant", () => { const oversized = "x".repeat(2_000_000); - const source = new ReadableStream({ - start(controller) { - controller.enqueue(oversized); - controller.close(); - }, - }); - - const writer = new StreamsWriterV2({ - basin: "test", - stream: "test", - accessToken: "test", - source, - }); - - let captured: unknown; - try { - await writer.wait(); - } catch (err) { - captured = err; - } - expect(isChatChunkTooLargeError(captured)).toBe(true); - expect((captured as ChatChunkTooLargeError).chunkType).toBeUndefined(); + + const result = encodeChunkOrError(oversized); + + expect(result.ok).toBe(false); + if (result.ok) return; + expect(result.error.chunkType).toBeUndefined(); }); - it("does not reject for chunks under the cap", async () => { + it("returns the encoded body for chunks under the cap", () => { const small = { type: "text-delta", delta: "hello" }; - const source = new ReadableStream({ - start(controller) { - controller.enqueue(small); - controller.close(); - }, - }); - - const writer = new StreamsWriterV2({ - basin: "test", - stream: "test", - accessToken: "test", - source, - }); - - await expect(writer.wait()).resolves.toBeDefined(); + + const result = encodeChunkOrError(small); + + expect(result.ok).toBe(true); + if (!result.ok) return; + const parsed = JSON.parse(result.body) as { data: unknown; id: string }; + expect(parsed.data).toEqual(small); + expect(parsed.id).toMatch(/^[A-Za-z0-9_-]{7}$/); // nanoid(7) + }); +}); + +// Cross-check the ChatChunkTooLargeError type-guard helper itself. Trivial, +// but keeps the test surface here exercising the public error helpers a +// consumer would import from the same module. +describe("isChatChunkTooLargeError", () => { + it("recognizes its own error class", () => { + const err = new ChatChunkTooLargeError(2_000_000, 1024 * 1024 - 1024, "x"); + expect(isChatChunkTooLargeError(err)).toBe(true); + }); + + it("rejects unrelated errors", () => { + expect(isChatChunkTooLargeError(new Error("nope"))).toBe(false); + expect(isChatChunkTooLargeError("string")).toBe(false); + expect(isChatChunkTooLargeError(undefined)).toBe(false); }); }); diff --git a/packages/core/src/v3/realtimeStreams/streamsWriterV2.ts b/packages/core/src/v3/realtimeStreams/streamsWriterV2.ts index ffd6fc92702..223fd8d894e 100644 --- a/packages/core/src/v3/realtimeStreams/streamsWriterV2.ts +++ b/packages/core/src/v3/realtimeStreams/streamsWriterV2.ts @@ -161,16 +161,12 @@ export class StreamsWriterV2 implements StreamsWriter { controller.error(new Error("Stream aborted")); return; } - const body = JSON.stringify({ data: chunk, id: nanoid(7) }); - const size = utf8Encoder.encode(body).length; - if (size > RECORD_BODY_MAX_BYTES) { - const chunkType = extractChunkType(chunk); - controller.error( - new ChatChunkTooLargeError(size, RECORD_BODY_MAX_BYTES, chunkType) - ); + const encoded = encodeChunkOrError(chunk); + if (!encoded.ok) { + controller.error(encoded.error); return; } - controller.enqueue(AppendRecord.string({ body })); + controller.enqueue(AppendRecord.string({ body: encoded.body })); }, }) ) @@ -258,3 +254,29 @@ function extractChunkType(chunk: unknown): string | undefined { if (typeof c.kind === "string") return c.kind; return undefined; } + +/** + * Encode a chunk as a JSON record body for S2, enforcing the per-record + * size cap. Exported so the size/discriminant logic can be unit-tested + * directly without spinning up an S2 client or mocking `@s2-dev/streamstore`. + * + * Returns `{ ok: true, body }` when the encoded chunk fits within + * `RECORD_BODY_MAX_BYTES`, or `{ ok: false, error }` carrying a + * `ChatChunkTooLargeError` annotated with the chunk's discriminant + * (`type` or `kind`, whichever is present) so the surfaced error is + * useful — "tool-output-available chunk too large" beats a bare + * "chunk too large" by a lot. + */ +export function encodeChunkOrError( + chunk: unknown +): { ok: true; body: string } | { ok: false; error: ChatChunkTooLargeError } { + const body = JSON.stringify({ data: chunk, id: nanoid(7) }); + const size = utf8Encoder.encode(body).length; + if (size > RECORD_BODY_MAX_BYTES) { + return { + ok: false, + error: new ChatChunkTooLargeError(size, RECORD_BODY_MAX_BYTES, extractChunkType(chunk)), + }; + } + return { ok: true, body }; +} diff --git a/packages/core/src/v3/sessionStreams/manager.ts b/packages/core/src/v3/sessionStreams/manager.ts index d40af9e7b2d..9253bbf619c 100644 --- a/packages/core/src/v3/sessionStreams/manager.ts +++ b/packages/core/src/v3/sessionStreams/manager.ts @@ -5,6 +5,7 @@ import { InputStreamTimeoutError, } from "../inputStreams/types.js"; import { InputStreamOnceOptions } from "../realtimeStreams/types.js"; +import { computeReconnectDelayMs } from "../utils/reconnectBackoff.js"; import { SessionChannelIO, SessionStreamManager } from "./types.js"; type SessionStreamHandler = (data: unknown) => void | Promise; @@ -50,6 +51,13 @@ export class StandardSessionStreamManager implements SessionStreamManager { // that's already being delivered out-of-band via the waitpoint. private explicitlyDisconnected = new Set(); private seqNums = new Map(); + // Reconnect attempt counter per key. Drives the exponential backoff + // applied by `#ensureTailConnected`'s `.finally` so a persistent + // backend failure (auth rejection, 5xx, DNS, etc.) doesn't reconnect + // in a tight loop. Reset to 0 by `#dispatch` whenever a real record + // flows through — any successful traffic is taken as a healthy + // connection. + private reconnectAttempts = new Map(); constructor( private apiClient: ApiClient, @@ -235,6 +243,7 @@ export class StandardSessionStreamManager implements SessionStreamManager { this.seqNums.clear(); this.minTimestamps.clear(); this.handlers.clear(); + this.reconnectAttempts.clear(); for (const [, waiters] of this.onceWaiters) { for (const waiter of waiters) { @@ -278,7 +287,28 @@ export class StandardSessionStreamManager implements SessionStreamManager { const hasWaiters = this.onceWaiters.has(key) && this.onceWaiters.get(key)!.length > 0; if (hasHandlers || hasWaiters) { - this.#ensureTailConnected(sessionId, io); + // Exponential backoff with jitter. 1s base, doubling each + // attempt, capped at 30s. Without this, a persistent backend + // failure (auth rejected, 5xx, DNS) reconnects in a tight loop + // because `#runTail`'s error path only logs. `#dispatch` resets + // the counter on every successful record, so transient blips + // don't accumulate. + const attempt = this.reconnectAttempts.get(key) ?? 0; + this.reconnectAttempts.set(key, attempt + 1); + const delayMs = computeReconnectDelayMs(attempt); + setTimeout(() => { + // Guards: a fresh `on()` during the wait may already have + // re-attached the tail; explicit disconnect or absence of + // handlers/waiters means we should stay quiet. + if (this.tails.has(key)) return; + if (this.explicitlyDisconnected.has(key)) return; + const stillHasHandlers = + this.handlers.has(key) && this.handlers.get(key)!.size > 0; + const stillHasWaiters = + this.onceWaiters.has(key) && this.onceWaiters.get(key)!.length > 0; + if (!stillHasHandlers && !stillHasWaiters) return; + this.#ensureTailConnected(sessionId, io); + }, delayMs); } }); this.tails.set(key, { abortController, promise }); @@ -351,6 +381,10 @@ export class StandardSessionStreamManager implements SessionStreamManager { } #dispatch(key: string, data: unknown): void { + // Any record flowing through = healthy connection; reset the backoff + // counter so the next disconnect starts fresh. + this.reconnectAttempts.delete(key); + const waiters = this.onceWaiters.get(key); if (waiters && waiters.length > 0) { const waiter = waiters.shift()!; diff --git a/packages/core/src/v3/utils/reconnectBackoff.test.ts b/packages/core/src/v3/utils/reconnectBackoff.test.ts new file mode 100644 index 00000000000..5de5a2db8e8 --- /dev/null +++ b/packages/core/src/v3/utils/reconnectBackoff.test.ts @@ -0,0 +1,82 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; + +import { RECONNECT_BACKOFF_MAX_MS, computeReconnectDelayMs } from "./reconnectBackoff.js"; + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe("computeReconnectDelayMs", () => { + // Hold Math.random steady so we can assert on the deterministic base. The + // jitter is added separately in the "jitter" test below. + function withFixedRandom(value: number, fn: () => void) { + const spy = vi.spyOn(Math, "random").mockReturnValue(value); + try { + fn(); + } finally { + spy.mockRestore(); + } + } + + it("base case — attempt 0 lands in [1000, 2000)", () => { + withFixedRandom(0, () => { + expect(computeReconnectDelayMs(0)).toBe(1000); + }); + withFixedRandom(0.999, () => { + expect(computeReconnectDelayMs(0)).toBeGreaterThanOrEqual(1000); + expect(computeReconnectDelayMs(0)).toBeLessThan(2000); + }); + }); + + it("doubles per attempt up to the 30s cap", () => { + withFixedRandom(0, () => { + // 1s, 2s, 4s, 8s, 16s, then capped at 30s + expect(computeReconnectDelayMs(0)).toBe(1_000); + expect(computeReconnectDelayMs(1)).toBe(2_000); + expect(computeReconnectDelayMs(2)).toBe(4_000); + expect(computeReconnectDelayMs(3)).toBe(8_000); + expect(computeReconnectDelayMs(4)).toBe(16_000); + // 32s would exceed the cap — should clamp to 30s. + expect(computeReconnectDelayMs(5)).toBe(RECONNECT_BACKOFF_MAX_MS); + // High attempt counts stay capped — protects against integer + // overflow on 2 ** N for large N. + expect(computeReconnectDelayMs(50)).toBe(RECONNECT_BACKOFF_MAX_MS); + expect(computeReconnectDelayMs(1_000)).toBe(RECONNECT_BACKOFF_MAX_MS); + }); + }); + + it("never exceeds RECONNECT_BACKOFF_MAX_MS + 1000ms (cap + jitter ceiling)", () => { + withFixedRandom(0.999, () => { + for (let attempt = 0; attempt < 100; attempt++) { + expect(computeReconnectDelayMs(attempt)).toBeLessThan( + RECONNECT_BACKOFF_MAX_MS + 1000 + ); + } + }); + }); + + it("adds 0–1000ms of jitter on top of the base", () => { + // Compare same attempt with random=0 vs random=0.5 — the difference is + // exactly the jitter. + withFixedRandom(0, () => { + expect(computeReconnectDelayMs(2)).toBe(4_000); + }); + withFixedRandom(0.5, () => { + expect(computeReconnectDelayMs(2)).toBe(4_500); + }); + withFixedRandom(0.999, () => { + const v = computeReconnectDelayMs(2); + expect(v).toBeGreaterThan(4_000); + expect(v).toBeLessThan(5_000); + }); + }); + + it("clamps negative / non-integer attempts to 0 (no NaN, no negative delay)", () => { + withFixedRandom(0, () => { + expect(computeReconnectDelayMs(-1)).toBe(1_000); + expect(computeReconnectDelayMs(-100)).toBe(1_000); + expect(computeReconnectDelayMs(0.7)).toBe(1_000); // floored to 0 + expect(computeReconnectDelayMs(2.9)).toBe(4_000); // floored to 2 + }); + }); +}); diff --git a/packages/core/src/v3/utils/reconnectBackoff.ts b/packages/core/src/v3/utils/reconnectBackoff.ts new file mode 100644 index 00000000000..51525591002 --- /dev/null +++ b/packages/core/src/v3/utils/reconnectBackoff.ts @@ -0,0 +1,25 @@ +/** + * Exponential backoff with full jitter for stream-tail reconnect loops. + * + * Shared between `SessionStreamManager` and `StandardInputStreamManager` + * — both reconnect a long-lived SSE tail when handlers/waiters are still + * registered, and both need to back off on persistent backend failures + * (auth rejection, 5xx, DNS) instead of reconnecting in a tight loop. + * + * - Base 1s, doubles per attempt (1s, 2s, 4s, 8s, 16s, 30s, 30s, ...) + * - Capped at 30s + * - Plus 0–1000ms jitter to avoid thundering herd when many clients + * share the same failure mode + * - Negative or non-integer attempts are clamped to 0 + * + * Callers track the per-key attempt count and reset to 0 on every + * successful record (any traffic flowing = healthy connection). + */ +export function computeReconnectDelayMs(attempt: number): number { + const safeAttempt = Math.max(0, Math.floor(attempt)); + const base = Math.min(1000 * 2 ** safeAttempt, 30_000); + return base + Math.random() * 1000; +} + +/** Maximum backoff floor without jitter — exposed for tests / asserts. */ +export const RECONNECT_BACKOFF_MAX_MS = 30_000; diff --git a/packages/trigger-sdk/src/v3/shared.ts b/packages/trigger-sdk/src/v3/shared.ts index bccf7334380..0e6389a053c 100644 --- a/packages/trigger-sdk/src/v3/shared.ts +++ b/packages/trigger-sdk/src/v3/shared.ts @@ -2629,6 +2629,18 @@ async function triggerAndSubscribe_internal Date: Wed, 13 May 2026 10:05:12 +0100 Subject: [PATCH 7/9] fix(webapp,sdk): address PR review nits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four follow-up nits from the second-pass review on #3542. .server-changes/sessions-dashboard-and-task-source-filter.md — adds the missing high-level entry for the webapp surface (Sessions page + task-source filter on Runs). The two existing changesets only cover @trigger.dev/sdk and @trigger.dev/core, so the dashboard work wouldn't have shown up in a future server changelog. apps/webapp/app/routes/realtime.v1.sessions.$session.$io.records.ts — switched `const loader = ...; export { loader }` to `export const loader = ...` to match the sibling `api.v1.deployments.current.ts` and the rest of the route file convention. Functionally identical. packages/core/src/v3/sessionStreams/manager.ts + packages/core/src/v3/inputStreams/manager.ts — two clarifications: (1) added a JSDoc to `disconnect()` documenting that it intentionally leaves handlers and waiters in place, so any registered listener will trigger an auto-reconnect with backoff. Distinguishes from `reset()` (full clean state, rejects waiters) and `disconnectStream` (single key, stays down until fresh `on()`/`once()`). (2) `disconnectStream` now clears `reconnectAttempts` for the key — an explicit teardown is not evidence of a broken backend, and a future re-attach should start the backoff at attempt 0. --- .../sessions-dashboard-and-task-source-filter.md | 6 ++++++ .../realtime.v1.sessions.$session.$io.records.ts | 4 +--- packages/core/src/v3/inputStreams/manager.ts | 13 +++++++++++++ packages/core/src/v3/sessionStreams/manager.ts | 13 +++++++++++++ 4 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 .server-changes/sessions-dashboard-and-task-source-filter.md diff --git a/.server-changes/sessions-dashboard-and-task-source-filter.md b/.server-changes/sessions-dashboard-and-task-source-filter.md new file mode 100644 index 00000000000..c3a727c4325 --- /dev/null +++ b/.server-changes/sessions-dashboard-and-task-source-filter.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: feature +--- + +New Sessions page in the dashboard for inspecting `chat.agent` Session rows alongside their underlying runs, plus a "Task source" filter on the Runs list (Standard / Scheduled / Agent) so agent runs can be sliced out of mixed workloads at a glance. diff --git a/apps/webapp/app/routes/realtime.v1.sessions.$session.$io.records.ts b/apps/webapp/app/routes/realtime.v1.sessions.$session.$io.records.ts index 7a27538c5a2..92d87fca760 100644 --- a/apps/webapp/app/routes/realtime.v1.sessions.$session.$io.records.ts +++ b/apps/webapp/app/routes/realtime.v1.sessions.$session.$io.records.ts @@ -36,7 +36,7 @@ const SearchSchema = z.object({ // `session_*` friendlyIds (which must reference a real row). External-id // form falls through with `row: null` so the boot path doesn't 404 on a // fresh chat that hasn't written its first chunk yet. -const loader = createLoaderApiRoute( +export const loader = createLoaderApiRoute( { params: ParamsSchema, searchParams: SearchSchema, @@ -96,5 +96,3 @@ const loader = createLoaderApiRoute( return json({ records }); } ); - -export { loader }; diff --git a/packages/core/src/v3/inputStreams/manager.ts b/packages/core/src/v3/inputStreams/manager.ts index a20d69be7e5..ed36adc3507 100644 --- a/packages/core/src/v3/inputStreams/manager.ts +++ b/packages/core/src/v3/inputStreams/manager.ts @@ -192,12 +192,25 @@ export class StandardInputStreamManager implements InputStreamManager { this.tails.delete(streamId); } this.buffer.delete(streamId); + // Reset the backoff counter so a future re-attach starts fresh — + // an explicit disconnect is a deliberate teardown, not evidence of + // a broken backend. + this.reconnectAttempts.delete(streamId); } connectTail(runId: string, _fromSeq?: number): void { // No-op: tails are now created per-stream lazily } + /** + * Tear down all active tails. Does NOT clear handlers or `onceWaiters`, + * so any registered listener will trigger an auto-reconnect (with + * backoff) the moment it sees no live tail — by design, so a transient + * network blip recovers without the caller re-subscribing. Use + * `reset()` if you want a full clean state with no resurrection, or + * `disconnectStream(streamId)` for a single stream that should stay + * down until a fresh `on()` / `once()` attaches. + */ disconnect(): void { for (const [, tail] of this.tails) { tail.abortController.abort(); diff --git a/packages/core/src/v3/sessionStreams/manager.ts b/packages/core/src/v3/sessionStreams/manager.ts index 9253bbf619c..563d0f17b5e 100644 --- a/packages/core/src/v3/sessionStreams/manager.ts +++ b/packages/core/src/v3/sessionStreams/manager.ts @@ -217,6 +217,10 @@ export class StandardSessionStreamManager implements SessionStreamManager { this.tails.delete(key); } this.buffer.delete(key); + // Reset the backoff counter so a future re-attach starts fresh — + // an explicit disconnect is a deliberate teardown, not evidence of + // a broken backend. + this.reconnectAttempts.delete(key); } clearHandlers(): void { @@ -231,6 +235,15 @@ export class StandardSessionStreamManager implements SessionStreamManager { } } + /** + * Tear down all active tails. Does NOT clear handlers or `onceWaiters`, + * so any registered listener will trigger an auto-reconnect (with + * backoff) the moment it sees no live tail — by design, so a transient + * network blip recovers without the caller re-subscribing. Use + * `reset()` if you want a full clean state with no resurrection, or + * `disconnectStream(sessionId, io)` for a single channel that should + * stay down until a fresh `on()` / `once()` attaches. + */ disconnect(): void { for (const [, tail] of this.tails) { tail.abortController.abort(); From 85e5bb1e74cd97c7550ee56d902a74681442ca08 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Wed, 13 May 2026 11:44:40 +0100 Subject: [PATCH 8/9] fix(webapp): improve dead-run friendlyId fallback and swap-race error path --- .../realtime/sessionRunManager.server.ts | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/apps/webapp/app/services/realtime/sessionRunManager.server.ts b/apps/webapp/app/services/realtime/sessionRunManager.server.ts index 8a7cd4df967..95fc87e2018 100644 --- a/apps/webapp/app/services/realtime/sessionRunManager.server.ts +++ b/apps/webapp/app/services/realtime/sessionRunManager.server.ts @@ -123,10 +123,19 @@ export async function ensureRunForSession( // agent as `previousRunId` so its boot gate flips // `couldHavePriorState` and replays the persisted state instead of // treating this as a fresh chat. See `chat.agent`'s boot orchestration - // in `packages/trigger-sdk/src/v3/ai.ts`. Falls back to the cuid on - // probe miss (rare — replica miss on a row we just read) so the - // continuation flag still propagates with degraded id fidelity. - priorDeadRunFriendlyId = probe?.friendlyId ?? session.currentRunId; + // in `packages/trigger-sdk/src/v3/ai.ts`. + if (probe?.friendlyId) { + priorDeadRunFriendlyId = probe.friendlyId; + } else { + // Replica miss on a row we just observed via `currentRunId`. Retry + // on the writer so the customer's `runs.retrieve(previousRunId)` + // gets the public `run_*` form rather than the internal cuid. + const writerProbe = await prisma.taskRun.findFirst({ + where: { id: session.currentRunId }, + select: { friendlyId: true }, + }); + priorDeadRunFriendlyId = writerProbe?.friendlyId ?? session.currentRunId; + } } // 2. Validate config + trigger upfront. Continuation overrides @@ -457,8 +466,19 @@ export async function swapSessionRun( select: { currentRunId: true }, }); + // Mirror `ensureRunForSession`'s "session vanished" branch: if we + // can't find the row (or it has no current run) on the writer right + // after losing the race, surface as an error rather than handing back + // `callingRunId` with `swapped: false` — that would tell the caller + // it's still the canonical run when in fact we don't know who is. + if (!fresh?.currentRunId) { + throw new SessionRunManagerError( + `Session ${session.id} has no currentRunId after preempted swap` + ); + } + return { - runId: fresh?.currentRunId ?? callingRunId, + runId: fresh.currentRunId, swapped: false, }; } From 37ea3861982faae54ce0746e5cd95bbad291c7a8 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Wed, 13 May 2026 11:57:42 +0100 Subject: [PATCH 9/9] fix(core): tighten stream-manager lifecycles --- packages/core/src/v3/inputStreams/manager.ts | 73 ++++++++++++++++--- .../core/src/v3/sessionStreams/manager.ts | 37 +++++++--- .../v3/test/test-realtime-streams-manager.ts | 1 + .../v3/test/test-session-stream-manager.ts | 23 +++--- 4 files changed, 102 insertions(+), 32 deletions(-) diff --git a/packages/core/src/v3/inputStreams/manager.ts b/packages/core/src/v3/inputStreams/manager.ts index ed36adc3507..51424df39f7 100644 --- a/packages/core/src/v3/inputStreams/manager.ts +++ b/packages/core/src/v3/inputStreams/manager.ts @@ -14,6 +14,13 @@ type OnceWaiter = { resolve: (result: InputStreamOnceResult) => void; reject: (error: Error) => void; timeoutHandle?: ReturnType; + // The abort signal and its handler are tracked on the waiter so any + // resolution path (dispatch / timeout / explicit removal) can detach + // the listener. Without this, a long-lived `AbortSignal` reused across + // many `once()` calls accumulates listeners — `{ once: true }` only + // self-clears if the signal actually aborts. + signal?: AbortSignal; + abortHandler?: () => void; }; @@ -36,6 +43,13 @@ export class StandardInputStreamManager implements InputStreamManager { // reconnect in a tight loop. Reset to 0 by `#dispatch` whenever a // record flows through. private reconnectAttempts = new Map(); + // Stream IDs that were explicitly torn down by `disconnectStream`. The + // tail's `.finally` reconnect path consults this set so a deliberate + // teardown isn't immediately undone by the auto-reconnect when + // handlers or once-waiters are still registered. Cleared on the next + // explicit `on()` / `once()` (those are the only legitimate reasons to + // bring the tail back up). + private explicitlyDisconnected = new Set(); constructor( private apiClient: ApiClient, @@ -75,6 +89,10 @@ export class StandardInputStreamManager implements InputStreamManager { on(streamId: string, handler: InputStreamHandler): { off: () => void } { this.#requireV2Streams(); + // A fresh attach is a legitimate reason to bring the tail back up; + // clear any prior explicit-disconnect flag. + this.explicitlyDisconnected.delete(streamId); + let handlerSet = this.handlers.get(streamId); if (!handlerSet) { handlerSet = new Set(); @@ -107,6 +125,10 @@ export class StandardInputStreamManager implements InputStreamManager { once(streamId: string, options?: InputStreamOnceOptions): InputStreamOncePromise { this.#requireV2Streams(); + // A fresh waiter is a legitimate reason to bring the tail back up; + // clear any prior explicit-disconnect flag. + this.explicitlyDisconnected.delete(streamId); + // Lazily connect a tail for this stream this.#ensureStreamTailConnected(streamId); @@ -131,17 +153,16 @@ export class StandardInputStreamManager implements InputStreamManager { reject(new Error("Aborted")); return; } - options.signal.addEventListener( - "abort", - () => { - if (waiter.timeoutHandle) { - clearTimeout(waiter.timeoutHandle); - } - this.#removeOnceWaiter(streamId, waiter); - reject(new Error("Aborted")); - }, - { once: true } - ); + const abortHandler = () => { + if (waiter.timeoutHandle) { + clearTimeout(waiter.timeoutHandle); + } + this.#removeOnceWaiter(streamId, waiter); + reject(new Error("Aborted")); + }; + waiter.signal = options.signal; + waiter.abortHandler = abortHandler; + options.signal.addEventListener("abort", abortHandler, { once: true }); } // Handle timeout — resolve with error result instead of rejecting @@ -186,6 +207,14 @@ export class StandardInputStreamManager implements InputStreamManager { } disconnectStream(streamId: string): void { + // Mark as explicitly disconnected BEFORE we abort, so the tail's + // `.finally` reconnect path sees the flag when it runs (which can be + // synchronous in the AbortError catch). Without this, an in-flight + // `.on(...)` or pending `.once()` would immediately resurrect the + // tail and negate the disconnect — defeating the + // "drop-the-duplicate before .wait() suspends" contract. Cleared on + // the next explicit `on()` / `once()`. + this.explicitlyDisconnected.add(streamId); const tail = this.tails.get(streamId); if (tail) { tail.abortController.abort(); @@ -225,6 +254,7 @@ export class StandardInputStreamManager implements InputStreamManager { this.seqNums.clear(); this.handlers.clear(); this.reconnectAttempts.clear(); + this.explicitlyDisconnected.clear(); // Reject all pending once waiters for (const [, waiters] of this.onceWaiters) { @@ -232,6 +262,9 @@ export class StandardInputStreamManager implements InputStreamManager { if (waiter.timeoutHandle) { clearTimeout(waiter.timeoutHandle); } + if (waiter.signal && waiter.abortHandler) { + waiter.signal.removeEventListener("abort", waiter.abortHandler); + } waiter.reject(new Error("Input stream manager reset")); } } @@ -259,6 +292,13 @@ export class StandardInputStreamManager implements InputStreamManager { .finally(() => { this.tails.delete(streamId); + // If the tail was torn down explicitly via `disconnectStream`, + // don't auto-reconnect — that's the whole point of the + // disconnect call. The next `on()` / `once()` clears the flag. + if (this.explicitlyDisconnected.has(streamId)) { + return; + } + // Auto-reconnect with exponential backoff if there are still // active handlers or waiters. Without backoff a persistent // failure (auth rejected, 5xx, DNS) would reconnect in a tight @@ -273,6 +313,7 @@ export class StandardInputStreamManager implements InputStreamManager { this.reconnectAttempts.set(streamId, attempt + 1); const delayMs = computeReconnectDelayMs(attempt); setTimeout(() => { + if (this.explicitlyDisconnected.has(streamId)) return; if (this.tails.has(streamId)) return; const stillHasHandlers = this.handlers.has(streamId) && this.handlers.get(streamId)!.size > 0; @@ -361,6 +402,9 @@ export class StandardInputStreamManager implements InputStreamManager { if (waiter.timeoutHandle) { clearTimeout(waiter.timeoutHandle); } + if (waiter.signal && waiter.abortHandler) { + waiter.signal.removeEventListener("abort", waiter.abortHandler); + } waiter.resolve({ ok: true, output: data }); // Also invoke persistent handlers this.#invokeHandlers(streamId, data); @@ -410,6 +454,13 @@ export class StandardInputStreamManager implements InputStreamManager { } #removeOnceWaiter(streamId: string, waiter: OnceWaiter): void { + // Centralized cleanup — both timeout and explicit abort paths funnel + // through here, so detach the abort listener once instead of at every + // callsite. The dispatch path doesn't go through this method (the + // waiter is shifted off inline), so it detaches the listener there. + if (waiter.signal && waiter.abortHandler) { + waiter.signal.removeEventListener("abort", waiter.abortHandler); + } const waiters = this.onceWaiters.get(streamId); if (!waiters) return; const index = waiters.indexOf(waiter); diff --git a/packages/core/src/v3/sessionStreams/manager.ts b/packages/core/src/v3/sessionStreams/manager.ts index 563d0f17b5e..0463cb3fb71 100644 --- a/packages/core/src/v3/sessionStreams/manager.ts +++ b/packages/core/src/v3/sessionStreams/manager.ts @@ -14,6 +14,13 @@ type OnceWaiter = { resolve: (result: InputStreamOnceResult) => void; reject: (error: Error) => void; timeoutHandle?: ReturnType; + // The abort signal and its handler are tracked on the waiter so any + // resolution path (dispatch / timeout / explicit removal) can detach + // the listener. Without this, a long-lived `AbortSignal` reused across + // many `once()` calls accumulates listeners — `{ once: true }` only + // self-clears if the signal actually aborts. + signal?: AbortSignal; + abortHandler?: () => void; }; type TailState = { @@ -131,15 +138,14 @@ export class StandardSessionStreamManager implements SessionStreamManager { reject(new Error("Aborted")); return; } - options.signal.addEventListener( - "abort", - () => { - if (waiter.timeoutHandle) clearTimeout(waiter.timeoutHandle); - this.#removeOnceWaiter(key, waiter); - reject(new Error("Aborted")); - }, - { once: true } - ); + const abortHandler = () => { + if (waiter.timeoutHandle) clearTimeout(waiter.timeoutHandle); + this.#removeOnceWaiter(key, waiter); + reject(new Error("Aborted")); + }; + waiter.signal = options.signal; + waiter.abortHandler = abortHandler; + options.signal.addEventListener("abort", abortHandler, { once: true }); } if (options?.timeoutMs) { @@ -261,6 +267,9 @@ export class StandardSessionStreamManager implements SessionStreamManager { for (const [, waiters] of this.onceWaiters) { for (const waiter of waiters) { if (waiter.timeoutHandle) clearTimeout(waiter.timeoutHandle); + if (waiter.signal && waiter.abortHandler) { + waiter.signal.removeEventListener("abort", waiter.abortHandler); + } waiter.reject(new Error("Session stream manager reset")); } } @@ -403,6 +412,9 @@ export class StandardSessionStreamManager implements SessionStreamManager { const waiter = waiters.shift()!; if (waiters.length === 0) this.onceWaiters.delete(key); if (waiter.timeoutHandle) clearTimeout(waiter.timeoutHandle); + if (waiter.signal && waiter.abortHandler) { + waiter.signal.removeEventListener("abort", waiter.abortHandler); + } waiter.resolve({ ok: true, output: data }); this.#invokeHandlers(key, data); return; @@ -450,6 +462,13 @@ export class StandardSessionStreamManager implements SessionStreamManager { } #removeOnceWaiter(key: string, waiter: OnceWaiter): void { + // Centralized cleanup — both timeout and explicit abort paths funnel + // through here, so detach the abort listener once instead of at every + // callsite. The dispatch path doesn't go through this method (the + // waiter is shifted off inline), so it detaches the listener there. + if (waiter.signal && waiter.abortHandler) { + waiter.signal.removeEventListener("abort", waiter.abortHandler); + } const waiters = this.onceWaiters.get(key); if (!waiters) return; const index = waiters.indexOf(waiter); diff --git a/packages/core/src/v3/test/test-realtime-streams-manager.ts b/packages/core/src/v3/test/test-realtime-streams-manager.ts index b53a2630d9c..8f4142b28e7 100644 --- a/packages/core/src/v3/test/test-realtime-streams-manager.ts +++ b/packages/core/src/v3/test/test-realtime-streams-manager.ts @@ -158,6 +158,7 @@ export class TestRealtimeStreamsManager implements RealtimeStreamsManager { reset(): void { this.buffers.clear(); this.pipeWaits.clear(); + this.writeListeners.clear(); } private getBuffer(key: string): unknown[] { diff --git a/packages/core/src/v3/test/test-session-stream-manager.ts b/packages/core/src/v3/test/test-session-stream-manager.ts index 1bd7499a0a7..493093d686f 100644 --- a/packages/core/src/v3/test/test-session-stream-manager.ts +++ b/packages/core/src/v3/test/test-session-stream-manager.ts @@ -221,8 +221,18 @@ export class TestSessionStreamManager implements SessionStreamManager { const handlers = this.handlers.get(key); if (handlers && handlers.size > 0) { + // Awaited so test code can rely on handlers having completed by the + // time `__sendFromTest` resolves. Wrapped per-handler so a + // throwing/rejecting handler doesn't poison Promise.all and break + // unrelated test state. await Promise.all( - Array.from(handlers).map((h) => Promise.resolve().then(() => h(data))) + Array.from(handlers).map(async (h) => { + try { + await h(data); + } catch { + // Never let a handler error break test state + } + }) ); } @@ -267,17 +277,6 @@ export class TestSessionStreamManager implements SessionStreamManager { } } - private invoke(handler: Handler, data: unknown): void { - try { - const result = handler(data); - if (result && typeof result === "object" && "catch" in result) { - (result as Promise).catch(() => {}); - } - } catch { - // Never let a handler error break test state - } - } - private removeWaiter(key: string, waiter: OnceWaiter): void { const waiters = this.onceWaiters.get(key); if (!waiters) return;