diff --git a/CHANGELOG.md b/CHANGELOG.md index 7260dac8..016b11b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,28 @@ # Changelog +## Unreleased + +- Add `hasSuccessfulToolCall(toolName)` stop-condition helper. Like the AI + SDK's `hasToolCall` but only matches tool calls that produced a + `tool-result` content part — failed tool calls (`tool-error` parts under + AI SDK v6) do not match. Use when you want the agent to retry on + argument-validation or runtime tool failures rather than stopping. + + ```ts + import { hasSuccessfulToolCall, stepCountIs } from "@convex-dev/agent"; + + await agent.streamText(ctx, { threadId }, { + prompt: "...", + stopWhen: [hasSuccessfulToolCall("generateImage"), stepCountIs(5)], + }); + ``` + +- Fix: `willContinue` (the internal helper that decides whether to keep + looping after a step that has tool calls) now counts `tool-error` + content parts as completed outputs. Without this, AI SDK v6 agents + would stop after a step where any tool call errored, even if the model + had more work queued. + ## 0.6.1 - Fix bundled package diff --git a/src/client/utils.test.ts b/src/client/utils.test.ts new file mode 100644 index 00000000..7f19ab97 --- /dev/null +++ b/src/client/utils.test.ts @@ -0,0 +1,97 @@ +import type { StepResult } from "ai"; +import { describe, expect, test } from "vitest"; +import { hasSuccessfulToolCall, willContinue } from "./utils.js"; + +// Minimal StepResult builder — only the fields willContinue and +// hasSuccessfulToolCall actually read. Loosely typed on purpose so test +// fixtures can be terse; cast at the boundary. +type StepFixture = { + finishReason?: string; + content?: Array<{ type: string; toolName?: string }>; + toolCalls?: Array<{ toolCallId: string; toolName: string }>; + toolResults?: Array<{ toolCallId: string; toolName: string }>; +}; + +function makeStep(partial: StepFixture): StepResult { + return { + finishReason: "tool-calls", + content: [], + toolCalls: [], + toolResults: [], + ...partial, + } as unknown as StepResult; +} + +describe("hasSuccessfulToolCall", () => { + test("returns true when last step has a tool-result for the named tool", () => { + const step = makeStep({ + content: [{ type: "tool-result", toolName: "search" }], + }); + expect(hasSuccessfulToolCall("search")({ steps: [step] })).toBe(true); + }); + + test("returns false when only a tool-error is present for the named tool", () => { + const step = makeStep({ + content: [{ type: "tool-error", toolName: "search" }], + }); + expect(hasSuccessfulToolCall("search")({ steps: [step] })).toBe(false); + }); + + test("returns false when the matching tool name is missing", () => { + const step = makeStep({ + content: [{ type: "tool-result", toolName: "other" }], + }); + expect(hasSuccessfulToolCall("search")({ steps: [step] })).toBe(false); + }); + + test("only inspects the last step", () => { + const earlier = makeStep({ + content: [{ type: "tool-result", toolName: "search" }], + }); + const last = makeStep({ + content: [{ type: "tool-error", toolName: "search" }], + }); + expect(hasSuccessfulToolCall("search")({ steps: [earlier, last] })).toBe( + false, + ); + }); + + test("returns false when steps is empty", () => { + expect(hasSuccessfulToolCall("search")({ steps: [] })).toBe(false); + }); +}); + +describe("willContinue", () => { + test("does not stop when a tool-error fills in for a missing tool-result", async () => { + // Two tool calls; one returns a result, the other errors. + const step = makeStep({ + toolCalls: [ + { toolCallId: "1", toolName: "a" }, + { toolCallId: "2", toolName: "b" }, + ], + toolResults: [{ toolCallId: "1", toolName: "a" }], + content: [ + { type: "tool-result", toolName: "a" }, + { type: "tool-error", toolName: "b" }, + ], + }); + // No stopWhen → returns false (no further stop conditions). The point + // is the function progresses past the early `toolCalls > completed` + // bail; pre-fix it returned early because tool-error wasn't counted. + expect(await willContinue([step], undefined)).toBe(false); + }); + + test("stops when a tool call has neither a result nor an error yet", async () => { + const step = makeStep({ + toolCalls: [{ toolCallId: "1", toolName: "a" }], + toolResults: [], + content: [], + }); + expect(await willContinue([step], undefined)).toBe(false); + }); + + test("stops when finishReason is not tool-calls", async () => { + const step = makeStep({ finishReason: "stop" }); + expect(await willContinue([step], undefined)).toBe(false); + }); +}); diff --git a/src/client/utils.ts b/src/client/utils.ts index d5c3b454..605f7aa6 100644 --- a/src/client/utils.ts +++ b/src/client/utils.ts @@ -1,16 +1,20 @@ import type { StepResult, StopCondition } from "ai"; /** - * A stop condition that only matches tool calls which completed - * successfully (i.e. produced a `tool-result`, not a `tool-error`). + * A stop condition that only matches tool calls of the given name which + * completed successfully — i.e. produced a `tool-result` content part. + * Failed tool calls (which surface as `tool-error` parts under AI SDK v6) + * do not match. * - * Use this instead of the AI SDK's `hasToolCall` when you want the - * agent to retry on argument validation failures rather than stopping. + * Use this instead of the AI SDK's `hasToolCall` when you want the agent + * to retry on argument-validation or runtime tool failures rather than + * stopping. Evaluated only against the last step (consistent with how + * `stopWhen` is applied after each step). */ export function hasSuccessfulToolCall(toolName: string): StopCondition { return ({ steps }) => - steps[steps.length - 1]?.toolResults?.some( - (result) => result.toolName === toolName, + steps[steps.length - 1]?.content?.some( + (p) => p.type === "tool-result" && p.toolName === toolName, ) ?? false; } @@ -26,11 +30,12 @@ export async function willContinue( // Count both successful results and errors as completed outputs. // In AI SDK v6, failed tool calls produce tool-error content parts // instead of tool-result, so only checking toolResults misses them. + // The fallback to step.toolResults.length is for non-v6 / mock callers + // where step.content may be missing; the optional chain is defensive. const completedOutputs = step.content?.filter( (p) => p.type === "tool-result" || p.type === "tool-error", ).length ?? step.toolResults.length; - // we don't have a tool result, so we'll wait for more if (step.toolCalls.length > completedOutputs) return false; if (Array.isArray(stopWhen)) { return (await Promise.all(stopWhen.map(async (s) => s({ steps })))).every(