From e7031d8d4687821fadd74cd31eabac9bbf7db195 Mon Sep 17 00:00:00 2001
From: Seth Raphael <seth@convex.dev>
Date: Sat, 9 May 2026 23:11:56 -0600
Subject: [PATCH] polish hasSuccessfulToolCall and willContinue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-ups to #241 from the post-merge review.

- hasSuccessfulToolCall now reads from `step.content` and matches a
  `tool-result` part by toolName, mirroring the pattern in willContinue.
  Previously it read `step.toolResults` — which already excludes errors
  by type, so behavior is unchanged in practice, but reading from the
  same source as willContinue removes the asymmetry the reviewer flagged
  and is more obviously correct on inspection.
- Expand JSDoc on hasSuccessfulToolCall: explicitly note that tool-error
  parts do not match, and that the helper is evaluated only against the
  last step (consistent with how stopWhen is applied after each step).
- Clarify the comment on the willContinue fallback: under AI SDK v6
  step.content is always defined, so the optional chain and the
  `?? step.toolResults.length` fallback are defensive (for non-v6 /
  mock callers), not load-bearing for the v6 fix.
- Add src/client/utils.test.ts with unit tests covering:
  * hasSuccessfulToolCall returns true on tool-result, false on
    tool-error, false on a different toolName, false on empty steps,
    and only inspects the last step.
  * willContinue does not stop when a tool-error fills in for a missing
    tool-result, stops when an output is genuinely missing, and stops
    when finishReason isn't tool-calls.
- Add CHANGELOG entry under Unreleased noting the new public helper and
  the willContinue fix.
---
 CHANGELOG.md             | 23 ++++++++++
 src/client/utils.test.ts | 97 ++++++++++++++++++++++++++++++++++++++++
 src/client/utils.ts      | 19 +++++---
 3 files changed, 132 insertions(+), 7 deletions(-)
 create mode 100644 src/client/utils.test.ts
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7260dac8..016b11b8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,28 @@
 # Changelog
 
+## Unreleased
+
+- Add `hasSuccessfulToolCall(toolName)` stop-condition helper. Like the AI
+  SDK's `hasToolCall` but only matches tool calls that produced a
+  `tool-result` content part — failed tool calls (`tool-error` parts under
+  AI SDK v6) do not match. Use when you want the agent to retry on
+  argument-validation or runtime tool failures rather than stopping.
+
+  ```ts
+  import { hasSuccessfulToolCall, stepCountIs } from "@convex-dev/agent";
+
+  await agent.streamText(ctx, { threadId }, {
+    prompt: "...",
+    stopWhen: [hasSuccessfulToolCall("generateImage"), stepCountIs(5)],
+  });
+  ```
+
+- Fix: `willContinue` (the internal helper that decides whether to keep
+  looping after a step that has tool calls) now counts `tool-error`
+  content parts as completed outputs. Without this, AI SDK v6 agents
+  would stop after a step where any tool call errored, even if the model
+  had more work queued.
+
 ## 0.6.1
 
 - Fix bundled package
diff --git a/src/client/utils.test.ts b/src/client/utils.test.ts
new file mode 100644
index 00000000..7f19ab97
--- /dev/null
+++ b/src/client/utils.test.ts
@@ -0,0 +1,97 @@
+import type { StepResult } from "ai";
+import { describe, expect, test } from "vitest";
+import { hasSuccessfulToolCall, willContinue } from "./utils.js";
+
+// Minimal StepResult builder — only the fields willContinue and
+// hasSuccessfulToolCall actually read. Loosely typed on purpose so test
+// fixtures can be terse; cast at the boundary.
+type StepFixture = {
+  finishReason?: string;
+  content?: Array<{ type: string; toolName?: string }>;
+  toolCalls?: Array<{ toolCallId: string; toolName: string }>;
+  toolResults?: Array<{ toolCallId: string; toolName: string }>;
+};
+
+function makeStep(partial: StepFixture): StepResult<any> {
+  return {
+    finishReason: "tool-calls",
+    content: [],
+    toolCalls: [],
+    toolResults: [],
+    ...partial,
+  } as unknown as StepResult<any>;
+}
+
+describe("hasSuccessfulToolCall", () => {
+  test("returns true when last step has a tool-result for the named tool", () => {
+    const step = makeStep({
+      content: [{ type: "tool-result", toolName: "search" }],
+    });
+    expect(hasSuccessfulToolCall("search")({ steps: [step] })).toBe(true);
+  });
+
+  test("returns false when only a tool-error is present for the named tool", () => {
+    const step = makeStep({
+      content: [{ type: "tool-error", toolName: "search" }],
+    });
+    expect(hasSuccessfulToolCall("search")({ steps: [step] })).toBe(false);
+  });
+
+  test("returns false when the matching tool name is missing", () => {
+    const step = makeStep({
+      content: [{ type: "tool-result", toolName: "other" }],
+    });
+    expect(hasSuccessfulToolCall("search")({ steps: [step] })).toBe(false);
+  });
+
+  test("only inspects the last step", () => {
+    const earlier = makeStep({
+      content: [{ type: "tool-result", toolName: "search" }],
+    });
+    const last = makeStep({
+      content: [{ type: "tool-error", toolName: "search" }],
+    });
+    expect(hasSuccessfulToolCall("search")({ steps: [earlier, last] })).toBe(
+      false,
+    );
+  });
+
+  test("returns false when steps is empty", () => {
+    expect(hasSuccessfulToolCall("search")({ steps: [] })).toBe(false);
+  });
+});
+
+describe("willContinue", () => {
+  test("does not stop when a tool-error fills in for a missing tool-result", async () => {
+    // Two tool calls; one returns a result, the other errors.
+    const step = makeStep({
+      toolCalls: [
+        { toolCallId: "1", toolName: "a" },
+        { toolCallId: "2", toolName: "b" },
+      ],
+      toolResults: [{ toolCallId: "1", toolName: "a" }],
+      content: [
+        { type: "tool-result", toolName: "a" },
+        { type: "tool-error", toolName: "b" },
+      ],
+    });
+    // No stopWhen → returns false (no further stop conditions). The point
+    // is the function progresses past the early `toolCalls > completed`
+    // bail; pre-fix it returned early because tool-error wasn't counted.
+    expect(await willContinue([step], undefined)).toBe(false);
+  });
+
+  test("stops when a tool call has neither a result nor an error yet", async () => {
+    const step = makeStep({
+      toolCalls: [{ toolCallId: "1", toolName: "a" }],
+      toolResults: [],
+      content: [],
+    });
+    expect(await willContinue([step], undefined)).toBe(false);
+  });
+
+  test("stops when finishReason is not tool-calls", async () => {
+    const step = makeStep({ finishReason: "stop" });
+    expect(await willContinue([step], undefined)).toBe(false);
+  });
+});
diff --git a/src/client/utils.ts b/src/client/utils.ts
index d5c3b454..605f7aa6 100644
--- a/src/client/utils.ts
+++ b/src/client/utils.ts
@@ -1,16 +1,20 @@
 import type { StepResult, StopCondition } from "ai";
 
 /**
- * A stop condition that only matches tool calls which completed
- * successfully (i.e. produced a `tool-result`, not a `tool-error`).
+ * A stop condition that only matches tool calls of the given name which
+ * completed successfully — i.e. produced a `tool-result` content part.
+ * Failed tool calls (which surface as `tool-error` parts under AI SDK v6)
+ * do not match.
  *
- * Use this instead of the AI SDK's `hasToolCall` when you want the
- * agent to retry on argument validation failures rather than stopping.
+ * Use this instead of the AI SDK's `hasToolCall` when you want the agent
+ * to retry on argument-validation or runtime tool failures rather than
+ * stopping. Evaluated only against the last step (consistent with how
+ * `stopWhen` is applied after each step).
  */
 export function hasSuccessfulToolCall(toolName: string): StopCondition<any> {
   return ({ steps }) =>
-    steps[steps.length - 1]?.toolResults?.some(
-      (result) => result.toolName === toolName,
+    steps[steps.length - 1]?.content?.some(
+      (p) => p.type === "tool-result" && p.toolName === toolName,
     ) ?? false;
 }
 
@@ -26,11 +30,12 @@ export async function willContinue(
   // Count both successful results and errors as completed outputs.
   // In AI SDK v6, failed tool calls produce tool-error content parts
   // instead of tool-result, so only checking toolResults misses them.
+  // The fallback to step.toolResults.length is for non-v6 / mock callers
+  // where step.content may be missing; the optional chain is defensive.
   const completedOutputs =
     step.content?.filter(
       (p) => p.type === "tool-result" || p.type === "tool-error",
     ).length ?? step.toolResults.length;
-  // we don't have a tool result, so we'll wait for more
   if (step.toolCalls.length > completedOutputs) return false;
   if (Array.isArray(stopWhen)) {
     return (await Promise.all(stopWhen.map(async (s) => s({ steps })))).every(