From a200c1d095a71cbf5a338c0066d576b55f167df4 Mon Sep 17 00:00:00 2001 From: Tom Beckenham <34339192+tombeckenham@users.noreply.github.com> Date: Thu, 21 May 2026 17:56:04 +1000 Subject: [PATCH 01/21] feat(ai): route structured output through native combined mode when supported (closes #605) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an adapter declares `supportsCombinedToolsAndSchema()`, the engine wires `outputSchema` into the regular `chatStream` call and harvests the schema-constrained JSON from the agent loop's final-turn text โ€” skipping the separate finalization round-trip introduced in #600 (which remains the fallback for adapters that can't combine tools + schema in one call). Opted in: modern OpenAI Chat Completions, OpenAI Responses, Claude 4.5+. Opted out explicitly: Groq (API-rejected), Grok (pending per-model gate). Unchanged (legacy path): Anthropic 4.4-, Gemini, Ollama, OpenRouter. ๐Ÿค– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../native-combined-tools-and-schema.md | 27 ++ docs/advanced/middleware.md | 6 +- docs/structured-outputs/overview.md | 12 + .../ai-anthropic/src/adapters/text.ts | 31 ++ .../typescript/ai-anthropic/src/model-meta.ts | 18 ++ .../src/text/text-provider-options.ts | 14 + .../tests/anthropic-adapter.test.ts | 71 ++++ .../typescript/ai-grok/src/adapters/text.ts | 11 + .../ai-grok/tests/grok-adapter.test.ts | 10 + .../typescript/ai-groq/src/adapters/text.ts | 11 + .../ai-groq/tests/groq-adapter.test.ts | 8 + .../ai/src/activities/chat/adapter.ts | 23 ++ .../ai/src/activities/chat/index.ts | 295 ++++++++++++++++- packages/typescript/ai/src/types.ts | 24 +- ...-native-combined-structured-output.test.ts | 303 ++++++++++++++++++ packages/typescript/ai/tests/test-utils.ts | 9 + .../src/adapters/chat-completions-text.ts | 40 +++ .../src/adapters/responses-text.ts | 37 +++ .../tests/chat-completions-text.test.ts | 84 +++++ .../openai-base/tests/responses-text.test.ts | 99 ++++++ testing/e2e/src/routes/api.middleware-test.ts | 13 +- testing/e2e/src/routes/middleware-test.tsx | 13 +- .../structured-output-middleware.spec.ts | 60 +++- 23 files changed, 1197 insertions(+), 22 deletions(-) create mode 100644 .changeset/native-combined-tools-and-schema.md create mode 100644 packages/typescript/ai/tests/chat-native-combined-structured-output.test.ts diff --git a/.changeset/native-combined-tools-and-schema.md b/.changeset/native-combined-tools-and-schema.md new file mode 100644 index 000000000..f031997e2 --- /dev/null +++ b/.changeset/native-combined-tools-and-schema.md @@ -0,0 +1,27 @@ +--- +'@tanstack/ai': minor +'@tanstack/openai-base': minor +'@tanstack/ai-anthropic': minor +'@tanstack/ai-groq': patch +'@tanstack/ai-grok': patch +--- + +Route `chat({ outputSchema, tools })` through the provider's native single-pass call where supported (modern OpenAI Chat Completions + Responses, Claude 4.5+). Closes #605. + +Historically, `chat({ outputSchema, tools })` ran the agent loop with `tools` and then issued a separate finalization call against the structured-output adapter for the typed answer โ€” because most providers couldn't combine `tools` with a schema-constrained response in one call. That has changed for most modern providers, making the second round-trip pure overhead. + +**New per-adapter capability:** `TextAdapter.supportsCombinedToolsAndSchema?(modelOptions?)`. Adapters that opt in receive a JSON Schema on `TextOptions.outputSchema` in `chatStream` and wire it into the upstream request alongside `tools`. The engine harvests the final-turn JSON from the agent loop's accumulated text โ€” no separate finalization call, no `'structuredOutput'` middleware phase. + +**Per-adapter status:** + +- **OpenAI (Chat Completions + Responses):** opted in. `response_format: json_schema` / `text.format: json_schema` is attached when `outputSchema` is set. +- **Anthropic:** opted in for Claude 4.5+ (Opus / Sonnet / Haiku 4.5, 4.6, 4.6-fast, 4.7, 4.7-fast). Wires `output_format: { type: 'json_schema', schema }` on the beta Messages request. Pre-4.5 Claude models keep the forced-tool finalization workaround. +- **Groq:** explicitly opts out โ€” the Groq API rejects `response_format` + `tools` + `stream` with HTTP 400 ("Streaming and tool use are not currently supported with Structured Outputs"). +- **Grok (xAI):** opts out pending per-model gating (Grok 4 supports the combination; Grok 2/3 reject it) โ€” follow-up. +- **OpenRouter, Gemini, Ollama:** unchanged; still take the finalization path. + +**Backward compatibility:** + +- `'structuredOutput'` middleware phase still fires for fallback-path adapters. It does NOT fire for adapters that handle the combination natively โ€” middleware sees the run through `'beforeModel'` / `'modelStream'` as usual. +- `onStructuredOutputConfig` keeps its existing surface but only fires on the fallback path. +- No call-site changes required. diff --git a/docs/advanced/middleware.md b/docs/advanced/middleware.md index d76e95c8f..22b627c93 100644 --- a/docs/advanced/middleware.md +++ b/docs/advanced/middleware.md @@ -102,7 +102,7 @@ The context's `phase` field tracks where you are in the lifecycle: | `modelStream` | While adapter streams chunks | `onChunk`, `onUsage` | | `beforeTools` | Before tool execution | `onBeforeToolCall` | | `afterTools` | After tool execution | `onAfterToolCall` | -| `structuredOutput` | During the final structured-output adapter call (when `outputSchema` is set). Chunks from `adapter.structuredOutputStream` (or the synthesized non-streaming fallback) flow through `onChunk` with this phase, and `onUsage` fires for the final call's tokens. | `onStructuredOutputConfig`, `onConfig`, `onChunk`, `onUsage` | +| `structuredOutput` | During the final structured-output adapter call (when `outputSchema` is set **and** the adapter does not declare `supportsCombinedToolsAndSchema()`). Chunks from `adapter.structuredOutputStream` (or the synthesized non-streaming fallback) flow through `onChunk` with this phase, and `onUsage` fires for the final call's tokens. **Does not fire** for adapters that natively combine tools + schema in one streaming call (modern OpenAI Chat Completions, OpenAI Responses, Claude 4.5+ โ€” see issue #605); on that path middleware observes the run through `beforeModel` / `modelStream` as usual. | `onStructuredOutputConfig`, `onConfig`, `onChunk`, `onUsage` | ## Hooks Reference @@ -153,7 +153,9 @@ When multiple middleware define `onConfig`, the config is **piped** through them ### onStructuredOutputConfig -Called once at the start of the final structured-output adapter call โ€” only when `chat()` was invoked with `outputSchema`. Pipes through middleware in order, like `onConfig`, but with access to the **JSON Schema** being sent to the provider. Use this hook when you need to transform the schema (e.g., inject `$defs`, strip vendor-incompatible keywords) or apply structured-output-specific behavior (e.g., suppress system prompts on the final call). +Called once at the start of the final structured-output adapter call โ€” only when `chat()` was invoked with `outputSchema` **and** the adapter takes the legacy finalization path (i.e. does not declare `supportsCombinedToolsAndSchema()`). Pipes through middleware in order, like `onConfig`, but with access to the **JSON Schema** being sent to the provider. Use this hook when you need to transform the schema (e.g., inject `$defs`, strip vendor-incompatible keywords) or apply structured-output-specific behavior (e.g., suppress system prompts on the final call). + +> Native-combined adapters (modern OpenAI, Claude 4.5+ โ€” see issue #605) skip the separate finalization call and never invoke this hook. If you need to mutate the schema for a native-combined adapter, do it in `onConfig` (the schema is on `config.modelOptions` / the request โ€” adapter-specific). Return a **partial** `StructuredOutputMiddlewareConfig` with only the fields you want to change โ€” they are shallow-merged with the current config. Return `void` to pass through. diff --git a/docs/structured-outputs/overview.md b/docs/structured-outputs/overview.md index 1f17d992a..5d3a76a66 100644 --- a/docs/structured-outputs/overview.md +++ b/docs/structured-outputs/overview.md @@ -82,6 +82,18 @@ provider call in addition to the agent loop. Chunks from the structured-output adapter are attributed to `ctx.phase === 'structuredOutput'`; `onFinish` fires exactly once at the end of the entire run. +> **Path-dependent:** Adapters that natively combine `tools` + a schema- +> constrained final answer in one streaming call (modern OpenAI Chat +> Completions, OpenAI Responses, Claude 4.5+) do **not** issue a separate +> finalization round-trip. The engine wires `outputSchema` into the regular +> `chatStream` request and harvests the structured result from the agent +> loop's final-turn text. On this path the `'structuredOutput'` middleware +> phase does **not** fire โ€” middleware sees the run through `'beforeModel'` +> / `'modelStream'` as usual, and `onStructuredOutputConfig` is not invoked. +> Adapters without native combined-mode support (Anthropic 4.4-, Groq, +> Ollama, Gemini 2.x, Grok 2/3) keep the legacy finalization path and the +> `'structuredOutput'` phase fires as before. + ### Observing structured-output chunks ```ts diff --git a/packages/typescript/ai-anthropic/src/adapters/text.ts b/packages/typescript/ai-anthropic/src/adapters/text.ts index 2feafc32b..a5cdd4a6e 100644 --- a/packages/typescript/ai-anthropic/src/adapters/text.ts +++ b/packages/typescript/ai-anthropic/src/adapters/text.ts @@ -7,6 +7,7 @@ import { generateId, getAnthropicApiKeyFromEnv, } from '../utils' +import { ANTHROPIC_COMBINED_TOOLS_AND_SCHEMA_MODELS } from '../model-meta' import type { ANTHROPIC_MODELS, AnthropicChatModelProviderOptionsByName, @@ -370,6 +371,25 @@ export class AnthropicTextAdapter< }), ) })() + // Native combined mode (issue #605): when the engine threads + // `outputSchema` through TextOptions, the adapter declared + // `supportsCombinedToolsAndSchema` (Claude 4.5+ only). The schema is + // already JSON Schema (pre-converted at the activity boundary). Wire + // it into the beta Messages `output_format` field alongside any + // `tools` โ€” the model emits tool calls during the agent loop and + // a single schema-constrained JSON message on its natural final turn. + const combinedSchema = options.outputSchema as + | Record + | undefined + const outputFormat = combinedSchema + ? { + output_format: { + type: 'json_schema' as const, + schema: combinedSchema, + }, + } + : undefined + // `InternalTextProviderOptions` declares `temperature`, `top_p`, // and `tools` as `T?: ...` (no `| undefined`), so spread them // conditionally rather than passing explicit `undefined` from the @@ -386,11 +406,22 @@ export class AnthropicTextAdapter< ...(systemBlocks !== undefined && { system: systemBlocks }), ...(tools !== undefined && { tools }), ...validProviderOptions, + ...(outputFormat ?? {}), } validateTextProviderOptions(requestParams) return requestParams } + /** + * Anthropic supports `output_format` + `tools` in a single streaming + * Messages request only for Claude 4.5+ (GA 2026-01-29). For 4.4 and + * earlier we keep the forced-tool-use workaround in + * {@link structuredOutput} via the engine's finalization path. + */ + supportsCombinedToolsAndSchema(): boolean { + return ANTHROPIC_COMBINED_TOOLS_AND_SCHEMA_MODELS.has(this.model) + } + private convertContentPartToAnthropic( part: ContentPart, ): TextBlockParam | ImageBlockParam | DocumentBlockParam { diff --git a/packages/typescript/ai-anthropic/src/model-meta.ts b/packages/typescript/ai-anthropic/src/model-meta.ts index 604c72b24..44b8be9ab 100644 --- a/packages/typescript/ai-anthropic/src/model-meta.ts +++ b/packages/typescript/ai-anthropic/src/model-meta.ts @@ -652,6 +652,24 @@ export const ANTHROPIC_MODELS = [ CLAUDE_OPUS_4_7_FAST.id, ] as const +/** + * Anthropic models that support combining `tools` + JSON-Schema-constrained + * output in a single streaming Messages request (per issue #605). GA'd + * 2026-01-29 for Claude 4.5+ via `output_format` on the beta messages + * endpoint. Older Claude models still need the forced-tool-use workaround + * in `structuredOutput`. + */ +export const ANTHROPIC_COMBINED_TOOLS_AND_SCHEMA_MODELS = new Set([ + CLAUDE_OPUS_4_5.id, + CLAUDE_OPUS_4_6.id, + CLAUDE_OPUS_4_6_FAST.id, + CLAUDE_OPUS_4_7.id, + CLAUDE_OPUS_4_7_FAST.id, + CLAUDE_SONNET_4_5.id, + CLAUDE_SONNET_4_6.id, + CLAUDE_HAIKU_4_5.id, +]) + // const ANTHROPIC_IMAGE_MODELS = [] as const // const ANTHROPIC_EMBEDDING_MODELS = [] as const // const ANTHROPIC_AUDIO_MODELS = [] as const diff --git a/packages/typescript/ai-anthropic/src/text/text-provider-options.ts b/packages/typescript/ai-anthropic/src/text/text-provider-options.ts index ed4f05746..aba8ca044 100644 --- a/packages/typescript/ai-anthropic/src/text/text-provider-options.ts +++ b/packages/typescript/ai-anthropic/src/text/text-provider-options.ts @@ -220,6 +220,20 @@ export interface InternalTextProviderOptions extends ExternalTextProviderOptions tools?: Array + /** + * Schema-constrained final answer in a single Messages request (issue + * #605). Set by the engine when the adapter declared + * `supportsCombinedToolsAndSchema` and a caller passed `outputSchema` + * to `chat()`. The model emits tool calls during the agent loop and a + * schema-matching JSON message on the natural final turn โ€” no separate + * finalization round-trip needed. Wire-format mirrors + * `BetaJSONOutputFormat` from `@anthropic-ai/sdk/resources/beta/messages`. + */ + output_format?: { + type: 'json_schema' + schema: Record + } + /** * Use nucleus sampling. diff --git a/packages/typescript/ai-anthropic/tests/anthropic-adapter.test.ts b/packages/typescript/ai-anthropic/tests/anthropic-adapter.test.ts index f36201f00..af23355ce 100644 --- a/packages/typescript/ai-anthropic/tests/anthropic-adapter.test.ts +++ b/packages/typescript/ai-anthropic/tests/anthropic-adapter.test.ts @@ -369,6 +369,77 @@ describe('Anthropic adapter option mapping', () => { }) }) + it('native combined mode (#605): wires outputSchema into output_format alongside tools on Claude 4.5+', async () => { + // Final-turn JSON the model emits when output_format is in play. + const finalJson = JSON.stringify({ city: 'Berlin', temp: 18 }) + + mocks.betaMessagesCreate.mockResolvedValueOnce( + (async function* () { + yield { + type: 'content_block_start', + index: 0, + content_block: { type: 'text', text: '' }, + } + yield { + type: 'content_block_delta', + index: 0, + delta: { type: 'text_delta', text: finalJson }, + } + yield { type: 'content_block_stop', index: 0 } + yield { + type: 'message_delta', + delta: { stop_reason: 'end_turn' }, + usage: { output_tokens: 8 }, + } + yield { type: 'message_stop' } + })(), + ) + + const adapter = new AnthropicTextAdapter( + { apiKey: 'test-key' }, + 'claude-sonnet-4-5', + ) + expect(adapter.supportsCombinedToolsAndSchema()).toBe(true) + + const ForecastSchema = z.object({ + city: z.string(), + temp: z.number(), + }) + + const result = await chat({ + adapter, + messages: [{ role: 'user', content: 'forecast for Berlin' }], + tools: [weatherTool], + outputSchema: ForecastSchema, + }) + + expect(result).toEqual({ city: 'Berlin', temp: 18 }) + + expect(mocks.betaMessagesCreate).toHaveBeenCalledTimes(1) + const [payload] = mocks.betaMessagesCreate.mock.calls[0]! + expect(payload).toMatchObject({ + model: 'claude-sonnet-4-5', + output_format: { + type: 'json_schema', + schema: expect.objectContaining({ type: 'object' }), + }, + }) + expect(payload.tools?.[0]).toMatchObject({ + name: 'lookup_weather', + }) + // No second beta.messages.create โ€” the engine harvested from the agent + // loop and did NOT issue a separate finalization call. + expect(mocks.messagesCreate).not.toHaveBeenCalled() + }) + + it('native combined mode (#605): pre-4.5 models keep the forced-tool finalization path', async () => { + const adapter = new AnthropicTextAdapter( + { apiKey: 'test-key' }, + 'claude-3-7-sonnet', + ) + expect(adapter.supportsCombinedToolsAndSchema()).toBe(false) + }) + it('merges consecutive user messages when tool results precede a follow-up user message', async () => { // This is the core multi-turn bug: after a tool call + result, the next user message // creates consecutive role:'user' messages (tool_result as user + new user message). diff --git a/packages/typescript/ai-grok/src/adapters/text.ts b/packages/typescript/ai-grok/src/adapters/text.ts index 12498c690..768f05384 100644 --- a/packages/typescript/ai-grok/src/adapters/text.ts +++ b/packages/typescript/ai-grok/src/adapters/text.ts @@ -79,6 +79,17 @@ export class GrokTextAdapter< } return undefined } + + /** + * Grok's combined tools + schema support is gated to the Grok 4 family + * per xAI's structured-output docs; Grok 2/3 reject the combination. + * Pinning to `false` here preserves the legacy finalization path for + * every Grok model until per-model gating lands as a follow-up โ€” see + * issue #605. + */ + override supportsCombinedToolsAndSchema(): boolean { + return false + } } /** diff --git a/packages/typescript/ai-grok/tests/grok-adapter.test.ts b/packages/typescript/ai-grok/tests/grok-adapter.test.ts index 9ff697753..bb5b9bf4d 100644 --- a/packages/typescript/ai-grok/tests/grok-adapter.test.ts +++ b/packages/typescript/ai-grok/tests/grok-adapter.test.ts @@ -106,6 +106,16 @@ describe('Grok adapters', () => { expect(adapter).toBeDefined() }) + + it('opts out of native combined tools+schema mode pending per-model gating (#605)', () => { + // The OpenAI Chat Completions base defaults to `true`. The Grok + // override forces `false` for every model until per-family gating + // (Grok 4.x yes, Grok 2/3 no) lands as a follow-up. + const grok3 = createGrokText('grok-3', 'test-api-key') + const grok4 = createGrokText('grok-4', 'test-api-key') + expect(grok3.supportsCombinedToolsAndSchema()).toBe(false) + expect(grok4.supportsCombinedToolsAndSchema()).toBe(false) + }) }) describe('Image adapter', () => { diff --git a/packages/typescript/ai-groq/src/adapters/text.ts b/packages/typescript/ai-groq/src/adapters/text.ts index 164568da4..0ded518d6 100644 --- a/packages/typescript/ai-groq/src/adapters/text.ts +++ b/packages/typescript/ai-groq/src/adapters/text.ts @@ -104,6 +104,17 @@ export class GroqTextAdapter< } return undefined } + + /** + * Groq's API rejects `response_format: json_schema` together with `tools` + * + `stream` (returns 400 โ€” see Groq Structured Outputs docs: + * "Streaming and tool use are not currently supported with Structured + * Outputs."). Force the engine onto the legacy finalization path even + * though the OpenAI Chat Completions base would otherwise opt in. + */ + override supportsCombinedToolsAndSchema(): boolean { + return false + } } /** diff --git a/packages/typescript/ai-groq/tests/groq-adapter.test.ts b/packages/typescript/ai-groq/tests/groq-adapter.test.ts index 42e8cfb97..e53e85533 100644 --- a/packages/typescript/ai-groq/tests/groq-adapter.test.ts +++ b/packages/typescript/ai-groq/tests/groq-adapter.test.ts @@ -140,6 +140,14 @@ describe('Groq adapters', () => { expect(adapter).toBeDefined() }) + + it('opts out of native combined tools+schema mode (#605) โ€” Groq API rejects the combination', () => { + const adapter = createGroqText('llama-3.3-70b-versatile', 'test-api-key') + // The OpenAI Chat Completions base defaults to `true`; Groq must + // override to `false` because the upstream API returns 400 on + // `response_format` + `tools` + `stream`. + expect(adapter.supportsCombinedToolsAndSchema()).toBe(false) + }) }) }) diff --git a/packages/typescript/ai/src/activities/chat/adapter.ts b/packages/typescript/ai/src/activities/chat/adapter.ts index e4c4dccee..648e7e6bf 100644 --- a/packages/typescript/ai/src/activities/chat/adapter.ts +++ b/packages/typescript/ai/src/activities/chat/adapter.ts @@ -123,6 +123,29 @@ export interface TextAdapter< structuredOutputStream?: ( options: StructuredOutputOptions, ) => AsyncIterable + + /** + * Declares whether the adapter supports combining `tools` and a + * schema-constrained final answer in a single streaming request. + * + * When `true`, the engine wires `outputSchema` into the regular + * `chatStream()` call and skips the separate `runStructuredFinalization` + * round-trip. The model's natural final turn carries the + * schema-constrained JSON text and the engine harvests it from the agent + * loop's accumulated content. + * + * When `false`, `undefined`, or the method is omitted, the engine runs + * the agent loop without `outputSchema` and then issues a separate + * `structuredOutput` / `structuredOutputStream` call against the JSON + * schema for finalization (the legacy path). + * + * The method receives the per-call `modelOptions` so providers whose + * support depends on the resolved upstream model (e.g. OpenRouter) can + * answer per-request. Most adapters can return a constant. + */ + supportsCombinedToolsAndSchema?: ( + modelOptions?: TProviderOptions | undefined, + ) => boolean } /** diff --git a/packages/typescript/ai/src/activities/chat/index.ts b/packages/typescript/ai/src/activities/chat/index.ts index 82c45b2f8..a9df2ae54 100644 --- a/packages/typescript/ai/src/activities/chat/index.ts +++ b/packages/typescript/ai/src/activities/chat/index.ts @@ -312,11 +312,20 @@ interface TextEngineConfig< * as the validated result and retrievable via * `getValidatedStructuredOutput()`. Used by `runAgenticStructuredOutput` * to perform Standard Schema validation inside the engine. + * - nativeCombined: when true, the adapter declared + * `supportsCombinedToolsAndSchema()` and the engine wires `jsonSchema` + * into the regular `chatStream` call instead of running a separate + * finalization round-trip. The agent loop's final-turn text is the + * schema-constrained JSON; the engine parses it from accumulated + * content. The `'structuredOutput'` middleware phase does NOT fire on + * this path โ€” middleware sees the run through `beforeModel` / + * `modelStream` as usual. */ finalStructuredOutput?: { jsonSchema: JSONSchema yieldChunks: boolean validate?: (data: unknown) => unknown + nativeCombined?: boolean } } @@ -379,6 +388,16 @@ class TextEngine< // Structured-output finalization state (populated by runStructuredFinalization) private structuredOutputResult: { data: unknown; rawText: string } | null = null + // Native combined mode: tracks whether we've already emitted the synthetic + // `structured-output.start` event before the schema-constrained final-turn + // text begins streaming. The event must precede the first + // TEXT_MESSAGE_START so the client-side StreamProcessor routes the JSON + // deltas into a StructuredOutputPart instead of a plain TextPart. + private combinedStartEmitted = false + // Native combined mode: messageId we want the synthetic + // `structured-output.start` (and any error emitted before deltas arrive) + // to carry, so the client matches it to the streaming text deltas. + private combinedStructuredMessageId: string | null = null // Holds the validated value when `finalStructuredOutput.validate` is provided // and succeeds. Distinct from `structuredOutputResult.data` (the raw, // unvalidated payload from the structured-output.complete chunk). @@ -393,6 +412,7 @@ class TextEngine< jsonSchema: JSONSchema yieldChunks: boolean validate?: (data: unknown) => unknown + nativeCombined?: boolean } constructor( @@ -560,12 +580,19 @@ class TextEngine< return } - // Skip the agent loop entirely when there are no tools AND a structured- - // output finalization will run. Without tools the model has nothing to - // do in the loop, so executing one iteration would burn an extra - // provider call before the finalization request. + // Skip the agent loop entirely when there are no tools AND a separate + // structured-output finalization will run. Without tools the model has + // nothing to do in the loop, so executing one iteration would burn an + // extra provider call before the finalization request. + // + // Native combined mode does NOT skip โ€” the agent loop itself produces + // the schema-constrained final answer in one pass (model emits the + // schema-constrained text on its natural final turn). Even with zero + // tools, the single chatStream call IS the structured-output call. const skipAgentLoop = - !!this.finalStructuredOutput && this.tools.length === 0 + !!this.finalStructuredOutput && + this.tools.length === 0 && + this.finalStructuredOutput.nativeCombined !== true if (!skipAgentLoop) { do { @@ -607,12 +634,20 @@ class TextEngine< // requested AND the run hasn't already errored/aborted, run it through // the middleware pipeline. The terminal hook fires once at the very // end (after finalization), not after the agent loop. + // + // Native combined mode takes a different path: the agent loop's final- + // turn text IS the schema-constrained JSON, so we harvest it from + // `accumulatedContent` instead of issuing a second provider call. if ( this.finalStructuredOutput && !this.isCancelled() && !this.finalizationError ) { - yield* this.runStructuredFinalization() + if (this.finalStructuredOutput.nativeCombined === true) { + yield* this.harvestCombinedStructuredOutput() + } else { + yield* this.runStructuredFinalization() + } } // Call terminal hook (skip when waiting for client โ€” stream is paused, not finished). @@ -777,6 +812,18 @@ class TextEngine< }, ) + // When the adapter declared `supportsCombinedToolsAndSchema()`, the + // activity layer set `nativeCombined: true` and we forward the + // pre-converted JSON Schema into the regular chatStream call. The + // adapter wires it into the upstream request (e.g. `response_format`, + // `text.format`, `output_format`) so the model's final-turn text is + // schema-constrained and the engine can harvest it from the agent loop + // without a separate finalization round-trip. + const combinedSchema = + this.finalStructuredOutput?.nativeCombined === true + ? this.finalStructuredOutput.jsonSchema + : undefined + for await (const chunk of this.adapter.chatStream({ model: this.params.model, messages: this.messages, @@ -792,6 +839,7 @@ class TextEngine< threadId: this.threadId, runId: this.runIdOverride, parentRunId: this.parentRunIdOverride, + ...(combinedSchema ? { outputSchema: combinedSchema } : {}), })) { if (this.isCancelled()) { break @@ -803,6 +851,44 @@ class TextEngine< // BEFORE middleware, so fields like finishReason, delta, etc. are available this.handleStreamChunk(chunk) + // Native combined mode: synthesize `structured-output.start` BEFORE + // the first TEXT_MESSAGE_START so the client-side StreamProcessor + // routes the schema-constrained JSON deltas into a + // StructuredOutputPart. We delay synthesis until we actually see + // text starting โ€” intermediate tool-call iterations don't need it, + // and emitting at run-start would wrap tool-call commentary into a + // structured-output part too. + if ( + this.finalStructuredOutput?.nativeCombined === true && + this.finalStructuredOutput.yieldChunks && + !this.combinedStartEmitted && + chunk.type === EventType.TEXT_MESSAGE_START + ) { + this.combinedStartEmitted = true + const messageId = + typeof chunk.messageId === 'string' && chunk.messageId !== '' + ? chunk.messageId + : generateMessageId() + this.combinedStructuredMessageId = messageId + const synthStart: StreamChunk = { + type: EventType.CUSTOM, + name: 'structured-output.start', + value: { messageId }, + model: this.params.model, + timestamp: Date.now(), + threadId: this.threadId, + ...(this.runIdOverride ? { runId: this.runIdOverride } : {}), + } + const synthOutputs = await this.middlewareRunner.runOnChunk( + this.middlewareCtx, + synthStart, + ) + for (const outputChunk of synthOutputs) { + yield outputChunk + this.middlewareCtx.chunkIndex++ + } + } + // Pipe chunk through middleware (devtools middleware observes; strip-to-spec cleans) const outputChunks = await this.middlewareRunner.runOnChunk( this.middlewareCtx, @@ -812,8 +898,13 @@ class TextEngine< // the agent loop, suppress the agent-loop's RUN_STARTED/RUN_FINISHED // here โ€” the finalization step emits the single outer lifecycle pair // that reaches the consumer. + // + // Native combined mode does NOT issue a second adapter stream โ€” the + // agent loop's lifecycle IS the outer pair the consumer sees. const suppressAgentLifecycle = - !!this.finalStructuredOutput && this.finalStructuredOutput.yieldChunks + !!this.finalStructuredOutput && + this.finalStructuredOutput.yieldChunks && + this.finalStructuredOutput.nativeCombined !== true for (const outputChunk of outputChunks) { if ( suppressAgentLifecycle && @@ -1948,6 +2039,172 @@ class TextEngine< } } + /** + * Native combined mode: harvest the structured output from the agent + * loop's accumulated final-turn text (no separate provider call). + * + * The adapter wired `outputSchema` into the regular `chatStream` request, + * so the model's final-turn text is the schema-constrained JSON. We parse + * `this.accumulatedContent`, populate `this.structuredOutputResult`, emit + * a synthetic `structured-output.complete` (and a `structured-output.start` + * if one wasn't emitted earlier โ€” only happens on the streaming path when + * the model returned no text at all), and run the validate callback when + * present. Failures populate `this.finalizationError` so the engine's + * terminal-hook chooser routes to `onError` (per spec ยง7.3). + * + * The `'structuredOutput'` middleware phase intentionally does NOT fire on + * this path โ€” middleware sees the run through `beforeModel` / `modelStream` + * as usual. See PR #605 / issue #605 for the design rationale. + */ + private async *harvestCombinedStructuredOutput(): AsyncGenerator { + if (!this.finalStructuredOutput) { + throw new Error( + 'harvestCombinedStructuredOutput called without finalStructuredOutput config', + ) + } + + const yieldChunks = this.finalStructuredOutput.yieldChunks + const rawText = this.accumulatedContent + + // Empty final-turn text means the agent loop terminated without the + // model emitting any assistant content (e.g. early termination after + // tool calls). Mirror the fallback path's "missing structured result" + // error rather than silently returning undefined. + if (rawText.length === 0) { + this.finalizationError = { + message: 'missing structured result', + code: 'structured-output-missing-result', + } + } else { + try { + const parsed: unknown = JSON.parse(rawText) + this.structuredOutputResult = { data: parsed, rawText } + } catch (err: unknown) { + const detail = + rawText.slice(0, 200) + (rawText.length > 200 ? '...' : '') + this.finalizationError = { + message: `Failed to parse structured output as JSON. Content: ${detail}`, + code: 'structured-output-parse-failed', + cause: err, + } + } + } + + // Validate against the Standard Schema (when supplied). Validation + // failures route through onError just like the fallback path. + if ( + this.structuredOutputResult && + !this.finalizationError && + this.finalStructuredOutput.validate + ) { + try { + const validated = this.finalStructuredOutput.validate( + this.structuredOutputResult.data, + ) + this.validatedStructuredOutput = validated + this.hasValidatedStructuredOutput = true + } catch (err: unknown) { + const message = err instanceof Error ? err.message : String(err) + this.finalizationError = { + message, + code: 'structured-output-validation-failed', + cause: err, + } + } + } + + if (!yieldChunks) { + // Promise path: state is populated, nothing to yield. The + // activity-layer caller pulls `structuredOutputResult` / + // `validatedStructuredOutput` directly. + return + } + + // Streaming path: emit a synthetic `structured-output.start` if the + // model produced no text at all (so the client snaps an errored + // StructuredOutputPart rather than nothing). The normal path already + // emitted start before the first TEXT_MESSAGE_START in + // `streamModelResponse`. + if (!this.combinedStartEmitted) { + this.combinedStartEmitted = true + const messageId = + this.combinedStructuredMessageId ?? generateMessageId() + this.combinedStructuredMessageId = messageId + const synthStart: StreamChunk = { + type: EventType.CUSTOM, + name: 'structured-output.start', + value: { messageId }, + model: this.params.model, + timestamp: Date.now(), + threadId: this.threadId, + ...(this.runIdOverride ? { runId: this.runIdOverride } : {}), + } + const startOutputs = await this.middlewareRunner.runOnChunk( + this.middlewareCtx, + synthStart, + ) + for (const outputChunk of startOutputs) { + yield outputChunk + this.middlewareCtx.chunkIndex++ + } + } + + // On success, emit the synthetic `structured-output.complete` carrying + // the parsed object + raw text. + if (this.structuredOutputResult && !this.finalizationError) { + const completeChunk: StreamChunk = { + type: EventType.CUSTOM, + name: 'structured-output.complete', + value: { + object: this.structuredOutputResult.data, + raw: this.structuredOutputResult.rawText, + }, + model: this.params.model, + timestamp: Date.now(), + threadId: this.threadId, + ...(this.runIdOverride ? { runId: this.runIdOverride } : {}), + } + const completeOutputs = await this.middlewareRunner.runOnChunk( + this.middlewareCtx, + completeChunk, + ) + for (const outputChunk of completeOutputs) { + yield outputChunk + this.middlewareCtx.chunkIndex++ + } + } + + // On failure, emit a synthetic RUN_ERROR so the streaming consumer's + // `for await` doesn't end silently. Mirrors the fallback path. + if (this.finalizationError) { + const errChunk: StreamChunk = { + type: EventType.RUN_ERROR, + runId: this.runIdOverride ?? this.requestId, + model: this.params.model, + timestamp: Date.now(), + threadId: this.threadId, + message: this.finalizationError.message, + ...(this.finalizationError.code + ? { code: this.finalizationError.code } + : {}), + error: { + message: this.finalizationError.message, + ...(this.finalizationError.code + ? { code: this.finalizationError.code } + : {}), + }, + } + const errOutputs = await this.middlewareRunner.runOnChunk( + this.middlewareCtx, + errChunk, + ) + for (const outputChunk of errOutputs) { + yield outputChunk + this.middlewareCtx.chunkIndex++ + } + } + } + private buildMiddlewareConfig(): ChatMiddlewareConfig { return { messages: this.messages, @@ -2243,6 +2500,13 @@ async function runAgenticStructuredOutput( parseWithStandardSchema>(outputSchema, data) : undefined + // Per issue #605: same capability check as the streaming path. When the + // adapter handles tools + schema natively, the engine skips the separate + // structured-output finalization call and harvests the JSON from the + // agent loop's accumulated final-turn text. + const nativeCombined = + adapter.supportsCombinedToolsAndSchema?.(options.modelOptions) === true + const engine = new TextEngine( { adapter, @@ -2256,6 +2520,7 @@ async function runAgenticStructuredOutput( jsonSchema, yieldChunks: false, ...(validate ? { validate } : {}), + ...(nativeCombined ? { nativeCombined: true } : {}), }, }, logger, @@ -2493,6 +2758,16 @@ async function* runStreamingStructuredOutputImpl( const model = adapter.model const logger = resolveDebugOption(debug) + // Per issue #605: adapters that natively combine tools + schema-constrained + // output in one streaming call (modern OpenAI, Anthropic 4.5+, Gemini 3+, + // Grok 4+) opt in via `supportsCombinedToolsAndSchema()`. The engine then + // forwards the schema into the regular `chatStream` call and harvests the + // structured result from the agent loop's accumulated text โ€” no separate + // finalization round-trip, and the `'structuredOutput'` middleware phase + // does not fire. + const nativeCombined = + adapter.supportsCombinedToolsAndSchema?.(options.modelOptions) === true + // Inputs may be UIMessages (from useChat) or ModelMessages (from server-side // callers). TextEngine handles the conversion uniformly. const engine = new TextEngine( @@ -2504,7 +2779,11 @@ async function* runStreamingStructuredOutputImpl( >, middleware, context, - finalStructuredOutput: { jsonSchema, yieldChunks: true }, + finalStructuredOutput: { + jsonSchema, + yieldChunks: true, + ...(nativeCombined ? { nativeCombined: true } : {}), + }, }, logger, ) diff --git a/packages/typescript/ai/src/types.ts b/packages/typescript/ai/src/types.ts index a12964981..ef841c4d1 100644 --- a/packages/typescript/ai/src/types.ts +++ b/packages/typescript/ai/src/types.ts @@ -799,10 +799,26 @@ export interface TextOptions< /** * Schema for structured output. - * When provided, the adapter should use the provider's native structured output API - * to ensure the response conforms to this schema. - * The schema will be converted to JSON Schema format before being sent to the provider. - * Supports any Standard JSON Schema compliant library (Zod, ArkType, Valibot, etc.). + * + * **Two distinct use sites:** + * + * 1. **User-facing (activity layer):** accepts any + * {@link SchemaInput} โ€” Zod, ArkType, Valibot, or a raw JSON Schema. + * The activity layer converts to JSON Schema before handing off. + * + * 2. **Adapter-facing (`chatStream` call):** the engine populates this with + * a pre-converted JSON Schema **only** when the adapter declared + * `supportsCombinedToolsAndSchema(modelOptions) === true`. The adapter + * should then wire the schema into the upstream request (e.g. + * `response_format: { type: 'json_schema', ... }`, `text.format`, + * `output_format`) alongside any `tools`. The model's natural final + * turn carries the schema-constrained JSON text and the engine + * harvests it from the agent loop without a separate finalization + * round-trip. + * + * Adapters that did NOT declare the capability never see this field + * populated โ€” the engine instead invokes `structuredOutput` / + * `structuredOutputStream` after the agent loop. */ outputSchema?: SchemaInput /** diff --git a/packages/typescript/ai/tests/chat-native-combined-structured-output.test.ts b/packages/typescript/ai/tests/chat-native-combined-structured-output.test.ts new file mode 100644 index 000000000..1ccf8fb89 --- /dev/null +++ b/packages/typescript/ai/tests/chat-native-combined-structured-output.test.ts @@ -0,0 +1,303 @@ +/** + * Unit tests for the native combined tools+schema path added in issue #605. + * + * When an adapter declares `supportsCombinedToolsAndSchema()`, the engine + * threads the converted JSON Schema through to `chatStream` (so the adapter + * can attach `response_format` / `text.format` / `output_format` to the + * upstream request) and SKIPS the separate + * `runStructuredFinalization` round-trip. The agent loop's final-turn text + * IS the schema-constrained JSON; the engine parses it from accumulated + * content, emits synthetic `structured-output.start` / `.complete` events + * for the client, and runs validation for the Promise path. + * + * These tests pin the contract so a future engine refactor can't silently + * regress per-PR-#605 routing or accidentally re-introduce the extra + * provider call for native-capable adapters. + */ + +import { describe, expect, it } from 'vitest' +import { z } from 'zod' +import { chat } from '../src/activities/chat/index' +import { EventType } from '../src/types' +import type { StreamChunk } from '../src/types' +import { collectChunks, createMockAdapter } from './test-utils' + +const PersonSchema = z.object({ + name: z.string(), + age: z.number(), +}) +type Person = z.infer + +const validPerson: Person = { name: 'Jane Roe', age: 31 } + +function textTurn(json: string): Array { + const ts = Date.now() + return [ + { + type: EventType.RUN_STARTED, + runId: 'run-1', + threadId: 'thread-1', + timestamp: ts, + } as StreamChunk, + { + type: EventType.TEXT_MESSAGE_START, + messageId: 'msg-1', + role: 'assistant', + timestamp: ts, + } as StreamChunk, + { + type: EventType.TEXT_MESSAGE_CONTENT, + messageId: 'msg-1', + delta: json, + timestamp: ts, + } as StreamChunk, + { + type: EventType.TEXT_MESSAGE_END, + messageId: 'msg-1', + timestamp: ts, + } as StreamChunk, + { + type: EventType.RUN_FINISHED, + runId: 'run-1', + threadId: 'thread-1', + finishReason: 'stop', + timestamp: ts, + } as StreamChunk, + ] +} + +describe('chat({ outputSchema, stream: true }) โ€” native combined mode (#605)', () => { + it('forwards outputSchema to chatStream and skips the finalization adapter call', async () => { + let structuredCalled = false + let structuredStreamCalled = false + + const { adapter, calls } = createMockAdapter({ + iterations: [textTurn(JSON.stringify(validPerson))], + structuredOutput: async () => { + structuredCalled = true + return { data: {}, rawText: '{}' } + }, + structuredOutputStream: () => { + structuredStreamCalled = true + return (async function* () {})() + }, + supportsCombinedToolsAndSchema: true, + }) + + const stream = chat({ + adapter, + messages: [{ role: 'user', content: 'extract' }], + outputSchema: PersonSchema, + stream: true, + }) + + await collectChunks(stream as unknown as AsyncIterable) + + // The agent loop's single chatStream call IS the structured call. + expect(calls.length).toBe(1) + expect(calls[0]?.outputSchema).toBeDefined() + // No separate finalization round-trip. + expect(structuredCalled).toBe(false) + expect(structuredStreamCalled).toBe(false) + }) + + it('synthesizes structured-output.start before TEXT_MESSAGE_START and structured-output.complete after the loop', async () => { + const json = JSON.stringify(validPerson) + const { adapter } = createMockAdapter({ + iterations: [textTurn(json)], + supportsCombinedToolsAndSchema: true, + }) + + const chunks = await collectChunks( + chat({ + adapter, + messages: [{ role: 'user', content: 'extract' }], + outputSchema: PersonSchema, + stream: true, + }) as unknown as AsyncIterable, + ) + + const startIdx = chunks.findIndex( + (c) => + c.type === EventType.CUSTOM && + (c as { name?: string }).name === 'structured-output.start', + ) + const textStartIdx = chunks.findIndex( + (c) => c.type === EventType.TEXT_MESSAGE_START, + ) + const completeIdx = chunks.findIndex( + (c) => + c.type === EventType.CUSTOM && + (c as { name?: string }).name === 'structured-output.complete', + ) + + expect(startIdx).toBeGreaterThanOrEqual(0) + expect(textStartIdx).toBeGreaterThanOrEqual(0) + expect(completeIdx).toBeGreaterThanOrEqual(0) + + // start before text deltas (so the client routes them to a + // StructuredOutputPart, not a TextPart). + expect(startIdx).toBeLessThan(textStartIdx) + // complete after the text ends, so the parsed object is available + // once the streaming text has fully arrived. + expect(completeIdx).toBeGreaterThan(textStartIdx) + + const complete = chunks[completeIdx] as { value: { object: unknown } } + expect(complete.value.object).toEqual(validPerson) + }) + + it('emits a single outer RUN_STARTED / RUN_FINISHED pair (no double lifecycle)', async () => { + const { adapter } = createMockAdapter({ + iterations: [textTurn(JSON.stringify(validPerson))], + supportsCombinedToolsAndSchema: true, + }) + + const chunks = await collectChunks( + chat({ + adapter, + messages: [{ role: 'user', content: 'extract' }], + outputSchema: PersonSchema, + stream: true, + }) as unknown as AsyncIterable, + ) + + const runStarted = chunks.filter((c) => c.type === EventType.RUN_STARTED) + const runFinished = chunks.filter((c) => c.type === EventType.RUN_FINISHED) + expect(runStarted.length).toBe(1) + expect(runFinished.length).toBe(1) + }) + + it('Promise path skips finalization and returns the validated typed value', async () => { + let structuredCalled = false + const { adapter, calls } = createMockAdapter({ + iterations: [textTurn(JSON.stringify(validPerson))], + structuredOutput: async () => { + structuredCalled = true + return { data: {}, rawText: '{}' } + }, + supportsCombinedToolsAndSchema: true, + }) + + const result = await chat({ + adapter, + messages: [{ role: 'user', content: 'extract' }], + outputSchema: PersonSchema, + }) + + expect(result).toEqual(validPerson) + expect(structuredCalled).toBe(false) + expect(calls.length).toBe(1) + expect(calls[0]?.outputSchema).toBeDefined() + }) + + it('Promise path routes Standard-Schema validation failures through onError', async () => { + const invalid = { name: 123, age: 'not-a-number' } + const { adapter } = createMockAdapter({ + iterations: [textTurn(JSON.stringify(invalid))], + supportsCombinedToolsAndSchema: true, + }) + + await expect( + chat({ + adapter, + messages: [{ role: 'user', content: 'extract' }], + outputSchema: PersonSchema, + }), + ).rejects.toThrow() + }) + + it('emits a RUN_ERROR on the streaming path when the final-turn text is not valid JSON', async () => { + const { adapter } = createMockAdapter({ + iterations: [textTurn('not-json-at-all')], + supportsCombinedToolsAndSchema: true, + }) + + const chunks = await collectChunks( + chat({ + adapter, + messages: [{ role: 'user', content: 'extract' }], + outputSchema: PersonSchema, + stream: true, + }) as unknown as AsyncIterable, + ) + + const runError = chunks.find((c) => c.type === EventType.RUN_ERROR) as + | { type: EventType.RUN_ERROR; code?: string } + | undefined + expect(runError).toBeDefined() + expect(runError!.code).toBe('structured-output-parse-failed') + + // No structured-output.complete on the parse-failure path. + const complete = chunks.find( + (c) => + c.type === EventType.CUSTOM && + (c as { name?: string }).name === 'structured-output.complete', + ) + expect(complete).toBeUndefined() + }) + + it('adapters that do not declare the capability still take the finalization path', async () => { + let structuredStreamCalled = false + const { adapter, calls } = createMockAdapter({ + iterations: [textTurn(JSON.stringify(validPerson))], + structuredOutputStream: () => { + structuredStreamCalled = true + const ts = Date.now() + return (async function* () { + yield { + type: EventType.RUN_STARTED, + runId: 'run-2', + threadId: 'thread-1', + timestamp: ts, + } as StreamChunk + yield { + type: EventType.TEXT_MESSAGE_START, + messageId: 'msg-2', + role: 'assistant', + timestamp: ts, + } as StreamChunk + yield { + type: EventType.TEXT_MESSAGE_CONTENT, + messageId: 'msg-2', + delta: JSON.stringify(validPerson), + timestamp: ts, + } as StreamChunk + yield { + type: EventType.TEXT_MESSAGE_END, + messageId: 'msg-2', + timestamp: ts, + } as StreamChunk + yield { + type: EventType.CUSTOM, + name: 'structured-output.complete', + value: { object: validPerson, raw: JSON.stringify(validPerson) }, + timestamp: ts, + } as StreamChunk + yield { + type: EventType.RUN_FINISHED, + runId: 'run-2', + threadId: 'thread-1', + finishReason: 'stop', + timestamp: ts, + } as StreamChunk + })() + }, + // supportsCombinedToolsAndSchema NOT set + }) + + await collectChunks( + chat({ + adapter, + messages: [{ role: 'user', content: 'extract' }], + outputSchema: PersonSchema, + stream: true, + }) as unknown as AsyncIterable, + ) + + // Engine took the legacy finalization path: separate adapter call. + expect(structuredStreamCalled).toBe(true) + // The agent loop short-circuited (no tools + finalization requested), + // so chatStream was never called. + expect(calls.length).toBe(0) + }) +}) diff --git a/packages/typescript/ai/tests/test-utils.ts b/packages/typescript/ai/tests/test-utils.ts index a597fab59..2c6b71303 100644 --- a/packages/typescript/ai/tests/test-utils.ts +++ b/packages/typescript/ai/tests/test-utils.ts @@ -157,6 +157,11 @@ export function createMockAdapter(options: { * has no `structuredOutputStream` and consumers fall through to the * synthesized fallback in `runStructuredFinalization`. */ structuredOutputStream?: (opts: any) => AsyncIterable + /** When true, the adapter declares it natively combines tools + a + * schema-constrained final answer in one streaming call (issue #605). + * The engine then forwards `outputSchema` into `chatStream` and skips + * the separate finalization round-trip. */ + supportsCombinedToolsAndSchema?: boolean }) { const calls: Array> = [] let callIndex = 0 @@ -204,6 +209,10 @@ export function createMockAdapter(options: { adapter.structuredOutputStream = options.structuredOutputStream } + if (options.supportsCombinedToolsAndSchema) { + adapter.supportsCombinedToolsAndSchema = () => true + } + return { adapter, calls } } diff --git a/packages/typescript/openai-base/src/adapters/chat-completions-text.ts b/packages/typescript/openai-base/src/adapters/chat-completions-text.ts index 8838a0020..064f92b9d 100644 --- a/packages/typescript/openai-base/src/adapters/chat-completions-text.ts +++ b/packages/typescript/openai-base/src/adapters/chat-completions-text.ts @@ -1123,6 +1123,34 @@ export abstract class OpenAIBaseChatCompletionsTextAdapter< const modelOptions = options.modelOptions + // Native combined mode (issue #605): when the engine threads + // `outputSchema` through TextOptions, the adapter declared + // `supportsCombinedToolsAndSchema` and the schema is already JSON Schema + // (pre-converted at the activity boundary). Wire it into + // `response_format` alongside any `tools`. Modern OpenAI-compatible + // Chat Completions accepts both together and emits the schema- + // constrained text on the natural final turn. + const combinedSchema = options.outputSchema as + | Record + | undefined + const responseFormat = combinedSchema + ? { + response_format: { + type: 'json_schema' as const, + json_schema: { + name: 'structured_output', + schema: this.makeStructuredOutputCompatible( + combinedSchema, + Array.isArray(combinedSchema.required) + ? (combinedSchema.required as Array) + : undefined, + ), + strict: true, + }, + }, + } + : undefined + // Build the request so explicit top-level options win over modelOptions // when set, but `undefined` top-level options do NOT clobber values the // caller put in modelOptions. Keeping the merge nullish-aware fixes the @@ -1145,10 +1173,22 @@ export abstract class OpenAIBaseChatCompletionsTextAdapter< tools.length > 0 && { tools, }), + ...(responseFormat ?? {}), stream: true, } } + /** + * Modern OpenAI-compatible Chat Completions APIs support `tools` and + * `response_format: json_schema` together in a single streaming request + * (per issue #605). Subclasses can override โ€” Groq, for instance, must + * return `false` because its API rejects schema + tools + stream with a + * 400. + */ + supportsCombinedToolsAndSchema(): boolean { + return true + } + /** * Converts a single ModelMessage to the Chat Completions API message format. * Override this in subclasses to handle provider-specific message formats. diff --git a/packages/typescript/openai-base/src/adapters/responses-text.ts b/packages/typescript/openai-base/src/adapters/responses-text.ts index 96dffb01f..43b3d9138 100644 --- a/packages/typescript/openai-base/src/adapters/responses-text.ts +++ b/packages/typescript/openai-base/src/adapters/responses-text.ts @@ -1608,6 +1608,33 @@ export abstract class OpenAIBaseResponsesTextAdapter< const modelOptions = options.modelOptions + // Native combined mode (issue #605): when the engine threads + // `outputSchema` through TextOptions, the adapter declared + // `supportsCombinedToolsAndSchema` and the schema is already JSON Schema + // (pre-converted at the activity boundary). Wire it into `text.format` + // alongside any `tools` โ€” the Responses API supports both together and + // emits the schema-constrained text on the natural final turn. + const combinedSchema = options.outputSchema as + | Record + | undefined + const textFormat = combinedSchema + ? { + text: { + format: { + type: 'json_schema' as const, + name: 'structured_output', + schema: this.makeStructuredOutputCompatible( + combinedSchema, + Array.isArray(combinedSchema.required) + ? (combinedSchema.required as Array) + : undefined, + ), + strict: true, + }, + }, + } + : undefined + // Spread modelOptions first, then explicit top-level options when set. // Mirrors the chat-completions base adapter's precedence so callers // tuning either backend get identical behaviour. Leaving `modelOptions` @@ -1635,9 +1662,19 @@ export abstract class OpenAIBaseResponsesTextAdapter< // Conditional spread: `tools: undefined` would clobber any // modelOptions.tools the caller set above. ...(tools && tools.length > 0 && { tools }), + ...(textFormat ?? {}), } } + /** + * The OpenAI Responses API supports `tools` and `text.format: json_schema` + * together in a single streaming request (per issue #605). Subclasses + * that route to providers without this capability should override. + */ + supportsCombinedToolsAndSchema(): boolean { + return true + } + /** * Converts ModelMessage[] to Responses API ResponseInput format. * Override this in subclasses for provider-specific message format quirks. diff --git a/packages/typescript/openai-base/tests/chat-completions-text.test.ts b/packages/typescript/openai-base/tests/chat-completions-text.test.ts index 9a302afe1..3345c9f9c 100644 --- a/packages/typescript/openai-base/tests/chat-completions-text.test.ts +++ b/packages/typescript/openai-base/tests/chat-completions-text.test.ts @@ -965,6 +965,90 @@ describe('OpenAIBaseChatCompletionsTextAdapter', () => { expect(callArgs.stream_options).toBeUndefined() }) + it('wires outputSchema into response_format alongside tools for native combined mode (#605)', async () => { + const streamChunks = [ + { + id: 'chatcmpl-1', + model: 'test-model', + choices: [ + { delta: { content: '{"city":"NYC"}' }, finish_reason: null }, + ], + }, + { + id: 'chatcmpl-1', + model: 'test-model', + choices: [{ delta: {}, finish_reason: 'stop' }], + }, + ] + + setupMockSdkClient(streamChunks) + const adapter = new TestChatCompletionsAdapter(testConfig, 'test-model') + // Sanity-check the capability advertisement. + expect(adapter.supportsCombinedToolsAndSchema()).toBe(true) + + for await (const _ of adapter.chatStream({ + logger: testLogger, + model: 'test-model', + messages: [{ role: 'user', content: 'Hello' }], + tools: [weatherTool], + outputSchema: { + type: 'object', + properties: { city: { type: 'string' } }, + required: ['city'], + }, + })) { + // drain + } + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + stream: true, + tools: expect.any(Array), + response_format: expect.objectContaining({ + type: 'json_schema', + json_schema: expect.objectContaining({ + name: 'structured_output', + strict: true, + schema: expect.objectContaining({ type: 'object' }), + }), + }), + }), + expect.anything(), + ) + }) + + it('omits response_format when outputSchema is not set', async () => { + const streamChunks = [ + { + id: 'chatcmpl-1', + model: 'test-model', + choices: [{ delta: { content: 'Hi' }, finish_reason: null }], + }, + { + id: 'chatcmpl-1', + model: 'test-model', + choices: [{ delta: {}, finish_reason: 'stop' }], + }, + ] + + setupMockSdkClient(streamChunks) + const adapter = new TestChatCompletionsAdapter(testConfig, 'test-model') + + for await (const _ of adapter.chatStream({ + logger: testLogger, + model: 'test-model', + messages: [{ role: 'user', content: 'Hello' }], + })) { + // drain + } + + const callArgs = mockCreate.mock.calls[0]![0] as unknown as Record< + string, + unknown + > + expect(callArgs.response_format).toBeUndefined() + }) + it('forwards request headers and signal to SDK create calls', async () => { const streamChunks = [ { diff --git a/packages/typescript/openai-base/tests/responses-text.test.ts b/packages/typescript/openai-base/tests/responses-text.test.ts index 0e8a34257..7430f6947 100644 --- a/packages/typescript/openai-base/tests/responses-text.test.ts +++ b/packages/typescript/openai-base/tests/responses-text.test.ts @@ -117,6 +117,105 @@ describe('OpenAIBaseResponsesTextAdapter', () => { }) }) + describe('native combined tools + outputSchema mode (#605)', () => { + it('wires outputSchema into text.format alongside tools on the regular chatStream call', async () => { + const streamChunks = [ + { + type: 'response.created', + response: { + id: 'resp-1', + model: 'test-model', + output: [], + usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }, + }, + }, + { + type: 'response.completed', + response: { + id: 'resp-1', + model: 'test-model', + output: [], + usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }, + }, + }, + ] + + setupMockResponsesClient(streamChunks) + const adapter = new TestResponsesAdapter(testConfig, 'test-model') + expect(adapter.supportsCombinedToolsAndSchema()).toBe(true) + + for await (const _ of adapter.chatStream({ + logger: testLogger, + model: 'test-model', + messages: [{ role: 'user', content: 'Hello' }], + tools: [weatherTool], + outputSchema: { + type: 'object', + properties: { city: { type: 'string' } }, + required: ['city'], + }, + })) { + // drain + } + + expect(mockResponsesCreate).toHaveBeenCalledWith( + expect.objectContaining({ + stream: true, + tools: expect.any(Array), + text: expect.objectContaining({ + format: expect.objectContaining({ + type: 'json_schema', + name: 'structured_output', + strict: true, + schema: expect.objectContaining({ type: 'object' }), + }), + }), + }), + expect.anything(), + ) + }) + + it('omits text.format when outputSchema is not set', async () => { + const streamChunks = [ + { + type: 'response.created', + response: { + id: 'resp-2', + model: 'test-model', + output: [], + usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }, + }, + }, + { + type: 'response.completed', + response: { + id: 'resp-2', + model: 'test-model', + output: [], + usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }, + }, + }, + ] + + setupMockResponsesClient(streamChunks) + const adapter = new TestResponsesAdapter(testConfig, 'test-model') + + for await (const _ of adapter.chatStream({ + logger: testLogger, + model: 'test-model', + messages: [{ role: 'user', content: 'Hello' }], + })) { + // drain + } + + const callArgs = mockResponsesCreate.mock.calls[0]![0] as Record< + string, + unknown + > + expect(callArgs.text).toBeUndefined() + }) + }) + describe('streaming event sequence', () => { it('emits RUN_STARTED as the first event', async () => { const streamChunks = [ diff --git a/testing/e2e/src/routes/api.middleware-test.ts b/testing/e2e/src/routes/api.middleware-test.ts index 0d2525a58..9d6f065bd 100644 --- a/testing/e2e/src/routes/api.middleware-test.ts +++ b/testing/e2e/src/routes/api.middleware-test.ts @@ -265,11 +265,20 @@ export const Route = createFileRoute('/api/middleware-test')({ typeof fp.testId === 'string' ? fp.testId : undefined const aimockPort: number | undefined = fp.aimockPort != null ? Number(fp.aimockPort) : undefined + // Provider/model overrides let the middleware E2E exercise both the + // native-combined-mode path (modern OpenAI / Claude 4.5+ โ€” no + // `structuredOutput` phase, single combined call) and the legacy + // finalization path (Claude 3.7, etc. โ€” `structuredOutput` phase + // fires). See #605. + const provider = + typeof fp.provider === 'string' ? fp.provider : 'openai' + const modelOverride = + typeof fp.model === 'string' ? fp.model : undefined try { const adapterOptions = createTextAdapter( - 'openai', - undefined, + provider as Parameters[0], + modelOverride, aimockPort, testId, ) diff --git a/testing/e2e/src/routes/middleware-test.tsx b/testing/e2e/src/routes/middleware-test.tsx index 55e704475..d4a3f2a50 100644 --- a/testing/e2e/src/routes/middleware-test.tsx +++ b/testing/e2e/src/routes/middleware-test.tsx @@ -65,12 +65,19 @@ export const Route = createFileRoute('/middleware-test')({ return { testId: typeof search.testId === 'string' ? search.testId : undefined, aimockPort: port != null && !isNaN(port) ? port : undefined, + // `provider` / `model` are forwarded to the server route so the + // structured-output ร— middleware spec can exercise both the + // native-combined-mode path (modern openai / claude 4.5+) and the + // legacy finalization path (claude 3.7, etc.) โ€” see #605. + provider: + typeof search.provider === 'string' ? search.provider : undefined, + model: typeof search.model === 'string' ? search.model : undefined, } }, }) function MiddlewareTestPage() { - const { testId, aimockPort } = Route.useSearch() + const { testId, aimockPort, provider, model } = Route.useSearch() const [scenario, setScenario] = useState('basic-text') const [middlewareMode, setMiddlewareMode] = useState('none') const [testComplete, setTestComplete] = useState(false) @@ -78,9 +85,9 @@ function MiddlewareTestPage() { useState(EMPTY_PHASE_CAPTURE) const { messages, sendMessage, isLoading } = useChat({ - id: `mw-test-${scenario}-${middlewareMode}`, + id: `mw-test-${scenario}-${middlewareMode}-${provider ?? 'openai'}-${model ?? 'default'}`, connection: fetchServerSentEvents('/api/middleware-test'), - body: { scenario, middlewareMode, testId, aimockPort }, + body: { scenario, middlewareMode, testId, aimockPort, provider, model }, onFinish: () => { // For phase-recorder mode the spec reads `#mw-phases-json` / // `#mw-onfinish-count` / `#mw-yielded-chunks-json` AFTER diff --git a/testing/e2e/tests/structured-output-middleware.spec.ts b/testing/e2e/tests/structured-output-middleware.spec.ts index c9d6deea5..f2334a25c 100644 --- a/testing/e2e/tests/structured-output-middleware.spec.ts +++ b/testing/e2e/tests/structured-output-middleware.spec.ts @@ -32,10 +32,17 @@ import { test, expect } from './fixtures' * to await `Promise` instead of iterating SSE and is out of scope here. */ -function buildHarnessUrl(testId?: string, aimockPort?: number): string { +function buildHarnessUrl( + testId?: string, + aimockPort?: number, + provider?: string, + model?: string, +): string { const params = new URLSearchParams() if (testId) params.set('testId', testId) if (aimockPort) params.set('aimockPort', String(aimockPort)) + if (provider) params.set('provider', provider) + if (model) params.set('model', model) const qs = params.toString() return `/middleware-test${qs ? '?' + qs : ''}` } @@ -73,12 +80,20 @@ function parseChunkSummaries(raw: string | null): Array<{ type: string }> { } test.describe('Structured Output ร— Middleware Coverage', () => { - test('structured output with stream:true: middleware observes finalization phase chunks', async ({ + test('legacy finalization path: middleware observes structuredOutput phase chunks (claude-3-7-sonnet)', async ({ page, testId, aimockPort, }) => { - await page.goto(buildHarnessUrl(testId, aimockPort)) + // Pinned to claude-3-7-sonnet because Claude 4.5+ adapters take the + // #605 native-combined-mode path (no separate finalization โ†’ no + // `structuredOutput` phase). The 3.7-sonnet adapter still uses the + // forced-tool finalization workaround, which is what this contract + // covers: any non-native-combined adapter must keep firing the + // `structuredOutput` phase so middleware can observe it. + await page.goto( + buildHarnessUrl(testId, aimockPort, 'anthropic', 'claude-3-7-sonnet'), + ) await page.waitForTimeout(2000) await page.locator('#mw-scenario-select').selectOption('structured-output') @@ -104,6 +119,45 @@ test.describe('Structured Output ร— Middleware Coverage', () => { expect(finishCount).toBe(1) }) + test('native combined mode (#605): structuredOutput phase does NOT fire โ€” single combined call observed via beforeModel only (openai)', async ({ + page, + testId, + aimockPort, + }) => { + // Default openai adapter (gpt-4o) declares supportsCombinedToolsAndSchema, + // so the engine forwards outputSchema into the regular chatStream call + // and harvests the JSON from accumulated content โ€” no second adapter + // request, no `structuredOutput` phase. This pins the new contract + // introduced in #605. + await page.goto(buildHarnessUrl(testId, aimockPort, 'openai')) + await page.waitForTimeout(2000) + + await page.locator('#mw-scenario-select').selectOption('structured-output') + await page.locator('#mw-mode-select').selectOption('phase-recorder') + await page.locator('#mw-run-button').click() + + await page.waitForFunction( + () => + document + .querySelector('#mw-metadata') + ?.getAttribute('data-test-complete') === 'true', + { timeout: 15000 }, + ) + + const phasesJson = await page.locator('#mw-phases-json').textContent() + const phases = parseStringArray(phasesJson) + // Combined-mode contract: middleware sees the run through the regular + // chat phases, not `structuredOutput`. + expect(phases).not.toContain('structuredOutput') + expect(phases).toContain('beforeModel') + + const finishCountRaw = await page + .locator('#mw-onfinish-count') + .textContent() + const finishCount = Number(finishCountRaw ?? '0') + expect(finishCount).toBe(1) + }) + test('streaming structured output: consumer sees exactly one RUN_STARTED/RUN_FINISHED pair', async ({ page, testId, From 0fe6b1793f4d99da67be0e8ed1326f1ffbab30fc Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Thu, 21 May 2026 07:57:49 +0000 Subject: [PATCH 02/21] ci: apply automated fixes --- packages/typescript/ai/src/activities/chat/index.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/typescript/ai/src/activities/chat/index.ts b/packages/typescript/ai/src/activities/chat/index.ts index a9df2ae54..dee0d7aa1 100644 --- a/packages/typescript/ai/src/activities/chat/index.ts +++ b/packages/typescript/ai/src/activities/chat/index.ts @@ -2127,8 +2127,7 @@ class TextEngine< // `streamModelResponse`. if (!this.combinedStartEmitted) { this.combinedStartEmitted = true - const messageId = - this.combinedStructuredMessageId ?? generateMessageId() + const messageId = this.combinedStructuredMessageId ?? generateMessageId() this.combinedStructuredMessageId = messageId const synthStart: StreamChunk = { type: EventType.CUSTOM, From af17a2289da8c32e8adb08cc5f06d5e66962154c Mon Sep 17 00:00:00 2001 From: Tom Beckenham <34339192+tombeckenham@users.noreply.github.com> Date: Thu, 21 May 2026 18:32:48 +1000 Subject: [PATCH 03/21] feat(ts-react-chat): add Anthropic to structured-output streaming example MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds 'anthropic' as a selectable provider in the structured-output generation demo so users can see Claude 4.5+ streaming the schema-constrained JSON natively via the #605 combined-mode path (`output_format` + `tools` in one beta Messages call) alongside the existing OpenAI / Grok / Groq / OpenRouter options. Only Claude 4.5+ models are listed because older Claude models still fall back to the non-streaming forced-tool-use workaround. ๐Ÿค– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../src/routes/api.structured-output.ts | 24 +++++++++++++++++++ .../routes/generations.structured-output.tsx | 15 ++++++++++++ 2 files changed, 39 insertions(+) diff --git a/examples/ts-react-chat/src/routes/api.structured-output.ts b/examples/ts-react-chat/src/routes/api.structured-output.ts index cba72ffdb..ebb38d97e 100644 --- a/examples/ts-react-chat/src/routes/api.structured-output.ts +++ b/examples/ts-react-chat/src/routes/api.structured-output.ts @@ -1,6 +1,7 @@ import { createFileRoute } from '@tanstack/react-router' import { chat, toServerSentEventsResponse } from '@tanstack/ai' import { openaiChatCompletions, openaiText } from '@tanstack/ai-openai' +import { anthropicText } from '@tanstack/ai-anthropic' import { grokText } from '@tanstack/ai-grok' import { groqText } from '@tanstack/ai-groq' import { @@ -78,6 +79,7 @@ const GuitarRecommendationSchema = z.object({ type Provider = | 'openai' | 'openai-chat' + | 'anthropic' | 'grok' | 'groq' | 'openrouter' @@ -89,6 +91,7 @@ const StructuredOutputRequestSchema = z.object({ .enum([ 'openai', 'openai-chat', + 'anthropic', 'grok', 'groq', 'openrouter', @@ -108,6 +111,14 @@ function adapterFor(provider: Provider, model?: string): AnyTextAdapter { // `/v1/chat/completions`. Useful for side-by-side comparison of // streaming structured output across the two OpenAI wire formats. return openaiChatCompletions((model || 'gpt-4o') as 'gpt-4o') + case 'anthropic': + // Claude 4.5+ supports native combined tools + schema-constrained + // streaming (#605) via `output_format` on the beta Messages endpoint. + // Earlier models fall back to the forced-tool-use workaround in + // `structuredOutput` (no real streaming). + return anthropicText( + (model || 'claude-sonnet-4-5') as 'claude-sonnet-4-5', + ) case 'grok': return grokText( (model || 'grok-4-1-fast-reasoning') as 'grok-4-1-fast-reasoning', @@ -155,6 +166,19 @@ function reasoningOptionsFor( // Responses does. Reasoning models still reason silently; no opt-in // option to inject here. return undefined + case 'anthropic': + // Claude 4.5+ extended thinking surfaces via REASONING_* events when + // enabled. budget_tokens is in addition to max_tokens, so keep it + // modest for the demo. Older Claude models (e.g. 3-5-haiku) reject + // the field โ€” caller should drop this case there. + if ( + model?.startsWith('claude-opus-4-') || + model?.startsWith('claude-sonnet-4-') || + model?.startsWith('claude-haiku-4-') + ) { + return { thinking: { type: 'enabled', budget_tokens: 1024 } } + } + return undefined case 'groq': // Groq's Chat Completions only streams `delta.reasoning` when // `reasoning_format: 'parsed'`. Required for gpt-oss / qwen3 / kimi-k2 diff --git a/examples/ts-react-chat/src/routes/generations.structured-output.tsx b/examples/ts-react-chat/src/routes/generations.structured-output.tsx index e0155eb06..4613a7e13 100644 --- a/examples/ts-react-chat/src/routes/generations.structured-output.tsx +++ b/examples/ts-react-chat/src/routes/generations.structured-output.tsx @@ -8,6 +8,7 @@ const SAMPLE_PROMPT = type Provider = | 'openai' | 'openai-chat' + | 'anthropic' | 'grok' | 'groq' | 'openrouter' @@ -36,6 +37,19 @@ const PROVIDER_MODELS: Record< { value: 'gpt-5.1', label: 'GPT-5.1' }, { value: 'gpt-5.2', label: 'GPT-5.2 (frontier)' }, ], + // Anthropic: Claude 4.5+ models stream the schema-constrained JSON + // natively via the #605 combined-mode path (`output_format` + `tools` in + // one beta Messages call). Older models would fall back to the + // forced-tool-use workaround in `structuredOutput` (no real streaming), + // so they're omitted here. + anthropic: [ + { value: 'claude-sonnet-4-5', label: 'Claude Sonnet 4.5' }, + { value: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6' }, + { value: 'claude-opus-4-5', label: 'Claude Opus 4.5' }, + { value: 'claude-opus-4-6', label: 'Claude Opus 4.6' }, + { value: 'claude-opus-4-7', label: 'Claude Opus 4.7' }, + { value: 'claude-haiku-4-5', label: 'Claude Haiku 4.5' }, + ], grok: [ { value: 'grok-4-1-fast-reasoning', label: 'Grok 4.1 Fast (reasoning)' }, { @@ -350,6 +364,7 @@ function StructuredOutputPage() { > +