From 42807329348945e2ddba576dfe9b68822a64a09f Mon Sep 17 00:00:00 2001 From: Chris Read Date: Mon, 9 Mar 2026 19:46:52 -0700 Subject: [PATCH 01/11] feat: support custom model base URL for OpenAI-compatible providers Thread modelBaseURL from x-model-base-url header through to V3 options, enabling providers like ZhipuAI, Ollama, and other OpenAI-compatible endpoints. Uses Chat Completions API (not Responses API) when a custom baseURL is set, and adds robust response coercion for models without native structured output support. --- packages/core/lib/v3/llm/LLMProvider.ts | 6 ++ packages/core/lib/v3/llm/aisdk.ts | 93 +++++++++++++++---- .../server-v3/src/lib/InMemorySessionStore.ts | 1 + packages/server-v3/src/lib/SessionStore.ts | 2 + packages/server-v3/src/lib/header.ts | 17 ++++ packages/server-v3/src/lib/stream.ts | 3 + .../server-v3/src/routes/v1/sessions/start.ts | 9 +- 7 files changed, 112 insertions(+), 19 deletions(-) diff --git a/packages/core/lib/v3/llm/LLMProvider.ts b/packages/core/lib/v3/llm/LLMProvider.ts index 11986d3cb2..0d02d794bc 100644 --- a/packages/core/lib/v3/llm/LLMProvider.ts +++ b/packages/core/lib/v3/llm/LLMProvider.ts @@ -117,6 +117,12 @@ export function getAISDKLanguageModel( ); } const provider = creator(clientOptions); + // When a custom baseURL is set, use the chat completions API instead of + // the Responses API, since custom endpoints (e.g. ZhipuAI, Ollama) are + // OpenAI-compatible but don't support the Responses API. + if (subProvider === "openai" && clientOptions?.baseURL) { + return (provider as ReturnType).chat(subModelName); + } // Get the specific model from the provider return provider(subModelName); } else { diff --git a/packages/core/lib/v3/llm/aisdk.ts b/packages/core/lib/v3/llm/aisdk.ts index 6db3058fc9..d45e05603b 100644 --- a/packages/core/lib/v3/llm/aisdk.ts +++ b/packages/core/lib/v3/llm/aisdk.ts @@ -173,24 +173,83 @@ You must respond in JSON format. respond WITH JSON. Do not include any other tex } try { - objectResponse = await generateObject({ - model: this.model, - messages: formattedMessages, - schema: options.response_model.schema, - temperature, - providerOptions: isGPT5 - ? { - openai: { - textVerbosity: isCodex ? "medium" : "low", // codex models only support 'medium' - reasoningEffort: isCodex - ? "medium" - : usesLowReasoningEffort - ? "low" - : "minimal", - }, + if (needsPromptJsonFallback) { + // For models without native structured output support, use + // "no-schema" mode so the AI SDK doesn't send response_format. + // The prompt-based JSON instruction above guides the model instead. + const noSchemaResponse = await generateObject({ + model: this.model, + messages: formattedMessages, + output: "no-schema", + temperature, + }); + // Coerce the free-form response to match the expected schema. + // Models without structured output support may return stringified + // nested values (e.g. "[]" instead of []) or omit fields entirely. + const raw = noSchemaResponse.object as Record; + for (const [k, v] of Object.entries(raw)) { + if (typeof v === "string") { + try { + raw[k] = JSON.parse(v); + } catch { + // keep as string } - : undefined, - }); + } + } + + // First attempt: parse as-is. On failure, patch common issues + // (missing/stringified arrays) and retry. + let parsed: unknown; + const firstTry = options.response_model.schema.safeParse(raw); + if (firstTry.success) { + parsed = firstTry.data; + } else { + for (const issue of firstTry.error.issues) { + if ( + issue.code === "invalid_type" && + issue.expected === "array" && + issue.path.length === 1 + ) { + const key = issue.path[0] as string; + const val = raw[key]; + if (val === undefined || val === null) { + raw[key] = []; + } else if (typeof val === "string") { + try { + raw[key] = JSON.parse(val); + } catch { + raw[key] = []; + } + } + } + } + parsed = options.response_model.schema.parse(raw); + } + + objectResponse = { + ...noSchemaResponse, + object: parsed, + }; + } else { + objectResponse = await generateObject({ + model: this.model, + messages: formattedMessages, + schema: options.response_model.schema, + temperature, + providerOptions: isGPT5 + ? { + openai: { + textVerbosity: isCodex ? "medium" : "low", + reasoningEffort: isCodex + ? "medium" + : usesLowReasoningEffort + ? "low" + : "minimal", + }, + } + : undefined, + }); + } } catch (err) { // Log error response to maintain request/response pairing SessionFileLogger.logLlmResponse({ diff --git a/packages/server-v3/src/lib/InMemorySessionStore.ts b/packages/server-v3/src/lib/InMemorySessionStore.ts index 5c8be3e866..8d9c2dc424 100644 --- a/packages/server-v3/src/lib/InMemorySessionStore.ts +++ b/packages/server-v3/src/lib/InMemorySessionStore.ts @@ -211,6 +211,7 @@ export class InMemorySessionStore implements SessionStore { model: { modelName: params.modelName, apiKey: ctx.modelApiKey, + baseURL: ctx.modelBaseURL, }, verbose: params.verbose, systemPrompt: params.systemPrompt, diff --git a/packages/server-v3/src/lib/SessionStore.ts b/packages/server-v3/src/lib/SessionStore.ts index 387cb856fd..a042d094e3 100644 --- a/packages/server-v3/src/lib/SessionStore.ts +++ b/packages/server-v3/src/lib/SessionStore.ts @@ -67,6 +67,8 @@ export interface CreateSessionParams { export interface RequestContext { /** Model API key (from x-model-api-key header) */ modelApiKey?: string; + /** Model base URL override (from x-model-base-url header) */ + modelBaseURL?: string; /** Logger function for this request */ logger?: (message: LogLine) => void; } diff --git a/packages/server-v3/src/lib/header.ts b/packages/server-v3/src/lib/header.ts index daf1f6b629..c7f2b42248 100644 --- a/packages/server-v3/src/lib/header.ts +++ b/packages/server-v3/src/lib/header.ts @@ -76,6 +76,23 @@ export function getModelApiKey(request: FastifyRequest): string | undefined { return getOptionalHeader(request, "x-model-api-key"); } +/** + * Extracts the model base URL with precedence: + * 1. Per-request body baseURL (V3: body.options.model.baseURL) + * 2. Per-request header x-model-base-url + */ +export function getModelBaseURL(request: FastifyRequest): string | undefined { + const body = request.body as Record | undefined; + const options = body?.options as Record | undefined; + const model = options?.model as Record | undefined; + + if (typeof model?.baseURL === "string" && model.baseURL) { + return model.baseURL; + } + + return getOptionalHeader(request, "x-model-base-url"); +} + /** * Extracts the stream response value from either the request header or body. * Body parameter takes precedence over header. diff --git a/packages/server-v3/src/lib/stream.ts b/packages/server-v3/src/lib/stream.ts index 4866e41f12..a451101b43 100644 --- a/packages/server-v3/src/lib/stream.ts +++ b/packages/server-v3/src/lib/stream.ts @@ -7,6 +7,7 @@ import { z } from "zod/v4"; import { AppError } from "./errorHandler.js"; import { getModelApiKey, + getModelBaseURL, getOptionalHeader, shouldRespondWithSSE, } from "./header.js"; @@ -36,6 +37,7 @@ export async function createStreamingResponse({ }: StreamingResponseOptions) { const shouldStreamResponse = shouldRespondWithSSE(request); const modelApiKey = getModelApiKey(request); + const modelBaseURL = getModelBaseURL(request); const sessionStore = getSessionStore(); const sessionConfig = await sessionStore.getSessionConfig(sessionId); @@ -117,6 +119,7 @@ export async function createStreamingResponse({ const requestContext: RequestContext = { modelApiKey, + modelBaseURL, logger: shouldStreamResponse ? (message) => { sendData("log", { status: "running", message }); diff --git a/packages/server-v3/src/routes/v1/sessions/start.ts b/packages/server-v3/src/routes/v1/sessions/start.ts index 6aa0221751..f55843eab8 100644 --- a/packages/server-v3/src/routes/v1/sessions/start.ts +++ b/packages/server-v3/src/routes/v1/sessions/start.ts @@ -8,7 +8,11 @@ import { z } from "zod/v4"; import { authMiddleware } from "../../../lib/auth.js"; import { withErrorHandling } from "../../../lib/errorHandler.js"; -import { getModelApiKey, getOptionalHeader } from "../../../lib/header.js"; +import { + getModelApiKey, + getModelBaseURL, + getOptionalHeader, +} from "../../../lib/header.js"; import { error, success } from "../../../lib/response.js"; import { getSessionStore } from "../../../lib/sessionStoreManager.js"; import { AISDK_PROVIDERS } from "../../../types/model.js"; @@ -205,10 +209,11 @@ const startRouteHandler: RouteHandler = withErrorHandling( let finalCdpUrl = connectUrl ?? session.cdpUrl ?? ""; if (browserType === "local" && browser?.launchOptions && !browser?.cdpUrl) { const modelApiKey = getModelApiKey(request); + const modelBaseURL = getModelBaseURL(request); try { const stagehand = await sessionStore.getOrCreateStagehand( session.sessionId, - { modelApiKey }, + { modelApiKey, modelBaseURL }, ); finalCdpUrl = stagehand.connectURL(); } catch (err) { From cf1290dcfa34c138ec25eb303cee4d57f5c3355a Mon Sep 17 00:00:00 2001 From: Chris Read Date: Mon, 9 Mar 2026 21:23:10 -0700 Subject: [PATCH 02/11] feat: add chatcompletions provider for OpenAI-compatible endpoints Adds "chatcompletions" as a generic provider that uses the Chat Completions API (/chat/completions) instead of the Responses API, for endpoints like ZhipuAI and Ollama. Also simplifies response coercion for models without native structured output support. --- packages/core/lib/v3/llm/LLMProvider.ts | 10 ++++++---- packages/core/lib/v3/llm/aisdk.ts | 16 +++------------- packages/server-v3/src/types/model.ts | 1 + 3 files changed, 10 insertions(+), 17 deletions(-) diff --git a/packages/core/lib/v3/llm/LLMProvider.ts b/packages/core/lib/v3/llm/LLMProvider.ts index 0d02d794bc..d33c709278 100644 --- a/packages/core/lib/v3/llm/LLMProvider.ts +++ b/packages/core/lib/v3/llm/LLMProvider.ts @@ -50,6 +50,7 @@ const AISDKProviders: Record = { ollama, vertex, gateway, + chatcompletions: openai, }; const AISDKProvidersWithAPIKey: Record = { openai: createOpenAI, @@ -67,6 +68,7 @@ const AISDKProvidersWithAPIKey: Record = { perplexity: createPerplexity, ollama: createOllama, gateway: createGateway, + chatcompletions: createOpenAI, }; const modelToProviderMap: { [key in AvailableModel]: ModelProvider } = { @@ -117,10 +119,10 @@ export function getAISDKLanguageModel( ); } const provider = creator(clientOptions); - // When a custom baseURL is set, use the chat completions API instead of - // the Responses API, since custom endpoints (e.g. ZhipuAI, Ollama) are - // OpenAI-compatible but don't support the Responses API. - if (subProvider === "openai" && clientOptions?.baseURL) { + // "chatcompletions" and "zhipuai" use the Chat Completions API + // (/chat/completions) instead of the Responses API (/responses), + // for OpenAI-compatible endpoints that don't support /responses. + if (subProvider === "chatcompletions") { return (provider as ReturnType).chat(subModelName); } // Get the specific model from the provider diff --git a/packages/core/lib/v3/llm/aisdk.ts b/packages/core/lib/v3/llm/aisdk.ts index d45e05603b..cb04e27399 100644 --- a/packages/core/lib/v3/llm/aisdk.ts +++ b/packages/core/lib/v3/llm/aisdk.ts @@ -197,8 +197,8 @@ You must respond in JSON format. respond WITH JSON. Do not include any other tex } } - // First attempt: parse as-is. On failure, patch common issues - // (missing/stringified arrays) and retry. + // Parse the coerced response. On failure, default missing/invalid + // array fields to [] and retry. let parsed: unknown; const firstTry = options.response_model.schema.safeParse(raw); if (firstTry.success) { @@ -210,17 +210,7 @@ You must respond in JSON format. respond WITH JSON. Do not include any other tex issue.expected === "array" && issue.path.length === 1 ) { - const key = issue.path[0] as string; - const val = raw[key]; - if (val === undefined || val === null) { - raw[key] = []; - } else if (typeof val === "string") { - try { - raw[key] = JSON.parse(val); - } catch { - raw[key] = []; - } - } + raw[issue.path[0] as string] = []; } } parsed = options.response_model.schema.parse(raw); diff --git a/packages/server-v3/src/types/model.ts b/packages/server-v3/src/types/model.ts index 491b699b6a..eb9409da43 100644 --- a/packages/server-v3/src/types/model.ts +++ b/packages/server-v3/src/types/model.ts @@ -13,6 +13,7 @@ export const AISDK_PROVIDERS = [ "ollama", "vertex", "bedrock", + "chatcompletions", ] as const; export type AISDKProvider = (typeof AISDK_PROVIDERS)[number]; From 87a58012af7cbc14d3b4dac5fe52cf96828ef84a Mon Sep 17 00:00:00 2001 From: Chris Read Date: Mon, 9 Mar 2026 21:24:41 -0700 Subject: [PATCH 03/11] fix: remove stale zhipuai reference from comment --- packages/core/lib/v3/llm/LLMProvider.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/core/lib/v3/llm/LLMProvider.ts b/packages/core/lib/v3/llm/LLMProvider.ts index d33c709278..bf09211c76 100644 --- a/packages/core/lib/v3/llm/LLMProvider.ts +++ b/packages/core/lib/v3/llm/LLMProvider.ts @@ -119,9 +119,9 @@ export function getAISDKLanguageModel( ); } const provider = creator(clientOptions); - // "chatcompletions" and "zhipuai" use the Chat Completions API - // (/chat/completions) instead of the Responses API (/responses), - // for OpenAI-compatible endpoints that don't support /responses. + // "chatcompletions" uses the Chat Completions API (/chat/completions) + // instead of the Responses API (/responses), for OpenAI-compatible + // endpoints that don't support /responses. if (subProvider === "chatcompletions") { return (provider as ReturnType).chat(subModelName); } From 2fd62863eaf04c24cc44d3d8348bf3c5b382a200 Mon Sep 17 00:00:00 2001 From: Chris Read Date: Mon, 9 Mar 2026 21:47:31 -0700 Subject: [PATCH 04/11] fix: use schema-first fallback for chatcompletions no-schema path Try structured output (schema:) first for all models. Only fall back to no-schema + response coercion when the call fails and the model matches a known fallback pattern. This avoids degrading DeepSeek/Kimi which already work with schema:. --- .changeset/chatcompletions-provider.md | 8 ++ packages/core/lib/v3/llm/aisdk.ts | 137 ++++++++++++------------- 2 files changed, 73 insertions(+), 72 deletions(-) create mode 100644 .changeset/chatcompletions-provider.md diff --git a/.changeset/chatcompletions-provider.md b/.changeset/chatcompletions-provider.md new file mode 100644 index 0000000000..acb8bb832a --- /dev/null +++ b/.changeset/chatcompletions-provider.md @@ -0,0 +1,8 @@ +--- +"@browserbasehq/stagehand": minor +"@browserbasehq/stagehand-server-v3": minor +--- + +feat: add `chatcompletions` provider prefix and `modelBaseURL` support for OpenAI-compatible endpoints + +Adds a `chatcompletions/` model name prefix that forces the Chat Completions API (`/chat/completions`) instead of the Responses API (`/responses`), enabling support for OpenAI-compatible providers like ZhipuAI GLM. Also threads `modelBaseURL` through the server so SDKs can point to custom LLM endpoints. diff --git a/packages/core/lib/v3/llm/aisdk.ts b/packages/core/lib/v3/llm/aisdk.ts index cb04e27399..8dde96fd77 100644 --- a/packages/core/lib/v3/llm/aisdk.ts +++ b/packages/core/lib/v3/llm/aisdk.ts @@ -173,19 +173,39 @@ You must respond in JSON format. respond WITH JSON. Do not include any other tex } try { + // Try structured output first. If the provider doesn't support + // response_format (e.g. chatcompletions/ endpoints), this will throw + // and we fall back to no-schema mode with response coercion below. + objectResponse = await generateObject({ + model: this.model, + messages: formattedMessages, + schema: options.response_model.schema, + temperature, + providerOptions: isGPT5 + ? { + openai: { + textVerbosity: isCodex ? "medium" : "low", + reasoningEffort: isCodex + ? "medium" + : usesLowReasoningEffort + ? "low" + : "minimal", + }, + } + : undefined, + }); + } catch (err) { + // For models whose modelId matches a known fallback pattern, retry + // with output: "no-schema" and coerce the free-form JSON response. + // This handles OpenAI-compatible endpoints (chatcompletions/) that + // don't support structured output / response_format. if (needsPromptJsonFallback) { - // For models without native structured output support, use - // "no-schema" mode so the AI SDK doesn't send response_format. - // The prompt-based JSON instruction above guides the model instead. const noSchemaResponse = await generateObject({ model: this.model, messages: formattedMessages, output: "no-schema", temperature, }); - // Coerce the free-form response to match the expected schema. - // Models without structured output support may return stringified - // nested values (e.g. "[]" instead of []) or omit fields entirely. const raw = noSchemaResponse.object as Record; for (const [k, v] of Object.entries(raw)) { if (typeof v === "string") { @@ -196,9 +216,6 @@ You must respond in JSON format. respond WITH JSON. Do not include any other tex } } } - - // Parse the coerced response. On failure, default missing/invalid - // array fields to [] and retry. let parsed: unknown; const firstTry = options.response_model.schema.safeParse(raw); if (firstTry.success) { @@ -215,76 +232,52 @@ You must respond in JSON format. respond WITH JSON. Do not include any other tex } parsed = options.response_model.schema.parse(raw); } - - objectResponse = { - ...noSchemaResponse, - object: parsed, - }; + objectResponse = { ...noSchemaResponse, object: parsed }; } else { - objectResponse = await generateObject({ - model: this.model, - messages: formattedMessages, - schema: options.response_model.schema, - temperature, - providerOptions: isGPT5 - ? { - openai: { - textVerbosity: isCodex ? "medium" : "low", - reasoningEffort: isCodex - ? "medium" - : usesLowReasoningEffort - ? "low" - : "minimal", - }, - } - : undefined, + // Log error response to maintain request/response pairing + SessionFileLogger.logLlmResponse({ + requestId: llmRequestId, + model: this.model.modelId, + operation: "generateObject", + output: `[error: ${err instanceof Error ? err.message : "unknown"}]`, }); - } - } catch (err) { - // Log error response to maintain request/response pairing - SessionFileLogger.logLlmResponse({ - requestId: llmRequestId, - model: this.model.modelId, - operation: "generateObject", - output: `[error: ${err instanceof Error ? err.message : "unknown"}]`, - }); - if (NoObjectGeneratedError.isInstance(err)) { - this.logger?.({ - category: "AISDK error", - message: err.message, - level: 0, - auxiliary: { - cause: { - value: JSON.stringify(err.cause ?? {}), - type: "object", - }, - text: { - value: err.text ?? "", - type: "string", - }, - response: { - value: JSON.stringify(err.response ?? {}), - type: "object", - }, - usage: { - value: JSON.stringify(err.usage ?? {}), - type: "object", + if (NoObjectGeneratedError.isInstance(err)) { + this.logger?.({ + category: "AISDK error", + message: err.message, + level: 0, + auxiliary: { + cause: { + value: JSON.stringify(err.cause ?? {}), + type: "object", + }, + text: { + value: err.text ?? "", + type: "string", + }, + response: { + value: JSON.stringify(err.response ?? {}), + type: "object", + }, + usage: { + value: JSON.stringify(err.usage ?? {}), + type: "object", + }, + finishReason: { + value: err.finishReason ?? "unknown", + type: "string", + }, + requestId: { + value: options.requestId, + type: "string", + }, }, - finishReason: { - value: err.finishReason ?? "unknown", - type: "string", - }, - requestId: { - value: options.requestId, - type: "string", - }, - }, - }); + }); + } throw err; } - throw err; } const result = { From d2841c13d7c38871c870eb92b326c1af26bee48b Mon Sep 17 00:00:00 2001 From: Chris Read Date: Mon, 9 Mar 2026 21:52:39 -0700 Subject: [PATCH 05/11] fix: add comments to no-schema fallback pipeline, restore codex comment --- packages/core/lib/v3/llm/aisdk.ts | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/packages/core/lib/v3/llm/aisdk.ts b/packages/core/lib/v3/llm/aisdk.ts index 8dde96fd77..873914df22 100644 --- a/packages/core/lib/v3/llm/aisdk.ts +++ b/packages/core/lib/v3/llm/aisdk.ts @@ -184,7 +184,7 @@ You must respond in JSON format. respond WITH JSON. Do not include any other tex providerOptions: isGPT5 ? { openai: { - textVerbosity: isCodex ? "medium" : "low", + textVerbosity: isCodex ? "medium" : "low", // codex models only support 'medium' reasoningEffort: isCodex ? "medium" : usesLowReasoningEffort @@ -195,17 +195,18 @@ You must respond in JSON format. respond WITH JSON. Do not include any other tex : undefined, }); } catch (err) { - // For models whose modelId matches a known fallback pattern, retry - // with output: "no-schema" and coerce the free-form JSON response. - // This handles OpenAI-compatible endpoints (chatcompletions/) that + // Fallback for OpenAI-compatible endpoints (chatcompletions/) that // don't support structured output / response_format. + // Pipeline: call LLM → fix strings → fix missing arrays → validate if (needsPromptJsonFallback) { + // 1. Call LLM without schema (prompt instruction guides JSON output) const noSchemaResponse = await generateObject({ model: this.model, messages: formattedMessages, output: "no-schema", temperature, }); + // 2. Fix strings — models may return "[]" instead of [] const raw = noSchemaResponse.object as Record; for (const [k, v] of Object.entries(raw)) { if (typeof v === "string") { @@ -216,6 +217,7 @@ You must respond in JSON format. respond WITH JSON. Do not include any other tex } } } + // 3. Fix missing arrays — models may omit empty array fields entirely let parsed: unknown; const firstTry = options.response_model.schema.safeParse(raw); if (firstTry.success) { @@ -230,6 +232,7 @@ You must respond in JSON format. respond WITH JSON. Do not include any other tex raw[issue.path[0] as string] = []; } } + // 4. Validate against schema parsed = options.response_model.schema.parse(raw); } objectResponse = { ...noSchemaResponse, object: parsed }; From 531658382b78cb14075578a628eab2e8605ca29b Mon Sep 17 00:00:00 2001 From: Chris Read Date: Mon, 9 Mar 2026 22:14:41 -0700 Subject: [PATCH 06/11] fix: skip wasted schema call for chatcompletions, unify .chat() handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Skip schema attempt for chatcompletions/ models (provider: openai.chat) since they can't do structured output — avoids a wasted LLM call per extract - Unify .chat() handling in getAISDKLanguageModel so chatcompletions/ works regardless of whether clientOptions are provided - Guard second schema.parse() with safeParse + descriptive error message --- packages/core/lib/v3/llm/LLMProvider.ts | 22 +-- packages/core/lib/v3/llm/aisdk.ts | 210 +++++++++++++----------- 2 files changed, 123 insertions(+), 109 deletions(-) diff --git a/packages/core/lib/v3/llm/LLMProvider.ts b/packages/core/lib/v3/llm/LLMProvider.ts index bf09211c76..402cb6c3cb 100644 --- a/packages/core/lib/v3/llm/LLMProvider.ts +++ b/packages/core/lib/v3/llm/LLMProvider.ts @@ -110,6 +110,7 @@ export function getAISDKLanguageModel( clientOptions && Object.values(clientOptions).some((v) => v !== undefined && v !== null); + let provider; if (hasValidOptions) { const creator = AISDKProvidersWithAPIKey[subProvider]; if (!creator) { @@ -118,25 +119,24 @@ export function getAISDKLanguageModel( Object.keys(AISDKProvidersWithAPIKey), ); } - const provider = creator(clientOptions); - // "chatcompletions" uses the Chat Completions API (/chat/completions) - // instead of the Responses API (/responses), for OpenAI-compatible - // endpoints that don't support /responses. - if (subProvider === "chatcompletions") { - return (provider as ReturnType).chat(subModelName); - } - // Get the specific model from the provider - return provider(subModelName); + provider = creator(clientOptions); } else { - const provider = AISDKProviders[subProvider]; + provider = AISDKProviders[subProvider]; if (!provider) { throw new UnsupportedAISDKModelProviderError( subProvider, Object.keys(AISDKProviders), ); } - return provider(subModelName); } + + // "chatcompletions" uses the Chat Completions API (/chat/completions) + // instead of the Responses API (/responses), for OpenAI-compatible + // endpoints that don't support /responses. + if (subProvider === "chatcompletions") { + return (provider as ReturnType).chat(subModelName); + } + return provider(subModelName); } export class LLMProvider { diff --git a/packages/core/lib/v3/llm/aisdk.ts b/packages/core/lib/v3/llm/aisdk.ts index 873914df22..c5108fecd8 100644 --- a/packages/core/lib/v3/llm/aisdk.ts +++ b/packages/core/lib/v3/llm/aisdk.ts @@ -172,115 +172,129 @@ You must respond in JSON format. respond WITH JSON. Do not include any other tex }); } - try { - // Try structured output first. If the provider doesn't support - // response_format (e.g. chatcompletions/ endpoints), this will throw - // and we fall back to no-schema mode with response coercion below. - objectResponse = await generateObject({ - model: this.model, - messages: formattedMessages, - schema: options.response_model.schema, - temperature, - providerOptions: isGPT5 - ? { - openai: { - textVerbosity: isCodex ? "medium" : "low", // codex models only support 'medium' - reasoningEffort: isCodex - ? "medium" - : usesLowReasoningEffort - ? "low" - : "minimal", - }, - } - : undefined, - }); - } catch (err) { - // Fallback for OpenAI-compatible endpoints (chatcompletions/) that - // don't support structured output / response_format. - // Pipeline: call LLM → fix strings → fix missing arrays → validate - if (needsPromptJsonFallback) { - // 1. Call LLM without schema (prompt instruction guides JSON output) - const noSchemaResponse = await generateObject({ + // chatcompletions/ models (provider: "openai.chat") can't do structured + // output — skip schema entirely to avoid a wasted LLM call. + // Other fallback models (deepseek, kimi) succeed with schema. + let useNoSchema = + needsPromptJsonFallback && this.model.provider === "openai.chat"; + + if (!useNoSchema) { + try { + objectResponse = await generateObject({ model: this.model, messages: formattedMessages, - output: "no-schema", + schema: options.response_model.schema, temperature, + providerOptions: isGPT5 + ? { + openai: { + textVerbosity: isCodex ? "medium" : "low", // codex models only support 'medium' + reasoningEffort: isCodex + ? "medium" + : usesLowReasoningEffort + ? "low" + : "minimal", + }, + } + : undefined, }); - // 2. Fix strings — models may return "[]" instead of [] - const raw = noSchemaResponse.object as Record; - for (const [k, v] of Object.entries(raw)) { - if (typeof v === "string") { - try { - raw[k] = JSON.parse(v); - } catch { - // keep as string - } + } catch (err) { + if (needsPromptJsonFallback) { + useNoSchema = true; + } else { + // Log error response to maintain request/response pairing + SessionFileLogger.logLlmResponse({ + requestId: llmRequestId, + model: this.model.modelId, + operation: "generateObject", + output: `[error: ${err instanceof Error ? err.message : "unknown"}]`, + }); + + if (NoObjectGeneratedError.isInstance(err)) { + this.logger?.({ + category: "AISDK error", + message: err.message, + level: 0, + auxiliary: { + cause: { + value: JSON.stringify(err.cause ?? {}), + type: "object", + }, + text: { + value: err.text ?? "", + type: "string", + }, + response: { + value: JSON.stringify(err.response ?? {}), + type: "object", + }, + usage: { + value: JSON.stringify(err.usage ?? {}), + type: "object", + }, + finishReason: { + value: err.finishReason ?? "unknown", + type: "string", + }, + requestId: { + value: options.requestId, + type: "string", + }, + }, + }); } + + throw err; } - // 3. Fix missing arrays — models may omit empty array fields entirely - let parsed: unknown; - const firstTry = options.response_model.schema.safeParse(raw); - if (firstTry.success) { - parsed = firstTry.data; - } else { - for (const issue of firstTry.error.issues) { - if ( - issue.code === "invalid_type" && - issue.expected === "array" && - issue.path.length === 1 - ) { - raw[issue.path[0] as string] = []; - } + } + } + + // No-schema fallback for models that can't do structured output. + // Pipeline: call LLM → fix strings → fix missing arrays → validate + if (useNoSchema) { + // 1. Call LLM without schema (prompt instruction guides JSON output) + const noSchemaResponse = await generateObject({ + model: this.model, + messages: formattedMessages, + output: "no-schema", + temperature, + }); + // 2. Fix strings — models may return "[]" instead of [] + const raw = noSchemaResponse.object as Record; + for (const [k, v] of Object.entries(raw)) { + if (typeof v === "string") { + try { + raw[k] = JSON.parse(v); + } catch { + // keep as string } - // 4. Validate against schema - parsed = options.response_model.schema.parse(raw); } - objectResponse = { ...noSchemaResponse, object: parsed }; + } + // 3. Fix missing arrays — models may omit empty array fields entirely + let parsed: unknown; + const firstTry = options.response_model.schema.safeParse(raw); + if (firstTry.success) { + parsed = firstTry.data; } else { - // Log error response to maintain request/response pairing - SessionFileLogger.logLlmResponse({ - requestId: llmRequestId, - model: this.model.modelId, - operation: "generateObject", - output: `[error: ${err instanceof Error ? err.message : "unknown"}]`, - }); - - if (NoObjectGeneratedError.isInstance(err)) { - this.logger?.({ - category: "AISDK error", - message: err.message, - level: 0, - auxiliary: { - cause: { - value: JSON.stringify(err.cause ?? {}), - type: "object", - }, - text: { - value: err.text ?? "", - type: "string", - }, - response: { - value: JSON.stringify(err.response ?? {}), - type: "object", - }, - usage: { - value: JSON.stringify(err.usage ?? {}), - type: "object", - }, - finishReason: { - value: err.finishReason ?? "unknown", - type: "string", - }, - requestId: { - value: options.requestId, - type: "string", - }, - }, - }); + for (const issue of firstTry.error.issues) { + if ( + issue.code === "invalid_type" && + issue.expected === "array" && + issue.path.length === 1 + ) { + raw[issue.path[0] as string] = []; + } } - - throw err; + // 4. Validate against schema + const secondTry = options.response_model.schema.safeParse(raw); + if (!secondTry.success) { + throw new Error( + `Model response could not be coerced into the expected schema: ${secondTry.error.message}`, + ); + } + parsed = secondTry.data; } + objectResponse = { ...noSchemaResponse, object: parsed }; } const result = { From 6b75973eda6f26031593b4e043e85a79386a4fb6 Mon Sep 17 00:00:00 2001 From: Chris Read Date: Tue, 10 Mar 2026 10:16:50 -0700 Subject: [PATCH 07/11] feat: add x-model-base-url header across client SDK, server-v4, OpenAPI specs, and stainless config --- packages/core/lib/v3/api.ts | 6 ++++++ packages/core/lib/v3/types/public/api.ts | 7 +++++++ packages/core/lib/v3/v3.ts | 1 + packages/server-v3/openapi.v3.yaml | 6 ++++++ packages/server-v4/openapi.v4.yaml | 6 ++++++ .../server-v4/src/lib/InMemorySessionStore.ts | 1 + packages/server-v4/src/lib/SessionStore.ts | 2 ++ packages/server-v4/src/lib/header.ts | 17 +++++++++++++++++ packages/server-v4/src/lib/stream.ts | 3 +++ .../server-v4/src/routes/v4/sessions/start.ts | 9 +++++++-- stainless.yml | 5 +++++ 11 files changed, 61 insertions(+), 2 deletions(-) diff --git a/packages/core/lib/v3/api.ts b/packages/core/lib/v3/api.ts index 9fe2892082..541026f769 100644 --- a/packages/core/lib/v3/api.ts +++ b/packages/core/lib/v3/api.ts @@ -104,6 +104,8 @@ interface StagehandAPIConstructorParams { interface ClientSessionStartParams extends Api.SessionStartRequest { /** Model API key - sent via x-model-api-key header, not in request body */ modelApiKey: string; + /** Model base URL - sent via x-model-base-url header, not in request body */ + modelBaseURL?: string; } /** @@ -177,6 +179,7 @@ export class StagehandAPIClient { private projectId?: string; private sessionId?: string; private modelApiKey: string; + private modelBaseURL?: string; private modelProvider?: string; private region?: BrowserbaseRegion; private logger: (message: LogLine) => void; @@ -202,6 +205,7 @@ export class StagehandAPIClient { async init({ modelName, modelApiKey, + modelBaseURL, domSettleTimeoutMs, verbose, systemPrompt, @@ -214,6 +218,7 @@ export class StagehandAPIClient { throw new StagehandAPIError("modelApiKey is required"); } this.modelApiKey = modelApiKey; + this.modelBaseURL = modelBaseURL; // Extract provider from modelName (e.g., "openai/gpt-5-nano" -> "openai") this.modelProvider = modelName?.includes("/") ? modelName.split("/")[0] @@ -849,6 +854,7 @@ export class StagehandAPIClient { // we want real-time logs, so we stream the response "x-stream-response": "true", "x-model-api-key": this.modelApiKey, + ...(this.modelBaseURL ? { "x-model-base-url": this.modelBaseURL } : {}), "x-language": "typescript", "x-sdk-version": STAGEHAND_VERSION, }; diff --git a/packages/core/lib/v3/types/public/api.ts b/packages/core/lib/v3/types/public/api.ts index 33ac3f73a6..fad48dc068 100644 --- a/packages/core/lib/v3/types/public/api.ts +++ b/packages/core/lib/v3/types/public/api.ts @@ -947,6 +947,13 @@ export const openApiSecuritySchemes = { name: "x-model-api-key", description: "API key for the AI model provider (OpenAI, Anthropic, etc.)", }, + ModelBaseUrl: { + type: "apiKey", + in: "header", + name: "x-model-base-url", + description: + "Base URL override for the AI model provider (for OpenAI-compatible endpoints)", + }, } as const; /** OpenAPI links for session operations (used in SessionStart response) */ diff --git a/packages/core/lib/v3/v3.ts b/packages/core/lib/v3/v3.ts index cde254843b..109b82132f 100644 --- a/packages/core/lib/v3/v3.ts +++ b/packages/core/lib/v3/v3.ts @@ -931,6 +931,7 @@ export class V3 { const { sessionId, available } = await this.apiClient.init({ modelName: this.modelName, modelApiKey: this.modelClientOptions.apiKey, + modelBaseURL: this.modelClientOptions.baseURL, domSettleTimeoutMs: this.domSettleTimeoutMs, verbose: this.verbose, systemPrompt: this.opts.systemPrompt, diff --git a/packages/server-v3/openapi.v3.yaml b/packages/server-v3/openapi.v3.yaml index 98f26f365c..d26fced198 100644 --- a/packages/server-v3/openapi.v3.yaml +++ b/packages/server-v3/openapi.v3.yaml @@ -38,6 +38,11 @@ components: in: header name: x-model-api-key description: API key for the AI model provider (OpenAI, Anthropic, etc.) + ModelBaseUrl: + type: apiKey + in: header + name: x-model-base-url + description: Base URL override for the AI model provider (for OpenAI-compatible endpoints) links: SessionAct: operationId: SessionAct @@ -2155,3 +2160,4 @@ security: - BrowserbaseApiKey: [] BrowserbaseProjectId: [] ModelApiKey: [] + ModelBaseUrl: [] diff --git a/packages/server-v4/openapi.v4.yaml b/packages/server-v4/openapi.v4.yaml index b7f443f44d..0f5a2dd88b 100644 --- a/packages/server-v4/openapi.v4.yaml +++ b/packages/server-v4/openapi.v4.yaml @@ -38,6 +38,11 @@ components: in: header name: x-model-api-key description: API key for the AI model provider (OpenAI, Anthropic, etc.) + ModelBaseUrl: + type: apiKey + in: header + name: x-model-base-url + description: Base URL override for the AI model provider (for OpenAI-compatible endpoints) links: SessionAct: operationId: SessionAct @@ -2155,3 +2160,4 @@ security: - BrowserbaseApiKey: [] BrowserbaseProjectId: [] ModelApiKey: [] + ModelBaseUrl: [] diff --git a/packages/server-v4/src/lib/InMemorySessionStore.ts b/packages/server-v4/src/lib/InMemorySessionStore.ts index 5c8be3e866..8d9c2dc424 100644 --- a/packages/server-v4/src/lib/InMemorySessionStore.ts +++ b/packages/server-v4/src/lib/InMemorySessionStore.ts @@ -211,6 +211,7 @@ export class InMemorySessionStore implements SessionStore { model: { modelName: params.modelName, apiKey: ctx.modelApiKey, + baseURL: ctx.modelBaseURL, }, verbose: params.verbose, systemPrompt: params.systemPrompt, diff --git a/packages/server-v4/src/lib/SessionStore.ts b/packages/server-v4/src/lib/SessionStore.ts index 387cb856fd..a042d094e3 100644 --- a/packages/server-v4/src/lib/SessionStore.ts +++ b/packages/server-v4/src/lib/SessionStore.ts @@ -67,6 +67,8 @@ export interface CreateSessionParams { export interface RequestContext { /** Model API key (from x-model-api-key header) */ modelApiKey?: string; + /** Model base URL override (from x-model-base-url header) */ + modelBaseURL?: string; /** Logger function for this request */ logger?: (message: LogLine) => void; } diff --git a/packages/server-v4/src/lib/header.ts b/packages/server-v4/src/lib/header.ts index daf1f6b629..c7f2b42248 100644 --- a/packages/server-v4/src/lib/header.ts +++ b/packages/server-v4/src/lib/header.ts @@ -76,6 +76,23 @@ export function getModelApiKey(request: FastifyRequest): string | undefined { return getOptionalHeader(request, "x-model-api-key"); } +/** + * Extracts the model base URL with precedence: + * 1. Per-request body baseURL (V3: body.options.model.baseURL) + * 2. Per-request header x-model-base-url + */ +export function getModelBaseURL(request: FastifyRequest): string | undefined { + const body = request.body as Record | undefined; + const options = body?.options as Record | undefined; + const model = options?.model as Record | undefined; + + if (typeof model?.baseURL === "string" && model.baseURL) { + return model.baseURL; + } + + return getOptionalHeader(request, "x-model-base-url"); +} + /** * Extracts the stream response value from either the request header or body. * Body parameter takes precedence over header. diff --git a/packages/server-v4/src/lib/stream.ts b/packages/server-v4/src/lib/stream.ts index 4866e41f12..a451101b43 100644 --- a/packages/server-v4/src/lib/stream.ts +++ b/packages/server-v4/src/lib/stream.ts @@ -7,6 +7,7 @@ import { z } from "zod/v4"; import { AppError } from "./errorHandler.js"; import { getModelApiKey, + getModelBaseURL, getOptionalHeader, shouldRespondWithSSE, } from "./header.js"; @@ -36,6 +37,7 @@ export async function createStreamingResponse({ }: StreamingResponseOptions) { const shouldStreamResponse = shouldRespondWithSSE(request); const modelApiKey = getModelApiKey(request); + const modelBaseURL = getModelBaseURL(request); const sessionStore = getSessionStore(); const sessionConfig = await sessionStore.getSessionConfig(sessionId); @@ -117,6 +119,7 @@ export async function createStreamingResponse({ const requestContext: RequestContext = { modelApiKey, + modelBaseURL, logger: shouldStreamResponse ? (message) => { sendData("log", { status: "running", message }); diff --git a/packages/server-v4/src/routes/v4/sessions/start.ts b/packages/server-v4/src/routes/v4/sessions/start.ts index 2019af4567..1797eef219 100644 --- a/packages/server-v4/src/routes/v4/sessions/start.ts +++ b/packages/server-v4/src/routes/v4/sessions/start.ts @@ -8,7 +8,11 @@ import { z } from "zod/v4"; import { authMiddleware } from "../../../lib/auth.js"; import { withErrorHandling } from "../../../lib/errorHandler.js"; -import { getModelApiKey, getOptionalHeader } from "../../../lib/header.js"; +import { + getModelApiKey, + getModelBaseURL, + getOptionalHeader, +} from "../../../lib/header.js"; import { error, success } from "../../../lib/response.js"; import { getSessionStore } from "../../../lib/sessionStoreManager.js"; import { AISDK_PROVIDERS } from "../../../types/model.js"; @@ -205,10 +209,11 @@ const startRouteHandler: RouteHandler = withErrorHandling( let finalCdpUrl = connectUrl ?? session.cdpUrl ?? ""; if (browserType === "local" && browser?.launchOptions && !browser?.cdpUrl) { const modelApiKey = getModelApiKey(request); + const modelBaseURL = getModelBaseURL(request); try { const stagehand = await sessionStore.getOrCreateStagehand( session.sessionId, - { modelApiKey }, + { modelApiKey, modelBaseURL }, ); finalCdpUrl = stagehand.connectURL(); } catch (err) { diff --git a/stainless.yml b/stainless.yml index 0270ad4d4f..bd76941a9d 100644 --- a/stainless.yml +++ b/stainless.yml @@ -256,11 +256,16 @@ security_schemes: type: apiKey in: header name: x-model-api-key + LLMModelBaseUrlAuth: + type: apiKey + in: header + name: x-model-base-url security: - BBApiKeyAuth: [] BBProjectIdAuth: [] LLMModelApiKeyAuth: [] + LLMModelBaseUrlAuth: [] # `readme` is used to configure the code snippets that will be rendered in the # README.md of various SDKs. From 48b9335c0b8af66e71d968a1e48a3705296b736e Mon Sep 17 00:00:00 2001 From: Chris Read Date: Tue, 10 Mar 2026 10:17:53 -0700 Subject: [PATCH 08/11] chore: add server-v4 to changeset --- .changeset/chatcompletions-provider.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.changeset/chatcompletions-provider.md b/.changeset/chatcompletions-provider.md index acb8bb832a..ea4eaeda9b 100644 --- a/.changeset/chatcompletions-provider.md +++ b/.changeset/chatcompletions-provider.md @@ -1,6 +1,7 @@ --- "@browserbasehq/stagehand": minor "@browserbasehq/stagehand-server-v3": minor +"@browserbasehq/stagehand-server-v4": minor --- feat: add `chatcompletions` provider prefix and `modelBaseURL` support for OpenAI-compatible endpoints From 6f01035f36fdfc596e00e0015eb5d0e4b3d2011d Mon Sep 17 00:00:00 2001 From: Chris Read Date: Tue, 10 Mar 2026 10:28:15 -0700 Subject: [PATCH 09/11] fix: remove ModelBaseUrl from required security schemes (it's optional) --- packages/server-v3/openapi.v3.yaml | 1 - packages/server-v4/openapi.v4.yaml | 1 - stainless.yml | 1 - 3 files changed, 3 deletions(-) diff --git a/packages/server-v3/openapi.v3.yaml b/packages/server-v3/openapi.v3.yaml index d26fced198..e41fd7e920 100644 --- a/packages/server-v3/openapi.v3.yaml +++ b/packages/server-v3/openapi.v3.yaml @@ -2160,4 +2160,3 @@ security: - BrowserbaseApiKey: [] BrowserbaseProjectId: [] ModelApiKey: [] - ModelBaseUrl: [] diff --git a/packages/server-v4/openapi.v4.yaml b/packages/server-v4/openapi.v4.yaml index 0f5a2dd88b..6fa619eab1 100644 --- a/packages/server-v4/openapi.v4.yaml +++ b/packages/server-v4/openapi.v4.yaml @@ -2160,4 +2160,3 @@ security: - BrowserbaseApiKey: [] BrowserbaseProjectId: [] ModelApiKey: [] - ModelBaseUrl: [] diff --git a/stainless.yml b/stainless.yml index bd76941a9d..56eaa366de 100644 --- a/stainless.yml +++ b/stainless.yml @@ -265,7 +265,6 @@ security: - BBApiKeyAuth: [] BBProjectIdAuth: [] LLMModelApiKeyAuth: [] - LLMModelBaseUrlAuth: [] # `readme` is used to configure the code snippets that will be rendered in the # README.md of various SDKs. From 9243cc51730feabb677e53bf1c5044b5b8f8925c Mon Sep 17 00:00:00 2001 From: Chris Read Date: Tue, 10 Mar 2026 10:29:37 -0700 Subject: [PATCH 10/11] fix: add MODEL_BASE_URL as nullable client opt so Stainless generates it in SDKs --- stainless.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/stainless.yml b/stainless.yml index 56eaa366de..3055aa2c4b 100644 --- a/stainless.yml +++ b/stainless.yml @@ -242,6 +242,13 @@ client_settings: nullable: false auth: security_scheme: LLMModelApiKeyAuth + MODEL_BASE_URL: + type: string + read_env: MODEL_BASE_URL + description: Base URL override for the AI model provider (for OpenAI-compatible endpoints) + nullable: true + auth: + security_scheme: LLMModelBaseUrlAuth security_schemes: BBApiKeyAuth: From e19a7e0bde0de8d8e6489940ccabac0e44d2f9ba Mon Sep 17 00:00:00 2001 From: Chris Read Date: Tue, 10 Mar 2026 10:56:46 -0700 Subject: [PATCH 11/11] chore: update changeset to reflect full modelBaseURL scope --- .changeset/chatcompletions-provider.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changeset/chatcompletions-provider.md b/.changeset/chatcompletions-provider.md index ea4eaeda9b..0fa07cfcd0 100644 --- a/.changeset/chatcompletions-provider.md +++ b/.changeset/chatcompletions-provider.md @@ -6,4 +6,4 @@ feat: add `chatcompletions` provider prefix and `modelBaseURL` support for OpenAI-compatible endpoints -Adds a `chatcompletions/` model name prefix that forces the Chat Completions API (`/chat/completions`) instead of the Responses API (`/responses`), enabling support for OpenAI-compatible providers like ZhipuAI GLM. Also threads `modelBaseURL` through the server so SDKs can point to custom LLM endpoints. +Adds a `chatcompletions/` model name prefix that forces the Chat Completions API (`/chat/completions`) instead of the Responses API (`/responses`), enabling support for OpenAI-compatible providers like ZhipuAI GLM. Also adds `modelBaseURL` support end-to-end: client SDK sends `x-model-base-url` header, both server-v3 and server-v4 extract and thread it, and Stainless generates it as an optional parameter across all language SDKs.