Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ dependencies = [
[project.optional-dependencies]
# ------------ A2A protocol ------------
a2a = [
"a2a-sdk",
"a2a-sdk==0.3.26",
"httpx",
# TODO: split the card resolvers from the a2a dependency
"nacos-sdk-python>=3.0.0",
Expand Down
54 changes: 52 additions & 2 deletions src/agentscope/model/_openai_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,8 @@ async def __call__(
start_datetime = datetime.now()

if structured_model:
import openai

if tools or tool_choice:
logger.warning(
"structured_model is provided. Both 'tools' and "
Expand Down Expand Up @@ -299,12 +301,13 @@ async def __call__(
response = self.client.chat.completions.stream(
**kwargs,
)
return self._parse_openai_stream_response(
return self._structured_stream_with_fallback(
start_datetime,
response,
structured_model,
kwargs,
)
except Exception as e:
except openai.BadRequestError as e:
logger.warning(
"response_format structured output failed (%s: %s), "
"falling back to tool-call based structured output. "
Expand Down Expand Up @@ -677,6 +680,53 @@ def _parse_openai_completion_response(

return ChatResponse(**resp_kwargs)

async def _structured_stream_with_fallback(
self,
start_datetime: datetime,
response: Any,
structured_model: Type[BaseModel],
kwargs: dict,
) -> AsyncGenerator[ChatResponse, None]:
"""Wrap the streaming response_format attempt with error handling.

The OpenAI `client.chat.completions.stream()` is lazy -- the HTTP
request is deferred until the stream is consumed. This means errors
from APIs that reject ``response_format`` (e.g. DeepSeek) are raised
*outside* the try/except in ``__call__``, so
``_structured_output_fallback`` is never set.

This wrapper catches such errors during stream consumption and
transparently falls back to the tool-call approach.
"""
import openai

try:
async for chunk in self._parse_openai_stream_response(
start_datetime,
response,
structured_model,
):
yield chunk
except openai.BadRequestError as e:
logger.warning(
"response_format structured output failed during streaming "
"(%s: %s), falling back to tool-call based structured "
"output. Subsequent calls will use tool-call directly.",
type(e).__name__,
e,
)
self._structured_output_fallback = True
fallback = await self._structured_via_tool_call(
kwargs,
structured_model,
datetime.now(),
)
if isinstance(fallback, AsyncGenerator):
async for chunk in fallback:
yield chunk
else:
yield fallback

async def _structured_via_tool_call(
self,
kwargs: dict,
Expand Down
Loading