From 9b18edbd6d7f765408262d7aae2cc2964fb58065 Mon Sep 17 00:00:00 2001
From: qbc <qianbingchen.qbc@alibaba-inc.com>
Date: Mon, 27 Apr 2026 11:52:10 +0800
Subject: [PATCH] fix(openai): catch response_format errors during streaming to
 enable fallback (#1527)

---
 pyproject.toml                        |  2 +-
 src/agentscope/model/_openai_model.py | 54 ++++++++++++++++++++++++++-
 2 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index ff2c085b26..5a30a3c1ce 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,7 +47,7 @@ dependencies = [
 [project.optional-dependencies]
 # ------------ A2A protocol ------------
 a2a = [
-    "a2a-sdk",
+    "a2a-sdk==0.3.26",
     "httpx",
     # TODO: split the card resolvers from the a2a dependency
     "nacos-sdk-python>=3.0.0",
diff --git a/src/agentscope/model/_openai_model.py b/src/agentscope/model/_openai_model.py
index 548d639e42..21f02747cf 100644
--- a/src/agentscope/model/_openai_model.py
+++ b/src/agentscope/model/_openai_model.py
@@ -269,6 +269,8 @@ async def __call__(
         start_datetime = datetime.now()
 
         if structured_model:
+            import openai
+
             if tools or tool_choice:
                 logger.warning(
                     "structured_model is provided. Both 'tools' and "
@@ -299,12 +301,13 @@ async def __call__(
                         response = self.client.chat.completions.stream(
                             **kwargs,
                         )
-                        return self._parse_openai_stream_response(
+                        return self._structured_stream_with_fallback(
                             start_datetime,
                             response,
                             structured_model,
+                            kwargs,
                         )
-                except Exception as e:
+                except openai.BadRequestError as e:
                     logger.warning(
                         "response_format structured output failed (%s: %s), "
                         "falling back to tool-call based structured output. "
@@ -677,6 +680,53 @@ def _parse_openai_completion_response(
 
         return ChatResponse(**resp_kwargs)
 
+    async def _structured_stream_with_fallback(
+        self,
+        start_datetime: datetime,
+        response: Any,
+        structured_model: Type[BaseModel],
+        kwargs: dict,
+    ) -> AsyncGenerator[ChatResponse, None]:
+        """Wrap the streaming response_format attempt with error handling.
+
+        The OpenAI `client.chat.completions.stream()` is lazy -- the HTTP
+        request is deferred until the stream is consumed. This means errors
+        from APIs that reject ``response_format`` (e.g. DeepSeek) are raised
+        *outside* the try/except in ``__call__``, so
+        ``_structured_output_fallback`` is never set.
+
+        This wrapper catches such errors during stream consumption and
+        transparently falls back to the tool-call approach.
+        """
+        import openai
+
+        try:
+            async for chunk in self._parse_openai_stream_response(
+                start_datetime,
+                response,
+                structured_model,
+            ):
+                yield chunk
+        except openai.BadRequestError as e:
+            logger.warning(
+                "response_format structured output failed during streaming "
+                "(%s: %s), falling back to tool-call based structured "
+                "output. Subsequent calls will use tool-call directly.",
+                type(e).__name__,
+                e,
+            )
+            self._structured_output_fallback = True
+            fallback = await self._structured_via_tool_call(
+                kwargs,
+                structured_model,
+                datetime.now(),
+            )
+            if isinstance(fallback, AsyncGenerator):
+                async for chunk in fallback:
+                    yield chunk
+            else:
+                yield fallback
+
     async def _structured_via_tool_call(
         self,
         kwargs: dict,