From 9c5084baf42233539bbcfb02863e07c212b76939 Mon Sep 17 00:00:00 2001 From: Yinhan Lu Date: Thu, 2 Apr 2026 19:07:42 -0400 Subject: [PATCH] fix: extract system messages from Responses API input into instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The upstream Codex Responses API rejects `role: "system"` messages in the input array. This causes a 400 error when clients (e.g., langchain-openai with use_responses_api=True) include SystemMessage in the input. Modify `_normalize_input_messages` to extract system and developer messages from the input array and merge them into the `instructions` field before forwarding upstream. This matches the behavior already implemented in the Chat Completions → Responses API converter. Fixes #52 Co-Authored-By: Claude Opus 4.6 (1M context) --- ccproxy/plugins/codex/adapter.py | 36 ++++++++--- tests/plugins/codex/unit/test_adapter.py | 78 ++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 10 deletions(-) diff --git a/ccproxy/plugins/codex/adapter.py b/ccproxy/plugins/codex/adapter.py index 7324d8fe..d86d66d4 100644 --- a/ccproxy/plugins/codex/adapter.py +++ b/ccproxy/plugins/codex/adapter.py @@ -647,22 +647,38 @@ def _normalize_input_messages(self, data: dict[str, Any]) -> dict[str, Any]: return data normalized_items: list[Any] = [] + system_segments: list[str] = [] for item in input_items: - if ( - isinstance(item, dict) - and "type" not in item - and "role" in item - and "content" in item - ): - normalized_item = dict(item) - normalized_item["type"] = "message" - normalized_items.append(normalized_item) - continue + if isinstance(item, dict) and "role" in item and "content" in item: + role = item.get("role", "") + # Extract system/developer messages into instructions + # so they are not rejected by the upstream Responses API. + if role in ("system", "developer"): + content = item.get("content") + if isinstance(content, str) and content.strip(): + system_segments.append(content.strip()) + continue + + if "type" not in item: + normalized_item = dict(item) + normalized_item["type"] = "message" + normalized_items.append(normalized_item) + continue normalized_items.append(item) result = dict(data) result["input"] = normalized_items + + # Merge extracted system messages into the instructions field + if system_segments: + existing = result.get("instructions") + parts = [] + if isinstance(existing, str) and existing.strip(): + parts.append(existing.strip()) + parts.extend(system_segments) + result["instructions"] = "\n\n".join(parts) + return result def _request_body_is_encoded(self, headers: dict[str, str]) -> bool: diff --git a/tests/plugins/codex/unit/test_adapter.py b/tests/plugins/codex/unit/test_adapter.py index 4eb7400c..43869af4 100644 --- a/tests/plugins/codex/unit/test_adapter.py +++ b/tests/plugins/codex/unit/test_adapter.py @@ -441,6 +441,84 @@ async def test_prepare_provider_request_keeps_msaf_reasoning_when_detection_disa assert "temperature" not in result_data assert "max_tokens" not in result_data + @pytest.mark.asyncio + async def test_normalize_input_extracts_system_messages_to_instructions( + self, adapter_with_disabled_detection: CodexAdapter + ) -> None: + """System messages in input should be extracted into instructions. + + The upstream Codex Responses API rejects role: system in the input + array. _normalize_input_messages must move them to the instructions + field so the request is accepted. + """ + body = json.dumps( + { + "model": "gpt-5", + "input": [ + {"role": "system", "content": "You are a helpful assistant"}, + {"role": "user", "content": "Hello"}, + ], + } + ).encode() + + result_body, _ = await adapter_with_disabled_detection.prepare_provider_request( + body, {}, "/responses" + ) + result_data = json.loads(result_body.decode()) + + # System message should be moved to instructions + assert result_data["instructions"] == "You are a helpful assistant" + # Only the user message should remain in input + assert len(result_data["input"]) == 1 + assert result_data["input"][0]["role"] == "user" + + @pytest.mark.asyncio + async def test_normalize_input_merges_system_with_existing_instructions( + self, adapter_with_disabled_detection: CodexAdapter + ) -> None: + """System messages should be appended to existing instructions.""" + body = json.dumps( + { + "model": "gpt-5", + "instructions": "Existing instructions", + "input": [ + {"role": "system", "content": "Extra system context"}, + {"role": "user", "content": "Hello"}, + ], + } + ).encode() + + result_body, _ = await adapter_with_disabled_detection.prepare_provider_request( + body, {}, "/responses" + ) + result_data = json.loads(result_body.decode()) + + assert result_data["instructions"] == "Existing instructions\n\nExtra system context" + assert len(result_data["input"]) == 1 + + @pytest.mark.asyncio + async def test_normalize_input_extracts_developer_messages( + self, adapter_with_disabled_detection: CodexAdapter + ) -> None: + """Developer role messages should also be extracted to instructions.""" + body = json.dumps( + { + "model": "gpt-5", + "input": [ + {"role": "developer", "content": "Developer instructions"}, + {"role": "user", "content": "Hello"}, + ], + } + ).encode() + + result_body, _ = await adapter_with_disabled_detection.prepare_provider_request( + body, {}, "/responses" + ) + result_data = json.loads(result_body.decode()) + + assert result_data["instructions"] == "Developer instructions" + assert len(result_data["input"]) == 1 + @pytest.mark.asyncio async def test_process_provider_response(self, adapter: CodexAdapter) -> None: """Test response processing and format conversion."""