From 9c5084baf42233539bbcfb02863e07c212b76939 Mon Sep 17 00:00:00 2001
From: Yinhan Lu <yinhan.lu@mail.mcgill.ca>
Date: Thu, 2 Apr 2026 19:07:42 -0400
Subject: [PATCH] fix: extract system messages from Responses API input into
 instructions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The upstream Codex Responses API rejects `role: "system"` messages in
the input array. This causes a 400 error when clients (e.g.,
langchain-openai with use_responses_api=True) include SystemMessage
in the input.

Modify `_normalize_input_messages` to extract system and developer
messages from the input array and merge them into the `instructions`
field before forwarding upstream. This matches the behavior already
implemented in the Chat Completions → Responses API converter.

Fixes #52

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 ccproxy/plugins/codex/adapter.py         | 36 ++++++++---
 tests/plugins/codex/unit/test_adapter.py | 78 ++++++++++++++++++++++++
 2 files changed, 104 insertions(+), 10 deletions(-)

diff --git a/ccproxy/plugins/codex/adapter.py b/ccproxy/plugins/codex/adapter.py
index 7324d8fe..d86d66d4 100644
--- a/ccproxy/plugins/codex/adapter.py
+++ b/ccproxy/plugins/codex/adapter.py
@@ -647,22 +647,38 @@ def _normalize_input_messages(self, data: dict[str, Any]) -> dict[str, Any]:
             return data
 
         normalized_items: list[Any] = []
+        system_segments: list[str] = []
         for item in input_items:
-            if (
-                isinstance(item, dict)
-                and "type" not in item
-                and "role" in item
-                and "content" in item
-            ):
-                normalized_item = dict(item)
-                normalized_item["type"] = "message"
-                normalized_items.append(normalized_item)
-                continue
+            if isinstance(item, dict) and "role" in item and "content" in item:
+                role = item.get("role", "")
+                # Extract system/developer messages into instructions
+                # so they are not rejected by the upstream Responses API.
+                if role in ("system", "developer"):
+                    content = item.get("content")
+                    if isinstance(content, str) and content.strip():
+                        system_segments.append(content.strip())
+                    continue
+
+                if "type" not in item:
+                    normalized_item = dict(item)
+                    normalized_item["type"] = "message"
+                    normalized_items.append(normalized_item)
+                    continue
 
             normalized_items.append(item)
 
         result = dict(data)
         result["input"] = normalized_items
+
+        # Merge extracted system messages into the instructions field
+        if system_segments:
+            existing = result.get("instructions")
+            parts = []
+            if isinstance(existing, str) and existing.strip():
+                parts.append(existing.strip())
+            parts.extend(system_segments)
+            result["instructions"] = "\n\n".join(parts)
+
         return result
 
     def _request_body_is_encoded(self, headers: dict[str, str]) -> bool:
diff --git a/tests/plugins/codex/unit/test_adapter.py b/tests/plugins/codex/unit/test_adapter.py
index 4eb7400c..43869af4 100644
--- a/tests/plugins/codex/unit/test_adapter.py
+++ b/tests/plugins/codex/unit/test_adapter.py
@@ -441,6 +441,84 @@ async def test_prepare_provider_request_keeps_msaf_reasoning_when_detection_disa
         assert "temperature" not in result_data
         assert "max_tokens" not in result_data
 
+    @pytest.mark.asyncio
+    async def test_normalize_input_extracts_system_messages_to_instructions(
+        self, adapter_with_disabled_detection: CodexAdapter
+    ) -> None:
+        """System messages in input should be extracted into instructions.
+
+        The upstream Codex Responses API rejects role: system in the input
+        array.  _normalize_input_messages must move them to the instructions
+        field so the request is accepted.
+        """
+        body = json.dumps(
+            {
+                "model": "gpt-5",
+                "input": [
+                    {"role": "system", "content": "You are a helpful assistant"},
+                    {"role": "user", "content": "Hello"},
+                ],
+            }
+        ).encode()
+
+        result_body, _ = await adapter_with_disabled_detection.prepare_provider_request(
+            body, {}, "/responses"
+        )
+        result_data = json.loads(result_body.decode())
+
+        # System message should be moved to instructions
+        assert result_data["instructions"] == "You are a helpful assistant"
+        # Only the user message should remain in input
+        assert len(result_data["input"]) == 1
+        assert result_data["input"][0]["role"] == "user"
+
+    @pytest.mark.asyncio
+    async def test_normalize_input_merges_system_with_existing_instructions(
+        self, adapter_with_disabled_detection: CodexAdapter
+    ) -> None:
+        """System messages should be appended to existing instructions."""
+        body = json.dumps(
+            {
+                "model": "gpt-5",
+                "instructions": "Existing instructions",
+                "input": [
+                    {"role": "system", "content": "Extra system context"},
+                    {"role": "user", "content": "Hello"},
+                ],
+            }
+        ).encode()
+
+        result_body, _ = await adapter_with_disabled_detection.prepare_provider_request(
+            body, {}, "/responses"
+        )
+        result_data = json.loads(result_body.decode())
+
+        assert result_data["instructions"] == "Existing instructions\n\nExtra system context"
+        assert len(result_data["input"]) == 1
+
+    @pytest.mark.asyncio
+    async def test_normalize_input_extracts_developer_messages(
+        self, adapter_with_disabled_detection: CodexAdapter
+    ) -> None:
+        """Developer role messages should also be extracted to instructions."""
+        body = json.dumps(
+            {
+                "model": "gpt-5",
+                "input": [
+                    {"role": "developer", "content": "Developer instructions"},
+                    {"role": "user", "content": "Hello"},
+                ],
+            }
+        ).encode()
+
+        result_body, _ = await adapter_with_disabled_detection.prepare_provider_request(
+            body, {}, "/responses"
+        )
+        result_data = json.loads(result_body.decode())
+
+        assert result_data["instructions"] == "Developer instructions"
+        assert len(result_data["input"]) == 1
+
     @pytest.mark.asyncio
     async def test_process_provider_response(self, adapter: CodexAdapter) -> None:
         """Test response processing and format conversion."""