From 092c2049c504c02a5f9ae16b62106be316e6d365 Mon Sep 17 00:00:00 2001
From: hallerite <git@hallerite.com>
Date: Tue, 26 May 2026 14:48:03 +0000
Subject: [PATCH] fix(nemotron3): forward tools to parse_qwen35 for
 schema-aware coercion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Nemotron3 uses the same XML-style wire format as Qwen3.5
(<tool_call><function=name><parameter=k>v</parameter>...), so values
arrive as raw strings between the tags. ``parse_response`` accepted
``tools`` but didn't forward it, with a ``# noqa: ARG002`` claiming
"args land in a JSON object, schema not needed" — which is incorrect
for the actual XML wire format.

Pre-fix, the no-schema branch of ``_coerce_arg_value`` did an
opportunistic ``json.loads`` with a raw-text fallback, so numeric and
boolean args were recovered by accident. With #52's vLLM-parity
refactor in flight (no schema → verbatim string, matching vLLM's
``extract_types_from_schema(None) → ["string"]``), every Nemotron3
tool-call arg would silently become a string.

Forwarding ``tools`` routes Nemotron3 through the same schema-aware
ladder the other five XML renderers (Qwen3.5/3.6, GLM-4.5/5,
MiniMax-M2, Laguna-XS.2) already use — which is what vLLM does for
the same wire format via its shared ``qwen3_coder`` parser.

Test plan: extends ``tests/test_tool_arg_type_preservation.py`` to
include Nemotron-3-Nano, exercising the same string-vs-coerced-type
matrix as the other XML renderers. All 141 tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 renderers/nemotron3.py                   | 3 ++-
 tests/test_tool_arg_type_preservation.py | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/renderers/nemotron3.py b/renderers/nemotron3.py
index 0d87f8b..06d9d4d 100644
--- a/renderers/nemotron3.py
+++ b/renderers/nemotron3.py
@@ -430,7 +430,7 @@ def parse_response(
         self,
         token_ids: list[int],
         *,
-        tools: list[ToolSpec] | None = None,  # noqa: ARG002 — args land in a JSON object, schema not needed
+        tools: list[ToolSpec] | None = None,
     ) -> ParsedResponse:
         stop_ids = {self._im_end}
         if self._endoftext is not None:
@@ -443,6 +443,7 @@ def parse_response(
             think_end_id=self._think_end,
             tool_call_id=self._tool_call,
             tool_call_end_id=self._tool_call_end,
+            tools=tools,
         )
 
     def get_stop_token_ids(self) -> list[int]:
diff --git a/tests/test_tool_arg_type_preservation.py b/tests/test_tool_arg_type_preservation.py
index d8dbd57..cf2a179 100644
--- a/tests/test_tool_arg_type_preservation.py
+++ b/tests/test_tool_arg_type_preservation.py
@@ -26,7 +26,7 @@
 
 
 # (HuggingFace model name, renderer name). Two JSON-shaped controls
-# (string types already preserved by the wire format) + four XML-style
+# (string types already preserved by the wire format) + five XML-style
 # parsers that rely on the schema to preserve them.
 _MODELS = [
     ("Qwen/Qwen3-8B", "auto"),  # hermes JSON  — control
@@ -35,6 +35,7 @@
     ("zai-org/GLM-5", "auto"),  # XML
     ("MiniMaxAI/MiniMax-M2.5", "auto"),  # XML
     ("poolside/Laguna-XS.2", "auto"),  # XML
+    ("nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "auto"),  # XML
 ]