From 092c2049c504c02a5f9ae16b62106be316e6d365 Mon Sep 17 00:00:00 2001 From: hallerite Date: Tue, 26 May 2026 14:48:03 +0000 Subject: [PATCH] fix(nemotron3): forward tools to parse_qwen35 for schema-aware coercion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nemotron3 uses the same XML-style wire format as Qwen3.5 (v...), so values arrive as raw strings between the tags. ``parse_response`` accepted ``tools`` but didn't forward it, with a ``# noqa: ARG002`` claiming "args land in a JSON object, schema not needed" — which is incorrect for the actual XML wire format. Pre-fix, the no-schema branch of ``_coerce_arg_value`` did an opportunistic ``json.loads`` with a raw-text fallback, so numeric and boolean args were recovered by accident. With #52's vLLM-parity refactor in flight (no schema → verbatim string, matching vLLM's ``extract_types_from_schema(None) → ["string"]``), every Nemotron3 tool-call arg would silently become a string. Forwarding ``tools`` routes Nemotron3 through the same schema-aware ladder the other five XML renderers (Qwen3.5/3.6, GLM-4.5/5, MiniMax-M2, Laguna-XS.2) already use — which is what vLLM does for the same wire format via its shared ``qwen3_coder`` parser. Test plan: extends ``tests/test_tool_arg_type_preservation.py`` to include Nemotron-3-Nano, exercising the same string-vs-coerced-type matrix as the other XML renderers. All 141 tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- renderers/nemotron3.py | 3 ++- tests/test_tool_arg_type_preservation.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/renderers/nemotron3.py b/renderers/nemotron3.py index 0d87f8b..06d9d4d 100644 --- a/renderers/nemotron3.py +++ b/renderers/nemotron3.py @@ -430,7 +430,7 @@ def parse_response( self, token_ids: list[int], *, - tools: list[ToolSpec] | None = None, # noqa: ARG002 — args land in a JSON object, schema not needed + tools: list[ToolSpec] | None = None, ) -> ParsedResponse: stop_ids = {self._im_end} if self._endoftext is not None: @@ -443,6 +443,7 @@ def parse_response( think_end_id=self._think_end, tool_call_id=self._tool_call, tool_call_end_id=self._tool_call_end, + tools=tools, ) def get_stop_token_ids(self) -> list[int]: diff --git a/tests/test_tool_arg_type_preservation.py b/tests/test_tool_arg_type_preservation.py index d8dbd57..cf2a179 100644 --- a/tests/test_tool_arg_type_preservation.py +++ b/tests/test_tool_arg_type_preservation.py @@ -26,7 +26,7 @@ # (HuggingFace model name, renderer name). Two JSON-shaped controls -# (string types already preserved by the wire format) + four XML-style +# (string types already preserved by the wire format) + five XML-style # parsers that rely on the schema to preserve them. _MODELS = [ ("Qwen/Qwen3-8B", "auto"), # hermes JSON — control @@ -35,6 +35,7 @@ ("zai-org/GLM-5", "auto"), # XML ("MiniMaxAI/MiniMax-M2.5", "auto"), # XML ("poolside/Laguna-XS.2", "auto"), # XML + ("nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "auto"), # XML ]