From 69328f63eefa7adcaa79fee8c24828ee2ae66ac7 Mon Sep 17 00:00:00 2001
From: hallerite <git@hallerite.com>
Date: Mon, 18 May 2026 16:47:07 +0000
Subject: [PATCH 1/4] fix(parsing): vLLM-parity type ladder for XML tool-call
 arg coercion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Renderers' Qwen3.5/GLM/MiniMax/Laguna XML parsers flagged
``<parameter=x>True</parameter>`` as INVALID_JSON for boolean params
because ``json.loads("True")`` fails. Both SGLang's
``Qwen3CoderDetector`` and vLLM's ``Qwen3CoderToolParser`` accept it via
``param_value.lower() == "true"`` — so Pythonic literals the model
freely emits round-trip cleanly at inference but came back malformed
through renderers.

Replaces the string-or-``json.loads`` dispatch with the full
``_convert_param_value`` ladder shared by both reference parsers:
case-insensitive ``null``, ``int()`` / ``float()`` for numeric families
with int demotion, case-folded bool, ``json.loads`` → ``ast.literal_eval``
for objects/arrays/anyOf, ``ast.literal_eval`` catch-all. INVALID_JSON
remains as the verifier / RL-loss signal for values that had to
degenerate (e.g. ``yes`` for bool, ``abc`` for int).

One deliberate deviation from vLLM/SGLang: declared ``string`` types
preserve ``"null"`` verbatim instead of coercing to Python ``None``, so
the existing ``test_tool_arg_type_preservation`` string-verbatim
contract still holds.

Fixes #47.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 renderers/parsing.py       | 146 ++++++++++++++++----
 tests/test_arg_coercion.py | 263 +++++++++++++++++++++++++++++++++++++
 2 files changed, 385 insertions(+), 24 deletions(-)
 create mode 100644 tests/test_arg_coercion.py
diff --git a/renderers/parsing.py b/renderers/parsing.py
index 528f122..f9f9ac1 100644
--- a/renderers/parsing.py
+++ b/renderers/parsing.py
@@ -16,6 +16,7 @@
 
 from __future__ import annotations
 
+import ast
 import json
 from typing import Any
 
@@ -29,8 +30,21 @@
 # ``"true"`` produce identical wire bytes; without the tool schema, the
 # parser has no signal to distinguish them and defaults to
 # ``json.loads`` (the historical behavior). When the caller passes
-# ``tools=[...]``, parsers consult the per-parameter declared type to
-# keep string args verbatim, matching vLLM / SGLang reference parsers.
+# ``tools=[...]``, parsers consult the per-parameter declared type and
+# dispatch through a vLLM-parity ladder (case-insensitive ``null``,
+# ``int()`` / ``float()`` for numeric, ``.lower() == "true"`` for bool,
+# ``json.loads`` → ``ast.literal_eval`` for object/array) so models can
+# emit Pythonic literals like ``True`` / ``None`` / ``{'k': 1}`` that
+# both SGLang's ``Qwen3CoderDetector`` and vLLM's
+# ``Qwen3CoderToolParser`` accept on the inference side.
+
+
+_STRING_TYPES = frozenset({"string", "str", "text", "varchar", "char", "enum"})
+_BOOL_TYPES = frozenset({"boolean", "bool", "binary"})
+_OBJECT_TYPES = frozenset({"object", "array", "arr"})
+_INT_PREFIXES = ("int", "uint", "long", "short", "unsigned")
+_FLOAT_PREFIXES = ("num", "float")
+_OBJECT_PREFIXES = ("dict", "list")
 
 
 def _build_param_type_index(
@@ -59,35 +73,119 @@ def _build_param_type_index(
     return index
 
 
+def _declared_type(param_schema: dict[str, Any] | None) -> str | None:
+    """Extract the lowercased ``type`` from a JSON-schema fragment.
+
+    Mirrors vLLM: a fragment with ``anyOf`` and no top-level ``type`` is
+    treated as ``"object"`` so json.loads is attempted; a list-form type
+    falls back to the first declared entry.
+    """
+    if not isinstance(param_schema, dict):
+        return None
+    declared = param_schema.get("type")
+    if isinstance(declared, str):
+        return declared.strip().lower()
+    if isinstance(declared, list) and declared:
+        first = declared[0]
+        if isinstance(first, str):
+            return first.strip().lower()
+    if "anyOf" in param_schema:
+        return "object"
+    return None
+
+
 def _coerce_arg_value(
     text: str, param_schema: dict[str, Any] | None
 ) -> tuple[Any, bool]:
     """Coerce a raw ``<arg_value>`` body to its declared type.
 
-    Returns ``(value, used_json_fallback)``. The boolean is ``True`` only
-    when ``json.loads`` was attempted and raised, so the caller knows
-    whether to flag ``INVALID_JSON``. Returning a string verbatim
-    because the schema declared ``type: "string"`` is NOT a fallback.
-
-    Rule (matches vLLM / SGLang reference parsers):
-
-    - If the param's declared ``type`` is ``"string"`` (or single-element
-      ``["string"]``), return ``text`` verbatim — never ``json.loads``.
-    - Anything else (no schema, non-string scalar, object, array, or
-      union types that include non-string): try ``json.loads`` and fall
-      back to raw ``text`` on parse failure.
-
-    Union types that include ``"string"`` alongside other types still
-    attempt ``json.loads`` first so an explicit integer / bool can
-    parse; the string branch only wins as the fallback.
+    Returns ``(value, used_fallback)``. ``used_fallback=True`` means the
+    coercion could not satisfy the declared type and had to fall back to
+    a different shape (a string for scalars, ``False`` for booleans, raw
+    text for objects), so the caller can flag ``INVALID_JSON``.
+
+    The ladder mirrors vLLM's ``Qwen3CoderToolParser._convert_param_value``
+    and SGLang's ``Qwen3CoderDetector._convert_param_value`` with one
+    deliberate deviation noted inline:
+
+    - No schema → try ``json.loads``, fall back to raw text + INVALID.
+    - ``string`` / ``str`` / ``text`` / ``varchar`` / ``char`` / ``enum``
+      → return verbatim. (Deviation: vLLM/SGLang null-coerce ``"null"``
+      *before* checking type, so a string-typed arg of ``"null"`` would
+      come back as Python ``None``. Renderers preserves the string
+      because the XML wire format can't distinguish the string
+      ``"null"`` from JSON null, and downstream tests pin the
+      preservation contract; see ``test_tool_arg_type_preservation``.)
+    - ``null`` (any case) for any non-string declared type → ``None``.
+    - ``int`` / ``uint`` / ``long`` / ``short`` / ``unsigned`` family →
+      ``int(text)``, degenerating to raw text + INVALID.
+    - ``num`` / ``float`` family → ``float(text)`` with int demotion
+      when the source has no ``.``/``e`` and is whole, degenerating to
+      raw text + INVALID.
+    - ``boolean`` / ``bool`` / ``binary`` → ``text.lower() == "true"``;
+      anything outside ``{"true", "false"}`` returns ``False`` + INVALID
+      (matches vLLM's "degenerate to false" behavior, with the
+      INVALID flag preserved for the verifier / RL-loss signal).
+    - ``object`` / ``array`` / ``dict`` / ``list`` (or ``anyOf``) →
+      ``json.loads`` first, ``ast.literal_eval`` fallback (for Python
+      literals like ``{'k': 1}``), raw text + INVALID otherwise.
+    - Unknown type (param missing from schema, or type not in any
+      family above) → ``ast.literal_eval`` if it parses, else raw text.
+      Matches vLLM's catch-all branch and keeps strings like
+      ``"hello"`` (with no schema entry) as-is.
     """
-    if param_schema is not None:
-        declared = param_schema.get("type")
-        if declared == "string" or declared == ["string"]:
-            return text, False
+    if param_schema is None:
+        if text.lower() == "null":
+            return None, False
+        try:
+            return json.loads(text), False
+        except (json.JSONDecodeError, ValueError):
+            return text, True
+
+    declared = _declared_type(param_schema)
+
+    if declared is None or declared in _STRING_TYPES:
+        return text, False
+
+    if text.lower() == "null":
+        return None, False
+
+    if declared.startswith(_INT_PREFIXES):
+        try:
+            return int(text), False
+        except (ValueError, TypeError):
+            return text, True
+
+    if declared.startswith(_FLOAT_PREFIXES):
+        try:
+            f = float(text)
+        except (ValueError, TypeError):
+            return text, True
+        if "." not in text and "e" not in text.lower() and f.is_integer():
+            return int(f), False
+        return f, False
+
+    if declared in _BOOL_TYPES:
+        lowered = text.lower()
+        if lowered == "true":
+            return True, False
+        if lowered == "false":
+            return False, False
+        return False, True
+
+    if declared in _OBJECT_TYPES or declared.startswith(_OBJECT_PREFIXES):
+        try:
+            return json.loads(text), False
+        except (json.JSONDecodeError, ValueError):
+            pass
+        try:
+            return ast.literal_eval(text), False
+        except (ValueError, SyntaxError, TypeError, MemoryError):
+            return text, True
+
     try:
-        return json.loads(text), False
-    except (json.JSONDecodeError, ValueError):
+        return ast.literal_eval(text), False
+    except (ValueError, SyntaxError, TypeError, MemoryError):
         return text, True
 
 
diff --git a/tests/test_arg_coercion.py b/tests/test_arg_coercion.py
new file mode 100644
index 0000000..d95565f
--- /dev/null
+++ b/tests/test_arg_coercion.py
@@ -0,0 +1,263 @@
+"""Schema-aware argument coercion parity with vLLM / SGLang.
+
+The XML-style tool-call wire format (Qwen3.5, GLM, MiniMax, Laguna)
+renders parameter values verbatim with no quoting, so the parser must
+consult the tool schema to know whether ``True`` is a bool or a string.
+Renderers' previous coercion was strict ``json.loads``, which flagged
+``True`` / ``False`` (capitalised Python literals) as ``INVALID_JSON``
+for boolean params — but both vLLM's ``Qwen3CoderToolParser`` and
+SGLang's ``Qwen3CoderDetector`` accept them via case-folded comparison.
+
+These tests pin the new ladder to the vLLM / SGLang reference shape and
+guard the bug from issue #47.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from renderers.base import ToolCallParseStatus, load_tokenizer
+from renderers.parsing import _coerce_arg_value, parse_qwen35
+
+
+# ── Direct coercion ─────────────────────────────────────────────────
+
+
+@pytest.mark.parametrize(
+    "raw,expected",
+    [
+        ("true", True),
+        ("True", True),
+        ("TRUE", True),
+        ("false", False),
+        ("False", False),
+        ("FALSE", False),
+    ],
+)
+def test_boolean_accepts_case_insensitive(raw, expected):
+    value, used_fallback = _coerce_arg_value(raw, {"type": "boolean"})
+    assert value is expected
+    assert used_fallback is False
+
+
+@pytest.mark.parametrize("declared", ["boolean", "bool", "binary"])
+def test_boolean_type_aliases_all_accepted(declared):
+    value, used_fallback = _coerce_arg_value("True", {"type": declared})
+    assert value is True
+    assert used_fallback is False
+
+
+def test_boolean_garbage_degenerates_to_false_with_invalid_flag():
+    """Match vLLM: non-true/false values silently become ``False`` — but we
+    still flag the fallback so verifier / RL-loss code can see the drift."""
+    value, used_fallback = _coerce_arg_value("yes", {"type": "boolean"})
+    assert value is False
+    assert used_fallback is True
+
+
+@pytest.mark.parametrize("declared", ["boolean", "integer", "object", "number"])
+def test_null_literal_is_case_insensitive_for_non_string_types(declared):
+    for raw in ("null", "Null", "NULL"):
+        value, used_fallback = _coerce_arg_value(raw, {"type": declared})
+        assert value is None
+        assert used_fallback is False
+
+
+def test_null_is_preserved_verbatim_for_string_type():
+    """Deliberate deviation from vLLM/SGLang: string-typed ``"null"``
+    stays as the string ``"null"`` so the existing string-verbatim
+    contract holds (see ``test_tool_arg_type_preservation``). The XML
+    wire format already can't distinguish the string ``"null"`` from
+    JSON null, but when a schema says ``type: "string"`` we honour it."""
+    value, used_fallback = _coerce_arg_value("null", {"type": "string"})
+    assert value == "null"
+    assert used_fallback is False
+
+
+@pytest.mark.parametrize(
+    "declared,raw,expected",
+    [
+        ("integer", "42", 42),
+        ("int", "-7", -7),
+        ("uint", "0", 0),
+        ("long", "9999999999", 9999999999),
+        ("short", "12", 12),
+        ("unsigned", "5", 5),
+    ],
+)
+def test_int_family_coerces(declared, raw, expected):
+    value, used_fallback = _coerce_arg_value(raw, {"type": declared})
+    assert value == expected
+    assert isinstance(value, int)
+    assert used_fallback is False
+
+
+def test_int_failure_keeps_raw_and_flags_fallback():
+    value, used_fallback = _coerce_arg_value("abc", {"type": "integer"})
+    assert value == "abc"
+    assert used_fallback is True
+
+
+@pytest.mark.parametrize(
+    "raw,expected,exact_type",
+    [
+        ("3.14", 3.14, float),
+        ("1e3", 1000.0, float),  # source has `e` → stays float (SGLang rule)
+        ("1.0", 1.0, float),  # source has `.` → stays float
+        ("-2.5", -2.5, float),
+        ("7", 7, int),  # no `.`/`e` and whole → demoted to int
+    ],
+)
+def test_float_family_coerces(raw, expected, exact_type):
+    value, used_fallback = _coerce_arg_value(raw, {"type": "number"})
+    assert value == expected
+    assert type(value) is exact_type
+    assert used_fallback is False
+
+
+def test_float_failure_keeps_raw_and_flags_fallback():
+    value, used_fallback = _coerce_arg_value("not-a-number", {"type": "float"})
+    assert value == "not-a-number"
+    assert used_fallback is True
+
+
+def test_object_via_json_loads():
+    value, used_fallback = _coerce_arg_value('{"k": 1}', {"type": "object"})
+    assert value == {"k": 1}
+    assert used_fallback is False
+
+
+def test_object_via_ast_literal_eval_fallback():
+    """Python-literal dicts (single quotes) should still parse for object
+    params — this is the vLLM ``ast.literal_eval`` fallback path."""
+    value, used_fallback = _coerce_arg_value("{'k': 1}", {"type": "object"})
+    assert value == {"k": 1}
+    assert used_fallback is False
+
+
+def test_array_via_json_loads():
+    value, used_fallback = _coerce_arg_value("[1, 2, 3]", {"type": "array"})
+    assert value == [1, 2, 3]
+    assert used_fallback is False
+
+
+def test_object_total_failure_flags_invalid():
+    value, used_fallback = _coerce_arg_value(
+        "this is not a json object", {"type": "object"}
+    )
+    assert value == "this is not a json object"
+    assert used_fallback is True
+
+
+def test_anyof_treated_as_object():
+    """vLLM-specific: a schema with ``anyOf`` and no top-level ``type``
+    routes through the object branch so JSON-shaped values parse."""
+    schema = {"anyOf": [{"type": "integer"}, {"type": "string"}]}
+    value, used_fallback = _coerce_arg_value("42", schema)
+    assert value == 42
+    assert used_fallback is False
+
+
+@pytest.mark.parametrize("declared", ["string", "str", "text", "varchar", "char", "enum"])
+def test_string_family_returns_verbatim(declared):
+    value, used_fallback = _coerce_arg_value("True", {"type": declared})
+    assert value == "True"
+    assert used_fallback is False
+
+
+def test_string_with_list_form_type():
+    value, used_fallback = _coerce_arg_value("True", {"type": ["string"]})
+    assert value == "True"
+    assert used_fallback is False
+
+
+def test_no_schema_falls_back_to_json_loads():
+    """Historical behavior: when ``param_schema`` is ``None`` (no tools
+    were passed), we still try ``json.loads`` for backwards compatibility
+    so untyped numbers / bools parse. ``None`` is handled via the
+    ``null`` short-circuit at the top of the ladder."""
+    value, used_fallback = _coerce_arg_value("42", None)
+    assert value == 42
+    assert used_fallback is False
+
+    value, used_fallback = _coerce_arg_value("hello", None)
+    assert value == "hello"
+    assert used_fallback is True
+
+
+def test_unknown_type_falls_back_to_literal_eval():
+    """A declared type we don't recognise (e.g. ``"date"``) lands in the
+    catch-all that tries ``ast.literal_eval``, mirroring vLLM's else
+    branch — strings without quotes that fail to eval stay as strings."""
+    value, used_fallback = _coerce_arg_value("2024-01-01", {"type": "date"})
+    assert value == "2024-01-01"
+    assert used_fallback is True
+
+    # Bare Python literal still parses through the catch-all
+    value, used_fallback = _coerce_arg_value("[1, 2]", {"type": "date"})
+    assert value == [1, 2]
+    assert used_fallback is False
+
+
+# ── Integration: parse_qwen35 end-to-end (the bug from issue #47) ───
+
+
+_TOOLS_FROM_ISSUE = [
+    {
+        "type": "function",
+        "function": {
+            "name": "filesystem_server_get_directory_tree",
+            "description": "List a directory tree.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "path": {"type": "string"},
+                    "max_depth": {"type": "integer"},
+                    "include_files": {"type": "boolean"},
+                    "show_size": {"type": "boolean"},
+                },
+            },
+        },
+    }
+]
+
+
+def test_qwen35_capital_true_for_boolean_is_ok_status():
+    """Regression for issue #47: ``<parameter=include_files>True</parameter>``
+    used to be flagged ``INVALID_JSON`` because ``json.loads("True")``
+    fails. SGLang and vLLM both accept it via case-folded comparison;
+    renderers now does too."""
+    tok = load_tokenizer("Qwen/Qwen3.5-9B")
+
+    completion = (
+        "<tool_call>\n"
+        "<function=filesystem_server_get_directory_tree>\n"
+        "<parameter=path>/</parameter>\n"
+        "<parameter=max_depth>2</parameter>\n"
+        "<parameter=include_files>True</parameter>\n"
+        "<parameter=show_size>True</parameter>\n"
+        "</function>\n"
+        "</tool_call>"
+    )
+    ids = tok.encode(completion, add_special_tokens=False)
+
+    parsed = parse_qwen35(
+        tok,
+        list(ids),
+        stop_ids={tok.eos_token_id} if tok.eos_token_id is not None else set(),
+        think_id=tok.convert_tokens_to_ids("<think>"),
+        think_end_id=tok.convert_tokens_to_ids("</think>"),
+        tool_call_id=tok.convert_tokens_to_ids("<tool_call>"),
+        tool_call_end_id=tok.convert_tokens_to_ids("</tool_call>"),
+        tools=_TOOLS_FROM_ISSUE,
+    )
+    assert len(parsed.tool_calls) == 1
+    tc = parsed.tool_calls[0]
+    assert tc.status == ToolCallParseStatus.OK
+    assert tc.name == "filesystem_server_get_directory_tree"
+    assert tc.arguments == {
+        "path": "/",
+        "max_depth": 2,
+        "include_files": True,
+        "show_size": True,
+    }

From a338f9d64f2e766e89ffbdbb463aac574bd0e1f0 Mon Sep 17 00:00:00 2001
From: hallerite <git@hallerite.com>
Date: Wed, 20 May 2026 12:46:31 +0000
Subject: [PATCH 2/4] fix(parsing): structural vLLM parity for XML tool-call
 extraction
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mirror vLLM's Qwen3CoderToolParser end to end (chosen over qwen3_xml
because qwen3_coder is a strict superset for offline value coercion;
qwen3_xml's edge over qwen3_coder is its streaming state machine,
which does not apply here yet — module comment notes the TODO to
switch when streaming lands). Closes the three remaining structural
deltas:

- Adopt vLLM's three tolerant regex patterns (tool_call / function /
  parameter) so unclosed </parameter>, </function>, and </tool_call>
  tags are recovered instead of silently dropping their content.
- Add the qwen3coder_tool_parser.py:269-271 back-off: when output has
  no <tool_call> markers, scan the whole completion for raw
  <function=...></function> blocks and recover the call. Content text
  is whatever remains after the function blocks are stripped.
- Strip exactly one leading and one trailing newline from parameter
  values (qwen3coder_tool_parser.py:246-250); interior whitespace
  round-trips verbatim — previous .strip() was too aggressive.
- Fix the no-schema branch of _coerce_arg_value to match vLLM
  (qwen3coder_tool_parser.py:128-137): return the raw text verbatim
  after the case-insensitive null short-circuit instead of attempting
  json.loads.

Qwen3.6 needs no separate parser changes — Qwen36Renderer inherits
parse_response from Qwen35Renderer and shares the same parse_qwen35
plumbing, so the vLLM-parity ladder + structural tolerance applies
to both 3.5 (Python repr literals) and 3.6 (JSON literals) in one
shot.

Qwen35ToolParser in parsers.py (DefaultRenderer + tool_parser=
'qwen3.5' path) is updated to share the new _parse_xml_function_blocks
helper so both code paths produce identical output.

Tests: 7 new structural-parity tests pin the regex tolerance, no-tag
back-off, and whitespace stripping against the vLLM reference shape.
Existing test_no_schema_falls_back_to_json_loads is renamed and
inverted to match the new (vLLM-faithful) behavior.
---
 renderers/parsers.py       | 108 +++++++-------
 renderers/parsing.py       | 284 ++++++++++++++++++++++++++-----------
 tests/test_arg_coercion.py | 159 ++++++++++++++++++---
 3 files changed, 398 insertions(+), 153 deletions(-)

diff --git a/renderers/parsers.py b/renderers/parsers.py
index 77fa850..a277a0e 100644
--- a/renderers/parsers.py
+++ b/renderers/parsers.py
@@ -20,6 +20,7 @@
 from typing import Protocol, runtime_checkable
 
 from renderers.base import ParsedToolCall, ToolCallParseStatus
+from renderers.parsing import _parse_xml_function_blocks
 
 
 # ── Shared helpers ───────────────────────────────────────────────────
@@ -156,7 +157,22 @@ def extract(self, ids: list[int]) -> tuple[list[int], list[ParsedToolCall]]:
 
 
 class Qwen35ToolParser:
-    """XML-style tool calls: ``<tool_call><function=N><parameter=K>V</parameter>...</function></tool_call>``."""
+    """XML-style tool calls: ``<tool_call><function=N><parameter=K>V</parameter>...</function></tool_call>``.
+
+    Mirrors vLLM's ``Qwen3CoderToolParser`` extraction logic — see
+    ``renderers.parsing`` for the full ladder and the rationale behind
+    picking ``qwen3_coder`` over ``qwen3_xml``. This parser drives
+    ``DefaultRenderer`` when callers wire it up with
+    ``tool_parser="qwen3.5"``; ``Qwen35Renderer.parse_response``
+    routes through the same shared helpers via ``parse_qwen35``.
+
+    Schema-aware coercion is intentionally inert here: ``ToolParser``'s
+    ``extract`` API does not receive the tool list (callers consume
+    the parsed values as opaque dicts), so ``param_index`` is empty
+    and every parameter goes through the no-schema branch of
+    ``_coerce_arg_value`` — vLLM-equivalent verbatim strings after the
+    case-insensitive ``null`` short-circuit.
+    """
 
     def __init__(self, tokenizer):
         self._tokenizer = tokenizer
@@ -169,69 +185,57 @@ def extract(self, ids: list[int]) -> tuple[list[int], list[ParsedToolCall]]:
         tc_start = _find(ids, self._tc_id)
         if tc_start == -1:
             return ids, []
+
         tool_calls: list[ParsedToolCall] = []
         i = tc_start
+        param_index: dict = {}
         while i < len(ids):
-            if ids[i] == self._tc_id:
-                end = (
-                    _find(ids, self._tc_end_id, i + 1)
-                    if self._tc_end_id is not None
-                    else -1
-                )
-                if end == -1:
-                    raw = _decode(self._tokenizer, ids[i + 1 :])
-                    tool_calls.append(
-                        ParsedToolCall(
-                            raw=raw,
-                            token_span=(i, len(ids)),
-                            status=ToolCallParseStatus.UNCLOSED_BLOCK,
-                        )
-                    )
-                    break
+            if ids[i] != self._tc_id:
+                i += 1
+                continue
+
+            end = (
+                _find(ids, self._tc_end_id, i + 1)
+                if self._tc_end_id is not None
+                else -1
+            )
+            if end == -1:
+                block_text = _decode(self._tokenizer, ids[i + 1 :])
+                span = (i, len(ids))
+                wrapper_unclosed = True
+            else:
                 block_text = _decode(self._tokenizer, ids[i + 1 : end])
                 span = (i, end + 1)
-                name_match = re.search(r"<function=([^>]+)>", block_text)
-                if not name_match:
-                    tool_calls.append(
-                        ParsedToolCall(
-                            raw=block_text,
-                            token_span=span,
-                            status=ToolCallParseStatus.MALFORMED_STRUCTURE,
-                        )
-                    )
-                    i = end + 1
-                    continue
-                name = name_match.group(1)
-                arguments: dict = {}
-                any_json_fallback = False
-                for pm in re.finditer(
-                    r"<parameter=([^>]+)>\n?(.*?)\n?</parameter>",
-                    block_text,
-                    re.DOTALL,
-                ):
-                    arg_name = pm.group(1)
-                    arg_value = pm.group(2).strip()
-                    try:
-                        arguments[arg_name] = json.loads(arg_value)
-                    except (json.JSONDecodeError, ValueError):
-                        arguments[arg_name] = arg_value
-                        any_json_fallback = True
+                wrapper_unclosed = False
+
+            block_calls = _parse_xml_function_blocks(
+                block_text,
+                param_index=param_index,
+                token_span=span,
+                wrapper_unclosed=wrapper_unclosed,
+            )
+            if block_calls:
+                tool_calls.extend(block_calls)
+            elif wrapper_unclosed:
                 tool_calls.append(
                     ParsedToolCall(
                         raw=block_text,
-                        name=name,
-                        arguments=arguments,
                         token_span=span,
-                        status=(
-                            ToolCallParseStatus.INVALID_JSON
-                            if any_json_fallback
-                            else ToolCallParseStatus.OK
-                        ),
+                        status=ToolCallParseStatus.UNCLOSED_BLOCK,
                     )
                 )
-                i = end + 1
             else:
-                i += 1
+                tool_calls.append(
+                    ParsedToolCall(
+                        raw=block_text,
+                        token_span=span,
+                        status=ToolCallParseStatus.MALFORMED_STRUCTURE,
+                    )
+                )
+
+            if wrapper_unclosed:
+                break
+            i = end + 1
         return ids[:tc_start], tool_calls
 
 
diff --git a/renderers/parsing.py b/renderers/parsing.py
index f9f9ac1..8d039f7 100644
--- a/renderers/parsing.py
+++ b/renderers/parsing.py
@@ -18,25 +18,60 @@
 
 import ast
 import json
+import re
 from typing import Any
 
 from renderers.base import ParsedResponse, ParsedToolCall, ToolCallParseStatus, ToolSpec
 
 
+# ── vLLM Qwen3CoderToolParser regex patterns ────────────────────────
+#
+# Lifted verbatim from ``vllm/tool_parsers/qwen3coder_tool_parser.py``
+# (``self.tool_call_regex`` / ``tool_call_function_regex`` /
+# ``tool_call_parameter_regex``). Each pattern has two branches: one
+# for the closed form (with the matching end tag) and one anchored at
+# end-of-string for unclosed output. The parameter regex uses
+# positive lookaheads so a missing ``</parameter>`` is recovered from
+# the next ``<parameter=`` / ``</function>`` boundary instead of
+# silently dropping the value.
+
+_TOOL_CALL_BLOCK_RE = re.compile(
+    r"<tool_call>(.*?)</tool_call>|<tool_call>(.*?)$", re.DOTALL
+)
+_FUNCTION_BLOCK_RE = re.compile(
+    r"<function=(.*?)</function>|<function=(.*)$", re.DOTALL
+)
+_PARAMETER_BLOCK_RE = re.compile(
+    r"<parameter=(.*?)(?:</parameter>|(?=<parameter=)|(?=</function>)|$)",
+    re.DOTALL,
+)
+
+
 # ── Schema-aware argument coercion ──────────────────────────────────
 #
 # XML-style tool-call formats render argument values verbatim inside
-# ``<arg_value>`` tags with no quoting. ``true`` and the string
+# ``<parameter=…>`` tags with no quoting. ``true`` and the string
 # ``"true"`` produce identical wire bytes; without the tool schema, the
-# parser has no signal to distinguish them and defaults to
-# ``json.loads`` (the historical behavior). When the caller passes
-# ``tools=[...]``, parsers consult the per-parameter declared type and
-# dispatch through a vLLM-parity ladder (case-insensitive ``null``,
-# ``int()`` / ``float()`` for numeric, ``.lower() == "true"`` for bool,
-# ``json.loads`` → ``ast.literal_eval`` for object/array) so models can
-# emit Pythonic literals like ``True`` / ``None`` / ``{'k': 1}`` that
-# both SGLang's ``Qwen3CoderDetector`` and vLLM's
-# ``Qwen3CoderToolParser`` accept on the inference side.
+# parser has no signal to distinguish them. We mirror vLLM's
+# ``Qwen3CoderToolParser`` (and SGLang's ``Qwen3CoderDetector``) end to
+# end: case-insensitive ``null`` short-circuit, ``int()`` / ``float()``
+# for numeric types, ``.lower() == "true"`` for bool, ``json.loads``
+# with ``ast.literal_eval`` fallback for objects/arrays. That accepts
+# both JSON literals (Qwen3.6) and Python literals (Qwen3.5 — ``str()``
+# renders bools as ``True`` / ``False``) on the inference side.
+#
+# Why ``qwen3_coder`` and not ``qwen3_xml``: vLLM ships two parsers for
+# the same wire format. ``qwen3_xml`` is the newer streaming-first
+# parser (uses ``xml.parsers.expat`` and is the recommended choice for
+# *live serving* because ``qwen3_coder`` has known infinite-loop bugs
+# in vLLM's streaming path). For offline token-level extraction the
+# two parsers agree on JSON-literal scalars but ``qwen3_coder`` is a
+# strict superset for value coercion — it accepts Python literals via
+# ``ast.literal_eval`` where ``qwen3_xml`` returns raw strings. Since
+# this repo doesn't stream yet, ``qwen3_coder`` semantics dominate.
+# TODO: when a streaming parse API lands, switch the streaming path to
+# ``qwen3_xml``-style state-machine semantics to dodge vLLM's
+# regex-streaming bugs (see HF Qwen3-Coder-Next discussion #17).
 
 
 _STRING_TYPES = frozenset({"string", "str", "text", "varchar", "char", "enum"})
@@ -97,7 +132,7 @@ def _declared_type(param_schema: dict[str, Any] | None) -> str | None:
 def _coerce_arg_value(
     text: str, param_schema: dict[str, Any] | None
 ) -> tuple[Any, bool]:
-    """Coerce a raw ``<arg_value>`` body to its declared type.
+    """Coerce a raw ``<parameter=…>`` body to its declared type.
 
     Returns ``(value, used_fallback)``. ``used_fallback=True`` means the
     coercion could not satisfy the declared type and had to fall back to
@@ -108,7 +143,11 @@ def _coerce_arg_value(
     and SGLang's ``Qwen3CoderDetector._convert_param_value`` with one
     deliberate deviation noted inline:
 
-    - No schema → try ``json.loads``, fall back to raw text + INVALID.
+    - ``null`` short-circuit (any case) → ``None`` for any declared
+      type **except** the string family (see deviation below).
+    - No schema OR param not in schema → return verbatim (after the
+      null short-circuit). vLLM ``qwen3coder_tool_parser.py:128-137``:
+      ``if param_name not in param_config: return param_value``.
     - ``string`` / ``str`` / ``text`` / ``varchar`` / ``char`` / ``enum``
       → return verbatim. (Deviation: vLLM/SGLang null-coerce ``"null"``
       *before* checking type, so a string-typed arg of ``"null"`` would
@@ -116,7 +155,6 @@ def _coerce_arg_value(
       because the XML wire format can't distinguish the string
       ``"null"`` from JSON null, and downstream tests pin the
       preservation contract; see ``test_tool_arg_type_preservation``.)
-    - ``null`` (any case) for any non-string declared type → ``None``.
     - ``int`` / ``uint`` / ``long`` / ``short`` / ``unsigned`` family →
       ``int(text)``, degenerating to raw text + INVALID.
     - ``num`` / ``float`` family → ``float(text)`` with int demotion
@@ -129,27 +167,20 @@ def _coerce_arg_value(
     - ``object`` / ``array`` / ``dict`` / ``list`` (or ``anyOf``) →
       ``json.loads`` first, ``ast.literal_eval`` fallback (for Python
       literals like ``{'k': 1}``), raw text + INVALID otherwise.
-    - Unknown type (param missing from schema, or type not in any
-      family above) → ``ast.literal_eval`` if it parses, else raw text.
-      Matches vLLM's catch-all branch and keeps strings like
-      ``"hello"`` (with no schema entry) as-is.
+    - Any other declared type → ``ast.literal_eval`` if it parses, else
+      raw text. Matches vLLM's catch-all else branch.
     """
-    if param_schema is None:
-        if text.lower() == "null":
-            return None, False
-        try:
-            return json.loads(text), False
-        except (json.JSONDecodeError, ValueError):
-            return text, True
-
     declared = _declared_type(param_schema)
 
-    if declared is None or declared in _STRING_TYPES:
-        return text, False
-
-    if text.lower() == "null":
+    # vLLM null short-circuit runs BEFORE the schema-missing check, but
+    # is suppressed for declared string types so the wire-format
+    # ambiguity is resolved in favour of the explicit schema.
+    if declared not in _STRING_TYPES and text.lower() == "null":
         return None, False
 
+    if param_schema is None or declared is None or declared in _STRING_TYPES:
+        return text, False
+
     if declared.startswith(_INT_PREFIXES):
         try:
             return int(text), False
@@ -353,6 +384,7 @@ def parse_qwen35(
 
     tc_start = _find(ids, tool_call_id)
     tool_calls: list[ParsedToolCall] = []
+    param_index = _build_param_type_index(tools)
     if tc_start != -1:
         content_text = _decode(tokenizer, ids[:tc_start]).strip()
         tool_calls = _parse_xml_tool_calls(
@@ -361,10 +393,25 @@ def parse_qwen35(
             tool_call_id,
             tool_call_end_id,
             section_offset=parse_offset + tc_start,
-            param_index=_build_param_type_index(tools),
+            param_index=param_index,
         )
     else:
-        content_text = _decode(tokenizer, ids).strip()
+        # vLLM ``qwen3coder_tool_parser.py:269-271`` back-off: if no
+        # ``<tool_call>`` markers are present, treat the whole output
+        # as a single synthetic tool-call region and scan for raw
+        # ``<function=…>`` blocks. The content text is whatever sits
+        # outside any recovered function block (typically empty).
+        full_text = _decode(tokenizer, ids)
+        tool_calls = _parse_xml_function_blocks(
+            full_text,
+            param_index=param_index,
+            token_span=(parse_offset, parse_offset + len(ids)),
+            wrapper_unclosed=False,
+        )
+        if tool_calls:
+            content_text = _strip_function_blocks(full_text).strip()
+        else:
+            content_text = full_text.strip()
 
     return ParsedResponse(
         content=content_text,
@@ -382,71 +429,150 @@ def _parse_xml_tool_calls(
     section_offset: int,
     param_index: dict[str, dict[str, dict[str, Any]]],
 ) -> list[ParsedToolCall]:
-    """Parse Qwen3.5-style XML tool calls from token IDs."""
-    import re
-
+    """Parse Qwen3.5-style XML tool calls from token IDs.
+
+    Uses token IDs to demarcate ``<tool_call>`` / ``</tool_call>``
+    boundaries (so ``token_span`` stays precise for trainer-side
+    masking) and vLLM-parity regex on the decoded block text to
+    extract the function/parameter content tolerantly. Mirrors
+    ``Qwen3CoderToolParser._parse_xml_function_call`` plus the
+    tag-tolerance branches of the three module-level patterns.
+    """
     tool_calls: list[ParsedToolCall] = []
     i = 0
     while i < len(ids):
-        if ids[i] == tc_id:
-            end = _find(ids, tc_end_id, i + 1)
-            if end == -1:
-                raw = _decode(tokenizer, ids[i + 1 :])
-                tool_calls.append(
-                    ParsedToolCall(
-                        raw=raw,
-                        token_span=(section_offset + i, section_offset + len(ids)),
-                        status=ToolCallParseStatus.UNCLOSED_BLOCK,
-                    )
-                )
-                break
+        if ids[i] != tc_id:
+            i += 1
+            continue
+
+        end = _find(ids, tc_end_id, i + 1)
+        if end == -1:
+            block_ids = ids[i + 1 :]
+            block_text = _decode(tokenizer, block_ids)
+            span = (section_offset + i, section_offset + len(ids))
+            wrapper_unclosed = True
+        else:
             block_text = _decode(tokenizer, ids[i + 1 : end])
             span = (section_offset + i, section_offset + end + 1)
-            name_match = re.search(r"<function=([^>]+)>", block_text)
-            if not name_match:
-                tool_calls.append(
-                    ParsedToolCall(
-                        raw=block_text,
-                        token_span=span,
-                        status=ToolCallParseStatus.MALFORMED_STRUCTURE,
-                    )
-                )
-                i = end + 1
-                continue
+            wrapper_unclosed = False
 
-            name = name_match.group(1)
-            params = param_index.get(name, {})
-            arguments: dict = {}
-            any_json_fallback = False
-            for pm in re.finditer(
-                r"<parameter=([^>]+)>\n?(.*?)\n?</parameter>", block_text, re.DOTALL
-            ):
-                arg_name = pm.group(1)
-                arg_value = pm.group(2).strip()
-                value, used_fallback = _coerce_arg_value(
-                    arg_value, params.get(arg_name)
+        block_calls = _parse_xml_function_blocks(
+            block_text,
+            param_index=param_index,
+            token_span=span,
+            wrapper_unclosed=wrapper_unclosed,
+        )
+        if block_calls:
+            tool_calls.extend(block_calls)
+        elif wrapper_unclosed:
+            # vLLM would silently drop a content-less unclosed
+            # ``<tool_call>``; we keep the diagnostic so verifier /
+            # RL-loss code can mask the malformed span.
+            tool_calls.append(
+                ParsedToolCall(
+                    raw=block_text,
+                    token_span=span,
+                    status=ToolCallParseStatus.UNCLOSED_BLOCK,
                 )
-                arguments[arg_name] = value
-                any_json_fallback = any_json_fallback or used_fallback
+            )
+        else:
             tool_calls.append(
                 ParsedToolCall(
                     raw=block_text,
-                    name=name,
-                    arguments=arguments,
                     token_span=span,
-                    status=(
-                        ToolCallParseStatus.INVALID_JSON
-                        if any_json_fallback
-                        else ToolCallParseStatus.OK
-                    ),
+                    status=ToolCallParseStatus.MALFORMED_STRUCTURE,
                 )
             )
-            i = end + 1
+
+        if wrapper_unclosed:
+            break
+        i = end + 1
+    return tool_calls
+
+
+def _parse_xml_function_blocks(
+    text: str,
+    *,
+    param_index: dict[str, dict[str, dict[str, Any]]],
+    token_span: tuple[int, int] | None,
+    wrapper_unclosed: bool,
+) -> list[ParsedToolCall]:
+    """Apply vLLM's ``<function=…></function>`` regex over *text*.
+
+    ``token_span`` is shared by every call recovered from this block;
+    the granularity is the surrounding ``<tool_call>`` region (or the
+    whole completion in the no-marker back-off path), matching how
+    vLLM treats multiple ``<function=>`` siblings — it does not try to
+    attribute character offsets back to token positions.
+    """
+    tool_calls: list[ParsedToolCall] = []
+    for match in _FUNCTION_BLOCK_RE.finditer(text):
+        closed = match.group(1)
+        body = closed if closed is not None else (match.group(2) or "")
+        function_unclosed = closed is None
+        end_index = body.find(">")
+        if end_index == -1:
+            tool_calls.append(
+                ParsedToolCall(
+                    raw=body,
+                    token_span=token_span,
+                    status=ToolCallParseStatus.MALFORMED_STRUCTURE,
+                )
+            )
+            continue
+
+        name = body[:end_index]
+        params_text = body[end_index + 1 :]
+        params = param_index.get(name, {})
+        arguments: dict = {}
+        any_fallback = False
+        for capture in _PARAMETER_BLOCK_RE.findall(params_text):
+            idx = capture.find(">")
+            if idx == -1:
+                continue
+            arg_name = capture[:idx]
+            arg_value = _strip_one_newline(capture[idx + 1 :])
+            value, used_fallback = _coerce_arg_value(arg_value, params.get(arg_name))
+            arguments[arg_name] = value
+            any_fallback = any_fallback or used_fallback
+
+        if wrapper_unclosed or function_unclosed:
+            status = ToolCallParseStatus.UNCLOSED_BLOCK
+        elif any_fallback:
+            status = ToolCallParseStatus.INVALID_JSON
         else:
-            i += 1
+            status = ToolCallParseStatus.OK
+        tool_calls.append(
+            ParsedToolCall(
+                raw=body,
+                name=name,
+                arguments=arguments,
+                token_span=token_span,
+                status=status,
+            )
+        )
     return tool_calls
 
 
+def _strip_function_blocks(text: str) -> str:
+    """Remove every ``<function=…></function>`` (closed or unclosed)
+    region from *text* so the leftover is whatever the model emitted
+    as natural-language content alongside an un-bracketed tool call.
+    """
+    return _FUNCTION_BLOCK_RE.sub("", text)
+
+
+def _strip_one_newline(s: str) -> str:
+    """vLLM ``qwen3coder_tool_parser.py:246-250`` strips exactly one
+    leading and one trailing newline from a parameter value — no
+    further whitespace trimming."""
+    if s.startswith("\n"):
+        s = s[1:]
+    if s.endswith("\n"):
+        s = s[:-1]
+    return s
+
+
 # ── GLM-5/4.7/4.5: <tool_call> name <arg_key>k</arg_key> <arg_value>v</arg_value> </tool_call>
 
 
diff --git a/tests/test_arg_coercion.py b/tests/test_arg_coercion.py
index d95565f..8ef4816 100644
--- a/tests/test_arg_coercion.py
+++ b/tests/test_arg_coercion.py
@@ -158,7 +158,9 @@ def test_anyof_treated_as_object():
     assert used_fallback is False
 
 
-@pytest.mark.parametrize("declared", ["string", "str", "text", "varchar", "char", "enum"])
+@pytest.mark.parametrize(
+    "declared", ["string", "str", "text", "varchar", "char", "enum"]
+)
 def test_string_family_returns_verbatim(declared):
     value, used_fallback = _coerce_arg_value("True", {"type": declared})
     assert value == "True"
@@ -171,18 +173,32 @@ def test_string_with_list_form_type():
     assert used_fallback is False
 
 
-def test_no_schema_falls_back_to_json_loads():
-    """Historical behavior: when ``param_schema`` is ``None`` (no tools
-    were passed), we still try ``json.loads`` for backwards compatibility
-    so untyped numbers / bools parse. ``None`` is handled via the
-    ``null`` short-circuit at the top of the ladder."""
+def test_no_schema_returns_verbatim_string():
+    """vLLM ``qwen3coder_tool_parser.py:128-137``: when the param is
+    not in the tool schema (or no tools were passed at all), the parser
+    returns the raw text verbatim — it does **not** try ``json.loads``.
+    The case-insensitive ``null`` short-circuit at the top of the ladder
+    still applies (so untyped ``"null"`` becomes Python ``None``)."""
     value, used_fallback = _coerce_arg_value("42", None)
-    assert value == 42
+    assert value == "42"
+    assert used_fallback is False
+
+    value, used_fallback = _coerce_arg_value("True", None)
+    assert value == "True"
     assert used_fallback is False
 
     value, used_fallback = _coerce_arg_value("hello", None)
     assert value == "hello"
-    assert used_fallback is True
+    assert used_fallback is False
+
+
+def test_no_schema_null_short_circuit():
+    """vLLM null-coerces before the schema-missing return, so an untyped
+    ``"null"`` still becomes Python ``None``."""
+    for raw in ("null", "Null", "NULL"):
+        value, used_fallback = _coerce_arg_value(raw, None)
+        assert value is None
+        assert used_fallback is False
 
 
 def test_unknown_type_falls_back_to_literal_eval():
@@ -222,14 +238,28 @@ def test_unknown_type_falls_back_to_literal_eval():
 ]
 
 
+def _parse(tok, completion: str, *, tools=None):
+    ids = tok.encode(completion, add_special_tokens=False)
+    return parse_qwen35(
+        tok,
+        list(ids),
+        stop_ids={tok.eos_token_id} if tok.eos_token_id is not None else set(),
+        think_id=tok.convert_tokens_to_ids("<think>"),
+        think_end_id=tok.convert_tokens_to_ids("</think>"),
+        tool_call_id=tok.convert_tokens_to_ids("<tool_call>"),
+        tool_call_end_id=tok.convert_tokens_to_ids("</tool_call>"),
+        tools=tools,
+    )
+
+
 def test_qwen35_capital_true_for_boolean_is_ok_status():
     """Regression for issue #47: ``<parameter=include_files>True</parameter>``
     used to be flagged ``INVALID_JSON`` because ``json.loads("True")``
     fails. SGLang and vLLM both accept it via case-folded comparison;
     renderers now does too."""
     tok = load_tokenizer("Qwen/Qwen3.5-9B")
-
-    completion = (
+    parsed = _parse(
+        tok,
         "<tool_call>\n"
         "<function=filesystem_server_get_directory_tree>\n"
         "<parameter=path>/</parameter>\n"
@@ -237,18 +267,7 @@ def test_qwen35_capital_true_for_boolean_is_ok_status():
         "<parameter=include_files>True</parameter>\n"
         "<parameter=show_size>True</parameter>\n"
         "</function>\n"
-        "</tool_call>"
-    )
-    ids = tok.encode(completion, add_special_tokens=False)
-
-    parsed = parse_qwen35(
-        tok,
-        list(ids),
-        stop_ids={tok.eos_token_id} if tok.eos_token_id is not None else set(),
-        think_id=tok.convert_tokens_to_ids("<think>"),
-        think_end_id=tok.convert_tokens_to_ids("</think>"),
-        tool_call_id=tok.convert_tokens_to_ids("<tool_call>"),
-        tool_call_end_id=tok.convert_tokens_to_ids("</tool_call>"),
+        "</tool_call>",
         tools=_TOOLS_FROM_ISSUE,
     )
     assert len(parsed.tool_calls) == 1
@@ -261,3 +280,99 @@ def test_qwen35_capital_true_for_boolean_is_ok_status():
         "include_files": True,
         "show_size": True,
     }
+
+
+# ── Structural tolerance: parity with vLLM's three regex patterns ────
+
+
+def test_qwen35_tolerates_missing_closing_parameter():
+    """vLLM ``tool_call_parameter_regex`` uses positive lookaheads so a
+    missing ``</parameter>`` is recovered from the next ``<parameter=``
+    or ``</function>`` boundary instead of dropping the value."""
+    tok = load_tokenizer("Qwen/Qwen3.5-9B")
+    parsed = _parse(
+        tok,
+        "<tool_call>\n"
+        "<function=filesystem_server_get_directory_tree>\n"
+        "<parameter=path>/\n"  # missing </parameter>
+        "<parameter=max_depth>2</parameter>\n"
+        "</function>\n"
+        "</tool_call>",
+        tools=_TOOLS_FROM_ISSUE,
+    )
+    assert len(parsed.tool_calls) == 1
+    tc = parsed.tool_calls[0]
+    assert tc.status == ToolCallParseStatus.OK
+    assert tc.name == "filesystem_server_get_directory_tree"
+    assert tc.arguments == {"path": "/", "max_depth": 2}
+
+
+def test_qwen35_tolerates_unclosed_function():
+    """vLLM ``tool_call_function_regex`` second alternation matches an
+    unclosed ``<function=…`` body to end-of-text; we still flag the
+    surrounding diagnostic via ``UNCLOSED_BLOCK`` but recover the call."""
+    tok = load_tokenizer("Qwen/Qwen3.5-9B")
+    parsed = _parse(
+        tok,
+        "<tool_call>\n"
+        "<function=filesystem_server_get_directory_tree>\n"
+        "<parameter=path>/</parameter>\n",  # no </function>, no </tool_call>
+        tools=_TOOLS_FROM_ISSUE,
+    )
+    assert len(parsed.tool_calls) == 1
+    tc = parsed.tool_calls[0]
+    assert tc.status == ToolCallParseStatus.UNCLOSED_BLOCK
+    assert tc.name == "filesystem_server_get_directory_tree"
+    assert tc.arguments == {"path": "/"}
+
+
+def test_qwen35_backoff_no_tool_call_markers():
+    """vLLM ``qwen3coder_tool_parser.py:269-271``: when the output
+    contains no ``<tool_call>`` markers, treat the whole completion as
+    a single tool-call region and scan for ``<function=…>`` directly."""
+    tok = load_tokenizer("Qwen/Qwen3.5-9B")
+    parsed = _parse(
+        tok,
+        "sure, calling now.\n"
+        "<function=filesystem_server_get_directory_tree>\n"
+        "<parameter=path>/etc</parameter>\n"
+        "<parameter=max_depth>1</parameter>\n"
+        "</function>",
+        tools=_TOOLS_FROM_ISSUE,
+    )
+    assert len(parsed.tool_calls) == 1
+    tc = parsed.tool_calls[0]
+    assert tc.status == ToolCallParseStatus.OK
+    assert tc.name == "filesystem_server_get_directory_tree"
+    assert tc.arguments == {"path": "/etc", "max_depth": 1}
+    assert "sure, calling now" in parsed.content
+
+
+def test_qwen35_backoff_returns_no_calls_when_no_function_either():
+    """No ``<tool_call>`` and no ``<function=`` → no recovered calls,
+    full text returned as content."""
+    tok = load_tokenizer("Qwen/Qwen3.5-9B")
+    parsed = _parse(tok, "just a chat reply", tools=_TOOLS_FROM_ISSUE)
+    assert parsed.tool_calls == []
+    assert parsed.content == "just a chat reply"
+
+
+def test_qwen35_parameter_value_preserves_internal_whitespace():
+    """vLLM strips exactly one leading and one trailing ``\\n`` from the
+    parameter body (``qwen3coder_tool_parser.py:246-250``); inner
+    indentation must round-trip verbatim."""
+    tok = load_tokenizer("Qwen/Qwen3.5-9B")
+    parsed = _parse(
+        tok,
+        "<tool_call>\n"
+        "<function=filesystem_server_get_directory_tree>\n"
+        "<parameter=path>\n  /etc  \n</parameter>\n"
+        "</function>\n"
+        "</tool_call>",
+        tools=_TOOLS_FROM_ISSUE,
+    )
+    assert len(parsed.tool_calls) == 1
+    tc = parsed.tool_calls[0]
+    assert tc.status == ToolCallParseStatus.OK
+    # One leading + one trailing \n stripped — interior spaces preserved.
+    assert tc.arguments == {"path": "  /etc  "}

From 96a1c9802c964ddd4464ce6951c778660d44fffd Mon Sep 17 00:00:00 2001
From: hallerite <git@hallerite.com>
Date: Wed, 20 May 2026 13:31:24 +0000
Subject: [PATCH 3/4] fix(parsing): byte-parity with vLLM qwen3_coder for
 content + ids + exception path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Close the four remaining behavioral deltas vs vLLM's
Qwen3CoderToolParser non-streaming extract path:

- Content text is now the raw slice (no .strip()). vLLM
  qwen3coder_tool_parser.py:316-319 explicitly leaves the commented-out
  .rstrip() in place; surrounding whitespace before/after <tool_call>
  round-trips verbatim.
- Backoff content (no <tool_call> markers) is the text up to the first
  <function= position only, matching vLLM line 317-319. Removed
  _strip_function_blocks helper that concatenated leftovers between
  function blocks — different from vLLM.
- Each successfully-parsed tool call is assigned an id in vLLM's
  default format: chatcmpl-tool-<16 hex chars> via
  uuid.uuid4().int & MASK_64_BITS, matching vLLM's make_tool_call_id /
  random_uuid pair. Malformed entries (no function name) intentionally
  stay id-less since vLLM would have dropped them entirely.
- Tool-call extraction is wrapped in try/except: on any unhandled
  error, surface tool_calls=[] and content=full_decoded_text — the
  vLLM qwen3coder_tool_parser.py:327-331 catch-all behavior. Reasoning
  extraction stays outside the wrapper since vLLM doesn't do reasoning
  in this parser.

Tests: 4 new cases pin content-no-strip, backoff content slice,
chatcmpl-tool-<hex> id format, and the exception fallback.
---
 renderers/parsing.py       | 96 ++++++++++++++++++++++++--------------
 tests/test_arg_coercion.py | 96 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 154 insertions(+), 38 deletions(-)

diff --git a/renderers/parsing.py b/renderers/parsing.py
index 8d039f7..5404d6b 100644
--- a/renderers/parsing.py
+++ b/renderers/parsing.py
@@ -19,6 +19,7 @@
 import ast
 import json
 import re
+import uuid
 from typing import Any
 
 from renderers.base import ParsedResponse, ParsedToolCall, ToolCallParseStatus, ToolSpec
@@ -35,6 +36,25 @@
 # the next ``<parameter=`` / ``</function>`` boundary instead of
 # silently dropping the value.
 
+
+# Mask matching vLLM's ``vllm.utils.MASK_64_BITS`` so the random id we
+# generate is byte-shape compatible with vLLM's ``random_uuid()``.
+_MASK_64_BITS = (1 << 64) - 1
+
+
+def _make_tool_call_id() -> str:
+    """Mint a tool-call id in vLLM's default format.
+
+    vLLM's ``ToolCall`` dataclass uses ``default_factory=make_tool_call_id``
+    which returns ``f"chatcmpl-tool-{random_uuid()}"`` where
+    ``random_uuid`` is ``f"{uuid.uuid4().int & MASK_64_BITS:016x}"`` —
+    16 hex characters drawn from the low 64 bits of a v4 UUID. We
+    reproduce the format so OpenAI-shaped clients get the same id
+    surface across renderers and vLLM serving paths.
+    """
+    return f"chatcmpl-tool-{uuid.uuid4().int & _MASK_64_BITS:016x}"
+
+
 _TOOL_CALL_BLOCK_RE = re.compile(
     r"<tool_call>(.*?)</tool_call>|<tool_call>(.*?)$", re.DOTALL
 )
@@ -382,36 +402,49 @@ def parse_qwen35(
             content="", reasoning_content=reasoning or None, tool_calls=[]
         )
 
+    full_text = _decode(tokenizer, ids)
     tc_start = _find(ids, tool_call_id)
-    tool_calls: list[ParsedToolCall] = []
     param_index = _build_param_type_index(tools)
-    if tc_start != -1:
-        content_text = _decode(tokenizer, ids[:tc_start]).strip()
-        tool_calls = _parse_xml_tool_calls(
-            tokenizer,
-            ids[tc_start:],
-            tool_call_id,
-            tool_call_end_id,
-            section_offset=parse_offset + tc_start,
-            param_index=param_index,
-        )
-    else:
-        # vLLM ``qwen3coder_tool_parser.py:269-271`` back-off: if no
-        # ``<tool_call>`` markers are present, treat the whole output
-        # as a single synthetic tool-call region and scan for raw
-        # ``<function=…>`` blocks. The content text is whatever sits
-        # outside any recovered function block (typically empty).
-        full_text = _decode(tokenizer, ids)
-        tool_calls = _parse_xml_function_blocks(
-            full_text,
-            param_index=param_index,
-            token_span=(parse_offset, parse_offset + len(ids)),
-            wrapper_unclosed=False,
-        )
-        if tool_calls:
-            content_text = _strip_function_blocks(full_text).strip()
+    try:
+        if tc_start != -1:
+            # vLLM ``qwen3coder_tool_parser.py:316-319``: content text
+            # is the raw slice up to the first ``<tool_call>`` (or
+            # first ``<function=`` if no ``<tool_call>`` is present).
+            # No whitespace stripping — the model's prefix prose is
+            # surfaced verbatim.
+            content_text = _decode(tokenizer, ids[:tc_start])
+            tool_calls = _parse_xml_tool_calls(
+                tokenizer,
+                ids[tc_start:],
+                tool_call_id,
+                tool_call_end_id,
+                section_offset=parse_offset + tc_start,
+                param_index=param_index,
+            )
         else:
-            content_text = full_text.strip()
+            # vLLM ``qwen3coder_tool_parser.py:269-271`` back-off: no
+            # ``<tool_call>`` markers — scan whole output for raw
+            # ``<function=…>`` blocks. Content text is whatever sits
+            # before the first ``<function=`` (vLLM line 317-319), or
+            # the full text if nothing matches.
+            tool_calls = _parse_xml_function_blocks(
+                full_text,
+                param_index=param_index,
+                token_span=(parse_offset, parse_offset + len(ids)),
+                wrapper_unclosed=False,
+            )
+            if tool_calls:
+                first_fn = full_text.find("<function=")
+                content_text = full_text[:first_fn] if first_fn >= 0 else full_text
+            else:
+                content_text = full_text
+    except Exception:
+        # vLLM ``qwen3coder_tool_parser.py:327-331`` catch-all: on any
+        # extraction error, drop every recovered call and surface the
+        # raw text as content so downstream consumers never see a
+        # half-parsed call.
+        content_text = full_text
+        tool_calls = []
 
     return ParsedResponse(
         content=content_text,
@@ -547,6 +580,7 @@ def _parse_xml_function_blocks(
                 raw=body,
                 name=name,
                 arguments=arguments,
+                id=_make_tool_call_id(),
                 token_span=token_span,
                 status=status,
             )
@@ -554,14 +588,6 @@ def _parse_xml_function_blocks(
     return tool_calls
 
 
-def _strip_function_blocks(text: str) -> str:
-    """Remove every ``<function=…></function>`` (closed or unclosed)
-    region from *text* so the leftover is whatever the model emitted
-    as natural-language content alongside an un-bracketed tool call.
-    """
-    return _FUNCTION_BLOCK_RE.sub("", text)
-
-
 def _strip_one_newline(s: str) -> str:
     """vLLM ``qwen3coder_tool_parser.py:246-250`` strips exactly one
     leading and one trailing newline from a parameter value — no
diff --git a/tests/test_arg_coercion.py b/tests/test_arg_coercion.py
index 8ef4816..279dbf3 100644
--- a/tests/test_arg_coercion.py
+++ b/tests/test_arg_coercion.py
@@ -329,7 +329,9 @@ def test_qwen35_tolerates_unclosed_function():
 def test_qwen35_backoff_no_tool_call_markers():
     """vLLM ``qwen3coder_tool_parser.py:269-271``: when the output
     contains no ``<tool_call>`` markers, treat the whole completion as
-    a single tool-call region and scan for ``<function=…>`` directly."""
+    a single tool-call region and scan for ``<function=…>`` directly.
+    Content text is the raw slice up to the first ``<function=``,
+    matching ``qwen3coder_tool_parser.py:316-319`` (no strip)."""
     tok = load_tokenizer("Qwen/Qwen3.5-9B")
     parsed = _parse(
         tok,
@@ -345,18 +347,106 @@ def test_qwen35_backoff_no_tool_call_markers():
     assert tc.status == ToolCallParseStatus.OK
     assert tc.name == "filesystem_server_get_directory_tree"
     assert tc.arguments == {"path": "/etc", "max_depth": 1}
-    assert "sure, calling now" in parsed.content
+    # vLLM-parity: content = text up to first ``<function=``, no strip.
+    assert parsed.content == "sure, calling now.\n"
 
 
 def test_qwen35_backoff_returns_no_calls_when_no_function_either():
     """No ``<tool_call>`` and no ``<function=`` → no recovered calls,
-    full text returned as content."""
+    full text returned as content verbatim (no strip, vLLM parity)."""
     tok = load_tokenizer("Qwen/Qwen3.5-9B")
     parsed = _parse(tok, "just a chat reply", tools=_TOOLS_FROM_ISSUE)
     assert parsed.tool_calls == []
     assert parsed.content == "just a chat reply"
 
 
+def test_qwen35_content_text_not_stripped():
+    """vLLM ``qwen3coder_tool_parser.py:316-319`` slices raw text before
+    the first ``<tool_call>``; the commented-out ``.rstrip()`` confirms
+    intent. We mirror that — surrounding whitespace round-trips."""
+    tok = load_tokenizer("Qwen/Qwen3.5-9B")
+    parsed = _parse(
+        tok,
+        "let me check\n\n"  # trailing whitespace before <tool_call>
+        "<tool_call>\n"
+        "<function=filesystem_server_get_directory_tree>\n"
+        "<parameter=path>/</parameter>\n"
+        "</function>\n"
+        "</tool_call>",
+        tools=_TOOLS_FROM_ISSUE,
+    )
+    assert parsed.content == "let me check\n\n"
+    assert len(parsed.tool_calls) == 1
+    assert parsed.tool_calls[0].status == ToolCallParseStatus.OK
+
+
+def test_qwen35_tool_call_id_matches_vllm_format():
+    """vLLM's ``ToolCall`` default factory mints ids of the form
+    ``chatcmpl-tool-<16 hex>``. Each surfaced call gets a fresh id."""
+    import re as _re
+
+    tok = load_tokenizer("Qwen/Qwen3.5-9B")
+    parsed = _parse(
+        tok,
+        "<tool_call>\n"
+        "<function=filesystem_server_get_directory_tree>\n"
+        "<parameter=path>/</parameter>\n"
+        "</function>\n"
+        "</tool_call>\n"
+        "<tool_call>\n"
+        "<function=filesystem_server_get_directory_tree>\n"
+        "<parameter=path>/etc</parameter>\n"
+        "</function>\n"
+        "</tool_call>",
+        tools=_TOOLS_FROM_ISSUE,
+    )
+    assert len(parsed.tool_calls) == 2
+    pattern = _re.compile(r"^chatcmpl-tool-[0-9a-f]{16}$")
+    ids = [tc.id for tc in parsed.tool_calls]
+    for tcid in ids:
+        assert tcid is not None and pattern.match(tcid), tcid
+    # Two surfaced calls must get distinct ids.
+    assert ids[0] != ids[1]
+
+
+def test_qwen35_exception_fallback_returns_full_text():
+    """vLLM ``qwen3coder_tool_parser.py:327-331`` catch-all returns
+    ``tools_called=False, tool_calls=[], content=model_output`` on any
+    unhandled extraction error. We mirror that by wrapping the
+    tool-call extraction; reasoning content extracted earlier is kept.
+    """
+    from unittest.mock import patch
+
+    tok = load_tokenizer("Qwen/Qwen3.5-9B")
+    completion = (
+        "<tool_call>\n"
+        "<function=filesystem_server_get_directory_tree>\n"
+        "<parameter=path>/</parameter>\n"
+        "</function>\n"
+        "</tool_call>"
+    )
+    ids = tok.encode(completion, add_special_tokens=False)
+
+    with patch(
+        "renderers.parsing._parse_xml_tool_calls",
+        side_effect=RuntimeError("boom"),
+    ):
+        parsed = parse_qwen35(
+            tok,
+            list(ids),
+            stop_ids={tok.eos_token_id} if tok.eos_token_id is not None else set(),
+            think_id=tok.convert_tokens_to_ids("<think>"),
+            think_end_id=tok.convert_tokens_to_ids("</think>"),
+            tool_call_id=tok.convert_tokens_to_ids("<tool_call>"),
+            tool_call_end_id=tok.convert_tokens_to_ids("</tool_call>"),
+            tools=_TOOLS_FROM_ISSUE,
+        )
+    assert parsed.tool_calls == []
+    # content is the full decoded post-think text, surfaced verbatim.
+    assert "<tool_call>" in parsed.content
+    assert "<function=filesystem_server_get_directory_tree>" in parsed.content
+
+
 def test_qwen35_parameter_value_preserves_internal_whitespace():
     """vLLM strips exactly one leading and one trailing ``\\n`` from the
     parameter body (``qwen3coder_tool_parser.py:246-250``); inner

From b58fa63396a0717586e66e86654b7ef5a031818f Mon Sep 17 00:00:00 2001
From: hallerite <git@hallerite.com>
Date: Wed, 20 May 2026 13:43:01 +0000
Subject: [PATCH 4/4] fix(parsing): drop string-type "null" deviation, match
 vLLM byte-for-byte
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

vLLM's _convert_param_value (qwen3coder_tool_parser.py:125) runs the
case-insensitive "null" short-circuit BEFORE any type check —
including the string family. That means an XML wire value of
<parameter=x>null</parameter> against a {"type": "string"} schema
collapses to Python None, not the string "null".

Previously this repo carried a deliberate deviation to preserve the
string verbatim across the four XML renderers (Qwen3.5/3.6, GLM-5,
MiniMax-M2.5, Laguna-XS.2). The motivation was a tidier round-trip
contract: "string params always round-trip as strings." But the XML
wire format genuinely cannot distinguish the string "null" from a
JSON null, and matching vLLM is more important than matching our
sibling renderers — downstream consumers (verifier, RL-loss) align
with vLLM's interpretation, not ours.

Test updates:
- test_arg_coercion::test_null_is_preserved_verbatim_for_string_type
  is renamed to test_null_short_circuit_applies_even_for_string_type
  and inverted: a string-typed "null" now returns None.
- test_tool_arg_type_preservation::test_string_arg_preserves_type
  branches on _JSON_FORMAT_MODELS (Qwen3 Hermes, Kimi K2 section JSON)
  vs XML renderers for the string-null case only. JSON renderers
  preserve "null" verbatim because the wire bytes quote strings; XML
  renderers null-coerce. Other string cases (bool/int/array/object)
  still round-trip verbatim everywhere.
---
 renderers/parsing.py                     | 35 +++++++++++-------------
 tests/test_arg_coercion.py               | 19 +++++++------
 tests/test_tool_arg_type_preservation.py | 33 +++++++++++++++++++---
 3 files changed, 55 insertions(+), 32 deletions(-)

diff --git a/renderers/parsing.py b/renderers/parsing.py
index 5404d6b..331c0ea 100644
--- a/renderers/parsing.py
+++ b/renderers/parsing.py
@@ -160,21 +160,21 @@ def _coerce_arg_value(
     text for objects), so the caller can flag ``INVALID_JSON``.
 
     The ladder mirrors vLLM's ``Qwen3CoderToolParser._convert_param_value``
-    and SGLang's ``Qwen3CoderDetector._convert_param_value`` with one
-    deliberate deviation noted inline:
-
-    - ``null`` short-circuit (any case) → ``None`` for any declared
-      type **except** the string family (see deviation below).
-    - No schema OR param not in schema → return verbatim (after the
-      null short-circuit). vLLM ``qwen3coder_tool_parser.py:128-137``:
+    and SGLang's ``Qwen3CoderDetector._convert_param_value`` byte-for-byte:
+
+    - ``null`` short-circuit (any case) → ``None``. Runs **before** any
+      type check, including the string family. vLLM does this on line
+      125 of ``qwen3coder_tool_parser.py`` ahead of the
+      ``param_name not in param_config`` branch. This means a
+      string-typed arg whose wire value is the literal ``null``
+      collapses to Python ``None`` — the XML wire format can't
+      distinguish that case from a JSON null, and we accept the
+      lossy round-trip in exchange for byte parity with vLLM.
+    - No schema OR param not in schema → return verbatim. vLLM
+      ``qwen3coder_tool_parser.py:128-137``:
       ``if param_name not in param_config: return param_value``.
     - ``string`` / ``str`` / ``text`` / ``varchar`` / ``char`` / ``enum``
-      → return verbatim. (Deviation: vLLM/SGLang null-coerce ``"null"``
-      *before* checking type, so a string-typed arg of ``"null"`` would
-      come back as Python ``None``. Renderers preserves the string
-      because the XML wire format can't distinguish the string
-      ``"null"`` from JSON null, and downstream tests pin the
-      preservation contract; see ``test_tool_arg_type_preservation``.)
+      → return verbatim.
     - ``int`` / ``uint`` / ``long`` / ``short`` / ``unsigned`` family →
       ``int(text)``, degenerating to raw text + INVALID.
     - ``num`` / ``float`` family → ``float(text)`` with int demotion
@@ -190,14 +190,11 @@ def _coerce_arg_value(
     - Any other declared type → ``ast.literal_eval`` if it parses, else
       raw text. Matches vLLM's catch-all else branch.
     """
-    declared = _declared_type(param_schema)
-
-    # vLLM null short-circuit runs BEFORE the schema-missing check, but
-    # is suppressed for declared string types so the wire-format
-    # ambiguity is resolved in favour of the explicit schema.
-    if declared not in _STRING_TYPES and text.lower() == "null":
+    if text.lower() == "null":
         return None, False
 
+    declared = _declared_type(param_schema)
+
     if param_schema is None or declared is None or declared in _STRING_TYPES:
         return text, False
 
diff --git a/tests/test_arg_coercion.py b/tests/test_arg_coercion.py
index 279dbf3..5d5575f 100644
--- a/tests/test_arg_coercion.py
+++ b/tests/test_arg_coercion.py
@@ -63,15 +63,16 @@ def test_null_literal_is_case_insensitive_for_non_string_types(declared):
         assert used_fallback is False
 
 
-def test_null_is_preserved_verbatim_for_string_type():
-    """Deliberate deviation from vLLM/SGLang: string-typed ``"null"``
-    stays as the string ``"null"`` so the existing string-verbatim
-    contract holds (see ``test_tool_arg_type_preservation``). The XML
-    wire format already can't distinguish the string ``"null"`` from
-    JSON null, but when a schema says ``type: "string"`` we honour it."""
-    value, used_fallback = _coerce_arg_value("null", {"type": "string"})
-    assert value == "null"
-    assert used_fallback is False
+def test_null_short_circuit_applies_even_for_string_type():
+    """vLLM ``qwen3coder_tool_parser.py:125`` null-coerces ``"null"``
+    *before* the type check, so a string-typed wire value of ``null``
+    collapses to Python ``None``. The XML wire format can't tell the
+    string ``"null"`` from JSON null; we accept the lossy round-trip
+    in exchange for byte parity with vLLM."""
+    for raw in ("null", "Null", "NULL"):
+        value, used_fallback = _coerce_arg_value(raw, {"type": "string"})
+        assert value is None
+        assert used_fallback is False
 
 
 @pytest.mark.parametrize(
diff --git a/tests/test_tool_arg_type_preservation.py b/tests/test_tool_arg_type_preservation.py
index 61dbb9b..e036abd 100644
--- a/tests/test_tool_arg_type_preservation.py
+++ b/tests/test_tool_arg_type_preservation.py
@@ -67,9 +67,21 @@ def renderer(model, renderer_name):
 ]
 
 
+# Renderers whose wire format quotes strings (Hermes JSON, section
+# JSON). Those parsers preserve the string ``"null"`` verbatim because
+# the wire bytes are unambiguous. XML-style renderers can't tell the
+# string ``null`` from a JSON null on the wire, so they null-coerce —
+# matching vLLM's ``Qwen3CoderToolParser._convert_param_value:125``
+# which runs the null short-circuit before any type check.
+_JSON_FORMAT_MODELS = {"Qwen/Qwen3-8B", "moonshotai/Kimi-K2-Instruct"}
+
+
 # Each case: a single ``x`` argument whose value is a STRING that
 # happens to be valid JSON of another type. With a schema declaring
-# ``x: string``, the parser must return the string verbatim.
+# ``x: string``, the parser must return the string verbatim — except
+# for the ``string-null`` case on XML renderers, where vLLM's
+# ``null`` short-circuit runs before the type check and we follow
+# suit for byte parity (see test below for the explicit assertion).
 JSON_LOOKING_STRING_ARGS = [
     pytest.param({"x": "true"}, id="string-bool"),
     pytest.param({"x": "42"}, id="string-int"),
@@ -116,9 +128,16 @@ def _extract_assistant_tokens(renderer, prompt, assistant_msg, *, tools=None):
 
 
 @pytest.mark.parametrize("args", JSON_LOOKING_STRING_ARGS)
-def test_string_arg_preserves_type(model, renderer_name, renderer, args):
+def test_string_arg_preserves_type(model, renderer_name, renderer, args, request):
     """Tool-call args of declared type ``str`` must round-trip as ``str``,
-    not get re-parsed as bool/int/null/list/dict by the parser."""
+    not get re-parsed as bool/int/null/list/dict by the parser.
+
+    Exception: the ``string-null`` case on XML renderers collapses to
+    Python ``None`` to match vLLM's null short-circuit. The XML wire
+    format is genuinely ambiguous there (``<parameter=x>null</parameter>``
+    is identical bytes whether the model meant the string ``"null"``
+    or a JSON null), and vLLM resolves the ambiguity by null-coercing.
+    """
     msg = {
         "role": "assistant",
         "content": "",
@@ -134,6 +153,12 @@ def test_string_arg_preserves_type(model, renderer_name, renderer, args):
 
     assert parsed.tool_calls, f"{model}: parser returned no tool_calls"
     got = _normalize_args(parsed.tool_calls[0].arguments)
-    assert got == args, (
+
+    case_id = request.node.callspec.id.rsplit("-", 1)[-1]
+    if case_id == "null" and model not in _JSON_FORMAT_MODELS:
+        expected = {"x": None}
+    else:
+        expected = args
+    assert got == expected, (
         f"{model}: tool-arg type drift — sent {args!r}, parser returned {got!r}"
     )