diff --git a/renderers/parsers.py b/renderers/parsers.py
index 77fa850..a277a0e 100644
--- a/renderers/parsers.py
+++ b/renderers/parsers.py
@@ -20,6 +20,7 @@
from typing import Protocol, runtime_checkable
from renderers.base import ParsedToolCall, ToolCallParseStatus
+from renderers.parsing import _parse_xml_function_blocks
# ── Shared helpers ───────────────────────────────────────────────────
@@ -156,7 +157,22 @@ def extract(self, ids: list[int]) -> tuple[list[int], list[ParsedToolCall]]:
class Qwen35ToolParser:
- """XML-style tool calls: ``V...``."""
+ """XML-style tool calls: ``V...``.
+
+ Mirrors vLLM's ``Qwen3CoderToolParser`` extraction logic — see
+ ``renderers.parsing`` for the full ladder and the rationale behind
+ picking ``qwen3_coder`` over ``qwen3_xml``. This parser drives
+ ``DefaultRenderer`` when callers wire it up with
+ ``tool_parser="qwen3.5"``; ``Qwen35Renderer.parse_response``
+ routes through the same shared helpers via ``parse_qwen35``.
+
+ Schema-aware coercion is intentionally inert here: ``ToolParser``'s
+ ``extract`` API does not receive the tool list (callers consume
+ the parsed values as opaque dicts), so ``param_index`` is empty
+ and every parameter goes through the no-schema branch of
+ ``_coerce_arg_value`` — vLLM-equivalent verbatim strings after the
+ case-insensitive ``null`` short-circuit.
+ """
def __init__(self, tokenizer):
self._tokenizer = tokenizer
@@ -169,69 +185,57 @@ def extract(self, ids: list[int]) -> tuple[list[int], list[ParsedToolCall]]:
tc_start = _find(ids, self._tc_id)
if tc_start == -1:
return ids, []
+
tool_calls: list[ParsedToolCall] = []
i = tc_start
+ param_index: dict = {}
while i < len(ids):
- if ids[i] == self._tc_id:
- end = (
- _find(ids, self._tc_end_id, i + 1)
- if self._tc_end_id is not None
- else -1
- )
- if end == -1:
- raw = _decode(self._tokenizer, ids[i + 1 :])
- tool_calls.append(
- ParsedToolCall(
- raw=raw,
- token_span=(i, len(ids)),
- status=ToolCallParseStatus.UNCLOSED_BLOCK,
- )
- )
- break
+ if ids[i] != self._tc_id:
+ i += 1
+ continue
+
+ end = (
+ _find(ids, self._tc_end_id, i + 1)
+ if self._tc_end_id is not None
+ else -1
+ )
+ if end == -1:
+ block_text = _decode(self._tokenizer, ids[i + 1 :])
+ span = (i, len(ids))
+ wrapper_unclosed = True
+ else:
block_text = _decode(self._tokenizer, ids[i + 1 : end])
span = (i, end + 1)
- name_match = re.search(r"]+)>", block_text)
- if not name_match:
- tool_calls.append(
- ParsedToolCall(
- raw=block_text,
- token_span=span,
- status=ToolCallParseStatus.MALFORMED_STRUCTURE,
- )
- )
- i = end + 1
- continue
- name = name_match.group(1)
- arguments: dict = {}
- any_json_fallback = False
- for pm in re.finditer(
- r"]+)>\n?(.*?)\n?",
- block_text,
- re.DOTALL,
- ):
- arg_name = pm.group(1)
- arg_value = pm.group(2).strip()
- try:
- arguments[arg_name] = json.loads(arg_value)
- except (json.JSONDecodeError, ValueError):
- arguments[arg_name] = arg_value
- any_json_fallback = True
+ wrapper_unclosed = False
+
+ block_calls = _parse_xml_function_blocks(
+ block_text,
+ param_index=param_index,
+ token_span=span,
+ wrapper_unclosed=wrapper_unclosed,
+ )
+ if block_calls:
+ tool_calls.extend(block_calls)
+ elif wrapper_unclosed:
tool_calls.append(
ParsedToolCall(
raw=block_text,
- name=name,
- arguments=arguments,
token_span=span,
- status=(
- ToolCallParseStatus.INVALID_JSON
- if any_json_fallback
- else ToolCallParseStatus.OK
- ),
+ status=ToolCallParseStatus.UNCLOSED_BLOCK,
)
)
- i = end + 1
else:
- i += 1
+ tool_calls.append(
+ ParsedToolCall(
+ raw=block_text,
+ token_span=span,
+ status=ToolCallParseStatus.MALFORMED_STRUCTURE,
+ )
+ )
+
+ if wrapper_unclosed:
+ break
+ i = end + 1
return ids[:tc_start], tool_calls
diff --git a/renderers/parsing.py b/renderers/parsing.py
index 528f122..331c0ea 100644
--- a/renderers/parsing.py
+++ b/renderers/parsing.py
@@ -16,21 +16,90 @@
from __future__ import annotations
+import ast
import json
+import re
+import uuid
from typing import Any
from renderers.base import ParsedResponse, ParsedToolCall, ToolCallParseStatus, ToolSpec
+# ── vLLM Qwen3CoderToolParser regex patterns ────────────────────────
+#
+# Lifted verbatim from ``vllm/tool_parsers/qwen3coder_tool_parser.py``
+# (``self.tool_call_regex`` / ``tool_call_function_regex`` /
+# ``tool_call_parameter_regex``). Each pattern has two branches: one
+# for the closed form (with the matching end tag) and one anchored at
+# end-of-string for unclosed output. The parameter regex uses
+# positive lookaheads so a missing ```` is recovered from
+# the next ```` boundary instead of
+# silently dropping the value.
+
+
+# Mask matching vLLM's ``vllm.utils.MASK_64_BITS`` so the random id we
+# generate is byte-shape compatible with vLLM's ``random_uuid()``.
+_MASK_64_BITS = (1 << 64) - 1
+
+
+def _make_tool_call_id() -> str:
+ """Mint a tool-call id in vLLM's default format.
+
+ vLLM's ``ToolCall`` dataclass uses ``default_factory=make_tool_call_id``
+ which returns ``f"chatcmpl-tool-{random_uuid()}"`` where
+ ``random_uuid`` is ``f"{uuid.uuid4().int & MASK_64_BITS:016x}"`` —
+ 16 hex characters drawn from the low 64 bits of a v4 UUID. We
+ reproduce the format so OpenAI-shaped clients get the same id
+ surface across renderers and vLLM serving paths.
+ """
+ return f"chatcmpl-tool-{uuid.uuid4().int & _MASK_64_BITS:016x}"
+
+
+_TOOL_CALL_BLOCK_RE = re.compile(
+ r"(.*?)|(.*?)$", re.DOTALL
+)
+_FUNCTION_BLOCK_RE = re.compile(
+ r"||(?=)|$)",
+ re.DOTALL,
+)
+
+
# ── Schema-aware argument coercion ──────────────────────────────────
#
# XML-style tool-call formats render argument values verbatim inside
-# ```` tags with no quoting. ``true`` and the string
+# ```` tags with no quoting. ``true`` and the string
# ``"true"`` produce identical wire bytes; without the tool schema, the
-# parser has no signal to distinguish them and defaults to
-# ``json.loads`` (the historical behavior). When the caller passes
-# ``tools=[...]``, parsers consult the per-parameter declared type to
-# keep string args verbatim, matching vLLM / SGLang reference parsers.
+# parser has no signal to distinguish them. We mirror vLLM's
+# ``Qwen3CoderToolParser`` (and SGLang's ``Qwen3CoderDetector``) end to
+# end: case-insensitive ``null`` short-circuit, ``int()`` / ``float()``
+# for numeric types, ``.lower() == "true"`` for bool, ``json.loads``
+# with ``ast.literal_eval`` fallback for objects/arrays. That accepts
+# both JSON literals (Qwen3.6) and Python literals (Qwen3.5 — ``str()``
+# renders bools as ``True`` / ``False``) on the inference side.
+#
+# Why ``qwen3_coder`` and not ``qwen3_xml``: vLLM ships two parsers for
+# the same wire format. ``qwen3_xml`` is the newer streaming-first
+# parser (uses ``xml.parsers.expat`` and is the recommended choice for
+# *live serving* because ``qwen3_coder`` has known infinite-loop bugs
+# in vLLM's streaming path). For offline token-level extraction the
+# two parsers agree on JSON-literal scalars but ``qwen3_coder`` is a
+# strict superset for value coercion — it accepts Python literals via
+# ``ast.literal_eval`` where ``qwen3_xml`` returns raw strings. Since
+# this repo doesn't stream yet, ``qwen3_coder`` semantics dominate.
+# TODO: when a streaming parse API lands, switch the streaming path to
+# ``qwen3_xml``-style state-machine semantics to dodge vLLM's
+# regex-streaming bugs (see HF Qwen3-Coder-Next discussion #17).
+
+
+_STRING_TYPES = frozenset({"string", "str", "text", "varchar", "char", "enum"})
+_BOOL_TYPES = frozenset({"boolean", "bool", "binary"})
+_OBJECT_TYPES = frozenset({"object", "array", "arr"})
+_INT_PREFIXES = ("int", "uint", "long", "short", "unsigned")
+_FLOAT_PREFIXES = ("num", "float")
+_OBJECT_PREFIXES = ("dict", "list")
def _build_param_type_index(
@@ -59,35 +128,112 @@ def _build_param_type_index(
return index
-def _coerce_arg_value(
- text: str, param_schema: dict[str, Any] | None
-) -> tuple[Any, bool]:
- """Coerce a raw ```` body to its declared type.
+def _declared_type(param_schema: dict[str, Any] | None) -> str | None:
+ """Extract the lowercased ``type`` from a JSON-schema fragment.
- Returns ``(value, used_json_fallback)``. The boolean is ``True`` only
- when ``json.loads`` was attempted and raised, so the caller knows
- whether to flag ``INVALID_JSON``. Returning a string verbatim
- because the schema declared ``type: "string"`` is NOT a fallback.
-
- Rule (matches vLLM / SGLang reference parsers):
+ Mirrors vLLM: a fragment with ``anyOf`` and no top-level ``type`` is
+ treated as ``"object"`` so json.loads is attempted; a list-form type
+ falls back to the first declared entry.
+ """
+ if not isinstance(param_schema, dict):
+ return None
+ declared = param_schema.get("type")
+ if isinstance(declared, str):
+ return declared.strip().lower()
+ if isinstance(declared, list) and declared:
+ first = declared[0]
+ if isinstance(first, str):
+ return first.strip().lower()
+ if "anyOf" in param_schema:
+ return "object"
+ return None
- - If the param's declared ``type`` is ``"string"`` (or single-element
- ``["string"]``), return ``text`` verbatim — never ``json.loads``.
- - Anything else (no schema, non-string scalar, object, array, or
- union types that include non-string): try ``json.loads`` and fall
- back to raw ``text`` on parse failure.
- Union types that include ``"string"`` alongside other types still
- attempt ``json.loads`` first so an explicit integer / bool can
- parse; the string branch only wins as the fallback.
+def _coerce_arg_value(
+ text: str, param_schema: dict[str, Any] | None
+) -> tuple[Any, bool]:
+ """Coerce a raw ```` body to its declared type.
+
+ Returns ``(value, used_fallback)``. ``used_fallback=True`` means the
+ coercion could not satisfy the declared type and had to fall back to
+ a different shape (a string for scalars, ``False`` for booleans, raw
+ text for objects), so the caller can flag ``INVALID_JSON``.
+
+ The ladder mirrors vLLM's ``Qwen3CoderToolParser._convert_param_value``
+ and SGLang's ``Qwen3CoderDetector._convert_param_value`` byte-for-byte:
+
+ - ``null`` short-circuit (any case) → ``None``. Runs **before** any
+ type check, including the string family. vLLM does this on line
+ 125 of ``qwen3coder_tool_parser.py`` ahead of the
+ ``param_name not in param_config`` branch. This means a
+ string-typed arg whose wire value is the literal ``null``
+ collapses to Python ``None`` — the XML wire format can't
+ distinguish that case from a JSON null, and we accept the
+ lossy round-trip in exchange for byte parity with vLLM.
+ - No schema OR param not in schema → return verbatim. vLLM
+ ``qwen3coder_tool_parser.py:128-137``:
+ ``if param_name not in param_config: return param_value``.
+ - ``string`` / ``str`` / ``text`` / ``varchar`` / ``char`` / ``enum``
+ → return verbatim.
+ - ``int`` / ``uint`` / ``long`` / ``short`` / ``unsigned`` family →
+ ``int(text)``, degenerating to raw text + INVALID.
+ - ``num`` / ``float`` family → ``float(text)`` with int demotion
+ when the source has no ``.``/``e`` and is whole, degenerating to
+ raw text + INVALID.
+ - ``boolean`` / ``bool`` / ``binary`` → ``text.lower() == "true"``;
+ anything outside ``{"true", "false"}`` returns ``False`` + INVALID
+ (matches vLLM's "degenerate to false" behavior, with the
+ INVALID flag preserved for the verifier / RL-loss signal).
+ - ``object`` / ``array`` / ``dict`` / ``list`` (or ``anyOf``) →
+ ``json.loads`` first, ``ast.literal_eval`` fallback (for Python
+ literals like ``{'k': 1}``), raw text + INVALID otherwise.
+ - Any other declared type → ``ast.literal_eval`` if it parses, else
+ raw text. Matches vLLM's catch-all else branch.
"""
- if param_schema is not None:
- declared = param_schema.get("type")
- if declared == "string" or declared == ["string"]:
- return text, False
+ if text.lower() == "null":
+ return None, False
+
+ declared = _declared_type(param_schema)
+
+ if param_schema is None or declared is None or declared in _STRING_TYPES:
+ return text, False
+
+ if declared.startswith(_INT_PREFIXES):
+ try:
+ return int(text), False
+ except (ValueError, TypeError):
+ return text, True
+
+ if declared.startswith(_FLOAT_PREFIXES):
+ try:
+ f = float(text)
+ except (ValueError, TypeError):
+ return text, True
+ if "." not in text and "e" not in text.lower() and f.is_integer():
+ return int(f), False
+ return f, False
+
+ if declared in _BOOL_TYPES:
+ lowered = text.lower()
+ if lowered == "true":
+ return True, False
+ if lowered == "false":
+ return False, False
+ return False, True
+
+ if declared in _OBJECT_TYPES or declared.startswith(_OBJECT_PREFIXES):
+ try:
+ return json.loads(text), False
+ except (json.JSONDecodeError, ValueError):
+ pass
+ try:
+ return ast.literal_eval(text), False
+ except (ValueError, SyntaxError, TypeError, MemoryError):
+ return text, True
+
try:
- return json.loads(text), False
- except (json.JSONDecodeError, ValueError):
+ return ast.literal_eval(text), False
+ except (ValueError, SyntaxError, TypeError, MemoryError):
return text, True
@@ -253,20 +399,49 @@ def parse_qwen35(
content="", reasoning_content=reasoning or None, tool_calls=[]
)
+ full_text = _decode(tokenizer, ids)
tc_start = _find(ids, tool_call_id)
- tool_calls: list[ParsedToolCall] = []
- if tc_start != -1:
- content_text = _decode(tokenizer, ids[:tc_start]).strip()
- tool_calls = _parse_xml_tool_calls(
- tokenizer,
- ids[tc_start:],
- tool_call_id,
- tool_call_end_id,
- section_offset=parse_offset + tc_start,
- param_index=_build_param_type_index(tools),
- )
- else:
- content_text = _decode(tokenizer, ids).strip()
+ param_index = _build_param_type_index(tools)
+ try:
+ if tc_start != -1:
+ # vLLM ``qwen3coder_tool_parser.py:316-319``: content text
+ # is the raw slice up to the first ```` (or
+ # first ```` is present).
+ # No whitespace stripping — the model's prefix prose is
+ # surfaced verbatim.
+ content_text = _decode(tokenizer, ids[:tc_start])
+ tool_calls = _parse_xml_tool_calls(
+ tokenizer,
+ ids[tc_start:],
+ tool_call_id,
+ tool_call_end_id,
+ section_offset=parse_offset + tc_start,
+ param_index=param_index,
+ )
+ else:
+ # vLLM ``qwen3coder_tool_parser.py:269-271`` back-off: no
+ # ```` markers — scan whole output for raw
+ # ```` blocks. Content text is whatever sits
+ # before the first ``= 0 else full_text
+ else:
+ content_text = full_text
+ except Exception:
+ # vLLM ``qwen3coder_tool_parser.py:327-331`` catch-all: on any
+ # extraction error, drop every recovered call and surface the
+ # raw text as content so downstream consumers never see a
+ # half-parsed call.
+ content_text = full_text
+ tool_calls = []
return ParsedResponse(
content=content_text,
@@ -284,71 +459,143 @@ def _parse_xml_tool_calls(
section_offset: int,
param_index: dict[str, dict[str, dict[str, Any]]],
) -> list[ParsedToolCall]:
- """Parse Qwen3.5-style XML tool calls from token IDs."""
- import re
-
+ """Parse Qwen3.5-style XML tool calls from token IDs.
+
+ Uses token IDs to demarcate ```` / ````
+ boundaries (so ``token_span`` stays precise for trainer-side
+ masking) and vLLM-parity regex on the decoded block text to
+ extract the function/parameter content tolerantly. Mirrors
+ ``Qwen3CoderToolParser._parse_xml_function_call`` plus the
+ tag-tolerance branches of the three module-level patterns.
+ """
tool_calls: list[ParsedToolCall] = []
i = 0
while i < len(ids):
- if ids[i] == tc_id:
- end = _find(ids, tc_end_id, i + 1)
- if end == -1:
- raw = _decode(tokenizer, ids[i + 1 :])
- tool_calls.append(
- ParsedToolCall(
- raw=raw,
- token_span=(section_offset + i, section_offset + len(ids)),
- status=ToolCallParseStatus.UNCLOSED_BLOCK,
- )
- )
- break
+ if ids[i] != tc_id:
+ i += 1
+ continue
+
+ end = _find(ids, tc_end_id, i + 1)
+ if end == -1:
+ block_ids = ids[i + 1 :]
+ block_text = _decode(tokenizer, block_ids)
+ span = (section_offset + i, section_offset + len(ids))
+ wrapper_unclosed = True
+ else:
block_text = _decode(tokenizer, ids[i + 1 : end])
span = (section_offset + i, section_offset + end + 1)
- name_match = re.search(r"]+)>", block_text)
- if not name_match:
- tool_calls.append(
- ParsedToolCall(
- raw=block_text,
- token_span=span,
- status=ToolCallParseStatus.MALFORMED_STRUCTURE,
- )
- )
- i = end + 1
- continue
+ wrapper_unclosed = False
- name = name_match.group(1)
- params = param_index.get(name, {})
- arguments: dict = {}
- any_json_fallback = False
- for pm in re.finditer(
- r"]+)>\n?(.*?)\n?", block_text, re.DOTALL
- ):
- arg_name = pm.group(1)
- arg_value = pm.group(2).strip()
- value, used_fallback = _coerce_arg_value(
- arg_value, params.get(arg_name)
+ block_calls = _parse_xml_function_blocks(
+ block_text,
+ param_index=param_index,
+ token_span=span,
+ wrapper_unclosed=wrapper_unclosed,
+ )
+ if block_calls:
+ tool_calls.extend(block_calls)
+ elif wrapper_unclosed:
+ # vLLM would silently drop a content-less unclosed
+ # ````; we keep the diagnostic so verifier /
+ # RL-loss code can mask the malformed span.
+ tool_calls.append(
+ ParsedToolCall(
+ raw=block_text,
+ token_span=span,
+ status=ToolCallParseStatus.UNCLOSED_BLOCK,
)
- arguments[arg_name] = value
- any_json_fallback = any_json_fallback or used_fallback
+ )
+ else:
tool_calls.append(
ParsedToolCall(
raw=block_text,
- name=name,
- arguments=arguments,
token_span=span,
- status=(
- ToolCallParseStatus.INVALID_JSON
- if any_json_fallback
- else ToolCallParseStatus.OK
- ),
+ status=ToolCallParseStatus.MALFORMED_STRUCTURE,
)
)
- i = end + 1
+
+ if wrapper_unclosed:
+ break
+ i = end + 1
+ return tool_calls
+
+
+def _parse_xml_function_blocks(
+ text: str,
+ *,
+ param_index: dict[str, dict[str, dict[str, Any]]],
+ token_span: tuple[int, int] | None,
+ wrapper_unclosed: bool,
+) -> list[ParsedToolCall]:
+ """Apply vLLM's ```` regex over *text*.
+
+ ``token_span`` is shared by every call recovered from this block;
+ the granularity is the surrounding ```` region (or the
+ whole completion in the no-marker back-off path), matching how
+ vLLM treats multiple ```` siblings — it does not try to
+ attribute character offsets back to token positions.
+ """
+ tool_calls: list[ParsedToolCall] = []
+ for match in _FUNCTION_BLOCK_RE.finditer(text):
+ closed = match.group(1)
+ body = closed if closed is not None else (match.group(2) or "")
+ function_unclosed = closed is None
+ end_index = body.find(">")
+ if end_index == -1:
+ tool_calls.append(
+ ParsedToolCall(
+ raw=body,
+ token_span=token_span,
+ status=ToolCallParseStatus.MALFORMED_STRUCTURE,
+ )
+ )
+ continue
+
+ name = body[:end_index]
+ params_text = body[end_index + 1 :]
+ params = param_index.get(name, {})
+ arguments: dict = {}
+ any_fallback = False
+ for capture in _PARAMETER_BLOCK_RE.findall(params_text):
+ idx = capture.find(">")
+ if idx == -1:
+ continue
+ arg_name = capture[:idx]
+ arg_value = _strip_one_newline(capture[idx + 1 :])
+ value, used_fallback = _coerce_arg_value(arg_value, params.get(arg_name))
+ arguments[arg_name] = value
+ any_fallback = any_fallback or used_fallback
+
+ if wrapper_unclosed or function_unclosed:
+ status = ToolCallParseStatus.UNCLOSED_BLOCK
+ elif any_fallback:
+ status = ToolCallParseStatus.INVALID_JSON
else:
- i += 1
+ status = ToolCallParseStatus.OK
+ tool_calls.append(
+ ParsedToolCall(
+ raw=body,
+ name=name,
+ arguments=arguments,
+ id=_make_tool_call_id(),
+ token_span=token_span,
+ status=status,
+ )
+ )
return tool_calls
+def _strip_one_newline(s: str) -> str:
+ """vLLM ``qwen3coder_tool_parser.py:246-250`` strips exactly one
+ leading and one trailing newline from a parameter value — no
+ further whitespace trimming."""
+ if s.startswith("\n"):
+ s = s[1:]
+ if s.endswith("\n"):
+ s = s[:-1]
+ return s
+
+
# ── GLM-5/4.7/4.5: name k v
diff --git a/tests/test_arg_coercion.py b/tests/test_arg_coercion.py
new file mode 100644
index 0000000..5d5575f
--- /dev/null
+++ b/tests/test_arg_coercion.py
@@ -0,0 +1,469 @@
+"""Schema-aware argument coercion parity with vLLM / SGLang.
+
+The XML-style tool-call wire format (Qwen3.5, GLM, MiniMax, Laguna)
+renders parameter values verbatim with no quoting, so the parser must
+consult the tool schema to know whether ``True`` is a bool or a string.
+Renderers' previous coercion was strict ``json.loads``, which flagged
+``True`` / ``False`` (capitalised Python literals) as ``INVALID_JSON``
+for boolean params — but both vLLM's ``Qwen3CoderToolParser`` and
+SGLang's ``Qwen3CoderDetector`` accept them via case-folded comparison.
+
+These tests pin the new ladder to the vLLM / SGLang reference shape and
+guard the bug from issue #47.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from renderers.base import ToolCallParseStatus, load_tokenizer
+from renderers.parsing import _coerce_arg_value, parse_qwen35
+
+
+# ── Direct coercion ─────────────────────────────────────────────────
+
+
+@pytest.mark.parametrize(
+ "raw,expected",
+ [
+ ("true", True),
+ ("True", True),
+ ("TRUE", True),
+ ("false", False),
+ ("False", False),
+ ("FALSE", False),
+ ],
+)
+def test_boolean_accepts_case_insensitive(raw, expected):
+ value, used_fallback = _coerce_arg_value(raw, {"type": "boolean"})
+ assert value is expected
+ assert used_fallback is False
+
+
+@pytest.mark.parametrize("declared", ["boolean", "bool", "binary"])
+def test_boolean_type_aliases_all_accepted(declared):
+ value, used_fallback = _coerce_arg_value("True", {"type": declared})
+ assert value is True
+ assert used_fallback is False
+
+
+def test_boolean_garbage_degenerates_to_false_with_invalid_flag():
+ """Match vLLM: non-true/false values silently become ``False`` — but we
+ still flag the fallback so verifier / RL-loss code can see the drift."""
+ value, used_fallback = _coerce_arg_value("yes", {"type": "boolean"})
+ assert value is False
+ assert used_fallback is True
+
+
+@pytest.mark.parametrize("declared", ["boolean", "integer", "object", "number"])
+def test_null_literal_is_case_insensitive_for_non_string_types(declared):
+ for raw in ("null", "Null", "NULL"):
+ value, used_fallback = _coerce_arg_value(raw, {"type": declared})
+ assert value is None
+ assert used_fallback is False
+
+
+def test_null_short_circuit_applies_even_for_string_type():
+ """vLLM ``qwen3coder_tool_parser.py:125`` null-coerces ``"null"``
+ *before* the type check, so a string-typed wire value of ``null``
+ collapses to Python ``None``. The XML wire format can't tell the
+ string ``"null"`` from JSON null; we accept the lossy round-trip
+ in exchange for byte parity with vLLM."""
+ for raw in ("null", "Null", "NULL"):
+ value, used_fallback = _coerce_arg_value(raw, {"type": "string"})
+ assert value is None
+ assert used_fallback is False
+
+
+@pytest.mark.parametrize(
+ "declared,raw,expected",
+ [
+ ("integer", "42", 42),
+ ("int", "-7", -7),
+ ("uint", "0", 0),
+ ("long", "9999999999", 9999999999),
+ ("short", "12", 12),
+ ("unsigned", "5", 5),
+ ],
+)
+def test_int_family_coerces(declared, raw, expected):
+ value, used_fallback = _coerce_arg_value(raw, {"type": declared})
+ assert value == expected
+ assert isinstance(value, int)
+ assert used_fallback is False
+
+
+def test_int_failure_keeps_raw_and_flags_fallback():
+ value, used_fallback = _coerce_arg_value("abc", {"type": "integer"})
+ assert value == "abc"
+ assert used_fallback is True
+
+
+@pytest.mark.parametrize(
+ "raw,expected,exact_type",
+ [
+ ("3.14", 3.14, float),
+ ("1e3", 1000.0, float), # source has `e` → stays float (SGLang rule)
+ ("1.0", 1.0, float), # source has `.` → stays float
+ ("-2.5", -2.5, float),
+ ("7", 7, int), # no `.`/`e` and whole → demoted to int
+ ],
+)
+def test_float_family_coerces(raw, expected, exact_type):
+ value, used_fallback = _coerce_arg_value(raw, {"type": "number"})
+ assert value == expected
+ assert type(value) is exact_type
+ assert used_fallback is False
+
+
+def test_float_failure_keeps_raw_and_flags_fallback():
+ value, used_fallback = _coerce_arg_value("not-a-number", {"type": "float"})
+ assert value == "not-a-number"
+ assert used_fallback is True
+
+
+def test_object_via_json_loads():
+ value, used_fallback = _coerce_arg_value('{"k": 1}', {"type": "object"})
+ assert value == {"k": 1}
+ assert used_fallback is False
+
+
+def test_object_via_ast_literal_eval_fallback():
+ """Python-literal dicts (single quotes) should still parse for object
+ params — this is the vLLM ``ast.literal_eval`` fallback path."""
+ value, used_fallback = _coerce_arg_value("{'k': 1}", {"type": "object"})
+ assert value == {"k": 1}
+ assert used_fallback is False
+
+
+def test_array_via_json_loads():
+ value, used_fallback = _coerce_arg_value("[1, 2, 3]", {"type": "array"})
+ assert value == [1, 2, 3]
+ assert used_fallback is False
+
+
+def test_object_total_failure_flags_invalid():
+ value, used_fallback = _coerce_arg_value(
+ "this is not a json object", {"type": "object"}
+ )
+ assert value == "this is not a json object"
+ assert used_fallback is True
+
+
+def test_anyof_treated_as_object():
+ """vLLM-specific: a schema with ``anyOf`` and no top-level ``type``
+ routes through the object branch so JSON-shaped values parse."""
+ schema = {"anyOf": [{"type": "integer"}, {"type": "string"}]}
+ value, used_fallback = _coerce_arg_value("42", schema)
+ assert value == 42
+ assert used_fallback is False
+
+
+@pytest.mark.parametrize(
+ "declared", ["string", "str", "text", "varchar", "char", "enum"]
+)
+def test_string_family_returns_verbatim(declared):
+ value, used_fallback = _coerce_arg_value("True", {"type": declared})
+ assert value == "True"
+ assert used_fallback is False
+
+
+def test_string_with_list_form_type():
+ value, used_fallback = _coerce_arg_value("True", {"type": ["string"]})
+ assert value == "True"
+ assert used_fallback is False
+
+
+def test_no_schema_returns_verbatim_string():
+ """vLLM ``qwen3coder_tool_parser.py:128-137``: when the param is
+ not in the tool schema (or no tools were passed at all), the parser
+ returns the raw text verbatim — it does **not** try ``json.loads``.
+ The case-insensitive ``null`` short-circuit at the top of the ladder
+ still applies (so untyped ``"null"`` becomes Python ``None``)."""
+ value, used_fallback = _coerce_arg_value("42", None)
+ assert value == "42"
+ assert used_fallback is False
+
+ value, used_fallback = _coerce_arg_value("True", None)
+ assert value == "True"
+ assert used_fallback is False
+
+ value, used_fallback = _coerce_arg_value("hello", None)
+ assert value == "hello"
+ assert used_fallback is False
+
+
+def test_no_schema_null_short_circuit():
+ """vLLM null-coerces before the schema-missing return, so an untyped
+ ``"null"`` still becomes Python ``None``."""
+ for raw in ("null", "Null", "NULL"):
+ value, used_fallback = _coerce_arg_value(raw, None)
+ assert value is None
+ assert used_fallback is False
+
+
+def test_unknown_type_falls_back_to_literal_eval():
+ """A declared type we don't recognise (e.g. ``"date"``) lands in the
+ catch-all that tries ``ast.literal_eval``, mirroring vLLM's else
+ branch — strings without quotes that fail to eval stay as strings."""
+ value, used_fallback = _coerce_arg_value("2024-01-01", {"type": "date"})
+ assert value == "2024-01-01"
+ assert used_fallback is True
+
+ # Bare Python literal still parses through the catch-all
+ value, used_fallback = _coerce_arg_value("[1, 2]", {"type": "date"})
+ assert value == [1, 2]
+ assert used_fallback is False
+
+
+# ── Integration: parse_qwen35 end-to-end (the bug from issue #47) ───
+
+
+_TOOLS_FROM_ISSUE = [
+ {
+ "type": "function",
+ "function": {
+ "name": "filesystem_server_get_directory_tree",
+ "description": "List a directory tree.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "path": {"type": "string"},
+ "max_depth": {"type": "integer"},
+ "include_files": {"type": "boolean"},
+ "show_size": {"type": "boolean"},
+ },
+ },
+ },
+ }
+]
+
+
+def _parse(tok, completion: str, *, tools=None):
+ ids = tok.encode(completion, add_special_tokens=False)
+ return parse_qwen35(
+ tok,
+ list(ids),
+ stop_ids={tok.eos_token_id} if tok.eos_token_id is not None else set(),
+ think_id=tok.convert_tokens_to_ids(""),
+ think_end_id=tok.convert_tokens_to_ids(""),
+ tool_call_id=tok.convert_tokens_to_ids(""),
+ tool_call_end_id=tok.convert_tokens_to_ids(""),
+ tools=tools,
+ )
+
+
+def test_qwen35_capital_true_for_boolean_is_ok_status():
+ """Regression for issue #47: ``True``
+ used to be flagged ``INVALID_JSON`` because ``json.loads("True")``
+ fails. SGLang and vLLM both accept it via case-folded comparison;
+ renderers now does too."""
+ tok = load_tokenizer("Qwen/Qwen3.5-9B")
+ parsed = _parse(
+ tok,
+ "\n"
+ "\n"
+ "/\n"
+ "2\n"
+ "True\n"
+ "True\n"
+ "\n"
+ "",
+ tools=_TOOLS_FROM_ISSUE,
+ )
+ assert len(parsed.tool_calls) == 1
+ tc = parsed.tool_calls[0]
+ assert tc.status == ToolCallParseStatus.OK
+ assert tc.name == "filesystem_server_get_directory_tree"
+ assert tc.arguments == {
+ "path": "/",
+ "max_depth": 2,
+ "include_files": True,
+ "show_size": True,
+ }
+
+
+# ── Structural tolerance: parity with vLLM's three regex patterns ────
+
+
+def test_qwen35_tolerates_missing_closing_parameter():
+ """vLLM ``tool_call_parameter_regex`` uses positive lookaheads so a
+ missing ```` is recovered from the next ```` boundary instead of dropping the value."""
+ tok = load_tokenizer("Qwen/Qwen3.5-9B")
+ parsed = _parse(
+ tok,
+ "\n"
+ "\n"
+ "/\n" # missing
+ "2\n"
+ "\n"
+ "",
+ tools=_TOOLS_FROM_ISSUE,
+ )
+ assert len(parsed.tool_calls) == 1
+ tc = parsed.tool_calls[0]
+ assert tc.status == ToolCallParseStatus.OK
+ assert tc.name == "filesystem_server_get_directory_tree"
+ assert tc.arguments == {"path": "/", "max_depth": 2}
+
+
+def test_qwen35_tolerates_unclosed_function():
+ """vLLM ``tool_call_function_regex`` second alternation matches an
+ unclosed ``\n"
+ "\n"
+ "/\n", # no , no
+ tools=_TOOLS_FROM_ISSUE,
+ )
+ assert len(parsed.tool_calls) == 1
+ tc = parsed.tool_calls[0]
+ assert tc.status == ToolCallParseStatus.UNCLOSED_BLOCK
+ assert tc.name == "filesystem_server_get_directory_tree"
+ assert tc.arguments == {"path": "/"}
+
+
+def test_qwen35_backoff_no_tool_call_markers():
+ """vLLM ``qwen3coder_tool_parser.py:269-271``: when the output
+ contains no ```` markers, treat the whole completion as
+ a single tool-call region and scan for ```` directly.
+ Content text is the raw slice up to the first ``\n"
+ "/etc\n"
+ "1\n"
+ "",
+ tools=_TOOLS_FROM_ISSUE,
+ )
+ assert len(parsed.tool_calls) == 1
+ tc = parsed.tool_calls[0]
+ assert tc.status == ToolCallParseStatus.OK
+ assert tc.name == "filesystem_server_get_directory_tree"
+ assert tc.arguments == {"path": "/etc", "max_depth": 1}
+ # vLLM-parity: content = text up to first ```` and no ````; the commented-out ``.rstrip()`` confirms
+ intent. We mirror that — surrounding whitespace round-trips."""
+ tok = load_tokenizer("Qwen/Qwen3.5-9B")
+ parsed = _parse(
+ tok,
+ "let me check\n\n" # trailing whitespace before
+ "\n"
+ "\n"
+ "/\n"
+ "\n"
+ "",
+ tools=_TOOLS_FROM_ISSUE,
+ )
+ assert parsed.content == "let me check\n\n"
+ assert len(parsed.tool_calls) == 1
+ assert parsed.tool_calls[0].status == ToolCallParseStatus.OK
+
+
+def test_qwen35_tool_call_id_matches_vllm_format():
+ """vLLM's ``ToolCall`` default factory mints ids of the form
+ ``chatcmpl-tool-<16 hex>``. Each surfaced call gets a fresh id."""
+ import re as _re
+
+ tok = load_tokenizer("Qwen/Qwen3.5-9B")
+ parsed = _parse(
+ tok,
+ "\n"
+ "\n"
+ "/\n"
+ "\n"
+ "\n"
+ "\n"
+ "\n"
+ "/etc\n"
+ "\n"
+ "",
+ tools=_TOOLS_FROM_ISSUE,
+ )
+ assert len(parsed.tool_calls) == 2
+ pattern = _re.compile(r"^chatcmpl-tool-[0-9a-f]{16}$")
+ ids = [tc.id for tc in parsed.tool_calls]
+ for tcid in ids:
+ assert tcid is not None and pattern.match(tcid), tcid
+ # Two surfaced calls must get distinct ids.
+ assert ids[0] != ids[1]
+
+
+def test_qwen35_exception_fallback_returns_full_text():
+ """vLLM ``qwen3coder_tool_parser.py:327-331`` catch-all returns
+ ``tools_called=False, tool_calls=[], content=model_output`` on any
+ unhandled extraction error. We mirror that by wrapping the
+ tool-call extraction; reasoning content extracted earlier is kept.
+ """
+ from unittest.mock import patch
+
+ tok = load_tokenizer("Qwen/Qwen3.5-9B")
+ completion = (
+ "\n"
+ "\n"
+ "/\n"
+ "\n"
+ ""
+ )
+ ids = tok.encode(completion, add_special_tokens=False)
+
+ with patch(
+ "renderers.parsing._parse_xml_tool_calls",
+ side_effect=RuntimeError("boom"),
+ ):
+ parsed = parse_qwen35(
+ tok,
+ list(ids),
+ stop_ids={tok.eos_token_id} if tok.eos_token_id is not None else set(),
+ think_id=tok.convert_tokens_to_ids(""),
+ think_end_id=tok.convert_tokens_to_ids(""),
+ tool_call_id=tok.convert_tokens_to_ids(""),
+ tool_call_end_id=tok.convert_tokens_to_ids(""),
+ tools=_TOOLS_FROM_ISSUE,
+ )
+ assert parsed.tool_calls == []
+ # content is the full decoded post-think text, surfaced verbatim.
+ assert "" in parsed.content
+ assert "" in parsed.content
+
+
+def test_qwen35_parameter_value_preserves_internal_whitespace():
+ """vLLM strips exactly one leading and one trailing ``\\n`` from the
+ parameter body (``qwen3coder_tool_parser.py:246-250``); inner
+ indentation must round-trip verbatim."""
+ tok = load_tokenizer("Qwen/Qwen3.5-9B")
+ parsed = _parse(
+ tok,
+ "\n"
+ "\n"
+ "\n /etc \n\n"
+ "\n"
+ "",
+ tools=_TOOLS_FROM_ISSUE,
+ )
+ assert len(parsed.tool_calls) == 1
+ tc = parsed.tool_calls[0]
+ assert tc.status == ToolCallParseStatus.OK
+ # One leading + one trailing \n stripped — interior spaces preserved.
+ assert tc.arguments == {"path": " /etc "}
diff --git a/tests/test_tool_arg_type_preservation.py b/tests/test_tool_arg_type_preservation.py
index 61dbb9b..e036abd 100644
--- a/tests/test_tool_arg_type_preservation.py
+++ b/tests/test_tool_arg_type_preservation.py
@@ -67,9 +67,21 @@ def renderer(model, renderer_name):
]
+# Renderers whose wire format quotes strings (Hermes JSON, section
+# JSON). Those parsers preserve the string ``"null"`` verbatim because
+# the wire bytes are unambiguous. XML-style renderers can't tell the
+# string ``null`` from a JSON null on the wire, so they null-coerce —
+# matching vLLM's ``Qwen3CoderToolParser._convert_param_value:125``
+# which runs the null short-circuit before any type check.
+_JSON_FORMAT_MODELS = {"Qwen/Qwen3-8B", "moonshotai/Kimi-K2-Instruct"}
+
+
# Each case: a single ``x`` argument whose value is a STRING that
# happens to be valid JSON of another type. With a schema declaring
-# ``x: string``, the parser must return the string verbatim.
+# ``x: string``, the parser must return the string verbatim — except
+# for the ``string-null`` case on XML renderers, where vLLM's
+# ``null`` short-circuit runs before the type check and we follow
+# suit for byte parity (see test below for the explicit assertion).
JSON_LOOKING_STRING_ARGS = [
pytest.param({"x": "true"}, id="string-bool"),
pytest.param({"x": "42"}, id="string-int"),
@@ -116,9 +128,16 @@ def _extract_assistant_tokens(renderer, prompt, assistant_msg, *, tools=None):
@pytest.mark.parametrize("args", JSON_LOOKING_STRING_ARGS)
-def test_string_arg_preserves_type(model, renderer_name, renderer, args):
+def test_string_arg_preserves_type(model, renderer_name, renderer, args, request):
"""Tool-call args of declared type ``str`` must round-trip as ``str``,
- not get re-parsed as bool/int/null/list/dict by the parser."""
+ not get re-parsed as bool/int/null/list/dict by the parser.
+
+ Exception: the ``string-null`` case on XML renderers collapses to
+ Python ``None`` to match vLLM's null short-circuit. The XML wire
+ format is genuinely ambiguous there (``null``
+ is identical bytes whether the model meant the string ``"null"``
+ or a JSON null), and vLLM resolves the ambiguity by null-coercing.
+ """
msg = {
"role": "assistant",
"content": "",
@@ -134,6 +153,12 @@ def test_string_arg_preserves_type(model, renderer_name, renderer, args):
assert parsed.tool_calls, f"{model}: parser returned no tool_calls"
got = _normalize_args(parsed.tool_calls[0].arguments)
- assert got == args, (
+
+ case_id = request.node.callspec.id.rsplit("-", 1)[-1]
+ if case_id == "null" and model not in _JSON_FORMAT_MODELS:
+ expected = {"x": None}
+ else:
+ expected = args
+ assert got == expected, (
f"{model}: tool-arg type drift — sent {args!r}, parser returned {got!r}"
)