From 69328f63eefa7adcaa79fee8c24828ee2ae66ac7 Mon Sep 17 00:00:00 2001 From: hallerite Date: Mon, 18 May 2026 16:47:07 +0000 Subject: [PATCH 1/4] fix(parsing): vLLM-parity type ladder for XML tool-call arg coercion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Renderers' Qwen3.5/GLM/MiniMax/Laguna XML parsers flagged ``True`` as INVALID_JSON for boolean params because ``json.loads("True")`` fails. Both SGLang's ``Qwen3CoderDetector`` and vLLM's ``Qwen3CoderToolParser`` accept it via ``param_value.lower() == "true"`` — so Pythonic literals the model freely emits round-trip cleanly at inference but came back malformed through renderers. Replaces the string-or-``json.loads`` dispatch with the full ``_convert_param_value`` ladder shared by both reference parsers: case-insensitive ``null``, ``int()`` / ``float()`` for numeric families with int demotion, case-folded bool, ``json.loads`` → ``ast.literal_eval`` for objects/arrays/anyOf, ``ast.literal_eval`` catch-all. INVALID_JSON remains as the verifier / RL-loss signal for values that had to degenerate (e.g. ``yes`` for bool, ``abc`` for int). One deliberate deviation from vLLM/SGLang: declared ``string`` types preserve ``"null"`` verbatim instead of coercing to Python ``None``, so the existing ``test_tool_arg_type_preservation`` string-verbatim contract still holds. Fixes #47. Co-Authored-By: Claude Opus 4.7 (1M context) --- renderers/parsing.py | 146 ++++++++++++++++---- tests/test_arg_coercion.py | 263 +++++++++++++++++++++++++++++++++++++ 2 files changed, 385 insertions(+), 24 deletions(-) create mode 100644 tests/test_arg_coercion.py diff --git a/renderers/parsing.py b/renderers/parsing.py index 528f122..f9f9ac1 100644 --- a/renderers/parsing.py +++ b/renderers/parsing.py @@ -16,6 +16,7 @@ from __future__ import annotations +import ast import json from typing import Any @@ -29,8 +30,21 @@ # ``"true"`` produce identical wire bytes; without the tool schema, the # parser has no signal to distinguish them and defaults to # ``json.loads`` (the historical behavior). When the caller passes -# ``tools=[...]``, parsers consult the per-parameter declared type to -# keep string args verbatim, matching vLLM / SGLang reference parsers. +# ``tools=[...]``, parsers consult the per-parameter declared type and +# dispatch through a vLLM-parity ladder (case-insensitive ``null``, +# ``int()`` / ``float()`` for numeric, ``.lower() == "true"`` for bool, +# ``json.loads`` → ``ast.literal_eval`` for object/array) so models can +# emit Pythonic literals like ``True`` / ``None`` / ``{'k': 1}`` that +# both SGLang's ``Qwen3CoderDetector`` and vLLM's +# ``Qwen3CoderToolParser`` accept on the inference side. + + +_STRING_TYPES = frozenset({"string", "str", "text", "varchar", "char", "enum"}) +_BOOL_TYPES = frozenset({"boolean", "bool", "binary"}) +_OBJECT_TYPES = frozenset({"object", "array", "arr"}) +_INT_PREFIXES = ("int", "uint", "long", "short", "unsigned") +_FLOAT_PREFIXES = ("num", "float") +_OBJECT_PREFIXES = ("dict", "list") def _build_param_type_index( @@ -59,35 +73,119 @@ def _build_param_type_index( return index +def _declared_type(param_schema: dict[str, Any] | None) -> str | None: + """Extract the lowercased ``type`` from a JSON-schema fragment. + + Mirrors vLLM: a fragment with ``anyOf`` and no top-level ``type`` is + treated as ``"object"`` so json.loads is attempted; a list-form type + falls back to the first declared entry. + """ + if not isinstance(param_schema, dict): + return None + declared = param_schema.get("type") + if isinstance(declared, str): + return declared.strip().lower() + if isinstance(declared, list) and declared: + first = declared[0] + if isinstance(first, str): + return first.strip().lower() + if "anyOf" in param_schema: + return "object" + return None + + def _coerce_arg_value( text: str, param_schema: dict[str, Any] | None ) -> tuple[Any, bool]: """Coerce a raw ```` body to its declared type. - Returns ``(value, used_json_fallback)``. The boolean is ``True`` only - when ``json.loads`` was attempted and raised, so the caller knows - whether to flag ``INVALID_JSON``. Returning a string verbatim - because the schema declared ``type: "string"`` is NOT a fallback. - - Rule (matches vLLM / SGLang reference parsers): - - - If the param's declared ``type`` is ``"string"`` (or single-element - ``["string"]``), return ``text`` verbatim — never ``json.loads``. - - Anything else (no schema, non-string scalar, object, array, or - union types that include non-string): try ``json.loads`` and fall - back to raw ``text`` on parse failure. - - Union types that include ``"string"`` alongside other types still - attempt ``json.loads`` first so an explicit integer / bool can - parse; the string branch only wins as the fallback. + Returns ``(value, used_fallback)``. ``used_fallback=True`` means the + coercion could not satisfy the declared type and had to fall back to + a different shape (a string for scalars, ``False`` for booleans, raw + text for objects), so the caller can flag ``INVALID_JSON``. + + The ladder mirrors vLLM's ``Qwen3CoderToolParser._convert_param_value`` + and SGLang's ``Qwen3CoderDetector._convert_param_value`` with one + deliberate deviation noted inline: + + - No schema → try ``json.loads``, fall back to raw text + INVALID. + - ``string`` / ``str`` / ``text`` / ``varchar`` / ``char`` / ``enum`` + → return verbatim. (Deviation: vLLM/SGLang null-coerce ``"null"`` + *before* checking type, so a string-typed arg of ``"null"`` would + come back as Python ``None``. Renderers preserves the string + because the XML wire format can't distinguish the string + ``"null"`` from JSON null, and downstream tests pin the + preservation contract; see ``test_tool_arg_type_preservation``.) + - ``null`` (any case) for any non-string declared type → ``None``. + - ``int`` / ``uint`` / ``long`` / ``short`` / ``unsigned`` family → + ``int(text)``, degenerating to raw text + INVALID. + - ``num`` / ``float`` family → ``float(text)`` with int demotion + when the source has no ``.``/``e`` and is whole, degenerating to + raw text + INVALID. + - ``boolean`` / ``bool`` / ``binary`` → ``text.lower() == "true"``; + anything outside ``{"true", "false"}`` returns ``False`` + INVALID + (matches vLLM's "degenerate to false" behavior, with the + INVALID flag preserved for the verifier / RL-loss signal). + - ``object`` / ``array`` / ``dict`` / ``list`` (or ``anyOf``) → + ``json.loads`` first, ``ast.literal_eval`` fallback (for Python + literals like ``{'k': 1}``), raw text + INVALID otherwise. + - Unknown type (param missing from schema, or type not in any + family above) → ``ast.literal_eval`` if it parses, else raw text. + Matches vLLM's catch-all branch and keeps strings like + ``"hello"`` (with no schema entry) as-is. """ - if param_schema is not None: - declared = param_schema.get("type") - if declared == "string" or declared == ["string"]: - return text, False + if param_schema is None: + if text.lower() == "null": + return None, False + try: + return json.loads(text), False + except (json.JSONDecodeError, ValueError): + return text, True + + declared = _declared_type(param_schema) + + if declared is None or declared in _STRING_TYPES: + return text, False + + if text.lower() == "null": + return None, False + + if declared.startswith(_INT_PREFIXES): + try: + return int(text), False + except (ValueError, TypeError): + return text, True + + if declared.startswith(_FLOAT_PREFIXES): + try: + f = float(text) + except (ValueError, TypeError): + return text, True + if "." not in text and "e" not in text.lower() and f.is_integer(): + return int(f), False + return f, False + + if declared in _BOOL_TYPES: + lowered = text.lower() + if lowered == "true": + return True, False + if lowered == "false": + return False, False + return False, True + + if declared in _OBJECT_TYPES or declared.startswith(_OBJECT_PREFIXES): + try: + return json.loads(text), False + except (json.JSONDecodeError, ValueError): + pass + try: + return ast.literal_eval(text), False + except (ValueError, SyntaxError, TypeError, MemoryError): + return text, True + try: - return json.loads(text), False - except (json.JSONDecodeError, ValueError): + return ast.literal_eval(text), False + except (ValueError, SyntaxError, TypeError, MemoryError): return text, True diff --git a/tests/test_arg_coercion.py b/tests/test_arg_coercion.py new file mode 100644 index 0000000..d95565f --- /dev/null +++ b/tests/test_arg_coercion.py @@ -0,0 +1,263 @@ +"""Schema-aware argument coercion parity with vLLM / SGLang. + +The XML-style tool-call wire format (Qwen3.5, GLM, MiniMax, Laguna) +renders parameter values verbatim with no quoting, so the parser must +consult the tool schema to know whether ``True`` is a bool or a string. +Renderers' previous coercion was strict ``json.loads``, which flagged +``True`` / ``False`` (capitalised Python literals) as ``INVALID_JSON`` +for boolean params — but both vLLM's ``Qwen3CoderToolParser`` and +SGLang's ``Qwen3CoderDetector`` accept them via case-folded comparison. + +These tests pin the new ladder to the vLLM / SGLang reference shape and +guard the bug from issue #47. +""" + +from __future__ import annotations + +import pytest + +from renderers.base import ToolCallParseStatus, load_tokenizer +from renderers.parsing import _coerce_arg_value, parse_qwen35 + + +# ── Direct coercion ───────────────────────────────────────────────── + + +@pytest.mark.parametrize( + "raw,expected", + [ + ("true", True), + ("True", True), + ("TRUE", True), + ("false", False), + ("False", False), + ("FALSE", False), + ], +) +def test_boolean_accepts_case_insensitive(raw, expected): + value, used_fallback = _coerce_arg_value(raw, {"type": "boolean"}) + assert value is expected + assert used_fallback is False + + +@pytest.mark.parametrize("declared", ["boolean", "bool", "binary"]) +def test_boolean_type_aliases_all_accepted(declared): + value, used_fallback = _coerce_arg_value("True", {"type": declared}) + assert value is True + assert used_fallback is False + + +def test_boolean_garbage_degenerates_to_false_with_invalid_flag(): + """Match vLLM: non-true/false values silently become ``False`` — but we + still flag the fallback so verifier / RL-loss code can see the drift.""" + value, used_fallback = _coerce_arg_value("yes", {"type": "boolean"}) + assert value is False + assert used_fallback is True + + +@pytest.mark.parametrize("declared", ["boolean", "integer", "object", "number"]) +def test_null_literal_is_case_insensitive_for_non_string_types(declared): + for raw in ("null", "Null", "NULL"): + value, used_fallback = _coerce_arg_value(raw, {"type": declared}) + assert value is None + assert used_fallback is False + + +def test_null_is_preserved_verbatim_for_string_type(): + """Deliberate deviation from vLLM/SGLang: string-typed ``"null"`` + stays as the string ``"null"`` so the existing string-verbatim + contract holds (see ``test_tool_arg_type_preservation``). The XML + wire format already can't distinguish the string ``"null"`` from + JSON null, but when a schema says ``type: "string"`` we honour it.""" + value, used_fallback = _coerce_arg_value("null", {"type": "string"}) + assert value == "null" + assert used_fallback is False + + +@pytest.mark.parametrize( + "declared,raw,expected", + [ + ("integer", "42", 42), + ("int", "-7", -7), + ("uint", "0", 0), + ("long", "9999999999", 9999999999), + ("short", "12", 12), + ("unsigned", "5", 5), + ], +) +def test_int_family_coerces(declared, raw, expected): + value, used_fallback = _coerce_arg_value(raw, {"type": declared}) + assert value == expected + assert isinstance(value, int) + assert used_fallback is False + + +def test_int_failure_keeps_raw_and_flags_fallback(): + value, used_fallback = _coerce_arg_value("abc", {"type": "integer"}) + assert value == "abc" + assert used_fallback is True + + +@pytest.mark.parametrize( + "raw,expected,exact_type", + [ + ("3.14", 3.14, float), + ("1e3", 1000.0, float), # source has `e` → stays float (SGLang rule) + ("1.0", 1.0, float), # source has `.` → stays float + ("-2.5", -2.5, float), + ("7", 7, int), # no `.`/`e` and whole → demoted to int + ], +) +def test_float_family_coerces(raw, expected, exact_type): + value, used_fallback = _coerce_arg_value(raw, {"type": "number"}) + assert value == expected + assert type(value) is exact_type + assert used_fallback is False + + +def test_float_failure_keeps_raw_and_flags_fallback(): + value, used_fallback = _coerce_arg_value("not-a-number", {"type": "float"}) + assert value == "not-a-number" + assert used_fallback is True + + +def test_object_via_json_loads(): + value, used_fallback = _coerce_arg_value('{"k": 1}', {"type": "object"}) + assert value == {"k": 1} + assert used_fallback is False + + +def test_object_via_ast_literal_eval_fallback(): + """Python-literal dicts (single quotes) should still parse for object + params — this is the vLLM ``ast.literal_eval`` fallback path.""" + value, used_fallback = _coerce_arg_value("{'k': 1}", {"type": "object"}) + assert value == {"k": 1} + assert used_fallback is False + + +def test_array_via_json_loads(): + value, used_fallback = _coerce_arg_value("[1, 2, 3]", {"type": "array"}) + assert value == [1, 2, 3] + assert used_fallback is False + + +def test_object_total_failure_flags_invalid(): + value, used_fallback = _coerce_arg_value( + "this is not a json object", {"type": "object"} + ) + assert value == "this is not a json object" + assert used_fallback is True + + +def test_anyof_treated_as_object(): + """vLLM-specific: a schema with ``anyOf`` and no top-level ``type`` + routes through the object branch so JSON-shaped values parse.""" + schema = {"anyOf": [{"type": "integer"}, {"type": "string"}]} + value, used_fallback = _coerce_arg_value("42", schema) + assert value == 42 + assert used_fallback is False + + +@pytest.mark.parametrize("declared", ["string", "str", "text", "varchar", "char", "enum"]) +def test_string_family_returns_verbatim(declared): + value, used_fallback = _coerce_arg_value("True", {"type": declared}) + assert value == "True" + assert used_fallback is False + + +def test_string_with_list_form_type(): + value, used_fallback = _coerce_arg_value("True", {"type": ["string"]}) + assert value == "True" + assert used_fallback is False + + +def test_no_schema_falls_back_to_json_loads(): + """Historical behavior: when ``param_schema`` is ``None`` (no tools + were passed), we still try ``json.loads`` for backwards compatibility + so untyped numbers / bools parse. ``None`` is handled via the + ``null`` short-circuit at the top of the ladder.""" + value, used_fallback = _coerce_arg_value("42", None) + assert value == 42 + assert used_fallback is False + + value, used_fallback = _coerce_arg_value("hello", None) + assert value == "hello" + assert used_fallback is True + + +def test_unknown_type_falls_back_to_literal_eval(): + """A declared type we don't recognise (e.g. ``"date"``) lands in the + catch-all that tries ``ast.literal_eval``, mirroring vLLM's else + branch — strings without quotes that fail to eval stay as strings.""" + value, used_fallback = _coerce_arg_value("2024-01-01", {"type": "date"}) + assert value == "2024-01-01" + assert used_fallback is True + + # Bare Python literal still parses through the catch-all + value, used_fallback = _coerce_arg_value("[1, 2]", {"type": "date"}) + assert value == [1, 2] + assert used_fallback is False + + +# ── Integration: parse_qwen35 end-to-end (the bug from issue #47) ─── + + +_TOOLS_FROM_ISSUE = [ + { + "type": "function", + "function": { + "name": "filesystem_server_get_directory_tree", + "description": "List a directory tree.", + "parameters": { + "type": "object", + "properties": { + "path": {"type": "string"}, + "max_depth": {"type": "integer"}, + "include_files": {"type": "boolean"}, + "show_size": {"type": "boolean"}, + }, + }, + }, + } +] + + +def test_qwen35_capital_true_for_boolean_is_ok_status(): + """Regression for issue #47: ``True`` + used to be flagged ``INVALID_JSON`` because ``json.loads("True")`` + fails. SGLang and vLLM both accept it via case-folded comparison; + renderers now does too.""" + tok = load_tokenizer("Qwen/Qwen3.5-9B") + + completion = ( + "\n" + "\n" + "/\n" + "2\n" + "True\n" + "True\n" + "\n" + "" + ) + ids = tok.encode(completion, add_special_tokens=False) + + parsed = parse_qwen35( + tok, + list(ids), + stop_ids={tok.eos_token_id} if tok.eos_token_id is not None else set(), + think_id=tok.convert_tokens_to_ids(""), + think_end_id=tok.convert_tokens_to_ids(""), + tool_call_id=tok.convert_tokens_to_ids(""), + tool_call_end_id=tok.convert_tokens_to_ids(""), + tools=_TOOLS_FROM_ISSUE, + ) + assert len(parsed.tool_calls) == 1 + tc = parsed.tool_calls[0] + assert tc.status == ToolCallParseStatus.OK + assert tc.name == "filesystem_server_get_directory_tree" + assert tc.arguments == { + "path": "/", + "max_depth": 2, + "include_files": True, + "show_size": True, + } From a338f9d64f2e766e89ffbdbb463aac574bd0e1f0 Mon Sep 17 00:00:00 2001 From: hallerite Date: Wed, 20 May 2026 12:46:31 +0000 Subject: [PATCH 2/4] fix(parsing): structural vLLM parity for XML tool-call extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirror vLLM's Qwen3CoderToolParser end to end (chosen over qwen3_xml because qwen3_coder is a strict superset for offline value coercion; qwen3_xml's edge over qwen3_coder is its streaming state machine, which does not apply here yet — module comment notes the TODO to switch when streaming lands). Closes the three remaining structural deltas: - Adopt vLLM's three tolerant regex patterns (tool_call / function / parameter) so unclosed , , and tags are recovered instead of silently dropping their content. - Add the qwen3coder_tool_parser.py:269-271 back-off: when output has no markers, scan the whole completion for raw blocks and recover the call. Content text is whatever remains after the function blocks are stripped. - Strip exactly one leading and one trailing newline from parameter values (qwen3coder_tool_parser.py:246-250); interior whitespace round-trips verbatim — previous .strip() was too aggressive. - Fix the no-schema branch of _coerce_arg_value to match vLLM (qwen3coder_tool_parser.py:128-137): return the raw text verbatim after the case-insensitive null short-circuit instead of attempting json.loads. Qwen3.6 needs no separate parser changes — Qwen36Renderer inherits parse_response from Qwen35Renderer and shares the same parse_qwen35 plumbing, so the vLLM-parity ladder + structural tolerance applies to both 3.5 (Python repr literals) and 3.6 (JSON literals) in one shot. Qwen35ToolParser in parsers.py (DefaultRenderer + tool_parser= 'qwen3.5' path) is updated to share the new _parse_xml_function_blocks helper so both code paths produce identical output. Tests: 7 new structural-parity tests pin the regex tolerance, no-tag back-off, and whitespace stripping against the vLLM reference shape. Existing test_no_schema_falls_back_to_json_loads is renamed and inverted to match the new (vLLM-faithful) behavior. --- renderers/parsers.py | 108 +++++++------- renderers/parsing.py | 284 ++++++++++++++++++++++++++----------- tests/test_arg_coercion.py | 159 ++++++++++++++++++--- 3 files changed, 398 insertions(+), 153 deletions(-) diff --git a/renderers/parsers.py b/renderers/parsers.py index 77fa850..a277a0e 100644 --- a/renderers/parsers.py +++ b/renderers/parsers.py @@ -20,6 +20,7 @@ from typing import Protocol, runtime_checkable from renderers.base import ParsedToolCall, ToolCallParseStatus +from renderers.parsing import _parse_xml_function_blocks # ── Shared helpers ─────────────────────────────────────────────────── @@ -156,7 +157,22 @@ def extract(self, ids: list[int]) -> tuple[list[int], list[ParsedToolCall]]: class Qwen35ToolParser: - """XML-style tool calls: ``V...``.""" + """XML-style tool calls: ``V...``. + + Mirrors vLLM's ``Qwen3CoderToolParser`` extraction logic — see + ``renderers.parsing`` for the full ladder and the rationale behind + picking ``qwen3_coder`` over ``qwen3_xml``. This parser drives + ``DefaultRenderer`` when callers wire it up with + ``tool_parser="qwen3.5"``; ``Qwen35Renderer.parse_response`` + routes through the same shared helpers via ``parse_qwen35``. + + Schema-aware coercion is intentionally inert here: ``ToolParser``'s + ``extract`` API does not receive the tool list (callers consume + the parsed values as opaque dicts), so ``param_index`` is empty + and every parameter goes through the no-schema branch of + ``_coerce_arg_value`` — vLLM-equivalent verbatim strings after the + case-insensitive ``null`` short-circuit. + """ def __init__(self, tokenizer): self._tokenizer = tokenizer @@ -169,69 +185,57 @@ def extract(self, ids: list[int]) -> tuple[list[int], list[ParsedToolCall]]: tc_start = _find(ids, self._tc_id) if tc_start == -1: return ids, [] + tool_calls: list[ParsedToolCall] = [] i = tc_start + param_index: dict = {} while i < len(ids): - if ids[i] == self._tc_id: - end = ( - _find(ids, self._tc_end_id, i + 1) - if self._tc_end_id is not None - else -1 - ) - if end == -1: - raw = _decode(self._tokenizer, ids[i + 1 :]) - tool_calls.append( - ParsedToolCall( - raw=raw, - token_span=(i, len(ids)), - status=ToolCallParseStatus.UNCLOSED_BLOCK, - ) - ) - break + if ids[i] != self._tc_id: + i += 1 + continue + + end = ( + _find(ids, self._tc_end_id, i + 1) + if self._tc_end_id is not None + else -1 + ) + if end == -1: + block_text = _decode(self._tokenizer, ids[i + 1 :]) + span = (i, len(ids)) + wrapper_unclosed = True + else: block_text = _decode(self._tokenizer, ids[i + 1 : end]) span = (i, end + 1) - name_match = re.search(r"]+)>", block_text) - if not name_match: - tool_calls.append( - ParsedToolCall( - raw=block_text, - token_span=span, - status=ToolCallParseStatus.MALFORMED_STRUCTURE, - ) - ) - i = end + 1 - continue - name = name_match.group(1) - arguments: dict = {} - any_json_fallback = False - for pm in re.finditer( - r"]+)>\n?(.*?)\n?", - block_text, - re.DOTALL, - ): - arg_name = pm.group(1) - arg_value = pm.group(2).strip() - try: - arguments[arg_name] = json.loads(arg_value) - except (json.JSONDecodeError, ValueError): - arguments[arg_name] = arg_value - any_json_fallback = True + wrapper_unclosed = False + + block_calls = _parse_xml_function_blocks( + block_text, + param_index=param_index, + token_span=span, + wrapper_unclosed=wrapper_unclosed, + ) + if block_calls: + tool_calls.extend(block_calls) + elif wrapper_unclosed: tool_calls.append( ParsedToolCall( raw=block_text, - name=name, - arguments=arguments, token_span=span, - status=( - ToolCallParseStatus.INVALID_JSON - if any_json_fallback - else ToolCallParseStatus.OK - ), + status=ToolCallParseStatus.UNCLOSED_BLOCK, ) ) - i = end + 1 else: - i += 1 + tool_calls.append( + ParsedToolCall( + raw=block_text, + token_span=span, + status=ToolCallParseStatus.MALFORMED_STRUCTURE, + ) + ) + + if wrapper_unclosed: + break + i = end + 1 return ids[:tc_start], tool_calls diff --git a/renderers/parsing.py b/renderers/parsing.py index f9f9ac1..8d039f7 100644 --- a/renderers/parsing.py +++ b/renderers/parsing.py @@ -18,25 +18,60 @@ import ast import json +import re from typing import Any from renderers.base import ParsedResponse, ParsedToolCall, ToolCallParseStatus, ToolSpec +# ── vLLM Qwen3CoderToolParser regex patterns ──────────────────────── +# +# Lifted verbatim from ``vllm/tool_parsers/qwen3coder_tool_parser.py`` +# (``self.tool_call_regex`` / ``tool_call_function_regex`` / +# ``tool_call_parameter_regex``). Each pattern has two branches: one +# for the closed form (with the matching end tag) and one anchored at +# end-of-string for unclosed output. The parameter regex uses +# positive lookaheads so a missing ```` is recovered from +# the next ```` boundary instead of +# silently dropping the value. + +_TOOL_CALL_BLOCK_RE = re.compile( + r"(.*?)|(.*?)$", re.DOTALL +) +_FUNCTION_BLOCK_RE = re.compile( + r"||(?=)|$)", + re.DOTALL, +) + + # ── Schema-aware argument coercion ────────────────────────────────── # # XML-style tool-call formats render argument values verbatim inside -# ```` tags with no quoting. ``true`` and the string +# ```` tags with no quoting. ``true`` and the string # ``"true"`` produce identical wire bytes; without the tool schema, the -# parser has no signal to distinguish them and defaults to -# ``json.loads`` (the historical behavior). When the caller passes -# ``tools=[...]``, parsers consult the per-parameter declared type and -# dispatch through a vLLM-parity ladder (case-insensitive ``null``, -# ``int()`` / ``float()`` for numeric, ``.lower() == "true"`` for bool, -# ``json.loads`` → ``ast.literal_eval`` for object/array) so models can -# emit Pythonic literals like ``True`` / ``None`` / ``{'k': 1}`` that -# both SGLang's ``Qwen3CoderDetector`` and vLLM's -# ``Qwen3CoderToolParser`` accept on the inference side. +# parser has no signal to distinguish them. We mirror vLLM's +# ``Qwen3CoderToolParser`` (and SGLang's ``Qwen3CoderDetector``) end to +# end: case-insensitive ``null`` short-circuit, ``int()`` / ``float()`` +# for numeric types, ``.lower() == "true"`` for bool, ``json.loads`` +# with ``ast.literal_eval`` fallback for objects/arrays. That accepts +# both JSON literals (Qwen3.6) and Python literals (Qwen3.5 — ``str()`` +# renders bools as ``True`` / ``False``) on the inference side. +# +# Why ``qwen3_coder`` and not ``qwen3_xml``: vLLM ships two parsers for +# the same wire format. ``qwen3_xml`` is the newer streaming-first +# parser (uses ``xml.parsers.expat`` and is the recommended choice for +# *live serving* because ``qwen3_coder`` has known infinite-loop bugs +# in vLLM's streaming path). For offline token-level extraction the +# two parsers agree on JSON-literal scalars but ``qwen3_coder`` is a +# strict superset for value coercion — it accepts Python literals via +# ``ast.literal_eval`` where ``qwen3_xml`` returns raw strings. Since +# this repo doesn't stream yet, ``qwen3_coder`` semantics dominate. +# TODO: when a streaming parse API lands, switch the streaming path to +# ``qwen3_xml``-style state-machine semantics to dodge vLLM's +# regex-streaming bugs (see HF Qwen3-Coder-Next discussion #17). _STRING_TYPES = frozenset({"string", "str", "text", "varchar", "char", "enum"}) @@ -97,7 +132,7 @@ def _declared_type(param_schema: dict[str, Any] | None) -> str | None: def _coerce_arg_value( text: str, param_schema: dict[str, Any] | None ) -> tuple[Any, bool]: - """Coerce a raw ```` body to its declared type. + """Coerce a raw ```` body to its declared type. Returns ``(value, used_fallback)``. ``used_fallback=True`` means the coercion could not satisfy the declared type and had to fall back to @@ -108,7 +143,11 @@ def _coerce_arg_value( and SGLang's ``Qwen3CoderDetector._convert_param_value`` with one deliberate deviation noted inline: - - No schema → try ``json.loads``, fall back to raw text + INVALID. + - ``null`` short-circuit (any case) → ``None`` for any declared + type **except** the string family (see deviation below). + - No schema OR param not in schema → return verbatim (after the + null short-circuit). vLLM ``qwen3coder_tool_parser.py:128-137``: + ``if param_name not in param_config: return param_value``. - ``string`` / ``str`` / ``text`` / ``varchar`` / ``char`` / ``enum`` → return verbatim. (Deviation: vLLM/SGLang null-coerce ``"null"`` *before* checking type, so a string-typed arg of ``"null"`` would @@ -116,7 +155,6 @@ def _coerce_arg_value( because the XML wire format can't distinguish the string ``"null"`` from JSON null, and downstream tests pin the preservation contract; see ``test_tool_arg_type_preservation``.) - - ``null`` (any case) for any non-string declared type → ``None``. - ``int`` / ``uint`` / ``long`` / ``short`` / ``unsigned`` family → ``int(text)``, degenerating to raw text + INVALID. - ``num`` / ``float`` family → ``float(text)`` with int demotion @@ -129,27 +167,20 @@ def _coerce_arg_value( - ``object`` / ``array`` / ``dict`` / ``list`` (or ``anyOf``) → ``json.loads`` first, ``ast.literal_eval`` fallback (for Python literals like ``{'k': 1}``), raw text + INVALID otherwise. - - Unknown type (param missing from schema, or type not in any - family above) → ``ast.literal_eval`` if it parses, else raw text. - Matches vLLM's catch-all branch and keeps strings like - ``"hello"`` (with no schema entry) as-is. + - Any other declared type → ``ast.literal_eval`` if it parses, else + raw text. Matches vLLM's catch-all else branch. """ - if param_schema is None: - if text.lower() == "null": - return None, False - try: - return json.loads(text), False - except (json.JSONDecodeError, ValueError): - return text, True - declared = _declared_type(param_schema) - if declared is None or declared in _STRING_TYPES: - return text, False - - if text.lower() == "null": + # vLLM null short-circuit runs BEFORE the schema-missing check, but + # is suppressed for declared string types so the wire-format + # ambiguity is resolved in favour of the explicit schema. + if declared not in _STRING_TYPES and text.lower() == "null": return None, False + if param_schema is None or declared is None or declared in _STRING_TYPES: + return text, False + if declared.startswith(_INT_PREFIXES): try: return int(text), False @@ -353,6 +384,7 @@ def parse_qwen35( tc_start = _find(ids, tool_call_id) tool_calls: list[ParsedToolCall] = [] + param_index = _build_param_type_index(tools) if tc_start != -1: content_text = _decode(tokenizer, ids[:tc_start]).strip() tool_calls = _parse_xml_tool_calls( @@ -361,10 +393,25 @@ def parse_qwen35( tool_call_id, tool_call_end_id, section_offset=parse_offset + tc_start, - param_index=_build_param_type_index(tools), + param_index=param_index, ) else: - content_text = _decode(tokenizer, ids).strip() + # vLLM ``qwen3coder_tool_parser.py:269-271`` back-off: if no + # ```` markers are present, treat the whole output + # as a single synthetic tool-call region and scan for raw + # ```` blocks. The content text is whatever sits + # outside any recovered function block (typically empty). + full_text = _decode(tokenizer, ids) + tool_calls = _parse_xml_function_blocks( + full_text, + param_index=param_index, + token_span=(parse_offset, parse_offset + len(ids)), + wrapper_unclosed=False, + ) + if tool_calls: + content_text = _strip_function_blocks(full_text).strip() + else: + content_text = full_text.strip() return ParsedResponse( content=content_text, @@ -382,71 +429,150 @@ def _parse_xml_tool_calls( section_offset: int, param_index: dict[str, dict[str, dict[str, Any]]], ) -> list[ParsedToolCall]: - """Parse Qwen3.5-style XML tool calls from token IDs.""" - import re - + """Parse Qwen3.5-style XML tool calls from token IDs. + + Uses token IDs to demarcate ```` / ```` + boundaries (so ``token_span`` stays precise for trainer-side + masking) and vLLM-parity regex on the decoded block text to + extract the function/parameter content tolerantly. Mirrors + ``Qwen3CoderToolParser._parse_xml_function_call`` plus the + tag-tolerance branches of the three module-level patterns. + """ tool_calls: list[ParsedToolCall] = [] i = 0 while i < len(ids): - if ids[i] == tc_id: - end = _find(ids, tc_end_id, i + 1) - if end == -1: - raw = _decode(tokenizer, ids[i + 1 :]) - tool_calls.append( - ParsedToolCall( - raw=raw, - token_span=(section_offset + i, section_offset + len(ids)), - status=ToolCallParseStatus.UNCLOSED_BLOCK, - ) - ) - break + if ids[i] != tc_id: + i += 1 + continue + + end = _find(ids, tc_end_id, i + 1) + if end == -1: + block_ids = ids[i + 1 :] + block_text = _decode(tokenizer, block_ids) + span = (section_offset + i, section_offset + len(ids)) + wrapper_unclosed = True + else: block_text = _decode(tokenizer, ids[i + 1 : end]) span = (section_offset + i, section_offset + end + 1) - name_match = re.search(r"]+)>", block_text) - if not name_match: - tool_calls.append( - ParsedToolCall( - raw=block_text, - token_span=span, - status=ToolCallParseStatus.MALFORMED_STRUCTURE, - ) - ) - i = end + 1 - continue + wrapper_unclosed = False - name = name_match.group(1) - params = param_index.get(name, {}) - arguments: dict = {} - any_json_fallback = False - for pm in re.finditer( - r"]+)>\n?(.*?)\n?", block_text, re.DOTALL - ): - arg_name = pm.group(1) - arg_value = pm.group(2).strip() - value, used_fallback = _coerce_arg_value( - arg_value, params.get(arg_name) + block_calls = _parse_xml_function_blocks( + block_text, + param_index=param_index, + token_span=span, + wrapper_unclosed=wrapper_unclosed, + ) + if block_calls: + tool_calls.extend(block_calls) + elif wrapper_unclosed: + # vLLM would silently drop a content-less unclosed + # ````; we keep the diagnostic so verifier / + # RL-loss code can mask the malformed span. + tool_calls.append( + ParsedToolCall( + raw=block_text, + token_span=span, + status=ToolCallParseStatus.UNCLOSED_BLOCK, ) - arguments[arg_name] = value - any_json_fallback = any_json_fallback or used_fallback + ) + else: tool_calls.append( ParsedToolCall( raw=block_text, - name=name, - arguments=arguments, token_span=span, - status=( - ToolCallParseStatus.INVALID_JSON - if any_json_fallback - else ToolCallParseStatus.OK - ), + status=ToolCallParseStatus.MALFORMED_STRUCTURE, ) ) - i = end + 1 + + if wrapper_unclosed: + break + i = end + 1 + return tool_calls + + +def _parse_xml_function_blocks( + text: str, + *, + param_index: dict[str, dict[str, dict[str, Any]]], + token_span: tuple[int, int] | None, + wrapper_unclosed: bool, +) -> list[ParsedToolCall]: + """Apply vLLM's ```` regex over *text*. + + ``token_span`` is shared by every call recovered from this block; + the granularity is the surrounding ```` region (or the + whole completion in the no-marker back-off path), matching how + vLLM treats multiple ```` siblings — it does not try to + attribute character offsets back to token positions. + """ + tool_calls: list[ParsedToolCall] = [] + for match in _FUNCTION_BLOCK_RE.finditer(text): + closed = match.group(1) + body = closed if closed is not None else (match.group(2) or "") + function_unclosed = closed is None + end_index = body.find(">") + if end_index == -1: + tool_calls.append( + ParsedToolCall( + raw=body, + token_span=token_span, + status=ToolCallParseStatus.MALFORMED_STRUCTURE, + ) + ) + continue + + name = body[:end_index] + params_text = body[end_index + 1 :] + params = param_index.get(name, {}) + arguments: dict = {} + any_fallback = False + for capture in _PARAMETER_BLOCK_RE.findall(params_text): + idx = capture.find(">") + if idx == -1: + continue + arg_name = capture[:idx] + arg_value = _strip_one_newline(capture[idx + 1 :]) + value, used_fallback = _coerce_arg_value(arg_value, params.get(arg_name)) + arguments[arg_name] = value + any_fallback = any_fallback or used_fallback + + if wrapper_unclosed or function_unclosed: + status = ToolCallParseStatus.UNCLOSED_BLOCK + elif any_fallback: + status = ToolCallParseStatus.INVALID_JSON else: - i += 1 + status = ToolCallParseStatus.OK + tool_calls.append( + ParsedToolCall( + raw=body, + name=name, + arguments=arguments, + token_span=token_span, + status=status, + ) + ) return tool_calls +def _strip_function_blocks(text: str) -> str: + """Remove every ```` (closed or unclosed) + region from *text* so the leftover is whatever the model emitted + as natural-language content alongside an un-bracketed tool call. + """ + return _FUNCTION_BLOCK_RE.sub("", text) + + +def _strip_one_newline(s: str) -> str: + """vLLM ``qwen3coder_tool_parser.py:246-250`` strips exactly one + leading and one trailing newline from a parameter value — no + further whitespace trimming.""" + if s.startswith("\n"): + s = s[1:] + if s.endswith("\n"): + s = s[:-1] + return s + + # ── GLM-5/4.7/4.5: name k v diff --git a/tests/test_arg_coercion.py b/tests/test_arg_coercion.py index d95565f..8ef4816 100644 --- a/tests/test_arg_coercion.py +++ b/tests/test_arg_coercion.py @@ -158,7 +158,9 @@ def test_anyof_treated_as_object(): assert used_fallback is False -@pytest.mark.parametrize("declared", ["string", "str", "text", "varchar", "char", "enum"]) +@pytest.mark.parametrize( + "declared", ["string", "str", "text", "varchar", "char", "enum"] +) def test_string_family_returns_verbatim(declared): value, used_fallback = _coerce_arg_value("True", {"type": declared}) assert value == "True" @@ -171,18 +173,32 @@ def test_string_with_list_form_type(): assert used_fallback is False -def test_no_schema_falls_back_to_json_loads(): - """Historical behavior: when ``param_schema`` is ``None`` (no tools - were passed), we still try ``json.loads`` for backwards compatibility - so untyped numbers / bools parse. ``None`` is handled via the - ``null`` short-circuit at the top of the ladder.""" +def test_no_schema_returns_verbatim_string(): + """vLLM ``qwen3coder_tool_parser.py:128-137``: when the param is + not in the tool schema (or no tools were passed at all), the parser + returns the raw text verbatim — it does **not** try ``json.loads``. + The case-insensitive ``null`` short-circuit at the top of the ladder + still applies (so untyped ``"null"`` becomes Python ``None``).""" value, used_fallback = _coerce_arg_value("42", None) - assert value == 42 + assert value == "42" + assert used_fallback is False + + value, used_fallback = _coerce_arg_value("True", None) + assert value == "True" assert used_fallback is False value, used_fallback = _coerce_arg_value("hello", None) assert value == "hello" - assert used_fallback is True + assert used_fallback is False + + +def test_no_schema_null_short_circuit(): + """vLLM null-coerces before the schema-missing return, so an untyped + ``"null"`` still becomes Python ``None``.""" + for raw in ("null", "Null", "NULL"): + value, used_fallback = _coerce_arg_value(raw, None) + assert value is None + assert used_fallback is False def test_unknown_type_falls_back_to_literal_eval(): @@ -222,14 +238,28 @@ def test_unknown_type_falls_back_to_literal_eval(): ] +def _parse(tok, completion: str, *, tools=None): + ids = tok.encode(completion, add_special_tokens=False) + return parse_qwen35( + tok, + list(ids), + stop_ids={tok.eos_token_id} if tok.eos_token_id is not None else set(), + think_id=tok.convert_tokens_to_ids(""), + think_end_id=tok.convert_tokens_to_ids(""), + tool_call_id=tok.convert_tokens_to_ids(""), + tool_call_end_id=tok.convert_tokens_to_ids(""), + tools=tools, + ) + + def test_qwen35_capital_true_for_boolean_is_ok_status(): """Regression for issue #47: ``True`` used to be flagged ``INVALID_JSON`` because ``json.loads("True")`` fails. SGLang and vLLM both accept it via case-folded comparison; renderers now does too.""" tok = load_tokenizer("Qwen/Qwen3.5-9B") - - completion = ( + parsed = _parse( + tok, "\n" "\n" "/\n" @@ -237,18 +267,7 @@ def test_qwen35_capital_true_for_boolean_is_ok_status(): "True\n" "True\n" "\n" - "" - ) - ids = tok.encode(completion, add_special_tokens=False) - - parsed = parse_qwen35( - tok, - list(ids), - stop_ids={tok.eos_token_id} if tok.eos_token_id is not None else set(), - think_id=tok.convert_tokens_to_ids(""), - think_end_id=tok.convert_tokens_to_ids(""), - tool_call_id=tok.convert_tokens_to_ids(""), - tool_call_end_id=tok.convert_tokens_to_ids(""), + "", tools=_TOOLS_FROM_ISSUE, ) assert len(parsed.tool_calls) == 1 @@ -261,3 +280,99 @@ def test_qwen35_capital_true_for_boolean_is_ok_status(): "include_files": True, "show_size": True, } + + +# ── Structural tolerance: parity with vLLM's three regex patterns ──── + + +def test_qwen35_tolerates_missing_closing_parameter(): + """vLLM ``tool_call_parameter_regex`` uses positive lookaheads so a + missing ```` is recovered from the next ```` boundary instead of dropping the value.""" + tok = load_tokenizer("Qwen/Qwen3.5-9B") + parsed = _parse( + tok, + "\n" + "\n" + "/\n" # missing + "2\n" + "\n" + "", + tools=_TOOLS_FROM_ISSUE, + ) + assert len(parsed.tool_calls) == 1 + tc = parsed.tool_calls[0] + assert tc.status == ToolCallParseStatus.OK + assert tc.name == "filesystem_server_get_directory_tree" + assert tc.arguments == {"path": "/", "max_depth": 2} + + +def test_qwen35_tolerates_unclosed_function(): + """vLLM ``tool_call_function_regex`` second alternation matches an + unclosed ``\n" + "\n" + "/\n", # no , no + tools=_TOOLS_FROM_ISSUE, + ) + assert len(parsed.tool_calls) == 1 + tc = parsed.tool_calls[0] + assert tc.status == ToolCallParseStatus.UNCLOSED_BLOCK + assert tc.name == "filesystem_server_get_directory_tree" + assert tc.arguments == {"path": "/"} + + +def test_qwen35_backoff_no_tool_call_markers(): + """vLLM ``qwen3coder_tool_parser.py:269-271``: when the output + contains no ```` markers, treat the whole completion as + a single tool-call region and scan for ```` directly.""" + tok = load_tokenizer("Qwen/Qwen3.5-9B") + parsed = _parse( + tok, + "sure, calling now.\n" + "\n" + "/etc\n" + "1\n" + "", + tools=_TOOLS_FROM_ISSUE, + ) + assert len(parsed.tool_calls) == 1 + tc = parsed.tool_calls[0] + assert tc.status == ToolCallParseStatus.OK + assert tc.name == "filesystem_server_get_directory_tree" + assert tc.arguments == {"path": "/etc", "max_depth": 1} + assert "sure, calling now" in parsed.content + + +def test_qwen35_backoff_returns_no_calls_when_no_function_either(): + """No ```` and no ``\n" + "\n" + "\n /etc \n\n" + "\n" + "", + tools=_TOOLS_FROM_ISSUE, + ) + assert len(parsed.tool_calls) == 1 + tc = parsed.tool_calls[0] + assert tc.status == ToolCallParseStatus.OK + # One leading + one trailing \n stripped — interior spaces preserved. + assert tc.arguments == {"path": " /etc "} From 96a1c9802c964ddd4464ce6951c778660d44fffd Mon Sep 17 00:00:00 2001 From: hallerite Date: Wed, 20 May 2026 13:31:24 +0000 Subject: [PATCH 3/4] fix(parsing): byte-parity with vLLM qwen3_coder for content + ids + exception path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Close the four remaining behavioral deltas vs vLLM's Qwen3CoderToolParser non-streaming extract path: - Content text is now the raw slice (no .strip()). vLLM qwen3coder_tool_parser.py:316-319 explicitly leaves the commented-out .rstrip() in place; surrounding whitespace before/after round-trips verbatim. - Backoff content (no markers) is the text up to the first via uuid.uuid4().int & MASK_64_BITS, matching vLLM's make_tool_call_id / random_uuid pair. Malformed entries (no function name) intentionally stay id-less since vLLM would have dropped them entirely. - Tool-call extraction is wrapped in try/except: on any unhandled error, surface tool_calls=[] and content=full_decoded_text — the vLLM qwen3coder_tool_parser.py:327-331 catch-all behavior. Reasoning extraction stays outside the wrapper since vLLM doesn't do reasoning in this parser. Tests: 4 new cases pin content-no-strip, backoff content slice, chatcmpl-tool- id format, and the exception fallback. --- renderers/parsing.py | 96 ++++++++++++++++++++++++-------------- tests/test_arg_coercion.py | 96 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 154 insertions(+), 38 deletions(-) diff --git a/renderers/parsing.py b/renderers/parsing.py index 8d039f7..5404d6b 100644 --- a/renderers/parsing.py +++ b/renderers/parsing.py @@ -19,6 +19,7 @@ import ast import json import re +import uuid from typing import Any from renderers.base import ParsedResponse, ParsedToolCall, ToolCallParseStatus, ToolSpec @@ -35,6 +36,25 @@ # the next ```` boundary instead of # silently dropping the value. + +# Mask matching vLLM's ``vllm.utils.MASK_64_BITS`` so the random id we +# generate is byte-shape compatible with vLLM's ``random_uuid()``. +_MASK_64_BITS = (1 << 64) - 1 + + +def _make_tool_call_id() -> str: + """Mint a tool-call id in vLLM's default format. + + vLLM's ``ToolCall`` dataclass uses ``default_factory=make_tool_call_id`` + which returns ``f"chatcmpl-tool-{random_uuid()}"`` where + ``random_uuid`` is ``f"{uuid.uuid4().int & MASK_64_BITS:016x}"`` — + 16 hex characters drawn from the low 64 bits of a v4 UUID. We + reproduce the format so OpenAI-shaped clients get the same id + surface across renderers and vLLM serving paths. + """ + return f"chatcmpl-tool-{uuid.uuid4().int & _MASK_64_BITS:016x}" + + _TOOL_CALL_BLOCK_RE = re.compile( r"(.*?)|(.*?)$", re.DOTALL ) @@ -382,36 +402,49 @@ def parse_qwen35( content="", reasoning_content=reasoning or None, tool_calls=[] ) + full_text = _decode(tokenizer, ids) tc_start = _find(ids, tool_call_id) - tool_calls: list[ParsedToolCall] = [] param_index = _build_param_type_index(tools) - if tc_start != -1: - content_text = _decode(tokenizer, ids[:tc_start]).strip() - tool_calls = _parse_xml_tool_calls( - tokenizer, - ids[tc_start:], - tool_call_id, - tool_call_end_id, - section_offset=parse_offset + tc_start, - param_index=param_index, - ) - else: - # vLLM ``qwen3coder_tool_parser.py:269-271`` back-off: if no - # ```` markers are present, treat the whole output - # as a single synthetic tool-call region and scan for raw - # ```` blocks. The content text is whatever sits - # outside any recovered function block (typically empty). - full_text = _decode(tokenizer, ids) - tool_calls = _parse_xml_function_blocks( - full_text, - param_index=param_index, - token_span=(parse_offset, parse_offset + len(ids)), - wrapper_unclosed=False, - ) - if tool_calls: - content_text = _strip_function_blocks(full_text).strip() + try: + if tc_start != -1: + # vLLM ``qwen3coder_tool_parser.py:316-319``: content text + # is the raw slice up to the first ```` (or + # first ```` is present). + # No whitespace stripping — the model's prefix prose is + # surfaced verbatim. + content_text = _decode(tokenizer, ids[:tc_start]) + tool_calls = _parse_xml_tool_calls( + tokenizer, + ids[tc_start:], + tool_call_id, + tool_call_end_id, + section_offset=parse_offset + tc_start, + param_index=param_index, + ) else: - content_text = full_text.strip() + # vLLM ``qwen3coder_tool_parser.py:269-271`` back-off: no + # ```` markers — scan whole output for raw + # ```` blocks. Content text is whatever sits + # before the first ``= 0 else full_text + else: + content_text = full_text + except Exception: + # vLLM ``qwen3coder_tool_parser.py:327-331`` catch-all: on any + # extraction error, drop every recovered call and surface the + # raw text as content so downstream consumers never see a + # half-parsed call. + content_text = full_text + tool_calls = [] return ParsedResponse( content=content_text, @@ -547,6 +580,7 @@ def _parse_xml_function_blocks( raw=body, name=name, arguments=arguments, + id=_make_tool_call_id(), token_span=token_span, status=status, ) @@ -554,14 +588,6 @@ def _parse_xml_function_blocks( return tool_calls -def _strip_function_blocks(text: str) -> str: - """Remove every ```` (closed or unclosed) - region from *text* so the leftover is whatever the model emitted - as natural-language content alongside an un-bracketed tool call. - """ - return _FUNCTION_BLOCK_RE.sub("", text) - - def _strip_one_newline(s: str) -> str: """vLLM ``qwen3coder_tool_parser.py:246-250`` strips exactly one leading and one trailing newline from a parameter value — no diff --git a/tests/test_arg_coercion.py b/tests/test_arg_coercion.py index 8ef4816..279dbf3 100644 --- a/tests/test_arg_coercion.py +++ b/tests/test_arg_coercion.py @@ -329,7 +329,9 @@ def test_qwen35_tolerates_unclosed_function(): def test_qwen35_backoff_no_tool_call_markers(): """vLLM ``qwen3coder_tool_parser.py:269-271``: when the output contains no ```` markers, treat the whole completion as - a single tool-call region and scan for ```` directly.""" + a single tool-call region and scan for ```` directly. + Content text is the raw slice up to the first ```` and no ````; the commented-out ``.rstrip()`` confirms + intent. We mirror that — surrounding whitespace round-trips.""" + tok = load_tokenizer("Qwen/Qwen3.5-9B") + parsed = _parse( + tok, + "let me check\n\n" # trailing whitespace before + "\n" + "\n" + "/\n" + "\n" + "", + tools=_TOOLS_FROM_ISSUE, + ) + assert parsed.content == "let me check\n\n" + assert len(parsed.tool_calls) == 1 + assert parsed.tool_calls[0].status == ToolCallParseStatus.OK + + +def test_qwen35_tool_call_id_matches_vllm_format(): + """vLLM's ``ToolCall`` default factory mints ids of the form + ``chatcmpl-tool-<16 hex>``. Each surfaced call gets a fresh id.""" + import re as _re + + tok = load_tokenizer("Qwen/Qwen3.5-9B") + parsed = _parse( + tok, + "\n" + "\n" + "/\n" + "\n" + "\n" + "\n" + "\n" + "/etc\n" + "\n" + "", + tools=_TOOLS_FROM_ISSUE, + ) + assert len(parsed.tool_calls) == 2 + pattern = _re.compile(r"^chatcmpl-tool-[0-9a-f]{16}$") + ids = [tc.id for tc in parsed.tool_calls] + for tcid in ids: + assert tcid is not None and pattern.match(tcid), tcid + # Two surfaced calls must get distinct ids. + assert ids[0] != ids[1] + + +def test_qwen35_exception_fallback_returns_full_text(): + """vLLM ``qwen3coder_tool_parser.py:327-331`` catch-all returns + ``tools_called=False, tool_calls=[], content=model_output`` on any + unhandled extraction error. We mirror that by wrapping the + tool-call extraction; reasoning content extracted earlier is kept. + """ + from unittest.mock import patch + + tok = load_tokenizer("Qwen/Qwen3.5-9B") + completion = ( + "\n" + "\n" + "/\n" + "\n" + "" + ) + ids = tok.encode(completion, add_special_tokens=False) + + with patch( + "renderers.parsing._parse_xml_tool_calls", + side_effect=RuntimeError("boom"), + ): + parsed = parse_qwen35( + tok, + list(ids), + stop_ids={tok.eos_token_id} if tok.eos_token_id is not None else set(), + think_id=tok.convert_tokens_to_ids(""), + think_end_id=tok.convert_tokens_to_ids(""), + tool_call_id=tok.convert_tokens_to_ids(""), + tool_call_end_id=tok.convert_tokens_to_ids(""), + tools=_TOOLS_FROM_ISSUE, + ) + assert parsed.tool_calls == [] + # content is the full decoded post-think text, surfaced verbatim. + assert "" in parsed.content + assert "" in parsed.content + + def test_qwen35_parameter_value_preserves_internal_whitespace(): """vLLM strips exactly one leading and one trailing ``\\n`` from the parameter body (``qwen3coder_tool_parser.py:246-250``); inner From b58fa63396a0717586e66e86654b7ef5a031818f Mon Sep 17 00:00:00 2001 From: hallerite Date: Wed, 20 May 2026 13:43:01 +0000 Subject: [PATCH 4/4] fix(parsing): drop string-type "null" deviation, match vLLM byte-for-byte MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vLLM's _convert_param_value (qwen3coder_tool_parser.py:125) runs the case-insensitive "null" short-circuit BEFORE any type check — including the string family. That means an XML wire value of null against a {"type": "string"} schema collapses to Python None, not the string "null". Previously this repo carried a deliberate deviation to preserve the string verbatim across the four XML renderers (Qwen3.5/3.6, GLM-5, MiniMax-M2.5, Laguna-XS.2). The motivation was a tidier round-trip contract: "string params always round-trip as strings." But the XML wire format genuinely cannot distinguish the string "null" from a JSON null, and matching vLLM is more important than matching our sibling renderers — downstream consumers (verifier, RL-loss) align with vLLM's interpretation, not ours. Test updates: - test_arg_coercion::test_null_is_preserved_verbatim_for_string_type is renamed to test_null_short_circuit_applies_even_for_string_type and inverted: a string-typed "null" now returns None. - test_tool_arg_type_preservation::test_string_arg_preserves_type branches on _JSON_FORMAT_MODELS (Qwen3 Hermes, Kimi K2 section JSON) vs XML renderers for the string-null case only. JSON renderers preserve "null" verbatim because the wire bytes quote strings; XML renderers null-coerce. Other string cases (bool/int/array/object) still round-trip verbatim everywhere. --- renderers/parsing.py | 35 +++++++++++------------- tests/test_arg_coercion.py | 19 +++++++------ tests/test_tool_arg_type_preservation.py | 33 +++++++++++++++++++--- 3 files changed, 55 insertions(+), 32 deletions(-) diff --git a/renderers/parsing.py b/renderers/parsing.py index 5404d6b..331c0ea 100644 --- a/renderers/parsing.py +++ b/renderers/parsing.py @@ -160,21 +160,21 @@ def _coerce_arg_value( text for objects), so the caller can flag ``INVALID_JSON``. The ladder mirrors vLLM's ``Qwen3CoderToolParser._convert_param_value`` - and SGLang's ``Qwen3CoderDetector._convert_param_value`` with one - deliberate deviation noted inline: - - - ``null`` short-circuit (any case) → ``None`` for any declared - type **except** the string family (see deviation below). - - No schema OR param not in schema → return verbatim (after the - null short-circuit). vLLM ``qwen3coder_tool_parser.py:128-137``: + and SGLang's ``Qwen3CoderDetector._convert_param_value`` byte-for-byte: + + - ``null`` short-circuit (any case) → ``None``. Runs **before** any + type check, including the string family. vLLM does this on line + 125 of ``qwen3coder_tool_parser.py`` ahead of the + ``param_name not in param_config`` branch. This means a + string-typed arg whose wire value is the literal ``null`` + collapses to Python ``None`` — the XML wire format can't + distinguish that case from a JSON null, and we accept the + lossy round-trip in exchange for byte parity with vLLM. + - No schema OR param not in schema → return verbatim. vLLM + ``qwen3coder_tool_parser.py:128-137``: ``if param_name not in param_config: return param_value``. - ``string`` / ``str`` / ``text`` / ``varchar`` / ``char`` / ``enum`` - → return verbatim. (Deviation: vLLM/SGLang null-coerce ``"null"`` - *before* checking type, so a string-typed arg of ``"null"`` would - come back as Python ``None``. Renderers preserves the string - because the XML wire format can't distinguish the string - ``"null"`` from JSON null, and downstream tests pin the - preservation contract; see ``test_tool_arg_type_preservation``.) + → return verbatim. - ``int`` / ``uint`` / ``long`` / ``short`` / ``unsigned`` family → ``int(text)``, degenerating to raw text + INVALID. - ``num`` / ``float`` family → ``float(text)`` with int demotion @@ -190,14 +190,11 @@ def _coerce_arg_value( - Any other declared type → ``ast.literal_eval`` if it parses, else raw text. Matches vLLM's catch-all else branch. """ - declared = _declared_type(param_schema) - - # vLLM null short-circuit runs BEFORE the schema-missing check, but - # is suppressed for declared string types so the wire-format - # ambiguity is resolved in favour of the explicit schema. - if declared not in _STRING_TYPES and text.lower() == "null": + if text.lower() == "null": return None, False + declared = _declared_type(param_schema) + if param_schema is None or declared is None or declared in _STRING_TYPES: return text, False diff --git a/tests/test_arg_coercion.py b/tests/test_arg_coercion.py index 279dbf3..5d5575f 100644 --- a/tests/test_arg_coercion.py +++ b/tests/test_arg_coercion.py @@ -63,15 +63,16 @@ def test_null_literal_is_case_insensitive_for_non_string_types(declared): assert used_fallback is False -def test_null_is_preserved_verbatim_for_string_type(): - """Deliberate deviation from vLLM/SGLang: string-typed ``"null"`` - stays as the string ``"null"`` so the existing string-verbatim - contract holds (see ``test_tool_arg_type_preservation``). The XML - wire format already can't distinguish the string ``"null"`` from - JSON null, but when a schema says ``type: "string"`` we honour it.""" - value, used_fallback = _coerce_arg_value("null", {"type": "string"}) - assert value == "null" - assert used_fallback is False +def test_null_short_circuit_applies_even_for_string_type(): + """vLLM ``qwen3coder_tool_parser.py:125`` null-coerces ``"null"`` + *before* the type check, so a string-typed wire value of ``null`` + collapses to Python ``None``. The XML wire format can't tell the + string ``"null"`` from JSON null; we accept the lossy round-trip + in exchange for byte parity with vLLM.""" + for raw in ("null", "Null", "NULL"): + value, used_fallback = _coerce_arg_value(raw, {"type": "string"}) + assert value is None + assert used_fallback is False @pytest.mark.parametrize( diff --git a/tests/test_tool_arg_type_preservation.py b/tests/test_tool_arg_type_preservation.py index 61dbb9b..e036abd 100644 --- a/tests/test_tool_arg_type_preservation.py +++ b/tests/test_tool_arg_type_preservation.py @@ -67,9 +67,21 @@ def renderer(model, renderer_name): ] +# Renderers whose wire format quotes strings (Hermes JSON, section +# JSON). Those parsers preserve the string ``"null"`` verbatim because +# the wire bytes are unambiguous. XML-style renderers can't tell the +# string ``null`` from a JSON null on the wire, so they null-coerce — +# matching vLLM's ``Qwen3CoderToolParser._convert_param_value:125`` +# which runs the null short-circuit before any type check. +_JSON_FORMAT_MODELS = {"Qwen/Qwen3-8B", "moonshotai/Kimi-K2-Instruct"} + + # Each case: a single ``x`` argument whose value is a STRING that # happens to be valid JSON of another type. With a schema declaring -# ``x: string``, the parser must return the string verbatim. +# ``x: string``, the parser must return the string verbatim — except +# for the ``string-null`` case on XML renderers, where vLLM's +# ``null`` short-circuit runs before the type check and we follow +# suit for byte parity (see test below for the explicit assertion). JSON_LOOKING_STRING_ARGS = [ pytest.param({"x": "true"}, id="string-bool"), pytest.param({"x": "42"}, id="string-int"), @@ -116,9 +128,16 @@ def _extract_assistant_tokens(renderer, prompt, assistant_msg, *, tools=None): @pytest.mark.parametrize("args", JSON_LOOKING_STRING_ARGS) -def test_string_arg_preserves_type(model, renderer_name, renderer, args): +def test_string_arg_preserves_type(model, renderer_name, renderer, args, request): """Tool-call args of declared type ``str`` must round-trip as ``str``, - not get re-parsed as bool/int/null/list/dict by the parser.""" + not get re-parsed as bool/int/null/list/dict by the parser. + + Exception: the ``string-null`` case on XML renderers collapses to + Python ``None`` to match vLLM's null short-circuit. The XML wire + format is genuinely ambiguous there (``null`` + is identical bytes whether the model meant the string ``"null"`` + or a JSON null), and vLLM resolves the ambiguity by null-coercing. + """ msg = { "role": "assistant", "content": "", @@ -134,6 +153,12 @@ def test_string_arg_preserves_type(model, renderer_name, renderer, args): assert parsed.tool_calls, f"{model}: parser returned no tool_calls" got = _normalize_args(parsed.tool_calls[0].arguments) - assert got == args, ( + + case_id = request.node.callspec.id.rsplit("-", 1)[-1] + if case_id == "null" and model not in _JSON_FORMAT_MODELS: + expected = {"x": None} + else: + expected = args + assert got == expected, ( f"{model}: tool-arg type drift — sent {args!r}, parser returned {got!r}" )