diff --git a/renderers/parsers.py b/renderers/parsers.py index 77fa850..a277a0e 100644 --- a/renderers/parsers.py +++ b/renderers/parsers.py @@ -20,6 +20,7 @@ from typing import Protocol, runtime_checkable from renderers.base import ParsedToolCall, ToolCallParseStatus +from renderers.parsing import _parse_xml_function_blocks # ── Shared helpers ─────────────────────────────────────────────────── @@ -156,7 +157,22 @@ def extract(self, ids: list[int]) -> tuple[list[int], list[ParsedToolCall]]: class Qwen35ToolParser: - """XML-style tool calls: ``V...``.""" + """XML-style tool calls: ``V...``. + + Mirrors vLLM's ``Qwen3CoderToolParser`` extraction logic — see + ``renderers.parsing`` for the full ladder and the rationale behind + picking ``qwen3_coder`` over ``qwen3_xml``. This parser drives + ``DefaultRenderer`` when callers wire it up with + ``tool_parser="qwen3.5"``; ``Qwen35Renderer.parse_response`` + routes through the same shared helpers via ``parse_qwen35``. + + Schema-aware coercion is intentionally inert here: ``ToolParser``'s + ``extract`` API does not receive the tool list (callers consume + the parsed values as opaque dicts), so ``param_index`` is empty + and every parameter goes through the no-schema branch of + ``_coerce_arg_value`` — vLLM-equivalent verbatim strings after the + case-insensitive ``null`` short-circuit. + """ def __init__(self, tokenizer): self._tokenizer = tokenizer @@ -169,69 +185,57 @@ def extract(self, ids: list[int]) -> tuple[list[int], list[ParsedToolCall]]: tc_start = _find(ids, self._tc_id) if tc_start == -1: return ids, [] + tool_calls: list[ParsedToolCall] = [] i = tc_start + param_index: dict = {} while i < len(ids): - if ids[i] == self._tc_id: - end = ( - _find(ids, self._tc_end_id, i + 1) - if self._tc_end_id is not None - else -1 - ) - if end == -1: - raw = _decode(self._tokenizer, ids[i + 1 :]) - tool_calls.append( - ParsedToolCall( - raw=raw, - token_span=(i, len(ids)), - status=ToolCallParseStatus.UNCLOSED_BLOCK, - ) - ) - break + if ids[i] != self._tc_id: + i += 1 + continue + + end = ( + _find(ids, self._tc_end_id, i + 1) + if self._tc_end_id is not None + else -1 + ) + if end == -1: + block_text = _decode(self._tokenizer, ids[i + 1 :]) + span = (i, len(ids)) + wrapper_unclosed = True + else: block_text = _decode(self._tokenizer, ids[i + 1 : end]) span = (i, end + 1) - name_match = re.search(r"]+)>", block_text) - if not name_match: - tool_calls.append( - ParsedToolCall( - raw=block_text, - token_span=span, - status=ToolCallParseStatus.MALFORMED_STRUCTURE, - ) - ) - i = end + 1 - continue - name = name_match.group(1) - arguments: dict = {} - any_json_fallback = False - for pm in re.finditer( - r"]+)>\n?(.*?)\n?", - block_text, - re.DOTALL, - ): - arg_name = pm.group(1) - arg_value = pm.group(2).strip() - try: - arguments[arg_name] = json.loads(arg_value) - except (json.JSONDecodeError, ValueError): - arguments[arg_name] = arg_value - any_json_fallback = True + wrapper_unclosed = False + + block_calls = _parse_xml_function_blocks( + block_text, + param_index=param_index, + token_span=span, + wrapper_unclosed=wrapper_unclosed, + ) + if block_calls: + tool_calls.extend(block_calls) + elif wrapper_unclosed: tool_calls.append( ParsedToolCall( raw=block_text, - name=name, - arguments=arguments, token_span=span, - status=( - ToolCallParseStatus.INVALID_JSON - if any_json_fallback - else ToolCallParseStatus.OK - ), + status=ToolCallParseStatus.UNCLOSED_BLOCK, ) ) - i = end + 1 else: - i += 1 + tool_calls.append( + ParsedToolCall( + raw=block_text, + token_span=span, + status=ToolCallParseStatus.MALFORMED_STRUCTURE, + ) + ) + + if wrapper_unclosed: + break + i = end + 1 return ids[:tc_start], tool_calls diff --git a/renderers/parsing.py b/renderers/parsing.py index 528f122..331c0ea 100644 --- a/renderers/parsing.py +++ b/renderers/parsing.py @@ -16,21 +16,90 @@ from __future__ import annotations +import ast import json +import re +import uuid from typing import Any from renderers.base import ParsedResponse, ParsedToolCall, ToolCallParseStatus, ToolSpec +# ── vLLM Qwen3CoderToolParser regex patterns ──────────────────────── +# +# Lifted verbatim from ``vllm/tool_parsers/qwen3coder_tool_parser.py`` +# (``self.tool_call_regex`` / ``tool_call_function_regex`` / +# ``tool_call_parameter_regex``). Each pattern has two branches: one +# for the closed form (with the matching end tag) and one anchored at +# end-of-string for unclosed output. The parameter regex uses +# positive lookaheads so a missing ```` is recovered from +# the next ```` boundary instead of +# silently dropping the value. + + +# Mask matching vLLM's ``vllm.utils.MASK_64_BITS`` so the random id we +# generate is byte-shape compatible with vLLM's ``random_uuid()``. +_MASK_64_BITS = (1 << 64) - 1 + + +def _make_tool_call_id() -> str: + """Mint a tool-call id in vLLM's default format. + + vLLM's ``ToolCall`` dataclass uses ``default_factory=make_tool_call_id`` + which returns ``f"chatcmpl-tool-{random_uuid()}"`` where + ``random_uuid`` is ``f"{uuid.uuid4().int & MASK_64_BITS:016x}"`` — + 16 hex characters drawn from the low 64 bits of a v4 UUID. We + reproduce the format so OpenAI-shaped clients get the same id + surface across renderers and vLLM serving paths. + """ + return f"chatcmpl-tool-{uuid.uuid4().int & _MASK_64_BITS:016x}" + + +_TOOL_CALL_BLOCK_RE = re.compile( + r"(.*?)|(.*?)$", re.DOTALL +) +_FUNCTION_BLOCK_RE = re.compile( + r"||(?=)|$)", + re.DOTALL, +) + + # ── Schema-aware argument coercion ────────────────────────────────── # # XML-style tool-call formats render argument values verbatim inside -# ```` tags with no quoting. ``true`` and the string +# ```` tags with no quoting. ``true`` and the string # ``"true"`` produce identical wire bytes; without the tool schema, the -# parser has no signal to distinguish them and defaults to -# ``json.loads`` (the historical behavior). When the caller passes -# ``tools=[...]``, parsers consult the per-parameter declared type to -# keep string args verbatim, matching vLLM / SGLang reference parsers. +# parser has no signal to distinguish them. We mirror vLLM's +# ``Qwen3CoderToolParser`` (and SGLang's ``Qwen3CoderDetector``) end to +# end: case-insensitive ``null`` short-circuit, ``int()`` / ``float()`` +# for numeric types, ``.lower() == "true"`` for bool, ``json.loads`` +# with ``ast.literal_eval`` fallback for objects/arrays. That accepts +# both JSON literals (Qwen3.6) and Python literals (Qwen3.5 — ``str()`` +# renders bools as ``True`` / ``False``) on the inference side. +# +# Why ``qwen3_coder`` and not ``qwen3_xml``: vLLM ships two parsers for +# the same wire format. ``qwen3_xml`` is the newer streaming-first +# parser (uses ``xml.parsers.expat`` and is the recommended choice for +# *live serving* because ``qwen3_coder`` has known infinite-loop bugs +# in vLLM's streaming path). For offline token-level extraction the +# two parsers agree on JSON-literal scalars but ``qwen3_coder`` is a +# strict superset for value coercion — it accepts Python literals via +# ``ast.literal_eval`` where ``qwen3_xml`` returns raw strings. Since +# this repo doesn't stream yet, ``qwen3_coder`` semantics dominate. +# TODO: when a streaming parse API lands, switch the streaming path to +# ``qwen3_xml``-style state-machine semantics to dodge vLLM's +# regex-streaming bugs (see HF Qwen3-Coder-Next discussion #17). + + +_STRING_TYPES = frozenset({"string", "str", "text", "varchar", "char", "enum"}) +_BOOL_TYPES = frozenset({"boolean", "bool", "binary"}) +_OBJECT_TYPES = frozenset({"object", "array", "arr"}) +_INT_PREFIXES = ("int", "uint", "long", "short", "unsigned") +_FLOAT_PREFIXES = ("num", "float") +_OBJECT_PREFIXES = ("dict", "list") def _build_param_type_index( @@ -59,35 +128,112 @@ def _build_param_type_index( return index -def _coerce_arg_value( - text: str, param_schema: dict[str, Any] | None -) -> tuple[Any, bool]: - """Coerce a raw ```` body to its declared type. +def _declared_type(param_schema: dict[str, Any] | None) -> str | None: + """Extract the lowercased ``type`` from a JSON-schema fragment. - Returns ``(value, used_json_fallback)``. The boolean is ``True`` only - when ``json.loads`` was attempted and raised, so the caller knows - whether to flag ``INVALID_JSON``. Returning a string verbatim - because the schema declared ``type: "string"`` is NOT a fallback. - - Rule (matches vLLM / SGLang reference parsers): + Mirrors vLLM: a fragment with ``anyOf`` and no top-level ``type`` is + treated as ``"object"`` so json.loads is attempted; a list-form type + falls back to the first declared entry. + """ + if not isinstance(param_schema, dict): + return None + declared = param_schema.get("type") + if isinstance(declared, str): + return declared.strip().lower() + if isinstance(declared, list) and declared: + first = declared[0] + if isinstance(first, str): + return first.strip().lower() + if "anyOf" in param_schema: + return "object" + return None - - If the param's declared ``type`` is ``"string"`` (or single-element - ``["string"]``), return ``text`` verbatim — never ``json.loads``. - - Anything else (no schema, non-string scalar, object, array, or - union types that include non-string): try ``json.loads`` and fall - back to raw ``text`` on parse failure. - Union types that include ``"string"`` alongside other types still - attempt ``json.loads`` first so an explicit integer / bool can - parse; the string branch only wins as the fallback. +def _coerce_arg_value( + text: str, param_schema: dict[str, Any] | None +) -> tuple[Any, bool]: + """Coerce a raw ```` body to its declared type. + + Returns ``(value, used_fallback)``. ``used_fallback=True`` means the + coercion could not satisfy the declared type and had to fall back to + a different shape (a string for scalars, ``False`` for booleans, raw + text for objects), so the caller can flag ``INVALID_JSON``. + + The ladder mirrors vLLM's ``Qwen3CoderToolParser._convert_param_value`` + and SGLang's ``Qwen3CoderDetector._convert_param_value`` byte-for-byte: + + - ``null`` short-circuit (any case) → ``None``. Runs **before** any + type check, including the string family. vLLM does this on line + 125 of ``qwen3coder_tool_parser.py`` ahead of the + ``param_name not in param_config`` branch. This means a + string-typed arg whose wire value is the literal ``null`` + collapses to Python ``None`` — the XML wire format can't + distinguish that case from a JSON null, and we accept the + lossy round-trip in exchange for byte parity with vLLM. + - No schema OR param not in schema → return verbatim. vLLM + ``qwen3coder_tool_parser.py:128-137``: + ``if param_name not in param_config: return param_value``. + - ``string`` / ``str`` / ``text`` / ``varchar`` / ``char`` / ``enum`` + → return verbatim. + - ``int`` / ``uint`` / ``long`` / ``short`` / ``unsigned`` family → + ``int(text)``, degenerating to raw text + INVALID. + - ``num`` / ``float`` family → ``float(text)`` with int demotion + when the source has no ``.``/``e`` and is whole, degenerating to + raw text + INVALID. + - ``boolean`` / ``bool`` / ``binary`` → ``text.lower() == "true"``; + anything outside ``{"true", "false"}`` returns ``False`` + INVALID + (matches vLLM's "degenerate to false" behavior, with the + INVALID flag preserved for the verifier / RL-loss signal). + - ``object`` / ``array`` / ``dict`` / ``list`` (or ``anyOf``) → + ``json.loads`` first, ``ast.literal_eval`` fallback (for Python + literals like ``{'k': 1}``), raw text + INVALID otherwise. + - Any other declared type → ``ast.literal_eval`` if it parses, else + raw text. Matches vLLM's catch-all else branch. """ - if param_schema is not None: - declared = param_schema.get("type") - if declared == "string" or declared == ["string"]: - return text, False + if text.lower() == "null": + return None, False + + declared = _declared_type(param_schema) + + if param_schema is None or declared is None or declared in _STRING_TYPES: + return text, False + + if declared.startswith(_INT_PREFIXES): + try: + return int(text), False + except (ValueError, TypeError): + return text, True + + if declared.startswith(_FLOAT_PREFIXES): + try: + f = float(text) + except (ValueError, TypeError): + return text, True + if "." not in text and "e" not in text.lower() and f.is_integer(): + return int(f), False + return f, False + + if declared in _BOOL_TYPES: + lowered = text.lower() + if lowered == "true": + return True, False + if lowered == "false": + return False, False + return False, True + + if declared in _OBJECT_TYPES or declared.startswith(_OBJECT_PREFIXES): + try: + return json.loads(text), False + except (json.JSONDecodeError, ValueError): + pass + try: + return ast.literal_eval(text), False + except (ValueError, SyntaxError, TypeError, MemoryError): + return text, True + try: - return json.loads(text), False - except (json.JSONDecodeError, ValueError): + return ast.literal_eval(text), False + except (ValueError, SyntaxError, TypeError, MemoryError): return text, True @@ -253,20 +399,49 @@ def parse_qwen35( content="", reasoning_content=reasoning or None, tool_calls=[] ) + full_text = _decode(tokenizer, ids) tc_start = _find(ids, tool_call_id) - tool_calls: list[ParsedToolCall] = [] - if tc_start != -1: - content_text = _decode(tokenizer, ids[:tc_start]).strip() - tool_calls = _parse_xml_tool_calls( - tokenizer, - ids[tc_start:], - tool_call_id, - tool_call_end_id, - section_offset=parse_offset + tc_start, - param_index=_build_param_type_index(tools), - ) - else: - content_text = _decode(tokenizer, ids).strip() + param_index = _build_param_type_index(tools) + try: + if tc_start != -1: + # vLLM ``qwen3coder_tool_parser.py:316-319``: content text + # is the raw slice up to the first ```` (or + # first ```` is present). + # No whitespace stripping — the model's prefix prose is + # surfaced verbatim. + content_text = _decode(tokenizer, ids[:tc_start]) + tool_calls = _parse_xml_tool_calls( + tokenizer, + ids[tc_start:], + tool_call_id, + tool_call_end_id, + section_offset=parse_offset + tc_start, + param_index=param_index, + ) + else: + # vLLM ``qwen3coder_tool_parser.py:269-271`` back-off: no + # ```` markers — scan whole output for raw + # ```` blocks. Content text is whatever sits + # before the first ``= 0 else full_text + else: + content_text = full_text + except Exception: + # vLLM ``qwen3coder_tool_parser.py:327-331`` catch-all: on any + # extraction error, drop every recovered call and surface the + # raw text as content so downstream consumers never see a + # half-parsed call. + content_text = full_text + tool_calls = [] return ParsedResponse( content=content_text, @@ -284,71 +459,143 @@ def _parse_xml_tool_calls( section_offset: int, param_index: dict[str, dict[str, dict[str, Any]]], ) -> list[ParsedToolCall]: - """Parse Qwen3.5-style XML tool calls from token IDs.""" - import re - + """Parse Qwen3.5-style XML tool calls from token IDs. + + Uses token IDs to demarcate ```` / ```` + boundaries (so ``token_span`` stays precise for trainer-side + masking) and vLLM-parity regex on the decoded block text to + extract the function/parameter content tolerantly. Mirrors + ``Qwen3CoderToolParser._parse_xml_function_call`` plus the + tag-tolerance branches of the three module-level patterns. + """ tool_calls: list[ParsedToolCall] = [] i = 0 while i < len(ids): - if ids[i] == tc_id: - end = _find(ids, tc_end_id, i + 1) - if end == -1: - raw = _decode(tokenizer, ids[i + 1 :]) - tool_calls.append( - ParsedToolCall( - raw=raw, - token_span=(section_offset + i, section_offset + len(ids)), - status=ToolCallParseStatus.UNCLOSED_BLOCK, - ) - ) - break + if ids[i] != tc_id: + i += 1 + continue + + end = _find(ids, tc_end_id, i + 1) + if end == -1: + block_ids = ids[i + 1 :] + block_text = _decode(tokenizer, block_ids) + span = (section_offset + i, section_offset + len(ids)) + wrapper_unclosed = True + else: block_text = _decode(tokenizer, ids[i + 1 : end]) span = (section_offset + i, section_offset + end + 1) - name_match = re.search(r"]+)>", block_text) - if not name_match: - tool_calls.append( - ParsedToolCall( - raw=block_text, - token_span=span, - status=ToolCallParseStatus.MALFORMED_STRUCTURE, - ) - ) - i = end + 1 - continue + wrapper_unclosed = False - name = name_match.group(1) - params = param_index.get(name, {}) - arguments: dict = {} - any_json_fallback = False - for pm in re.finditer( - r"]+)>\n?(.*?)\n?", block_text, re.DOTALL - ): - arg_name = pm.group(1) - arg_value = pm.group(2).strip() - value, used_fallback = _coerce_arg_value( - arg_value, params.get(arg_name) + block_calls = _parse_xml_function_blocks( + block_text, + param_index=param_index, + token_span=span, + wrapper_unclosed=wrapper_unclosed, + ) + if block_calls: + tool_calls.extend(block_calls) + elif wrapper_unclosed: + # vLLM would silently drop a content-less unclosed + # ````; we keep the diagnostic so verifier / + # RL-loss code can mask the malformed span. + tool_calls.append( + ParsedToolCall( + raw=block_text, + token_span=span, + status=ToolCallParseStatus.UNCLOSED_BLOCK, ) - arguments[arg_name] = value - any_json_fallback = any_json_fallback or used_fallback + ) + else: tool_calls.append( ParsedToolCall( raw=block_text, - name=name, - arguments=arguments, token_span=span, - status=( - ToolCallParseStatus.INVALID_JSON - if any_json_fallback - else ToolCallParseStatus.OK - ), + status=ToolCallParseStatus.MALFORMED_STRUCTURE, ) ) - i = end + 1 + + if wrapper_unclosed: + break + i = end + 1 + return tool_calls + + +def _parse_xml_function_blocks( + text: str, + *, + param_index: dict[str, dict[str, dict[str, Any]]], + token_span: tuple[int, int] | None, + wrapper_unclosed: bool, +) -> list[ParsedToolCall]: + """Apply vLLM's ```` regex over *text*. + + ``token_span`` is shared by every call recovered from this block; + the granularity is the surrounding ```` region (or the + whole completion in the no-marker back-off path), matching how + vLLM treats multiple ```` siblings — it does not try to + attribute character offsets back to token positions. + """ + tool_calls: list[ParsedToolCall] = [] + for match in _FUNCTION_BLOCK_RE.finditer(text): + closed = match.group(1) + body = closed if closed is not None else (match.group(2) or "") + function_unclosed = closed is None + end_index = body.find(">") + if end_index == -1: + tool_calls.append( + ParsedToolCall( + raw=body, + token_span=token_span, + status=ToolCallParseStatus.MALFORMED_STRUCTURE, + ) + ) + continue + + name = body[:end_index] + params_text = body[end_index + 1 :] + params = param_index.get(name, {}) + arguments: dict = {} + any_fallback = False + for capture in _PARAMETER_BLOCK_RE.findall(params_text): + idx = capture.find(">") + if idx == -1: + continue + arg_name = capture[:idx] + arg_value = _strip_one_newline(capture[idx + 1 :]) + value, used_fallback = _coerce_arg_value(arg_value, params.get(arg_name)) + arguments[arg_name] = value + any_fallback = any_fallback or used_fallback + + if wrapper_unclosed or function_unclosed: + status = ToolCallParseStatus.UNCLOSED_BLOCK + elif any_fallback: + status = ToolCallParseStatus.INVALID_JSON else: - i += 1 + status = ToolCallParseStatus.OK + tool_calls.append( + ParsedToolCall( + raw=body, + name=name, + arguments=arguments, + id=_make_tool_call_id(), + token_span=token_span, + status=status, + ) + ) return tool_calls +def _strip_one_newline(s: str) -> str: + """vLLM ``qwen3coder_tool_parser.py:246-250`` strips exactly one + leading and one trailing newline from a parameter value — no + further whitespace trimming.""" + if s.startswith("\n"): + s = s[1:] + if s.endswith("\n"): + s = s[:-1] + return s + + # ── GLM-5/4.7/4.5: name k v diff --git a/tests/test_arg_coercion.py b/tests/test_arg_coercion.py new file mode 100644 index 0000000..5d5575f --- /dev/null +++ b/tests/test_arg_coercion.py @@ -0,0 +1,469 @@ +"""Schema-aware argument coercion parity with vLLM / SGLang. + +The XML-style tool-call wire format (Qwen3.5, GLM, MiniMax, Laguna) +renders parameter values verbatim with no quoting, so the parser must +consult the tool schema to know whether ``True`` is a bool or a string. +Renderers' previous coercion was strict ``json.loads``, which flagged +``True`` / ``False`` (capitalised Python literals) as ``INVALID_JSON`` +for boolean params — but both vLLM's ``Qwen3CoderToolParser`` and +SGLang's ``Qwen3CoderDetector`` accept them via case-folded comparison. + +These tests pin the new ladder to the vLLM / SGLang reference shape and +guard the bug from issue #47. +""" + +from __future__ import annotations + +import pytest + +from renderers.base import ToolCallParseStatus, load_tokenizer +from renderers.parsing import _coerce_arg_value, parse_qwen35 + + +# ── Direct coercion ───────────────────────────────────────────────── + + +@pytest.mark.parametrize( + "raw,expected", + [ + ("true", True), + ("True", True), + ("TRUE", True), + ("false", False), + ("False", False), + ("FALSE", False), + ], +) +def test_boolean_accepts_case_insensitive(raw, expected): + value, used_fallback = _coerce_arg_value(raw, {"type": "boolean"}) + assert value is expected + assert used_fallback is False + + +@pytest.mark.parametrize("declared", ["boolean", "bool", "binary"]) +def test_boolean_type_aliases_all_accepted(declared): + value, used_fallback = _coerce_arg_value("True", {"type": declared}) + assert value is True + assert used_fallback is False + + +def test_boolean_garbage_degenerates_to_false_with_invalid_flag(): + """Match vLLM: non-true/false values silently become ``False`` — but we + still flag the fallback so verifier / RL-loss code can see the drift.""" + value, used_fallback = _coerce_arg_value("yes", {"type": "boolean"}) + assert value is False + assert used_fallback is True + + +@pytest.mark.parametrize("declared", ["boolean", "integer", "object", "number"]) +def test_null_literal_is_case_insensitive_for_non_string_types(declared): + for raw in ("null", "Null", "NULL"): + value, used_fallback = _coerce_arg_value(raw, {"type": declared}) + assert value is None + assert used_fallback is False + + +def test_null_short_circuit_applies_even_for_string_type(): + """vLLM ``qwen3coder_tool_parser.py:125`` null-coerces ``"null"`` + *before* the type check, so a string-typed wire value of ``null`` + collapses to Python ``None``. The XML wire format can't tell the + string ``"null"`` from JSON null; we accept the lossy round-trip + in exchange for byte parity with vLLM.""" + for raw in ("null", "Null", "NULL"): + value, used_fallback = _coerce_arg_value(raw, {"type": "string"}) + assert value is None + assert used_fallback is False + + +@pytest.mark.parametrize( + "declared,raw,expected", + [ + ("integer", "42", 42), + ("int", "-7", -7), + ("uint", "0", 0), + ("long", "9999999999", 9999999999), + ("short", "12", 12), + ("unsigned", "5", 5), + ], +) +def test_int_family_coerces(declared, raw, expected): + value, used_fallback = _coerce_arg_value(raw, {"type": declared}) + assert value == expected + assert isinstance(value, int) + assert used_fallback is False + + +def test_int_failure_keeps_raw_and_flags_fallback(): + value, used_fallback = _coerce_arg_value("abc", {"type": "integer"}) + assert value == "abc" + assert used_fallback is True + + +@pytest.mark.parametrize( + "raw,expected,exact_type", + [ + ("3.14", 3.14, float), + ("1e3", 1000.0, float), # source has `e` → stays float (SGLang rule) + ("1.0", 1.0, float), # source has `.` → stays float + ("-2.5", -2.5, float), + ("7", 7, int), # no `.`/`e` and whole → demoted to int + ], +) +def test_float_family_coerces(raw, expected, exact_type): + value, used_fallback = _coerce_arg_value(raw, {"type": "number"}) + assert value == expected + assert type(value) is exact_type + assert used_fallback is False + + +def test_float_failure_keeps_raw_and_flags_fallback(): + value, used_fallback = _coerce_arg_value("not-a-number", {"type": "float"}) + assert value == "not-a-number" + assert used_fallback is True + + +def test_object_via_json_loads(): + value, used_fallback = _coerce_arg_value('{"k": 1}', {"type": "object"}) + assert value == {"k": 1} + assert used_fallback is False + + +def test_object_via_ast_literal_eval_fallback(): + """Python-literal dicts (single quotes) should still parse for object + params — this is the vLLM ``ast.literal_eval`` fallback path.""" + value, used_fallback = _coerce_arg_value("{'k': 1}", {"type": "object"}) + assert value == {"k": 1} + assert used_fallback is False + + +def test_array_via_json_loads(): + value, used_fallback = _coerce_arg_value("[1, 2, 3]", {"type": "array"}) + assert value == [1, 2, 3] + assert used_fallback is False + + +def test_object_total_failure_flags_invalid(): + value, used_fallback = _coerce_arg_value( + "this is not a json object", {"type": "object"} + ) + assert value == "this is not a json object" + assert used_fallback is True + + +def test_anyof_treated_as_object(): + """vLLM-specific: a schema with ``anyOf`` and no top-level ``type`` + routes through the object branch so JSON-shaped values parse.""" + schema = {"anyOf": [{"type": "integer"}, {"type": "string"}]} + value, used_fallback = _coerce_arg_value("42", schema) + assert value == 42 + assert used_fallback is False + + +@pytest.mark.parametrize( + "declared", ["string", "str", "text", "varchar", "char", "enum"] +) +def test_string_family_returns_verbatim(declared): + value, used_fallback = _coerce_arg_value("True", {"type": declared}) + assert value == "True" + assert used_fallback is False + + +def test_string_with_list_form_type(): + value, used_fallback = _coerce_arg_value("True", {"type": ["string"]}) + assert value == "True" + assert used_fallback is False + + +def test_no_schema_returns_verbatim_string(): + """vLLM ``qwen3coder_tool_parser.py:128-137``: when the param is + not in the tool schema (or no tools were passed at all), the parser + returns the raw text verbatim — it does **not** try ``json.loads``. + The case-insensitive ``null`` short-circuit at the top of the ladder + still applies (so untyped ``"null"`` becomes Python ``None``).""" + value, used_fallback = _coerce_arg_value("42", None) + assert value == "42" + assert used_fallback is False + + value, used_fallback = _coerce_arg_value("True", None) + assert value == "True" + assert used_fallback is False + + value, used_fallback = _coerce_arg_value("hello", None) + assert value == "hello" + assert used_fallback is False + + +def test_no_schema_null_short_circuit(): + """vLLM null-coerces before the schema-missing return, so an untyped + ``"null"`` still becomes Python ``None``.""" + for raw in ("null", "Null", "NULL"): + value, used_fallback = _coerce_arg_value(raw, None) + assert value is None + assert used_fallback is False + + +def test_unknown_type_falls_back_to_literal_eval(): + """A declared type we don't recognise (e.g. ``"date"``) lands in the + catch-all that tries ``ast.literal_eval``, mirroring vLLM's else + branch — strings without quotes that fail to eval stay as strings.""" + value, used_fallback = _coerce_arg_value("2024-01-01", {"type": "date"}) + assert value == "2024-01-01" + assert used_fallback is True + + # Bare Python literal still parses through the catch-all + value, used_fallback = _coerce_arg_value("[1, 2]", {"type": "date"}) + assert value == [1, 2] + assert used_fallback is False + + +# ── Integration: parse_qwen35 end-to-end (the bug from issue #47) ─── + + +_TOOLS_FROM_ISSUE = [ + { + "type": "function", + "function": { + "name": "filesystem_server_get_directory_tree", + "description": "List a directory tree.", + "parameters": { + "type": "object", + "properties": { + "path": {"type": "string"}, + "max_depth": {"type": "integer"}, + "include_files": {"type": "boolean"}, + "show_size": {"type": "boolean"}, + }, + }, + }, + } +] + + +def _parse(tok, completion: str, *, tools=None): + ids = tok.encode(completion, add_special_tokens=False) + return parse_qwen35( + tok, + list(ids), + stop_ids={tok.eos_token_id} if tok.eos_token_id is not None else set(), + think_id=tok.convert_tokens_to_ids(""), + think_end_id=tok.convert_tokens_to_ids(""), + tool_call_id=tok.convert_tokens_to_ids(""), + tool_call_end_id=tok.convert_tokens_to_ids(""), + tools=tools, + ) + + +def test_qwen35_capital_true_for_boolean_is_ok_status(): + """Regression for issue #47: ``True`` + used to be flagged ``INVALID_JSON`` because ``json.loads("True")`` + fails. SGLang and vLLM both accept it via case-folded comparison; + renderers now does too.""" + tok = load_tokenizer("Qwen/Qwen3.5-9B") + parsed = _parse( + tok, + "\n" + "\n" + "/\n" + "2\n" + "True\n" + "True\n" + "\n" + "", + tools=_TOOLS_FROM_ISSUE, + ) + assert len(parsed.tool_calls) == 1 + tc = parsed.tool_calls[0] + assert tc.status == ToolCallParseStatus.OK + assert tc.name == "filesystem_server_get_directory_tree" + assert tc.arguments == { + "path": "/", + "max_depth": 2, + "include_files": True, + "show_size": True, + } + + +# ── Structural tolerance: parity with vLLM's three regex patterns ──── + + +def test_qwen35_tolerates_missing_closing_parameter(): + """vLLM ``tool_call_parameter_regex`` uses positive lookaheads so a + missing ```` is recovered from the next ```` boundary instead of dropping the value.""" + tok = load_tokenizer("Qwen/Qwen3.5-9B") + parsed = _parse( + tok, + "\n" + "\n" + "/\n" # missing + "2\n" + "\n" + "", + tools=_TOOLS_FROM_ISSUE, + ) + assert len(parsed.tool_calls) == 1 + tc = parsed.tool_calls[0] + assert tc.status == ToolCallParseStatus.OK + assert tc.name == "filesystem_server_get_directory_tree" + assert tc.arguments == {"path": "/", "max_depth": 2} + + +def test_qwen35_tolerates_unclosed_function(): + """vLLM ``tool_call_function_regex`` second alternation matches an + unclosed ``\n" + "\n" + "/\n", # no , no + tools=_TOOLS_FROM_ISSUE, + ) + assert len(parsed.tool_calls) == 1 + tc = parsed.tool_calls[0] + assert tc.status == ToolCallParseStatus.UNCLOSED_BLOCK + assert tc.name == "filesystem_server_get_directory_tree" + assert tc.arguments == {"path": "/"} + + +def test_qwen35_backoff_no_tool_call_markers(): + """vLLM ``qwen3coder_tool_parser.py:269-271``: when the output + contains no ```` markers, treat the whole completion as + a single tool-call region and scan for ```` directly. + Content text is the raw slice up to the first ``\n" + "/etc\n" + "1\n" + "", + tools=_TOOLS_FROM_ISSUE, + ) + assert len(parsed.tool_calls) == 1 + tc = parsed.tool_calls[0] + assert tc.status == ToolCallParseStatus.OK + assert tc.name == "filesystem_server_get_directory_tree" + assert tc.arguments == {"path": "/etc", "max_depth": 1} + # vLLM-parity: content = text up to first ```` and no ````; the commented-out ``.rstrip()`` confirms + intent. We mirror that — surrounding whitespace round-trips.""" + tok = load_tokenizer("Qwen/Qwen3.5-9B") + parsed = _parse( + tok, + "let me check\n\n" # trailing whitespace before + "\n" + "\n" + "/\n" + "\n" + "", + tools=_TOOLS_FROM_ISSUE, + ) + assert parsed.content == "let me check\n\n" + assert len(parsed.tool_calls) == 1 + assert parsed.tool_calls[0].status == ToolCallParseStatus.OK + + +def test_qwen35_tool_call_id_matches_vllm_format(): + """vLLM's ``ToolCall`` default factory mints ids of the form + ``chatcmpl-tool-<16 hex>``. Each surfaced call gets a fresh id.""" + import re as _re + + tok = load_tokenizer("Qwen/Qwen3.5-9B") + parsed = _parse( + tok, + "\n" + "\n" + "/\n" + "\n" + "\n" + "\n" + "\n" + "/etc\n" + "\n" + "", + tools=_TOOLS_FROM_ISSUE, + ) + assert len(parsed.tool_calls) == 2 + pattern = _re.compile(r"^chatcmpl-tool-[0-9a-f]{16}$") + ids = [tc.id for tc in parsed.tool_calls] + for tcid in ids: + assert tcid is not None and pattern.match(tcid), tcid + # Two surfaced calls must get distinct ids. + assert ids[0] != ids[1] + + +def test_qwen35_exception_fallback_returns_full_text(): + """vLLM ``qwen3coder_tool_parser.py:327-331`` catch-all returns + ``tools_called=False, tool_calls=[], content=model_output`` on any + unhandled extraction error. We mirror that by wrapping the + tool-call extraction; reasoning content extracted earlier is kept. + """ + from unittest.mock import patch + + tok = load_tokenizer("Qwen/Qwen3.5-9B") + completion = ( + "\n" + "\n" + "/\n" + "\n" + "" + ) + ids = tok.encode(completion, add_special_tokens=False) + + with patch( + "renderers.parsing._parse_xml_tool_calls", + side_effect=RuntimeError("boom"), + ): + parsed = parse_qwen35( + tok, + list(ids), + stop_ids={tok.eos_token_id} if tok.eos_token_id is not None else set(), + think_id=tok.convert_tokens_to_ids(""), + think_end_id=tok.convert_tokens_to_ids(""), + tool_call_id=tok.convert_tokens_to_ids(""), + tool_call_end_id=tok.convert_tokens_to_ids(""), + tools=_TOOLS_FROM_ISSUE, + ) + assert parsed.tool_calls == [] + # content is the full decoded post-think text, surfaced verbatim. + assert "" in parsed.content + assert "" in parsed.content + + +def test_qwen35_parameter_value_preserves_internal_whitespace(): + """vLLM strips exactly one leading and one trailing ``\\n`` from the + parameter body (``qwen3coder_tool_parser.py:246-250``); inner + indentation must round-trip verbatim.""" + tok = load_tokenizer("Qwen/Qwen3.5-9B") + parsed = _parse( + tok, + "\n" + "\n" + "\n /etc \n\n" + "\n" + "", + tools=_TOOLS_FROM_ISSUE, + ) + assert len(parsed.tool_calls) == 1 + tc = parsed.tool_calls[0] + assert tc.status == ToolCallParseStatus.OK + # One leading + one trailing \n stripped — interior spaces preserved. + assert tc.arguments == {"path": " /etc "} diff --git a/tests/test_tool_arg_type_preservation.py b/tests/test_tool_arg_type_preservation.py index 61dbb9b..e036abd 100644 --- a/tests/test_tool_arg_type_preservation.py +++ b/tests/test_tool_arg_type_preservation.py @@ -67,9 +67,21 @@ def renderer(model, renderer_name): ] +# Renderers whose wire format quotes strings (Hermes JSON, section +# JSON). Those parsers preserve the string ``"null"`` verbatim because +# the wire bytes are unambiguous. XML-style renderers can't tell the +# string ``null`` from a JSON null on the wire, so they null-coerce — +# matching vLLM's ``Qwen3CoderToolParser._convert_param_value:125`` +# which runs the null short-circuit before any type check. +_JSON_FORMAT_MODELS = {"Qwen/Qwen3-8B", "moonshotai/Kimi-K2-Instruct"} + + # Each case: a single ``x`` argument whose value is a STRING that # happens to be valid JSON of another type. With a schema declaring -# ``x: string``, the parser must return the string verbatim. +# ``x: string``, the parser must return the string verbatim — except +# for the ``string-null`` case on XML renderers, where vLLM's +# ``null`` short-circuit runs before the type check and we follow +# suit for byte parity (see test below for the explicit assertion). JSON_LOOKING_STRING_ARGS = [ pytest.param({"x": "true"}, id="string-bool"), pytest.param({"x": "42"}, id="string-int"), @@ -116,9 +128,16 @@ def _extract_assistant_tokens(renderer, prompt, assistant_msg, *, tools=None): @pytest.mark.parametrize("args", JSON_LOOKING_STRING_ARGS) -def test_string_arg_preserves_type(model, renderer_name, renderer, args): +def test_string_arg_preserves_type(model, renderer_name, renderer, args, request): """Tool-call args of declared type ``str`` must round-trip as ``str``, - not get re-parsed as bool/int/null/list/dict by the parser.""" + not get re-parsed as bool/int/null/list/dict by the parser. + + Exception: the ``string-null`` case on XML renderers collapses to + Python ``None`` to match vLLM's null short-circuit. The XML wire + format is genuinely ambiguous there (``null`` + is identical bytes whether the model meant the string ``"null"`` + or a JSON null), and vLLM resolves the ambiguity by null-coercing. + """ msg = { "role": "assistant", "content": "", @@ -134,6 +153,12 @@ def test_string_arg_preserves_type(model, renderer_name, renderer, args): assert parsed.tool_calls, f"{model}: parser returned no tool_calls" got = _normalize_args(parsed.tool_calls[0].arguments) - assert got == args, ( + + case_id = request.node.callspec.id.rsplit("-", 1)[-1] + if case_id == "null" and model not in _JSON_FORMAT_MODELS: + expected = {"x": None} + else: + expected = args + assert got == expected, ( f"{model}: tool-arg type drift — sent {args!r}, parser returned {got!r}" )