Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 17 additions & 11 deletions renderers/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,30 +65,36 @@ def _coerce_arg_value(
"""Coerce a raw ``<arg_value>`` body to its declared type.

Returns ``(value, used_json_fallback)``. The boolean is ``True`` only
when ``json.loads`` was attempted and raised, so the caller knows
whether to flag ``INVALID_JSON``. Returning a string verbatim
because the schema declared ``type: "string"`` is NOT a fallback.
when ``json.loads`` was attempted, raised, AND the schema doesn't
permit a string. Returning a string verbatim because the schema
permits strings is NOT a fallback.

Rule (matches vLLM / SGLang reference parsers):

- If the param's declared ``type`` is ``"string"`` (or single-element
``["string"]``), return ``text`` verbatim — never ``json.loads``.
- Anything else (no schema, non-string scalar, object, array, or
union types that include non-string): try ``json.loads`` and fall
back to raw ``text`` on parse failure.

Union types that include ``"string"`` alongside other types still
attempt ``json.loads`` first so an explicit integer / bool can
parse; the string branch only wins as the fallback.
- Otherwise try ``json.loads``. If that fails, return raw ``text``.
The ``used_json_fallback`` flag is ``True`` only when the schema
does NOT permit a string — i.e. the fallback is truly suspect.

Union types (``anyOf``/``oneOf``) that include ``"string"`` alongside
other types still attempt ``json.loads`` first so an explicit
integer / bool can parse; the string branch wins as fallback, and
landing there is expected — not a malformed-JSON signal.
"""
string_is_allowed = False
if param_schema is not None:
declared = param_schema.get("type")
if declared == "string" or declared == ["string"]:
return text, False
for branch in param_schema.get("anyOf") or param_schema.get("oneOf") or []:
if isinstance(branch, dict) and branch.get("type") == "string":
string_is_allowed = True
break
try:
return json.loads(text), False
except (json.JSONDecodeError, ValueError):
return text, True
return text, not string_is_allowed


def _find(ids: list[int], target: int, start: int = 0) -> int:
Expand Down
72 changes: 72 additions & 0 deletions tests/test_tool_arg_type_preservation.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,75 @@ def test_string_arg_preserves_type(model, renderer_name, renderer, args):
assert got == args, (
f"{model}: tool-arg type drift — sent {args!r}, parser returned {got!r}"
)


# Schemas where ``string`` is one branch of a union (``anyOf`` / ``oneOf``).
# These are common in practice — e.g. ``form_input.value: str | bool`` in
# Pydantic serialises to ``{"anyOf": [{"type": "string"}, {"type": "boolean"}]}``.
# Without the union-aware check, the XML parser's ``json.loads`` falls back
# to raw text for bare strings, but flags the call ``INVALID_JSON`` because
# no top-level ``type`` key declared a string — silently dropping otherwise
# valid tool calls in the renderer client.
UNION_WITH_STRING_SCHEMAS = [
pytest.param(
{"anyOf": [{"type": "string"}, {"type": "boolean"}]},
id="anyOf-string-boolean",
),
pytest.param(
{"anyOf": [{"type": "string"}, {"type": "null"}]},
id="anyOf-string-null",
),
pytest.param(
{"oneOf": [{"type": "string"}, {"type": "integer"}]},
id="oneOf-string-integer",
),
]


@pytest.mark.parametrize("param_schema", UNION_WITH_STRING_SCHEMAS)
def test_union_with_string_emits_ok_status(
model, renderer_name, renderer, param_schema
):
"""Union schemas containing ``string`` must yield ``status=OK`` when
the model emits a bare string. Pre-fix, ``_coerce_arg_value`` flagged
this as ``INVALID_JSON`` because the top-level ``type`` key was
absent (the string branch was under ``anyOf`` / ``oneOf``)."""
from renderers.base import ToolCallParseStatus

tools = [
{
"type": "function",
"function": {
"name": "f",
"description": "Test tool with one union-typed parameter.",
"parameters": {
"type": "object",
"properties": {"x": param_schema},
"required": ["x"],
},
},
}
]
args = {"x": "abc"}
msg = {
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "functions.f:0",
"function": {"name": "f", "arguments": args},
}
],
}
completion_ids = _extract_assistant_tokens(renderer, PROMPT, msg, tools=tools)
parsed = renderer.parse_response(completion_ids, tools=tools)

assert parsed.tool_calls, f"{model}: parser returned no tool_calls"
tc = parsed.tool_calls[0]
assert tc.status == ToolCallParseStatus.OK, (
f"{model}: union-with-string schema flagged {tc.status} on bare string"
)
got = _normalize_args(tc.arguments)
assert got == args, (
f"{model}: tool-arg drift — sent {args!r}, parser returned {got!r}"
)
Loading