Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,15 @@ Each break fragments a rollout into multiple training samples — every fragment

## Compaction overrides

`create_renderer` and `create_renderer_pool` accept two constructor-only flags:
`create_renderer` and `create_renderer_pool` accept constructor-time template controls:

```python
chat_template_kwargs: dict | None = None
preserve_all_thinking: bool = False
preserve_thinking_between_tool_calls: bool = False
```

Defaults preserve byte-identity with the model's chat template. Flipping a flag at construction restores `reasoning_content` the template would otherwise drop:
`chat_template_kwargs` binds template toggles that must be fixed for the renderer instance, such as `enable_thinking` and `reasoning_effort`. Defaults preserve byte-identity with the model's chat template. Flipping a preserve flag at construction restores `reasoning_content` the template would otherwise drop:

- `preserve_all_thinking=True` — every past assistant's reasoning is kept.
- `preserve_thinking_between_tool_calls=True` — reasoning is kept on assistants in the in-flight tool cycle (no-op for current renderers; reserved for future templates that drop it).
Expand Down
74 changes: 70 additions & 4 deletions renderers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,6 +870,14 @@ def bridge_to_next_turn(self, *args: Any, **kwargs: Any) -> "RenderedTokens | No


RENDERER_REGISTRY: dict[str, type] = {}
_RENDERER_CONSTRUCTOR_KWARGS = frozenset(
{
"tool_parser",
"reasoning_parser",
"preserve_all_thinking",
"preserve_thinking_between_tool_calls",
}
)

# Exact canonical HF model names → renderer. We do NOT use prefix
# matching because models with the same architecture may ship different
Expand Down Expand Up @@ -1171,6 +1179,7 @@ def create_renderer_pool(
*,
tool_parser: str | None = None,
reasoning_parser: str | None = None,
chat_template_kwargs: dict[str, Any] | None = None,
preserve_all_thinking: bool = False,
preserve_thinking_between_tool_calls: bool = False,
) -> RendererPool:
Expand All @@ -1183,6 +1192,11 @@ def create_renderer_pool(
``tool_parser`` and ``reasoning_parser`` are forwarded to
``create_renderer`` when the pool falls back to ``DefaultRenderer``.

``chat_template_kwargs`` are forwarded to each renderer constructor.
Hand-coded renderers accept only the kwargs they explicitly model
(for example ``enable_thinking`` or ``reasoning_effort``); the default
renderer forwards them to ``tokenizer.apply_chat_template``.

``preserve_all_thinking`` and ``preserve_thinking_between_tool_calls``
are forwarded to each pooled renderer's constructor — every slot in
the pool shares one configuration. To run with a different
Expand All @@ -1200,6 +1214,7 @@ def factory() -> Renderer:
renderer=renderer,
tool_parser=tool_parser,
reasoning_parser=reasoning_parser,
chat_template_kwargs=chat_template_kwargs,
preserve_all_thinking=preserve_all_thinking,
preserve_thinking_between_tool_calls=preserve_thinking_between_tool_calls,
)
Expand All @@ -1213,6 +1228,7 @@ def create_renderer(
*,
tool_parser: str | None = None,
reasoning_parser: str | None = None,
chat_template_kwargs: dict[str, Any] | None = None,
preserve_all_thinking: bool = False,
preserve_thinking_between_tool_calls: bool = False,
) -> Renderer:
Expand All @@ -1229,6 +1245,11 @@ def create_renderer(
have their own parsing wired in.
reasoning_parser: Name of a reasoning parser registered in
``renderers.parsers``. Only consumed by DefaultRenderer.
chat_template_kwargs: Template-control kwargs bound to the renderer.
Hand-coded renderers accept only the kwargs they explicitly
model (for example ``enable_thinking`` or
``reasoning_effort``); DefaultRenderer forwards all kwargs
to ``tokenizer.apply_chat_template``.
preserve_all_thinking: Forwarded to the renderer's constructor.
When ``True``, the instance restores ``reasoning_content``
the chat template would otherwise drop on historical
Expand All @@ -1254,6 +1275,8 @@ def create_renderer(
"preserve_all_thinking": preserve_all_thinking,
"preserve_thinking_between_tool_calls": preserve_thinking_between_tool_calls,
}
template_kwargs = dict(chat_template_kwargs or {})
_reject_renderer_constructor_kwargs(template_kwargs)

if renderer != "auto":
cls = RENDERER_REGISTRY.get(renderer)
Expand All @@ -1262,15 +1285,21 @@ def create_renderer(
f"Unknown renderer {renderer!r}. Available: {', '.join(sorted(RENDERER_REGISTRY))}"
)
if renderer == "default":
return cls(tokenizer, **default_kwargs, **preserve_kwargs)
return cls(
tokenizer, **default_kwargs, **template_kwargs, **preserve_kwargs
)
if default_kwargs:
logger.info(
"tool_parser / reasoning_parser are only consumed by "
"DefaultRenderer; ignoring for renderer=%r which has "
"built-in behavior.",
renderer,
)
return cls(tokenizer, **preserve_kwargs)
return cls(
tokenizer,
**_model_renderer_chat_template_kwargs(renderer, cls, template_kwargs),
**preserve_kwargs,
)

# Auto-detect from model name via exact match on the canonical HF id.
# Fine-tunes and renamed checkpoints miss on purpose — their chat
Expand All @@ -1280,7 +1309,15 @@ def create_renderer(
model_name = getattr(tokenizer, "name_or_path", "")
renderer_name = MODEL_RENDERER_MAP.get(model_name)
if renderer_name is not None:
return RENDERER_REGISTRY[renderer_name](tokenizer, **preserve_kwargs)
return RENDERER_REGISTRY[renderer_name](
tokenizer,
**_model_renderer_chat_template_kwargs(
renderer_name,
RENDERER_REGISTRY[renderer_name],
template_kwargs,
),
**preserve_kwargs,
)

# No match. For VLMs this must be fatal: DefaultRenderer only knows
# ``apply_chat_template`` + text tokens, so it would silently drop
Expand All @@ -1307,7 +1344,36 @@ def create_renderer(
"reasoning_parser=<name> to enable structured output parsing.",
model_name or "<unnamed tokenizer>",
)
return RENDERER_REGISTRY["default"](tokenizer, **default_kwargs, **preserve_kwargs)
return RENDERER_REGISTRY["default"](
tokenizer, **default_kwargs, **template_kwargs, **preserve_kwargs
)
Comment thread
cursor[bot] marked this conversation as resolved.


def _reject_renderer_constructor_kwargs(
chat_template_kwargs: dict[str, Any],
) -> None:
reserved = sorted(set(chat_template_kwargs) & _RENDERER_CONSTRUCTOR_KWARGS)
if reserved:
raise ValueError(
"chat_template_kwargs cannot contain renderer constructor kwargs: "
f"{', '.join(reserved)}"
)


def _model_renderer_chat_template_kwargs(
renderer: str, renderer_cls: type, chat_template_kwargs: dict[str, Any]
) -> dict[str, Any]:
if not chat_template_kwargs:
return {}

allowed = set(getattr(renderer_cls, "CHAT_TEMPLATE_KWARGS", ()))
unsupported = sorted(set(chat_template_kwargs) - allowed)
if unsupported:
raise ValueError(
f"renderer={renderer!r} does not support chat_template_kwargs: "
f"{', '.join(unsupported)}"
)
return dict(chat_template_kwargs)


# ---------------------------------------------------------------------------
Expand Down
2 changes: 2 additions & 0 deletions renderers/deepseek_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ def _ds_token(name: str) -> str:
class DeepSeekV3Renderer:
"""Deterministic message → token renderer for DeepSeek V3 models."""

CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"})

def __init__(
self,
tokenizer: PreTrainedTokenizer,
Expand Down
2 changes: 2 additions & 0 deletions renderers/glm45.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
class GLM45Renderer:
"""Deterministic message → token renderer for GLM-4.5 Air models."""

CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"})

def __init__(
self,
tokenizer: PreTrainedTokenizer,
Expand Down
2 changes: 2 additions & 0 deletions renderers/glm5.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@
class GLM5Renderer:
"""Deterministic message → token renderer for GLM-5 models."""

CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"})

# GLM-5.1 flips this on: even when the most-recent assistant has no
# reasoning content, the template wraps it with ``<think></think>``
# instead of just emitting ``</think>`` as a separator. Subclassed in
Expand Down
2 changes: 2 additions & 0 deletions renderers/gpt_oss.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ def _arguments_to_str(arguments: Any) -> str:
class GptOssRenderer:
"""Deterministic message → token renderer for OpenAI gpt-oss (harmony)."""

CHAT_TEMPLATE_KWARGS = frozenset({"reasoning_effort"})

def __init__(
self,
tokenizer: PreTrainedTokenizer,
Expand Down
2 changes: 2 additions & 0 deletions renderers/kimi_k25.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,8 @@ class KimiK25Renderer:
The tokenizer should be ``moonshotai/Kimi-K2-Instruct`` (same as K2).
"""

CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"})

def __init__(
self,
tokenizer: PreTrainedTokenizer,
Expand Down
2 changes: 2 additions & 0 deletions renderers/laguna_xs2.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@


class LagunaXS2Renderer:
CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"})

def __init__(
self,
tokenizer: PreTrainedTokenizer,
Expand Down
2 changes: 2 additions & 0 deletions renderers/nemotron3.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ def _render_extra_keys(obj: dict[str, Any], handled_keys: set[str]) -> list[str]
class Nemotron3Renderer:
"""Deterministic message → token renderer for Nemotron 3 models."""

CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"})

def __init__(
self,
tokenizer: PreTrainedTokenizer,
Expand Down
2 changes: 2 additions & 0 deletions renderers/qwen3.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
class Qwen3Renderer:
"""Deterministic message → token renderer for Qwen3 models."""

CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"})

def __init__(
self,
tokenizer: PreTrainedTokenizer,
Expand Down
2 changes: 2 additions & 0 deletions renderers/qwen35.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ def _detect_enable_thinking_default(tokenizer: PreTrainedTokenizer) -> bool:
class Qwen35Renderer:
"""Deterministic message → token renderer for Qwen3.5 models."""

CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"})

def __init__(
self,
tokenizer: PreTrainedTokenizer,
Expand Down
71 changes: 71 additions & 0 deletions tests/test_chat_template_kwargs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from types import SimpleNamespace

import pytest

from renderers import base


class _FakeRenderer:
CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"})

def __init__(
self,
tokenizer,
*,
enable_thinking: bool = True,
preserve_all_thinking: bool = False,
preserve_thinking_between_tool_calls: bool = False,
):
self.tokenizer = tokenizer
self.enable_thinking = enable_thinking
self.preserve_all_thinking = preserve_all_thinking
self.preserve_thinking_between_tool_calls = preserve_thinking_between_tool_calls


def _register_fake_renderer(monkeypatch) -> None:
base._populate_registry()
monkeypatch.setitem(base.RENDERER_REGISTRY, "fake-qwen", _FakeRenderer)


def test_create_renderer_forwards_model_chat_template_kwargs(monkeypatch):
_register_fake_renderer(monkeypatch)

renderer = base.create_renderer(
SimpleNamespace(name_or_path="unused"),
renderer="fake-qwen",
chat_template_kwargs={"enable_thinking": False},
)

assert renderer.enable_thinking is False


def test_create_renderer_rejects_unsupported_model_chat_template_kwargs(monkeypatch):
_register_fake_renderer(monkeypatch)

with pytest.raises(ValueError, match="reasoning_effort"):
base.create_renderer(
SimpleNamespace(name_or_path="unused"),
renderer="fake-qwen",
chat_template_kwargs={"reasoning_effort": "high"},
)


def test_create_renderer_rejects_constructor_kwargs_in_chat_template_kwargs():
with pytest.raises(ValueError, match="preserve_all_thinking"):
base.create_renderer(
SimpleNamespace(name_or_path="unused"),
renderer="default",
chat_template_kwargs={"preserve_all_thinking": True},
)
Comment thread
cursor[bot] marked this conversation as resolved.


def test_create_renderer_auto_forwards_model_chat_template_kwargs(monkeypatch):
_register_fake_renderer(monkeypatch)
monkeypatch.setitem(base.MODEL_RENDERER_MAP, "fake/model", "fake-qwen")

renderer = base.create_renderer(
SimpleNamespace(name_or_path="fake/model"),
chat_template_kwargs={"enable_thinking": False},
)

assert renderer.enable_thinking is False
Loading