diff --git a/README.md b/README.md index 51e4d19..2336c3e 100644 --- a/README.md +++ b/README.md @@ -110,14 +110,15 @@ Each break fragments a rollout into multiple training samples — every fragment ## Compaction overrides -`create_renderer` and `create_renderer_pool` accept two constructor-only flags: +`create_renderer` and `create_renderer_pool` accept constructor-time template controls: ```python +chat_template_kwargs: dict | None = None preserve_all_thinking: bool = False preserve_thinking_between_tool_calls: bool = False ``` -Defaults preserve byte-identity with the model's chat template. Flipping a flag at construction restores `reasoning_content` the template would otherwise drop: +`chat_template_kwargs` binds template toggles that must be fixed for the renderer instance, such as `enable_thinking` and `reasoning_effort`. Defaults preserve byte-identity with the model's chat template. Flipping a preserve flag at construction restores `reasoning_content` the template would otherwise drop: - `preserve_all_thinking=True` — every past assistant's reasoning is kept. - `preserve_thinking_between_tool_calls=True` — reasoning is kept on assistants in the in-flight tool cycle (no-op for current renderers; reserved for future templates that drop it). diff --git a/renderers/base.py b/renderers/base.py index b861872..e8f4b3c 100644 --- a/renderers/base.py +++ b/renderers/base.py @@ -870,6 +870,14 @@ def bridge_to_next_turn(self, *args: Any, **kwargs: Any) -> "RenderedTokens | No RENDERER_REGISTRY: dict[str, type] = {} +_RENDERER_CONSTRUCTOR_KWARGS = frozenset( + { + "tool_parser", + "reasoning_parser", + "preserve_all_thinking", + "preserve_thinking_between_tool_calls", + } +) # Exact canonical HF model names → renderer. We do NOT use prefix # matching because models with the same architecture may ship different @@ -1171,6 +1179,7 @@ def create_renderer_pool( *, tool_parser: str | None = None, reasoning_parser: str | None = None, + chat_template_kwargs: dict[str, Any] | None = None, preserve_all_thinking: bool = False, preserve_thinking_between_tool_calls: bool = False, ) -> RendererPool: @@ -1183,6 +1192,11 @@ def create_renderer_pool( ``tool_parser`` and ``reasoning_parser`` are forwarded to ``create_renderer`` when the pool falls back to ``DefaultRenderer``. + ``chat_template_kwargs`` are forwarded to each renderer constructor. + Hand-coded renderers accept only the kwargs they explicitly model + (for example ``enable_thinking`` or ``reasoning_effort``); the default + renderer forwards them to ``tokenizer.apply_chat_template``. + ``preserve_all_thinking`` and ``preserve_thinking_between_tool_calls`` are forwarded to each pooled renderer's constructor — every slot in the pool shares one configuration. To run with a different @@ -1200,6 +1214,7 @@ def factory() -> Renderer: renderer=renderer, tool_parser=tool_parser, reasoning_parser=reasoning_parser, + chat_template_kwargs=chat_template_kwargs, preserve_all_thinking=preserve_all_thinking, preserve_thinking_between_tool_calls=preserve_thinking_between_tool_calls, ) @@ -1213,6 +1228,7 @@ def create_renderer( *, tool_parser: str | None = None, reasoning_parser: str | None = None, + chat_template_kwargs: dict[str, Any] | None = None, preserve_all_thinking: bool = False, preserve_thinking_between_tool_calls: bool = False, ) -> Renderer: @@ -1229,6 +1245,11 @@ def create_renderer( have their own parsing wired in. reasoning_parser: Name of a reasoning parser registered in ``renderers.parsers``. Only consumed by DefaultRenderer. + chat_template_kwargs: Template-control kwargs bound to the renderer. + Hand-coded renderers accept only the kwargs they explicitly + model (for example ``enable_thinking`` or + ``reasoning_effort``); DefaultRenderer forwards all kwargs + to ``tokenizer.apply_chat_template``. preserve_all_thinking: Forwarded to the renderer's constructor. When ``True``, the instance restores ``reasoning_content`` the chat template would otherwise drop on historical @@ -1254,6 +1275,8 @@ def create_renderer( "preserve_all_thinking": preserve_all_thinking, "preserve_thinking_between_tool_calls": preserve_thinking_between_tool_calls, } + template_kwargs = dict(chat_template_kwargs or {}) + _reject_renderer_constructor_kwargs(template_kwargs) if renderer != "auto": cls = RENDERER_REGISTRY.get(renderer) @@ -1262,7 +1285,9 @@ def create_renderer( f"Unknown renderer {renderer!r}. Available: {', '.join(sorted(RENDERER_REGISTRY))}" ) if renderer == "default": - return cls(tokenizer, **default_kwargs, **preserve_kwargs) + return cls( + tokenizer, **default_kwargs, **template_kwargs, **preserve_kwargs + ) if default_kwargs: logger.info( "tool_parser / reasoning_parser are only consumed by " @@ -1270,7 +1295,11 @@ def create_renderer( "built-in behavior.", renderer, ) - return cls(tokenizer, **preserve_kwargs) + return cls( + tokenizer, + **_model_renderer_chat_template_kwargs(renderer, cls, template_kwargs), + **preserve_kwargs, + ) # Auto-detect from model name via exact match on the canonical HF id. # Fine-tunes and renamed checkpoints miss on purpose — their chat @@ -1280,7 +1309,15 @@ def create_renderer( model_name = getattr(tokenizer, "name_or_path", "") renderer_name = MODEL_RENDERER_MAP.get(model_name) if renderer_name is not None: - return RENDERER_REGISTRY[renderer_name](tokenizer, **preserve_kwargs) + return RENDERER_REGISTRY[renderer_name]( + tokenizer, + **_model_renderer_chat_template_kwargs( + renderer_name, + RENDERER_REGISTRY[renderer_name], + template_kwargs, + ), + **preserve_kwargs, + ) # No match. For VLMs this must be fatal: DefaultRenderer only knows # ``apply_chat_template`` + text tokens, so it would silently drop @@ -1307,7 +1344,36 @@ def create_renderer( "reasoning_parser= to enable structured output parsing.", model_name or "", ) - return RENDERER_REGISTRY["default"](tokenizer, **default_kwargs, **preserve_kwargs) + return RENDERER_REGISTRY["default"]( + tokenizer, **default_kwargs, **template_kwargs, **preserve_kwargs + ) + + +def _reject_renderer_constructor_kwargs( + chat_template_kwargs: dict[str, Any], +) -> None: + reserved = sorted(set(chat_template_kwargs) & _RENDERER_CONSTRUCTOR_KWARGS) + if reserved: + raise ValueError( + "chat_template_kwargs cannot contain renderer constructor kwargs: " + f"{', '.join(reserved)}" + ) + + +def _model_renderer_chat_template_kwargs( + renderer: str, renderer_cls: type, chat_template_kwargs: dict[str, Any] +) -> dict[str, Any]: + if not chat_template_kwargs: + return {} + + allowed = set(getattr(renderer_cls, "CHAT_TEMPLATE_KWARGS", ())) + unsupported = sorted(set(chat_template_kwargs) - allowed) + if unsupported: + raise ValueError( + f"renderer={renderer!r} does not support chat_template_kwargs: " + f"{', '.join(unsupported)}" + ) + return dict(chat_template_kwargs) # --------------------------------------------------------------------------- diff --git a/renderers/deepseek_v3.py b/renderers/deepseek_v3.py index 507d81d..7531e69 100644 --- a/renderers/deepseek_v3.py +++ b/renderers/deepseek_v3.py @@ -41,6 +41,8 @@ def _ds_token(name: str) -> str: class DeepSeekV3Renderer: """Deterministic message → token renderer for DeepSeek V3 models.""" + CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"}) + def __init__( self, tokenizer: PreTrainedTokenizer, diff --git a/renderers/glm45.py b/renderers/glm45.py index 206f366..109d546 100644 --- a/renderers/glm45.py +++ b/renderers/glm45.py @@ -50,6 +50,8 @@ class GLM45Renderer: """Deterministic message → token renderer for GLM-4.5 Air models.""" + CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"}) + def __init__( self, tokenizer: PreTrainedTokenizer, diff --git a/renderers/glm5.py b/renderers/glm5.py index 6de6ba3..ad61038 100644 --- a/renderers/glm5.py +++ b/renderers/glm5.py @@ -48,6 +48,8 @@ class GLM5Renderer: """Deterministic message → token renderer for GLM-5 models.""" + CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"}) + # GLM-5.1 flips this on: even when the most-recent assistant has no # reasoning content, the template wraps it with ```` # instead of just emitting ```` as a separator. Subclassed in diff --git a/renderers/gpt_oss.py b/renderers/gpt_oss.py index 9939de1..c1944a3 100644 --- a/renderers/gpt_oss.py +++ b/renderers/gpt_oss.py @@ -118,6 +118,8 @@ def _arguments_to_str(arguments: Any) -> str: class GptOssRenderer: """Deterministic message → token renderer for OpenAI gpt-oss (harmony).""" + CHAT_TEMPLATE_KWARGS = frozenset({"reasoning_effort"}) + def __init__( self, tokenizer: PreTrainedTokenizer, diff --git a/renderers/kimi_k25.py b/renderers/kimi_k25.py index b2a45e6..5467568 100644 --- a/renderers/kimi_k25.py +++ b/renderers/kimi_k25.py @@ -570,6 +570,8 @@ class KimiK25Renderer: The tokenizer should be ``moonshotai/Kimi-K2-Instruct`` (same as K2). """ + CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"}) + def __init__( self, tokenizer: PreTrainedTokenizer, diff --git a/renderers/laguna_xs2.py b/renderers/laguna_xs2.py index ce85037..2a5e56c 100644 --- a/renderers/laguna_xs2.py +++ b/renderers/laguna_xs2.py @@ -76,6 +76,8 @@ class LagunaXS2Renderer: + CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"}) + def __init__( self, tokenizer: PreTrainedTokenizer, diff --git a/renderers/nemotron3.py b/renderers/nemotron3.py index e97790d..4098a2e 100644 --- a/renderers/nemotron3.py +++ b/renderers/nemotron3.py @@ -76,6 +76,8 @@ def _render_extra_keys(obj: dict[str, Any], handled_keys: set[str]) -> list[str] class Nemotron3Renderer: """Deterministic message → token renderer for Nemotron 3 models.""" + CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"}) + def __init__( self, tokenizer: PreTrainedTokenizer, diff --git a/renderers/qwen3.py b/renderers/qwen3.py index 4562546..a7c0416 100644 --- a/renderers/qwen3.py +++ b/renderers/qwen3.py @@ -45,6 +45,8 @@ class Qwen3Renderer: """Deterministic message → token renderer for Qwen3 models.""" + CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"}) + def __init__( self, tokenizer: PreTrainedTokenizer, diff --git a/renderers/qwen35.py b/renderers/qwen35.py index 2deefcf..680c68c 100644 --- a/renderers/qwen35.py +++ b/renderers/qwen35.py @@ -103,6 +103,8 @@ def _detect_enable_thinking_default(tokenizer: PreTrainedTokenizer) -> bool: class Qwen35Renderer: """Deterministic message → token renderer for Qwen3.5 models.""" + CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"}) + def __init__( self, tokenizer: PreTrainedTokenizer, diff --git a/tests/test_chat_template_kwargs.py b/tests/test_chat_template_kwargs.py new file mode 100644 index 0000000..2243e46 --- /dev/null +++ b/tests/test_chat_template_kwargs.py @@ -0,0 +1,71 @@ +from types import SimpleNamespace + +import pytest + +from renderers import base + + +class _FakeRenderer: + CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"}) + + def __init__( + self, + tokenizer, + *, + enable_thinking: bool = True, + preserve_all_thinking: bool = False, + preserve_thinking_between_tool_calls: bool = False, + ): + self.tokenizer = tokenizer + self.enable_thinking = enable_thinking + self.preserve_all_thinking = preserve_all_thinking + self.preserve_thinking_between_tool_calls = preserve_thinking_between_tool_calls + + +def _register_fake_renderer(monkeypatch) -> None: + base._populate_registry() + monkeypatch.setitem(base.RENDERER_REGISTRY, "fake-qwen", _FakeRenderer) + + +def test_create_renderer_forwards_model_chat_template_kwargs(monkeypatch): + _register_fake_renderer(monkeypatch) + + renderer = base.create_renderer( + SimpleNamespace(name_or_path="unused"), + renderer="fake-qwen", + chat_template_kwargs={"enable_thinking": False}, + ) + + assert renderer.enable_thinking is False + + +def test_create_renderer_rejects_unsupported_model_chat_template_kwargs(monkeypatch): + _register_fake_renderer(monkeypatch) + + with pytest.raises(ValueError, match="reasoning_effort"): + base.create_renderer( + SimpleNamespace(name_or_path="unused"), + renderer="fake-qwen", + chat_template_kwargs={"reasoning_effort": "high"}, + ) + + +def test_create_renderer_rejects_constructor_kwargs_in_chat_template_kwargs(): + with pytest.raises(ValueError, match="preserve_all_thinking"): + base.create_renderer( + SimpleNamespace(name_or_path="unused"), + renderer="default", + chat_template_kwargs={"preserve_all_thinking": True}, + ) + + +def test_create_renderer_auto_forwards_model_chat_template_kwargs(monkeypatch): + _register_fake_renderer(monkeypatch) + monkeypatch.setitem(base.MODEL_RENDERER_MAP, "fake/model", "fake-qwen") + + renderer = base.create_renderer( + SimpleNamespace(name_or_path="fake/model"), + chat_template_kwargs={"enable_thinking": False}, + ) + + assert renderer.enable_thinking is False