From b37d3f2ca76bf08f417acebf8fd524db5208f7cd Mon Sep 17 00:00:00 2001 From: eligotts <78387377+eligotts@users.noreply.github.com> Date: Fri, 22 May 2026 18:48:26 -0700 Subject: [PATCH 1/5] Add renderer chat template kwargs passthrough --- README.md | 5 ++- renderers/base.py | 52 ++++++++++++++++++++++++-- renderers/deepseek_v3.py | 2 + renderers/glm45.py | 2 + renderers/glm5.py | 2 + renderers/gpt_oss.py | 2 + renderers/kimi_k2.py | 2 + renderers/kimi_k25.py | 2 + renderers/laguna_xs2.py | 2 + renderers/nemotron3.py | 2 + renderers/qwen3.py | 2 + renderers/qwen35.py | 2 + tests/test_chat_template_kwargs.py | 59 ++++++++++++++++++++++++++++++ 13 files changed, 130 insertions(+), 6 deletions(-) create mode 100644 tests/test_chat_template_kwargs.py diff --git a/README.md b/README.md index 51e4d19..2336c3e 100644 --- a/README.md +++ b/README.md @@ -110,14 +110,15 @@ Each break fragments a rollout into multiple training samples — every fragment ## Compaction overrides -`create_renderer` and `create_renderer_pool` accept two constructor-only flags: +`create_renderer` and `create_renderer_pool` accept constructor-time template controls: ```python +chat_template_kwargs: dict | None = None preserve_all_thinking: bool = False preserve_thinking_between_tool_calls: bool = False ``` -Defaults preserve byte-identity with the model's chat template. Flipping a flag at construction restores `reasoning_content` the template would otherwise drop: +`chat_template_kwargs` binds template toggles that must be fixed for the renderer instance, such as `enable_thinking` and `reasoning_effort`. Defaults preserve byte-identity with the model's chat template. Flipping a preserve flag at construction restores `reasoning_content` the template would otherwise drop: - `preserve_all_thinking=True` — every past assistant's reasoning is kept. - `preserve_thinking_between_tool_calls=True` — reasoning is kept on assistants in the in-flight tool cycle (no-op for current renderers; reserved for future templates that drop it). diff --git a/renderers/base.py b/renderers/base.py index b861872..6720cc5 100644 --- a/renderers/base.py +++ b/renderers/base.py @@ -1171,6 +1171,7 @@ def create_renderer_pool( *, tool_parser: str | None = None, reasoning_parser: str | None = None, + chat_template_kwargs: dict[str, Any] | None = None, preserve_all_thinking: bool = False, preserve_thinking_between_tool_calls: bool = False, ) -> RendererPool: @@ -1183,6 +1184,11 @@ def create_renderer_pool( ``tool_parser`` and ``reasoning_parser`` are forwarded to ``create_renderer`` when the pool falls back to ``DefaultRenderer``. + ``chat_template_kwargs`` are forwarded to each renderer constructor. + Hand-coded renderers accept only the kwargs they explicitly model + (for example ``enable_thinking`` or ``reasoning_effort``); the default + renderer forwards them to ``tokenizer.apply_chat_template``. + ``preserve_all_thinking`` and ``preserve_thinking_between_tool_calls`` are forwarded to each pooled renderer's constructor — every slot in the pool shares one configuration. To run with a different @@ -1200,6 +1206,7 @@ def factory() -> Renderer: renderer=renderer, tool_parser=tool_parser, reasoning_parser=reasoning_parser, + chat_template_kwargs=chat_template_kwargs, preserve_all_thinking=preserve_all_thinking, preserve_thinking_between_tool_calls=preserve_thinking_between_tool_calls, ) @@ -1213,6 +1220,7 @@ def create_renderer( *, tool_parser: str | None = None, reasoning_parser: str | None = None, + chat_template_kwargs: dict[str, Any] | None = None, preserve_all_thinking: bool = False, preserve_thinking_between_tool_calls: bool = False, ) -> Renderer: @@ -1229,6 +1237,11 @@ def create_renderer( have their own parsing wired in. reasoning_parser: Name of a reasoning parser registered in ``renderers.parsers``. Only consumed by DefaultRenderer. + chat_template_kwargs: Template-control kwargs bound to the renderer. + Hand-coded renderers accept only the kwargs they explicitly + model (for example ``enable_thinking`` or + ``reasoning_effort``); DefaultRenderer forwards all kwargs + to ``tokenizer.apply_chat_template``. preserve_all_thinking: Forwarded to the renderer's constructor. When ``True``, the instance restores ``reasoning_content`` the chat template would otherwise drop on historical @@ -1254,6 +1267,7 @@ def create_renderer( "preserve_all_thinking": preserve_all_thinking, "preserve_thinking_between_tool_calls": preserve_thinking_between_tool_calls, } + template_kwargs = dict(chat_template_kwargs or {}) if renderer != "auto": cls = RENDERER_REGISTRY.get(renderer) @@ -1262,7 +1276,7 @@ def create_renderer( f"Unknown renderer {renderer!r}. Available: {', '.join(sorted(RENDERER_REGISTRY))}" ) if renderer == "default": - return cls(tokenizer, **default_kwargs, **preserve_kwargs) + return cls(tokenizer, **default_kwargs, **template_kwargs, **preserve_kwargs) if default_kwargs: logger.info( "tool_parser / reasoning_parser are only consumed by " @@ -1270,7 +1284,11 @@ def create_renderer( "built-in behavior.", renderer, ) - return cls(tokenizer, **preserve_kwargs) + return cls( + tokenizer, + **_model_renderer_chat_template_kwargs(renderer, cls, template_kwargs), + **preserve_kwargs, + ) # Auto-detect from model name via exact match on the canonical HF id. # Fine-tunes and renamed checkpoints miss on purpose — their chat @@ -1280,7 +1298,15 @@ def create_renderer( model_name = getattr(tokenizer, "name_or_path", "") renderer_name = MODEL_RENDERER_MAP.get(model_name) if renderer_name is not None: - return RENDERER_REGISTRY[renderer_name](tokenizer, **preserve_kwargs) + return RENDERER_REGISTRY[renderer_name]( + tokenizer, + **_model_renderer_chat_template_kwargs( + renderer_name, + RENDERER_REGISTRY[renderer_name], + template_kwargs, + ), + **preserve_kwargs, + ) # No match. For VLMs this must be fatal: DefaultRenderer only knows # ``apply_chat_template`` + text tokens, so it would silently drop @@ -1307,7 +1333,25 @@ def create_renderer( "reasoning_parser= to enable structured output parsing.", model_name or "", ) - return RENDERER_REGISTRY["default"](tokenizer, **default_kwargs, **preserve_kwargs) + return RENDERER_REGISTRY["default"]( + tokenizer, **default_kwargs, **template_kwargs, **preserve_kwargs + ) + + +def _model_renderer_chat_template_kwargs( + renderer: str, renderer_cls: type, chat_template_kwargs: dict[str, Any] +) -> dict[str, Any]: + if not chat_template_kwargs: + return {} + + allowed = set(getattr(renderer_cls, "CHAT_TEMPLATE_KWARGS", ())) + unsupported = sorted(set(chat_template_kwargs) - allowed) + if unsupported: + raise ValueError( + f"renderer={renderer!r} does not support chat_template_kwargs: " + f"{', '.join(unsupported)}" + ) + return dict(chat_template_kwargs) # --------------------------------------------------------------------------- diff --git a/renderers/deepseek_v3.py b/renderers/deepseek_v3.py index 507d81d..7531e69 100644 --- a/renderers/deepseek_v3.py +++ b/renderers/deepseek_v3.py @@ -41,6 +41,8 @@ def _ds_token(name: str) -> str: class DeepSeekV3Renderer: """Deterministic message → token renderer for DeepSeek V3 models.""" + CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"}) + def __init__( self, tokenizer: PreTrainedTokenizer, diff --git a/renderers/glm45.py b/renderers/glm45.py index 206f366..109d546 100644 --- a/renderers/glm45.py +++ b/renderers/glm45.py @@ -50,6 +50,8 @@ class GLM45Renderer: """Deterministic message → token renderer for GLM-4.5 Air models.""" + CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"}) + def __init__( self, tokenizer: PreTrainedTokenizer, diff --git a/renderers/glm5.py b/renderers/glm5.py index 6de6ba3..ad61038 100644 --- a/renderers/glm5.py +++ b/renderers/glm5.py @@ -48,6 +48,8 @@ class GLM5Renderer: """Deterministic message → token renderer for GLM-5 models.""" + CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"}) + # GLM-5.1 flips this on: even when the most-recent assistant has no # reasoning content, the template wraps it with ```` # instead of just emitting ```` as a separator. Subclassed in diff --git a/renderers/gpt_oss.py b/renderers/gpt_oss.py index 9939de1..c1944a3 100644 --- a/renderers/gpt_oss.py +++ b/renderers/gpt_oss.py @@ -118,6 +118,8 @@ def _arguments_to_str(arguments: Any) -> str: class GptOssRenderer: """Deterministic message → token renderer for OpenAI gpt-oss (harmony).""" + CHAT_TEMPLATE_KWARGS = frozenset({"reasoning_effort"}) + def __init__( self, tokenizer: PreTrainedTokenizer, diff --git a/renderers/kimi_k2.py b/renderers/kimi_k2.py index 9e08141..3b2c3d8 100644 --- a/renderers/kimi_k2.py +++ b/renderers/kimi_k2.py @@ -34,6 +34,8 @@ class KimiK2Renderer: """Deterministic message → token renderer for Kimi K2 models.""" + CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"}) + def __init__( self, tokenizer: PreTrainedTokenizer, diff --git a/renderers/kimi_k25.py b/renderers/kimi_k25.py index b2a45e6..5467568 100644 --- a/renderers/kimi_k25.py +++ b/renderers/kimi_k25.py @@ -570,6 +570,8 @@ class KimiK25Renderer: The tokenizer should be ``moonshotai/Kimi-K2-Instruct`` (same as K2). """ + CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"}) + def __init__( self, tokenizer: PreTrainedTokenizer, diff --git a/renderers/laguna_xs2.py b/renderers/laguna_xs2.py index ce85037..2a5e56c 100644 --- a/renderers/laguna_xs2.py +++ b/renderers/laguna_xs2.py @@ -76,6 +76,8 @@ class LagunaXS2Renderer: + CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"}) + def __init__( self, tokenizer: PreTrainedTokenizer, diff --git a/renderers/nemotron3.py b/renderers/nemotron3.py index e97790d..4098a2e 100644 --- a/renderers/nemotron3.py +++ b/renderers/nemotron3.py @@ -76,6 +76,8 @@ def _render_extra_keys(obj: dict[str, Any], handled_keys: set[str]) -> list[str] class Nemotron3Renderer: """Deterministic message → token renderer for Nemotron 3 models.""" + CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"}) + def __init__( self, tokenizer: PreTrainedTokenizer, diff --git a/renderers/qwen3.py b/renderers/qwen3.py index 4562546..a7c0416 100644 --- a/renderers/qwen3.py +++ b/renderers/qwen3.py @@ -45,6 +45,8 @@ class Qwen3Renderer: """Deterministic message → token renderer for Qwen3 models.""" + CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"}) + def __init__( self, tokenizer: PreTrainedTokenizer, diff --git a/renderers/qwen35.py b/renderers/qwen35.py index 2deefcf..680c68c 100644 --- a/renderers/qwen35.py +++ b/renderers/qwen35.py @@ -103,6 +103,8 @@ def _detect_enable_thinking_default(tokenizer: PreTrainedTokenizer) -> bool: class Qwen35Renderer: """Deterministic message → token renderer for Qwen3.5 models.""" + CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"}) + def __init__( self, tokenizer: PreTrainedTokenizer, diff --git a/tests/test_chat_template_kwargs.py b/tests/test_chat_template_kwargs.py new file mode 100644 index 0000000..9108af3 --- /dev/null +++ b/tests/test_chat_template_kwargs.py @@ -0,0 +1,59 @@ +from types import SimpleNamespace + +import pytest + +from renderers import base + + +class _FakeRenderer: + CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"}) + + def __init__( + self, + tokenizer, + *, + enable_thinking: bool = True, + preserve_all_thinking: bool = False, + preserve_thinking_between_tool_calls: bool = False, + ): + self.tokenizer = tokenizer + self.enable_thinking = enable_thinking + self.preserve_all_thinking = preserve_all_thinking + self.preserve_thinking_between_tool_calls = ( + preserve_thinking_between_tool_calls + ) + + +def test_create_renderer_forwards_model_chat_template_kwargs(monkeypatch): + monkeypatch.setitem(base.RENDERER_REGISTRY, "fake-qwen", _FakeRenderer) + + renderer = base.create_renderer( + SimpleNamespace(name_or_path="unused"), + renderer="fake-qwen", + chat_template_kwargs={"enable_thinking": False}, + ) + + assert renderer.enable_thinking is False + + +def test_create_renderer_rejects_unsupported_model_chat_template_kwargs(monkeypatch): + monkeypatch.setitem(base.RENDERER_REGISTRY, "fake-qwen", _FakeRenderer) + + with pytest.raises(ValueError, match="reasoning_effort"): + base.create_renderer( + SimpleNamespace(name_or_path="unused"), + renderer="fake-qwen", + chat_template_kwargs={"reasoning_effort": "high"}, + ) + + +def test_create_renderer_auto_forwards_model_chat_template_kwargs(monkeypatch): + monkeypatch.setitem(base.RENDERER_REGISTRY, "fake-qwen", _FakeRenderer) + monkeypatch.setitem(base.MODEL_RENDERER_MAP, "fake/model", "fake-qwen") + + renderer = base.create_renderer( + SimpleNamespace(name_or_path="fake/model"), + chat_template_kwargs={"enable_thinking": False}, + ) + + assert renderer.enable_thinking is False From 0bd7e6dec885166959c2f109630f2b29257667a0 Mon Sep 17 00:00:00 2001 From: eligotts <78387377+eligotts@users.noreply.github.com> Date: Fri, 22 May 2026 19:42:59 -0700 Subject: [PATCH 2/5] Reject constructor kwargs in chat template kwargs --- renderers/base.py | 22 ++++++++++++++++++++++ tests/test_chat_template_kwargs.py | 9 +++++++++ 2 files changed, 31 insertions(+) diff --git a/renderers/base.py b/renderers/base.py index 6720cc5..d92af47 100644 --- a/renderers/base.py +++ b/renderers/base.py @@ -870,6 +870,14 @@ def bridge_to_next_turn(self, *args: Any, **kwargs: Any) -> "RenderedTokens | No RENDERER_REGISTRY: dict[str, type] = {} +_RENDERER_CONSTRUCTOR_KWARGS = frozenset( + { + "tool_parser", + "reasoning_parser", + "preserve_all_thinking", + "preserve_thinking_between_tool_calls", + } +) # Exact canonical HF model names → renderer. We do NOT use prefix # matching because models with the same architecture may ship different @@ -1276,6 +1284,7 @@ def create_renderer( f"Unknown renderer {renderer!r}. Available: {', '.join(sorted(RENDERER_REGISTRY))}" ) if renderer == "default": + _reject_renderer_constructor_kwargs(renderer, template_kwargs) return cls(tokenizer, **default_kwargs, **template_kwargs, **preserve_kwargs) if default_kwargs: logger.info( @@ -1333,17 +1342,30 @@ def create_renderer( "reasoning_parser= to enable structured output parsing.", model_name or "", ) + _reject_renderer_constructor_kwargs("default", template_kwargs) return RENDERER_REGISTRY["default"]( tokenizer, **default_kwargs, **template_kwargs, **preserve_kwargs ) +def _reject_renderer_constructor_kwargs( + renderer: str, chat_template_kwargs: dict[str, Any] +) -> None: + reserved = sorted(set(chat_template_kwargs) & _RENDERER_CONSTRUCTOR_KWARGS) + if reserved: + raise ValueError( + f"renderer={renderer!r} chat_template_kwargs cannot contain " + f"renderer constructor kwargs: {', '.join(reserved)}" + ) + + def _model_renderer_chat_template_kwargs( renderer: str, renderer_cls: type, chat_template_kwargs: dict[str, Any] ) -> dict[str, Any]: if not chat_template_kwargs: return {} + _reject_renderer_constructor_kwargs(renderer, chat_template_kwargs) allowed = set(getattr(renderer_cls, "CHAT_TEMPLATE_KWARGS", ())) unsupported = sorted(set(chat_template_kwargs) - allowed) if unsupported: diff --git a/tests/test_chat_template_kwargs.py b/tests/test_chat_template_kwargs.py index 9108af3..7426a87 100644 --- a/tests/test_chat_template_kwargs.py +++ b/tests/test_chat_template_kwargs.py @@ -47,6 +47,15 @@ def test_create_renderer_rejects_unsupported_model_chat_template_kwargs(monkeypa ) +def test_create_renderer_rejects_constructor_kwargs_in_chat_template_kwargs(): + with pytest.raises(ValueError, match="preserve_all_thinking"): + base.create_renderer( + SimpleNamespace(name_or_path="unused"), + renderer="default", + chat_template_kwargs={"preserve_all_thinking": True}, + ) + + def test_create_renderer_auto_forwards_model_chat_template_kwargs(monkeypatch): monkeypatch.setitem(base.RENDERER_REGISTRY, "fake-qwen", _FakeRenderer) monkeypatch.setitem(base.MODEL_RENDERER_MAP, "fake/model", "fake-qwen") From d80d4acf3fa7fd8a0785d1d859320f8f033c68b3 Mon Sep 17 00:00:00 2001 From: eligotts <78387377+eligotts@users.noreply.github.com> Date: Fri, 22 May 2026 19:50:23 -0700 Subject: [PATCH 3/5] Simplify chat template kwargs validation --- renderers/base.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/renderers/base.py b/renderers/base.py index d92af47..6adcddf 100644 --- a/renderers/base.py +++ b/renderers/base.py @@ -1276,6 +1276,7 @@ def create_renderer( "preserve_thinking_between_tool_calls": preserve_thinking_between_tool_calls, } template_kwargs = dict(chat_template_kwargs or {}) + _reject_renderer_constructor_kwargs(template_kwargs) if renderer != "auto": cls = RENDERER_REGISTRY.get(renderer) @@ -1284,7 +1285,6 @@ def create_renderer( f"Unknown renderer {renderer!r}. Available: {', '.join(sorted(RENDERER_REGISTRY))}" ) if renderer == "default": - _reject_renderer_constructor_kwargs(renderer, template_kwargs) return cls(tokenizer, **default_kwargs, **template_kwargs, **preserve_kwargs) if default_kwargs: logger.info( @@ -1342,20 +1342,19 @@ def create_renderer( "reasoning_parser= to enable structured output parsing.", model_name or "", ) - _reject_renderer_constructor_kwargs("default", template_kwargs) return RENDERER_REGISTRY["default"]( tokenizer, **default_kwargs, **template_kwargs, **preserve_kwargs ) def _reject_renderer_constructor_kwargs( - renderer: str, chat_template_kwargs: dict[str, Any] + chat_template_kwargs: dict[str, Any], ) -> None: reserved = sorted(set(chat_template_kwargs) & _RENDERER_CONSTRUCTOR_KWARGS) if reserved: raise ValueError( - f"renderer={renderer!r} chat_template_kwargs cannot contain " - f"renderer constructor kwargs: {', '.join(reserved)}" + "chat_template_kwargs cannot contain renderer constructor kwargs: " + f"{', '.join(reserved)}" ) @@ -1365,7 +1364,6 @@ def _model_renderer_chat_template_kwargs( if not chat_template_kwargs: return {} - _reject_renderer_constructor_kwargs(renderer, chat_template_kwargs) allowed = set(getattr(renderer_cls, "CHAT_TEMPLATE_KWARGS", ())) unsupported = sorted(set(chat_template_kwargs) - allowed) if unsupported: From 7fbf39004f7782077f8c11721a4affa6d0e92e9a Mon Sep 17 00:00:00 2001 From: eligotts <78387377+eligotts@users.noreply.github.com> Date: Fri, 22 May 2026 19:52:24 -0700 Subject: [PATCH 4/5] Format chat template kwargs changes --- renderers/base.py | 4 +++- tests/test_chat_template_kwargs.py | 4 +--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/renderers/base.py b/renderers/base.py index 6adcddf..e8f4b3c 100644 --- a/renderers/base.py +++ b/renderers/base.py @@ -1285,7 +1285,9 @@ def create_renderer( f"Unknown renderer {renderer!r}. Available: {', '.join(sorted(RENDERER_REGISTRY))}" ) if renderer == "default": - return cls(tokenizer, **default_kwargs, **template_kwargs, **preserve_kwargs) + return cls( + tokenizer, **default_kwargs, **template_kwargs, **preserve_kwargs + ) if default_kwargs: logger.info( "tool_parser / reasoning_parser are only consumed by " diff --git a/tests/test_chat_template_kwargs.py b/tests/test_chat_template_kwargs.py index 7426a87..715e139 100644 --- a/tests/test_chat_template_kwargs.py +++ b/tests/test_chat_template_kwargs.py @@ -19,9 +19,7 @@ def __init__( self.tokenizer = tokenizer self.enable_thinking = enable_thinking self.preserve_all_thinking = preserve_all_thinking - self.preserve_thinking_between_tool_calls = ( - preserve_thinking_between_tool_calls - ) + self.preserve_thinking_between_tool_calls = preserve_thinking_between_tool_calls def test_create_renderer_forwards_model_chat_template_kwargs(monkeypatch): From b543277145262e4248b7b8f787549c218dec0c65 Mon Sep 17 00:00:00 2001 From: eligotts <78387377+eligotts@users.noreply.github.com> Date: Fri, 22 May 2026 20:16:27 -0700 Subject: [PATCH 5/5] Address chat template kwargs review comments --- renderers/kimi_k2.py | 2 -- tests/test_chat_template_kwargs.py | 11 ++++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/renderers/kimi_k2.py b/renderers/kimi_k2.py index 3b2c3d8..9e08141 100644 --- a/renderers/kimi_k2.py +++ b/renderers/kimi_k2.py @@ -34,8 +34,6 @@ class KimiK2Renderer: """Deterministic message → token renderer for Kimi K2 models.""" - CHAT_TEMPLATE_KWARGS = frozenset({"enable_thinking"}) - def __init__( self, tokenizer: PreTrainedTokenizer, diff --git a/tests/test_chat_template_kwargs.py b/tests/test_chat_template_kwargs.py index 715e139..2243e46 100644 --- a/tests/test_chat_template_kwargs.py +++ b/tests/test_chat_template_kwargs.py @@ -22,9 +22,14 @@ def __init__( self.preserve_thinking_between_tool_calls = preserve_thinking_between_tool_calls -def test_create_renderer_forwards_model_chat_template_kwargs(monkeypatch): +def _register_fake_renderer(monkeypatch) -> None: + base._populate_registry() monkeypatch.setitem(base.RENDERER_REGISTRY, "fake-qwen", _FakeRenderer) + +def test_create_renderer_forwards_model_chat_template_kwargs(monkeypatch): + _register_fake_renderer(monkeypatch) + renderer = base.create_renderer( SimpleNamespace(name_or_path="unused"), renderer="fake-qwen", @@ -35,7 +40,7 @@ def test_create_renderer_forwards_model_chat_template_kwargs(monkeypatch): def test_create_renderer_rejects_unsupported_model_chat_template_kwargs(monkeypatch): - monkeypatch.setitem(base.RENDERER_REGISTRY, "fake-qwen", _FakeRenderer) + _register_fake_renderer(monkeypatch) with pytest.raises(ValueError, match="reasoning_effort"): base.create_renderer( @@ -55,7 +60,7 @@ def test_create_renderer_rejects_constructor_kwargs_in_chat_template_kwargs(): def test_create_renderer_auto_forwards_model_chat_template_kwargs(monkeypatch): - monkeypatch.setitem(base.RENDERER_REGISTRY, "fake-qwen", _FakeRenderer) + _register_fake_renderer(monkeypatch) monkeypatch.setitem(base.MODEL_RENDERER_MAP, "fake/model", "fake-qwen") renderer = base.create_renderer(