From 109aed7ba2f411f00d58524516d1d9cd00d24d01 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 18 May 2026 17:16:41 +0200 Subject: [PATCH 1/2] fix(agents): wire FallbackModel so a primary 503 retries the fallback (#183) --- app/features/agents/agents/base.py | 51 +++++++++++++++++++++ app/features/agents/agents/experiment.py | 10 ++-- app/features/agents/agents/rag_assistant.py | 10 ++-- app/features/agents/tests/test_base.py | 40 ++++++++++++++++ 4 files changed, 99 insertions(+), 12 deletions(-) diff --git a/app/features/agents/agents/base.py b/app/features/agents/agents/base.py index abade6c2..272e59e8 100644 --- a/app/features/agents/agents/base.py +++ b/app/features/agents/agents/base.py @@ -14,6 +14,7 @@ import structlog from pydantic_ai import ModelRetry from pydantic_ai.models import Model +from pydantic_ai.models.fallback import FallbackModel from pydantic_ai.models.openai import OpenAIChatModel from pydantic_ai.providers.ollama import OllamaProvider @@ -120,6 +121,56 @@ def get_fallback_model() -> str: return settings.agent_fallback_model +def build_agent_model_with_fallback() -> Model | str: + """Build the PydanticAI ``model`` argument, wrapping primary + fallback. + + When the primary model raises a provider error — HTTP 5xx, rate limit, + timeout, i.e. any ``pydantic_ai.exceptions.ModelAPIError`` — PydanticAI's + :class:`FallbackModel` transparently retries the request against + ``agent_fallback_model``. This keeps an agent run alive through a transient + provider outage (e.g. a Gemini ``503 UNAVAILABLE``) instead of surfacing a + hard error. ``FallbackModel``'s default ``fallback_on=(ModelAPIError,)`` + already covers that case. + + The primary model is returned alone (no fallback wrapper) when: + + - no fallback is configured, or it equals the primary identifier; or + - the fallback provider has no API key — wrapping it would only move the + failure, so the agent runs primary-only and logs a warning. + + Returns: + A :class:`FallbackModel` (primary then fallback) when a usable fallback + is configured, otherwise the primary model argument from + :func:`build_agent_model`. + + Raises: + ValueError: If the primary provider's API key is not configured + (fail-fast — an agent with no usable primary cannot run). + """ + primary_id = get_model_identifier() + validate_api_key_for_model(primary_id) # fail-fast on the primary + primary = build_agent_model(primary_id) + + fallback_id = get_fallback_model() + if not fallback_id or fallback_id == primary_id: + return primary + + try: + validate_api_key_for_model(fallback_id) + except ValueError: + logger.warning( + "agents.fallback_disabled", + reason="missing_api_key", + primary=primary_id, + fallback=fallback_id, + ) + return primary + + fallback = build_agent_model(fallback_id) + logger.info("agents.fallback_enabled", primary=primary_id, fallback=fallback_id) + return FallbackModel(primary, fallback) + + def get_agent_retries() -> int: """Get the configured retry budget for agent tool calls and output validation. diff --git a/app/features/agents/agents/experiment.py b/app/features/agents/agents/experiment.py index 22311139..7a2de475 100644 --- a/app/features/agents/agents/experiment.py +++ b/app/features/agents/agents/experiment.py @@ -19,13 +19,11 @@ SAFETY_INSTRUCTIONS, SYSTEM_PROMPT_HEADER, TOOL_USAGE_INSTRUCTIONS, - build_agent_model, + build_agent_model_with_fallback, get_agent_retries, - get_model_identifier, get_model_settings, recoverable, requires_approval, - validate_api_key_for_model, ) from app.features.agents.deps import AgentDeps from app.features.agents.schemas import ExperimentReport @@ -83,9 +81,9 @@ def create_experiment_agent() -> Agent[AgentDeps, ExperimentReport]: Returns: Configured Agent instance with tools registered. """ - identifier = get_model_identifier() - validate_api_key_for_model(identifier) # Fail-fast validation - model = build_agent_model(identifier) # str for cloud, Model object for ollama + # Primary model, wrapped in a FallbackModel so a transient provider error + # (HTTP 5xx, rate limit) on the primary transparently retries the fallback. + model = build_agent_model_with_fallback() retries = get_agent_retries() agent: Agent[AgentDeps, ExperimentReport] = Agent( diff --git a/app/features/agents/agents/rag_assistant.py b/app/features/agents/agents/rag_assistant.py index c2288409..8095c597 100644 --- a/app/features/agents/agents/rag_assistant.py +++ b/app/features/agents/agents/rag_assistant.py @@ -18,12 +18,10 @@ from app.features.agents.agents.base import ( SAFETY_INSTRUCTIONS, SYSTEM_PROMPT_HEADER, - build_agent_model, + build_agent_model_with_fallback, get_agent_retries, - get_model_identifier, get_model_settings, recoverable, - validate_api_key_for_model, ) from app.features.agents.deps import AgentDeps from app.features.agents.schemas import RAGAnswer @@ -78,9 +76,9 @@ def create_rag_assistant_agent() -> Agent[AgentDeps, RAGAnswer]: Returns: Configured Agent instance with tools registered. """ - identifier = get_model_identifier() - validate_api_key_for_model(identifier) # Fail-fast validation - model = build_agent_model(identifier) # str for cloud, Model object for ollama + # Primary model, wrapped in a FallbackModel so a transient provider error + # (HTTP 5xx, rate limit) on the primary transparently retries the fallback. + model = build_agent_model_with_fallback() retries = get_agent_retries() agent: Agent[AgentDeps, RAGAnswer] = Agent( diff --git a/app/features/agents/tests/test_base.py b/app/features/agents/tests/test_base.py index 83193aed..1580a4c8 100644 --- a/app/features/agents/tests/test_base.py +++ b/app/features/agents/tests/test_base.py @@ -8,6 +8,7 @@ import pytest from pydantic_ai import ModelRetry from pydantic_ai.messages import ModelMessage, ModelResponse, TextPart +from pydantic_ai.models.fallback import FallbackModel from pydantic_ai.models.function import AgentInfo, FunctionModel from pydantic_ai.models.openai import OpenAIChatModel @@ -15,6 +16,7 @@ from app.features.agents.agents.base import ( TOOL_USAGE_INSTRUCTIONS, build_agent_model, + build_agent_model_with_fallback, get_agent_retries, recoverable, validate_api_key_for_model, @@ -62,6 +64,44 @@ def test_validate_api_key_for_model_ollama_skips_key_check(): validate_api_key_for_model("ollama:llama3.1") +def test_build_agent_model_with_fallback_wraps_primary_and_fallback(): + """A distinct, key-backed fallback yields a FallbackModel(primary, fallback).""" + settings = get_settings() + settings.agent_default_model = "anthropic:claude-sonnet-4-5" + settings.agent_fallback_model = "openai:gpt-4o" + settings.anthropic_api_key = "test-anthropic-key" + settings.openai_api_key = "test-openai-key" + + model = build_agent_model_with_fallback() + + assert isinstance(model, FallbackModel) + + +def test_build_agent_model_with_fallback_primary_only_when_fallback_key_missing(): + """With no API key for the fallback provider, the primary is returned alone.""" + settings = get_settings() + settings.agent_default_model = "anthropic:claude-sonnet-4-5" + settings.agent_fallback_model = "openai:gpt-4o" + settings.anthropic_api_key = "test-anthropic-key" + settings.openai_api_key = "" + + model = build_agent_model_with_fallback() + + assert model == "anthropic:claude-sonnet-4-5" + + +def test_build_agent_model_with_fallback_primary_only_when_fallback_equals_primary(): + """A fallback identical to the primary adds no resilience — primary returned alone.""" + settings = get_settings() + settings.agent_default_model = "anthropic:claude-sonnet-4-5" + settings.agent_fallback_model = "anthropic:claude-sonnet-4-5" + settings.anthropic_api_key = "test-anthropic-key" + + model = build_agent_model_with_fallback() + + assert model == "anthropic:claude-sonnet-4-5" + + def test_prompts_only_reference_registered_tool_names() -> None: """Every `tool_*` name in the agent prompts must be an actually-registered tool. From ea3b97bebead5af9df5d90fdd89b0f25159b2987 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 18 May 2026 17:20:47 +0200 Subject: [PATCH 2/2] test(agents): assert FallbackModel wiring order and primary fail-fast (#183) --- app/features/agents/tests/test_base.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/app/features/agents/tests/test_base.py b/app/features/agents/tests/test_base.py index 1580a4c8..ddfcd02b 100644 --- a/app/features/agents/tests/test_base.py +++ b/app/features/agents/tests/test_base.py @@ -65,7 +65,13 @@ def test_validate_api_key_for_model_ollama_skips_key_check(): def test_build_agent_model_with_fallback_wraps_primary_and_fallback(): - """A distinct, key-backed fallback yields a FallbackModel(primary, fallback).""" + """A distinct, key-backed fallback yields a FallbackModel wired primary-then-fallback. + + Asserts the *order* via the public ``FallbackModel.models`` list — ``models[0]`` + must be the primary (``agent_default_model``) and ``models[1]`` the fallback + (``agent_fallback_model``) — so a swap or misconfiguration is caught, not just + the wrapper type. + """ settings = get_settings() settings.agent_default_model = "anthropic:claude-sonnet-4-5" settings.agent_fallback_model = "openai:gpt-4o" @@ -75,6 +81,22 @@ def test_build_agent_model_with_fallback_wraps_primary_and_fallback(): model = build_agent_model_with_fallback() assert isinstance(model, FallbackModel) + # Each member model exposes its provider via `.system` and name via + # `.model_name`; recombine to the `provider:model` identifier we configured. + wired = [f"{m.system}:{m.model_name}" for m in model.models] + assert wired == [settings.agent_default_model, settings.agent_fallback_model] + + +def test_build_agent_model_with_fallback_raises_when_primary_api_key_missing(): + """Fail fast: a non-Ollama primary with no API key raises before any wrapping.""" + settings = get_settings() + settings.agent_default_model = "anthropic:claude-sonnet-4-5" + settings.agent_fallback_model = "openai:gpt-4o" + settings.anthropic_api_key = "" + settings.openai_api_key = "test-openai-key" + + with pytest.raises(ValueError, match="Anthropic API key not configured"): + build_agent_model_with_fallback() def test_build_agent_model_with_fallback_primary_only_when_fallback_key_missing():