Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ RAG_HNSW_EF_CONSTRUCTION=64
# - openai: GPT models (gpt-4o, gpt-4o-mini, etc.)
# - google-gla: Gemini models via Google AI Studio (gemini-2-5-flash, gemini-3-flash, gemini-3-pro)
# - google-vertex: Gemini models via Vertex AI (gemini-*) [requires GCP auth]
# - ollama: local models via Ollama's OpenAI-compatible endpoint (no API key)
# e.g. AGENT_DEFAULT_MODEL=ollama:llama3.1 (requires `ollama serve` + `ollama pull llama3.1`)
# Runtime-editable: the /admin "AI Models" tab persists overrides in the
# app_config table and applies them live — no .env edit or restart needed.
AGENT_DEFAULT_MODEL=anthropic:claude-sonnet-4-5
AGENT_FALLBACK_MODEL=openai:gpt-4o

Expand Down
674 changes: 674 additions & 0 deletions PRPs/PRP-18-ai-model-admin-console.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Portfolio-grade end-to-end retail demand forecasting system.
- **RAG Knowledge Base**: Postgres pgvector embeddings + evidence-grounded answers with citations
- **Agentic Layer**: PydanticAI agents for autonomous experimentation and evidence-grounded Q&A with human-in-the-loop approval
- **Data Seeder (The Forge)**: Reproducible synthetic data generator with realistic time-series patterns, scenario presets, and retail effects
- **AI Models Console**: `/admin` → AI Models tab — swap the agent LLM (incl. fully-local Ollama), the RAG embedding model, and provider API keys at runtime; changes apply live with no restart

## Quick Start

Expand Down
1 change: 1 addition & 0 deletions alembic/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

# Import all models for Alembic autogenerate detection
from app.features.agents import models as agents_models # noqa: F401
from app.features.config import models as config_models # noqa: F401
from app.features.data_platform import models as data_platform_models # noqa: F401
from app.features.jobs import models as jobs_models # noqa: F401
from app.features.rag import models as rag_models # noqa: F401
Expand Down
46 changes: 46 additions & 0 deletions alembic/versions/378c112e4b32_create_app_config_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""create app_config table

Revision ID: 378c112e4b32
Revises: a8b9c0d1e234
Create Date: 2026-05-18 12:38:56.878929

"""

from __future__ import annotations

from collections.abc import Sequence

import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision: str = "378c112e4b32"
down_revision: str | None = "a8b9c0d1e234"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
"""Apply migration - create app_config key/value override store."""
op.create_table(
"app_config",
sa.Column("key", sa.String(length=100), nullable=False),
sa.Column(
"value",
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("key"),
)


def downgrade() -> None:
"""Revert migration - drop app_config table."""
op.drop_table("app_config")
88 changes: 55 additions & 33 deletions app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,58 @@
from pydantic import field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict

# Valid agent LLM provider prefixes for a "provider:model-name" identifier.
# "ollama" runs the agent fully local via Ollama's OpenAI-compatible endpoint.
VALID_MODEL_PROVIDERS: tuple[str, ...] = (
"anthropic",
"openai",
"google-gla",
"google-vertex",
"ollama",
)


def validate_model_identifier(v: str) -> str:
"""Validate an agent model identifier of the form ``provider:model-name``.

Shared by the ``Settings`` field validators and the runtime config service
(``app/features/config``) so a UI-driven model change is checked the same
way an env-var-driven one is.

Args:
v: Model identifier string (e.g. ``anthropic:claude-sonnet-4-5``,
``ollama:llama3.1``).

Returns:
The validated model identifier, unchanged.

Raises:
ValueError: If the format is invalid, the model name is blank, or the
provider is not in :data:`VALID_MODEL_PROVIDERS`.
"""
if ":" not in v:
raise ValueError(
f"Invalid model identifier '{v}'. "
"Expected format: 'provider:model-name' "
"(e.g., 'anthropic:claude-sonnet-4-5', 'ollama:llama3.1')"
)
provider, model_name = v.split(":", 1)

# Validate model name is non-empty and not just whitespace
if not model_name or not model_name.strip():
raise ValueError(
f"Invalid model identifier '{v}'. "
"Model name after ':' cannot be empty or blank. "
"Expected format: 'provider:model-name' "
"(e.g., 'anthropic:claude-sonnet-4-5', 'ollama:llama3.1')"
)

if provider not in VALID_MODEL_PROVIDERS:
raise ValueError(
f"Unknown provider '{provider}'. Valid providers: {list(VALID_MODEL_PROVIDERS)}"
)
return v


class Settings(BaseSettings):
"""Application settings loaded from environment variables."""
Expand Down Expand Up @@ -130,39 +182,9 @@ class Settings(BaseSettings):

@field_validator("agent_default_model", "agent_fallback_model")
@classmethod
def validate_model_identifier(cls, v: str) -> str:
"""Validate model identifier format (provider:model-name).

Args:
v: Model identifier string.

Returns:
Validated model identifier.

Raises:
ValueError: If format is invalid or model name is missing.
"""
if ":" not in v:
raise ValueError(
f"Invalid model identifier '{v}'. "
"Expected format: 'provider:model-name' "
"(e.g., 'anthropic:claude-sonnet-4-5', 'google-gla:gemini-3-flash')"
)
provider, model_name = v.split(":", 1)

# Validate model name is non-empty and not just whitespace
if not model_name or not model_name.strip():
raise ValueError(
f"Invalid model identifier '{v}'. "
"Model name after ':' cannot be empty or blank. "
"Expected format: 'provider:model-name' "
"(e.g., 'anthropic:claude-sonnet-4-5', 'google-gla:gemini-3-flash')"
)

valid_providers = ["anthropic", "openai", "google-gla", "google-vertex"]
if provider not in valid_providers:
raise ValueError(f"Unknown provider '{provider}'. Valid providers: {valid_providers}")
return v
def _validate_agent_model(cls, v: str) -> str:
"""Validate agent model identifiers via :func:`validate_model_identifier`."""
return validate_model_identifier(v)

@property
def is_development(self) -> bool:
Expand Down
47 changes: 47 additions & 0 deletions app/features/agents/agents/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,54 @@
from typing import Any

import structlog
from pydantic_ai.models import Model
from pydantic_ai.models.openai import OpenAIChatModel
from pydantic_ai.providers.ollama import OllamaProvider

from app.core.config import get_settings

logger = structlog.get_logger()


def build_agent_model(identifier: str) -> str | Model:
"""Build the PydanticAI ``model`` argument for an agent identifier.

Cloud providers accept a plain ``provider:model-name`` string. Ollama does
not — it needs an :class:`OpenAIChatModel` bound to an :class:`OllamaProvider`
pointed at the host's OpenAI-compatible ``/v1`` endpoint.

Args:
identifier: Model identifier (e.g. ``anthropic:claude-sonnet-4-5``,
``ollama:llama3.1``).

Returns:
The identifier string unchanged for cloud providers, or a configured
:class:`OpenAIChatModel` for the ``ollama`` provider.
"""
provider = identifier.split(":", 1)[0]
if provider != "ollama":
return identifier

settings = get_settings()
model_name = identifier.split(":", 1)[1]
# CRITICAL: Ollama's OpenAI-compatible base ends in /v1.
base_url = settings.ollama_base_url.rstrip("/") + "/v1"
return OpenAIChatModel(model_name, provider=OllamaProvider(base_url=base_url))


def reset_agent_caches() -> None:
"""Drop the cached agent singletons so the next build picks up new config.

Called by the config service after a successful model/key change. Imports
are local to avoid an import cycle (the agent modules import from here).
"""
from app.features.agents.agents.experiment import reset_experiment_agent
from app.features.agents.agents.rag_assistant import reset_rag_assistant_agent

reset_experiment_agent()
reset_rag_assistant_agent()


def get_model_identifier() -> str:
"""Get the configured model identifier for agents.

Expand Down Expand Up @@ -68,6 +110,11 @@ def validate_api_key_for_model(model: str) -> None:
settings = get_settings()
provider = model.split(":")[0]

if provider == "ollama":
# Local Ollama runs without an API key — nothing to validate or export.
logger.debug("agents.api_key_validated", provider=provider, model=model)
return

if provider == "anthropic":
if not settings.anthropic_api_key:
raise ValueError(
Expand Down
16 changes: 14 additions & 2 deletions app/features/agents/agents/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
SAFETY_INSTRUCTIONS,
SYSTEM_PROMPT_HEADER,
TOOL_USAGE_INSTRUCTIONS,
build_agent_model,
get_model_identifier,
get_model_settings,
requires_approval,
Expand Down Expand Up @@ -74,8 +75,9 @@ def create_experiment_agent() -> Agent[AgentDeps, ExperimentReport]:
Returns:
Configured Agent instance with tools registered.
"""
model = get_model_identifier()
validate_api_key_for_model(model) # Fail-fast validation
identifier = get_model_identifier()
validate_api_key_for_model(identifier) # Fail-fast validation
model = build_agent_model(identifier) # str for cloud, Model object for ollama

agent: Agent[AgentDeps, ExperimentReport] = Agent(
model=model,
Expand Down Expand Up @@ -351,3 +353,13 @@ def get_experiment_agent() -> Agent[AgentDeps, ExperimentReport]:
if _experiment_agent is None:
_experiment_agent = create_experiment_agent()
return _experiment_agent


def reset_experiment_agent() -> None:
"""Drop the cached experiment agent so the next get_* call rebuilds it.

Used after a runtime model/key change so the new configuration takes
effect without a process restart.
"""
global _experiment_agent
_experiment_agent = None
16 changes: 14 additions & 2 deletions app/features/agents/agents/rag_assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from app.features.agents.agents.base import (
SAFETY_INSTRUCTIONS,
SYSTEM_PROMPT_HEADER,
build_agent_model,
get_model_identifier,
get_model_settings,
validate_api_key_for_model,
Expand Down Expand Up @@ -75,8 +76,9 @@ def create_rag_assistant_agent() -> Agent[AgentDeps, RAGAnswer]:
Returns:
Configured Agent instance with tools registered.
"""
model = get_model_identifier()
validate_api_key_for_model(model) # Fail-fast validation
identifier = get_model_identifier()
validate_api_key_for_model(identifier) # Fail-fast validation
model = build_agent_model(identifier) # str for cloud, Model object for ollama

agent: Agent[AgentDeps, RAGAnswer] = Agent(
model=model,
Expand Down Expand Up @@ -209,3 +211,13 @@ def get_rag_assistant_agent() -> Agent[AgentDeps, RAGAnswer]:
if _rag_assistant_agent is None:
_rag_assistant_agent = create_rag_assistant_agent()
return _rag_assistant_agent


def reset_rag_assistant_agent() -> None:
"""Drop the cached RAG assistant agent so the next get_* call rebuilds it.

Used after a runtime model/key change so the new configuration takes
effect without a process restart.
"""
global _rag_assistant_agent
_rag_assistant_agent = None
43 changes: 43 additions & 0 deletions app/features/agents/tests/test_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""Unit tests for agent base helpers (Ollama-aware model factory)."""

from collections.abc import Iterator

import pytest
from pydantic_ai.models.openai import OpenAIChatModel

from app.core.config import get_settings
from app.features.agents.agents.base import build_agent_model, validate_api_key_for_model


@pytest.fixture(autouse=True)
def _reset_settings() -> Iterator[None]:
"""Reset the settings cache so key mutations do not leak across tests."""
get_settings.cache_clear()
yield
get_settings.cache_clear()


def test_build_agent_model_cloud_returns_string():
"""A cloud identifier is returned unchanged (plain-string Agent path)."""
assert build_agent_model("anthropic:claude-sonnet-4-5") == "anthropic:claude-sonnet-4-5"


def test_build_agent_model_openai_returns_string():
"""An openai identifier is also returned unchanged."""
assert build_agent_model("openai:gpt-4o") == "openai:gpt-4o"


def test_build_agent_model_ollama_returns_model_object():
"""An ollama identifier becomes a configured OpenAIChatModel object."""
model = build_agent_model("ollama:llama3.1")
assert isinstance(model, OpenAIChatModel)


def test_validate_api_key_for_model_ollama_skips_key_check():
"""The ollama provider needs no API key — validation must not raise."""
settings = get_settings()
settings.anthropic_api_key = ""
settings.openai_api_key = ""
settings.google_api_key = ""
# Should return without raising even though no cloud key is configured.
validate_api_key_for_model("ollama:llama3.1")
6 changes: 6 additions & 0 deletions app/features/config/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""Runtime-editable application configuration slice.

Exposes the ``app_config`` key/value override store, the ``/config`` REST
surface, and the service that applies persisted overrides onto the live
``Settings`` singleton (agent LLM model, RAG embedding model, provider keys).
"""
Loading