Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
a864ae3
feat(data): implement MarkdownGenerator age_days trigger via heuristi…
w7-mgfcode May 14, 2026
1b4447d
feat(api,docs): e2e demo pipeline + showcase script (#128) (#129)
w7-mgfcode May 14, 2026
7034e48
chore(repo): bump authlib + fastmcp to clear Socket-flagged CVEs (#13…
w7-mgfcode May 14, 2026
51d9149
feat(api,ui): in-product demo showcase page (#132) (#133)
w7-mgfcode May 17, 2026
ec212de
chore(repo): back-merge main into dev to absorb v0.2.10 release commi…
w7-mgfcode May 18, 2026
7774ef1
docs(docs): fix broken PRP-0/INITIAL-0 relative links in phase 0 doc …
w7-mgfcode May 18, 2026
3358b04
docs(repo): fix readme dev-deps command and stale .github template pl…
w7-mgfcode May 18, 2026
087bc58
docs(docs): fill DEV_GUIDE.md onboarding stub sections (#142) (#143)
w7-mgfcode May 18, 2026
d505788
docs(repo): refresh stale CLAUDE.md note, document /demo API, align P…
w7-mgfcode May 18, 2026
c77ab4c
fix(ui): chart series render black — drop hsl() wrapper on oklch char…
w7-mgfcode May 18, 2026
25e0934
fix(jobs): backtest job result keeps fold metrics, stability and base…
w7-mgfcode May 18, 2026
ba901f6
fix(ui): forecast page reads forecasts/forecast from predict job resu…
w7-mgfcode May 18, 2026
ca843c5
fix(registry): tolerate multiple matches in _find_duplicate (#146) (#…
w7-mgfcode May 18, 2026
acfbf72
fix(ui): derive TimeSeriesChart line stroke from the config key (#156…
w7-mgfcode May 18, 2026
2545b95
feat(ui): job picker dropdown on forecast and backtest pages (#154) (…
w7-mgfcode May 18, 2026
d7783e2
chore(repo): back-merge main into dev after v0.2.11 (#160) (#161)
w7-mgfcode May 18, 2026
db530d5
feat(api,ui): add AI model admin console with Ollama support (#162) (…
w7-mgfcode May 18, 2026
864a2e1
fix(agents): handle model tool-retry crash gracefully (#164) (#165)
w7-mgfcode May 18, 2026
4d61666
fix(agents): round-trip agent message history through pydantic-ai typ…
w7-mgfcode May 18, 2026
516208f
fix(agents): apply configured agent_retry_attempts to the agents (#17…
w7-mgfcode May 18, 2026
0258eea
fix(agents): complete tool-using runs — sequential session use + Prom…
w7-mgfcode May 18, 2026
5db34e9
fix(agents): correct prompt tool names and recover from tool errors (…
w7-mgfcode May 18, 2026
3f66b61
chore(repo): back-merge main into dev after v0.2.11 (#179)
w7-mgfcode May 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ RAG_HNSW_EF_CONSTRUCTION=64
# - openai: GPT models (gpt-4o, gpt-4o-mini, etc.)
# - google-gla: Gemini models via Google AI Studio (gemini-2-5-flash, gemini-3-flash, gemini-3-pro)
# - google-vertex: Gemini models via Vertex AI (gemini-*) [requires GCP auth]
# - ollama: local models via Ollama's OpenAI-compatible endpoint (no API key)
# e.g. AGENT_DEFAULT_MODEL=ollama:llama3.1 (requires `ollama serve` + `ollama pull llama3.1`)
# Runtime-editable: the /admin "AI Models" tab persists overrides in the
# app_config table and applies them live — no .env edit or restart needed.
AGENT_DEFAULT_MODEL=anthropic:claude-sonnet-4-5
AGENT_FALLBACK_MODEL=openai:gpt-4o

Expand Down
674 changes: 674 additions & 0 deletions PRPs/PRP-18-ai-model-admin-console.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Portfolio-grade end-to-end retail demand forecasting system.
- **RAG Knowledge Base**: Postgres pgvector embeddings + evidence-grounded answers with citations
- **Agentic Layer**: PydanticAI agents for autonomous experimentation and evidence-grounded Q&A with human-in-the-loop approval
- **Data Seeder (The Forge)**: Reproducible synthetic data generator with realistic time-series patterns, scenario presets, and retail effects
- **AI Models Console**: `/admin` → AI Models tab — swap the agent LLM (incl. fully-local Ollama), the RAG embedding model, and provider API keys at runtime; changes apply live with no restart

## Quick Start

Expand Down
1 change: 1 addition & 0 deletions alembic/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

# Import all models for Alembic autogenerate detection
from app.features.agents import models as agents_models # noqa: F401
from app.features.config import models as config_models # noqa: F401
from app.features.data_platform import models as data_platform_models # noqa: F401
from app.features.jobs import models as jobs_models # noqa: F401
from app.features.rag import models as rag_models # noqa: F401
Expand Down
46 changes: 46 additions & 0 deletions alembic/versions/378c112e4b32_create_app_config_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""create app_config table

Revision ID: 378c112e4b32
Revises: a8b9c0d1e234
Create Date: 2026-05-18 12:38:56.878929

"""

from __future__ import annotations

from collections.abc import Sequence

import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision: str = "378c112e4b32"
down_revision: str | None = "a8b9c0d1e234"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
"""Apply migration - create app_config key/value override store."""
op.create_table(
"app_config",
sa.Column("key", sa.String(length=100), nullable=False),
sa.Column(
"value",
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("key"),
)


def downgrade() -> None:
"""Revert migration - drop app_config table."""
op.drop_table("app_config")
88 changes: 55 additions & 33 deletions app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,58 @@
from pydantic import field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict

# Valid agent LLM provider prefixes for a "provider:model-name" identifier.
# "ollama" runs the agent fully local via Ollama's OpenAI-compatible endpoint.
VALID_MODEL_PROVIDERS: tuple[str, ...] = (
"anthropic",
"openai",
"google-gla",
"google-vertex",
"ollama",
)


def validate_model_identifier(v: str) -> str:
"""Validate an agent model identifier of the form ``provider:model-name``.

Shared by the ``Settings`` field validators and the runtime config service
(``app/features/config``) so a UI-driven model change is checked the same
way an env-var-driven one is.

Args:
v: Model identifier string (e.g. ``anthropic:claude-sonnet-4-5``,
``ollama:llama3.1``).

Returns:
The validated model identifier, unchanged.

Raises:
ValueError: If the format is invalid, the model name is blank, or the
provider is not in :data:`VALID_MODEL_PROVIDERS`.
"""
if ":" not in v:
raise ValueError(
f"Invalid model identifier '{v}'. "
"Expected format: 'provider:model-name' "
"(e.g., 'anthropic:claude-sonnet-4-5', 'ollama:llama3.1')"
)
provider, model_name = v.split(":", 1)

# Validate model name is non-empty and not just whitespace
if not model_name or not model_name.strip():
raise ValueError(
f"Invalid model identifier '{v}'. "
"Model name after ':' cannot be empty or blank. "
"Expected format: 'provider:model-name' "
"(e.g., 'anthropic:claude-sonnet-4-5', 'ollama:llama3.1')"
)

if provider not in VALID_MODEL_PROVIDERS:
raise ValueError(
f"Unknown provider '{provider}'. Valid providers: {list(VALID_MODEL_PROVIDERS)}"
)
return v


class Settings(BaseSettings):
"""Application settings loaded from environment variables."""
Expand Down Expand Up @@ -130,39 +182,9 @@ class Settings(BaseSettings):

@field_validator("agent_default_model", "agent_fallback_model")
@classmethod
def validate_model_identifier(cls, v: str) -> str:
"""Validate model identifier format (provider:model-name).

Args:
v: Model identifier string.

Returns:
Validated model identifier.

Raises:
ValueError: If format is invalid or model name is missing.
"""
if ":" not in v:
raise ValueError(
f"Invalid model identifier '{v}'. "
"Expected format: 'provider:model-name' "
"(e.g., 'anthropic:claude-sonnet-4-5', 'google-gla:gemini-3-flash')"
)
provider, model_name = v.split(":", 1)

# Validate model name is non-empty and not just whitespace
if not model_name or not model_name.strip():
raise ValueError(
f"Invalid model identifier '{v}'. "
"Model name after ':' cannot be empty or blank. "
"Expected format: 'provider:model-name' "
"(e.g., 'anthropic:claude-sonnet-4-5', 'google-gla:gemini-3-flash')"
)

valid_providers = ["anthropic", "openai", "google-gla", "google-vertex"]
if provider not in valid_providers:
raise ValueError(f"Unknown provider '{provider}'. Valid providers: {valid_providers}")
return v
def _validate_agent_model(cls, v: str) -> str:
"""Validate agent model identifiers via :func:`validate_model_identifier`."""
return validate_model_identifier(v)

@property
def is_development(self) -> bool:
Expand Down
117 changes: 110 additions & 7 deletions app/features/agents/agents/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,101 @@

from __future__ import annotations

import functools
import inspect
import os
from collections.abc import Awaitable, Callable
from typing import Any

import structlog
from pydantic_ai import ModelRetry
from pydantic_ai.models import Model
from pydantic_ai.models.openai import OpenAIChatModel
from pydantic_ai.providers.ollama import OllamaProvider

from app.core.config import get_settings

logger = structlog.get_logger()


def recoverable[**P, ToolReturnT](
func: Callable[P, Awaitable[ToolReturnT]],
) -> Callable[P, Awaitable[ToolReturnT]]:
"""Wrap an async agent tool so an expected ``ValueError`` becomes a ``ModelRetry``.

Input-driven failures (no data for a store, an unknown run id, a malformed
date) should let the model correct its arguments on the next turn instead of
crashing the whole run (issue #176). Other exception types still propagate
as genuine errors.

Args:
func: The async tool function to wrap.

Returns:
The wrapped tool function, signature preserved for PydanticAI schema
extraction.

Raises:
TypeError: If ``func`` is not a coroutine function. The wrapper
``await``s ``func``, so wrapping a sync callable would only fail
(with an opaque "not awaitable" error) when the tool is first
called — this guard surfaces the mistake at decoration time.
"""
if not inspect.iscoroutinefunction(func):
raise TypeError(
f"@recoverable wraps async tool functions only; "
f"{getattr(func, '__qualname__', func)!r} is not a coroutine function."
)

@functools.wraps(func)
async def wrapper(*args: P.args, **kwargs: P.kwargs) -> ToolReturnT:
try:
return await func(*args, **kwargs)
except ValueError as exc:
raise ModelRetry(str(exc)) from exc

return wrapper


def build_agent_model(identifier: str) -> str | Model:
"""Build the PydanticAI ``model`` argument for an agent identifier.

Cloud providers accept a plain ``provider:model-name`` string. Ollama does
not — it needs an :class:`OpenAIChatModel` bound to an :class:`OllamaProvider`
pointed at the host's OpenAI-compatible ``/v1`` endpoint.

Args:
identifier: Model identifier (e.g. ``anthropic:claude-sonnet-4-5``,
``ollama:llama3.1``).

Returns:
The identifier string unchanged for cloud providers, or a configured
:class:`OpenAIChatModel` for the ``ollama`` provider.
"""
provider = identifier.split(":", 1)[0]
if provider != "ollama":
return identifier

settings = get_settings()
model_name = identifier.split(":", 1)[1]
# CRITICAL: Ollama's OpenAI-compatible base ends in /v1.
base_url = settings.ollama_base_url.rstrip("/") + "/v1"
return OpenAIChatModel(model_name, provider=OllamaProvider(base_url=base_url))


def reset_agent_caches() -> None:
"""Drop the cached agent singletons so the next build picks up new config.

Called by the config service after a successful model/key change. Imports
are local to avoid an import cycle (the agent modules import from here).
"""
from app.features.agents.agents.experiment import reset_experiment_agent
from app.features.agents.agents.rag_assistant import reset_rag_assistant_agent

reset_experiment_agent()
reset_rag_assistant_agent()


def get_model_identifier() -> str:
"""Get the configured model identifier for agents.

Expand All @@ -35,6 +120,19 @@ def get_fallback_model() -> str:
return settings.agent_fallback_model


def get_agent_retries() -> int:
"""Get the configured retry budget for agent tool calls and output validation.

PydanticAI defaults to 1 retry; without this the configured
``agent_retry_attempts`` setting is silently ignored.

Returns:
Number of retry attempts for tool calls and structured-output validation.
"""
settings = get_settings()
return settings.agent_retry_attempts


def get_model_settings() -> dict[str, Any]:
"""Get model settings from configuration for PydanticAI Agent.

Expand Down Expand Up @@ -68,6 +166,11 @@ def validate_api_key_for_model(model: str) -> None:
settings = get_settings()
provider = model.split(":")[0]

if provider == "ollama":
# Local Ollama runs without an API key — nothing to validate or export.
logger.debug("agents.api_key_validated", provider=provider, model=model)
return

if provider == "anthropic":
if not settings.anthropic_api_key:
raise ValueError(
Expand Down Expand Up @@ -123,13 +226,13 @@ def requires_approval(action_name: str) -> bool:
"""

TOOL_USAGE_INSTRUCTIONS = """
TOOL USAGE:
- Use list_runs to find existing experiments
- Use run_backtest to evaluate model performance
- Use compare_runs to analyze differences between runs
- Use create_alias to deploy successful models (requires approval)
- Use archive_run to clean up old experiments (requires approval)
- Use retrieve_context to find documentation
TOOL USAGE (call tools by these EXACT names):
- Use tool_list_runs to find existing experiments
- Use tool_run_backtest to evaluate model performance
- Use tool_compare_backtest_results to compare two backtest results
- Use tool_compare_runs to analyze differences between registered runs
- Use tool_create_alias to deploy successful models (requires approval)
- Use tool_archive_run to clean up old experiments (requires approval)
"""

SAFETY_INSTRUCTIONS = """
Expand Down
Loading