From f8a0f1cd260c975195471ab6b7c8d90da75b8153 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 18 May 2026 18:38:12 +0200 Subject: [PATCH 1/4] feat(api): expose agent session limits on GET /config/ai (#185) --- app/features/config/schemas.py | 7 +++++ app/features/config/service.py | 5 ++++ app/features/config/tests/test_routes.py | 21 +++++++++++++++ app/features/config/tests/test_schemas.py | 32 +++++++++++++++++++++++ app/features/config/tests/test_service.py | 18 +++++++++++++ 5 files changed, 83 insertions(+) diff --git a/app/features/config/schemas.py b/app/features/config/schemas.py index cca1ae0d..60d75c1e 100644 --- a/app/features/config/schemas.py +++ b/app/features/config/schemas.py @@ -72,6 +72,13 @@ class AIModelConfig(BaseModel): agent_thinking_budget: int | None = Field( description="Extended-reasoning token budget (Gemini 2.5+); None disables it" ) + agent_max_tool_calls: int = Field(description="Per-session tool-call cap") + agent_timeout_seconds: int = Field(description="Per-run agent timeout (seconds)") + agent_retry_attempts: int = Field(description="Agent retry attempts on failure") + agent_session_ttl_minutes: int = Field(description="Session time-to-live (minutes)") + agent_require_approval: list[str] = Field( + description="Tool names gated by human-in-the-loop approval" + ) rag_embedding_provider: str = Field(description="RAG embedding provider: 'openai' | 'ollama'") rag_embedding_model: str = Field(description="OpenAI embedding model name") rag_embedding_dimension: int = Field(description="Embedding vector dimension") diff --git a/app/features/config/service.py b/app/features/config/service.py index a9c5da6e..b4967ef8 100644 --- a/app/features/config/service.py +++ b/app/features/config/service.py @@ -144,6 +144,11 @@ async def get_effective_config(db: AsyncSession) -> AIModelConfig: agent_temperature=settings.agent_temperature, agent_max_tokens=settings.agent_max_tokens, agent_thinking_budget=settings.agent_thinking_budget, + agent_max_tool_calls=settings.agent_max_tool_calls, + agent_timeout_seconds=settings.agent_timeout_seconds, + agent_retry_attempts=settings.agent_retry_attempts, + agent_session_ttl_minutes=settings.agent_session_ttl_minutes, + agent_require_approval=list(settings.agent_require_approval), rag_embedding_provider=settings.rag_embedding_provider, rag_embedding_model=settings.rag_embedding_model, rag_embedding_dimension=settings.rag_embedding_dimension, diff --git a/app/features/config/tests/test_routes.py b/app/features/config/tests/test_routes.py index 2f064afe..4675d145 100644 --- a/app/features/config/tests/test_routes.py +++ b/app/features/config/tests/test_routes.py @@ -27,6 +27,11 @@ def _sample_config( agent_temperature=agent_temperature, agent_max_tokens=4096, agent_thinking_budget=None, + agent_max_tool_calls=10, + agent_timeout_seconds=120, + agent_retry_attempts=3, + agent_session_ttl_minutes=120, + agent_require_approval=["create_alias", "archive_run"], rag_embedding_provider="openai", rag_embedding_model="text-embedding-3-small", rag_embedding_dimension=1536, @@ -65,6 +70,22 @@ def test_returns_effective_config(self, client): assert data["agent_default_model"] == "anthropic:claude-sonnet-4-5" assert data["api_keys"][0]["masked"] == "sk-ant-…1234" + def test_returns_agent_session_limits(self, client): + """GET /config/ai exposes the read-only agent session-limit fields.""" + with patch( + "app.features.config.routes.service.get_effective_config", + new=AsyncMock(return_value=_sample_config()), + ): + response = client.get("/config/ai") + + assert response.status_code == 200 + data = response.json() + assert data["agent_max_tool_calls"] == 10 + assert data["agent_timeout_seconds"] == 120 + assert data["agent_retry_attempts"] == 3 + assert data["agent_session_ttl_minutes"] == 120 + assert data["agent_require_approval"] == ["create_alias", "archive_run"] + class TestUpdateAIConfig: """Tests for PATCH /config/ai.""" diff --git a/app/features/config/tests/test_schemas.py b/app/features/config/tests/test_schemas.py index c11232df..c1fd8712 100644 --- a/app/features/config/tests/test_schemas.py +++ b/app/features/config/tests/test_schemas.py @@ -105,6 +105,11 @@ def test_ai_model_config_constructs(self): agent_temperature=0.1, agent_max_tokens=4096, agent_thinking_budget=None, + agent_max_tool_calls=10, + agent_timeout_seconds=120, + agent_retry_attempts=3, + agent_session_ttl_minutes=120, + agent_require_approval=["create_alias", "archive_run"], rag_embedding_provider="openai", rag_embedding_model="text-embedding-3-small", rag_embedding_dimension=1536, @@ -115,6 +120,33 @@ def test_ai_model_config_constructs(self): ) assert cfg.agent_thinking_budget is None + def test_ai_model_config_carries_agent_limits(self): + """AIModelConfig exposes the read-only agent session-limit fields.""" + cfg = AIModelConfig( + agent_default_model="anthropic:claude-sonnet-4-5", + agent_fallback_model="openai:gpt-4o", + agent_temperature=0.1, + agent_max_tokens=4096, + agent_thinking_budget=None, + agent_max_tool_calls=10, + agent_timeout_seconds=120, + agent_retry_attempts=3, + agent_session_ttl_minutes=120, + agent_require_approval=["create_alias", "archive_run"], + rag_embedding_provider="openai", + rag_embedding_model="text-embedding-3-small", + rag_embedding_dimension=1536, + ollama_base_url="http://localhost:11434", + ollama_embedding_model="nomic-embed-text", + api_keys=[], + overridden_keys=[], + ) + assert cfg.agent_max_tool_calls == 10 + assert cfg.agent_timeout_seconds == 120 + assert cfg.agent_retry_attempts == 3 + assert cfg.agent_session_ttl_minutes == 120 + assert cfg.agent_require_approval == ["create_alias", "archive_run"] + def test_api_key_status(self): """ApiKeyStatus carries presence + a masked preview.""" status = ApiKeyStatus(provider="anthropic", is_set=True, masked="sk-ant-…3f9a") diff --git a/app/features/config/tests/test_service.py b/app/features/config/tests/test_service.py index edaf5866..e97009c1 100644 --- a/app/features/config/tests/test_service.py +++ b/app/features/config/tests/test_service.py @@ -97,6 +97,24 @@ async def test_get_effective_config_masks_secrets(self): assert anthropic.masked is not None assert "supersecretvalue" not in config.model_dump_json() + @pytest.mark.asyncio + async def test_get_effective_config_maps_agent_limits(self): + """The agent session-limit fields are sourced from the Settings singleton.""" + settings = get_settings() + settings.agent_max_tool_calls = 7 + settings.agent_timeout_seconds = 99 + settings.agent_retry_attempts = 2 + settings.agent_session_ttl_minutes = 45 + settings.agent_require_approval = ["create_alias"] + + config = await service.get_effective_config(_mock_db()) + + assert config.agent_max_tool_calls == 7 + assert config.agent_timeout_seconds == 99 + assert config.agent_retry_attempts == 2 + assert config.agent_session_ttl_minutes == 45 + assert config.agent_require_approval == ["create_alias"] + # ============================================================================= # Unit tests — update_config From 60d1c948900a330db4486d7d570ba933e657aa71 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 18 May 2026 18:38:18 +0200 Subject: [PATCH 2/4] feat(ui): add knowledge and agent guide pages with nav (#185) --- frontend/src/App.tsx | 18 ++ frontend/src/hooks/use-rag-sources.ts | 18 +- frontend/src/lib/constants.ts | 4 + frontend/src/lib/knowledge-utils.ts | 43 +++ frontend/src/pages/chat.tsx | 12 +- frontend/src/pages/guide.tsx | 362 ++++++++++++++++++++++++++ frontend/src/pages/knowledge.tsx | 344 ++++++++++++++++++++++++ frontend/src/types/api.ts | 34 +++ 8 files changed, 832 insertions(+), 3 deletions(-) create mode 100644 frontend/src/lib/knowledge-utils.ts create mode 100644 frontend/src/pages/guide.tsx create mode 100644 frontend/src/pages/knowledge.tsx diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index c853e202..b18eb325 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -18,6 +18,8 @@ const JobsMonitorPage = lazy(() => import('@/pages/explorer/jobs')) const ForecastPage = lazy(() => import('@/pages/visualize/forecast')) const BacktestPage = lazy(() => import('@/pages/visualize/backtest')) const ChatPage = lazy(() => import('@/pages/chat')) +const KnowledgePage = lazy(() => import('@/pages/knowledge')) +const GuidePage = lazy(() => import('@/pages/guide')) const AdminPage = lazy(() => import('@/pages/admin')) function PageLoader() { @@ -103,6 +105,14 @@ function App() { } /> + }> + + + } + /> } /> + }> + + + } + /> + api('/rag/retrieve', { method: 'POST', body }), + }) +} diff --git a/frontend/src/lib/constants.ts b/frontend/src/lib/constants.ts index ff5acf62..b95c4466 100644 --- a/frontend/src/lib/constants.ts +++ b/frontend/src/lib/constants.ts @@ -13,7 +13,9 @@ export const ROUTES = { FORECAST: '/visualize/forecast', BACKTEST: '/visualize/backtest', }, + KNOWLEDGE: '/knowledge', CHAT: '/chat', + GUIDE: '/guide', ADMIN: '/admin', } as const @@ -38,7 +40,9 @@ export const NAV_ITEMS = [ { label: 'Backtest Results', href: ROUTES.VISUALIZE.BACKTEST }, ], }, + { label: 'Knowledge', href: ROUTES.KNOWLEDGE }, { label: 'Chat', href: ROUTES.CHAT }, + { label: 'Agent Guide', href: ROUTES.GUIDE }, { label: 'Admin', href: ROUTES.ADMIN }, ] as const diff --git a/frontend/src/lib/knowledge-utils.ts b/frontend/src/lib/knowledge-utils.ts new file mode 100644 index 00000000..386bac4d --- /dev/null +++ b/frontend/src/lib/knowledge-utils.ts @@ -0,0 +1,43 @@ +// Pure, React-free helpers for the Knowledge page. Kept separate from the page +// component so they are cheap to unit-test (see knowledge-utils.test.ts) — +// mirrors the use-demo-pipeline.ts / status-utils.ts precedent. +import type { RagSource, ChunkResult } from '@/types/api' + +/** + * Convert a relevance score (0..1) into a display percentage string. + * Non-finite or out-of-range inputs are clamped: 0.873 -> "87%", + * 1.4 -> "100%", -0.2 -> "0%". + */ +export function formatRelevance(score: number): string { + const safe = Number.isFinite(score) ? score : 0 + const clamped = Math.min(1, Math.max(0, safe)) + return `${Math.round(clamped * 100)}%` +} + +/** + * Group indexed sources by their source_type (e.g. "markdown", "openapi"). + * An empty array yields an empty object; a missing type falls back to "unknown". + */ +export function groupSourcesByType(sources: RagSource[]): Record { + const groups: Record = {} + for (const source of sources) { + const key = source.source_type || 'unknown' + if (!groups[key]) { + groups[key] = [] + } + groups[key].push(source) + } + return groups +} + +/** + * Single-line excerpt of a chunk's content for a result card. Collapses runs of + * whitespace and truncates with an ellipsis past `maxChars` (default 240). + */ +export function chunkExcerpt(chunk: ChunkResult, maxChars = 240): string { + const collapsed = chunk.content.replace(/\s+/g, ' ').trim() + if (collapsed.length <= maxChars) { + return collapsed + } + return `${collapsed.slice(0, maxChars).trimEnd()}…` +} diff --git a/frontend/src/pages/chat.tsx b/frontend/src/pages/chat.tsx index f1da0a15..1a987401 100644 --- a/frontend/src/pages/chat.tsx +++ b/frontend/src/pages/chat.tsx @@ -1,5 +1,6 @@ import { useState, useRef, useEffect, useCallback } from 'react' -import { Bot, Plus } from 'lucide-react' +import { Link } from 'react-router-dom' +import { Bot, Plus, BookOpen } from 'lucide-react' import { useWebSocket } from '@/hooks/use-websocket' import { ChatMessage, StreamingMessage } from '@/components/chat/chat-message' import { ChatInput, ApprovalPrompt } from '@/components/chat/chat-input' @@ -15,7 +16,7 @@ import { SelectValue, } from '@/components/ui/select' import { api } from '@/lib/api' -import { WS_URL } from '@/lib/constants' +import { WS_URL, ROUTES } from '@/lib/constants' import type { ChatMessage as ChatMessageType, AgentStreamEvent, AgentType, AgentSession } from '@/types/api' export default function ChatPage() { @@ -217,6 +218,13 @@ export default function ChatPage() { > {isCreatingSession ? 'Creating...' : 'Start Session'} + + + New here? Read the Agent Guide + diff --git a/frontend/src/pages/guide.tsx b/frontend/src/pages/guide.tsx new file mode 100644 index 00000000..787a24e8 --- /dev/null +++ b/frontend/src/pages/guide.tsx @@ -0,0 +1,362 @@ +import { Link } from 'react-router-dom' +import { + Bot, + Search, + FlaskConical, + ShieldCheck, + Workflow, + Gauge, + MessageSquare, + ArrowRight, + Settings, + AlertTriangle, +} from 'lucide-react' +import { useAIConfig } from '@/hooks/use-config' +import { Button } from '@/components/ui/button' +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { Badge } from '@/components/ui/badge' +import { Skeleton } from '@/components/ui/skeleton' +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from '@/components/ui/table' +import { ROUTES } from '@/lib/constants' + +// Tool inventories — kept verbatim in sync with the agent definitions +// (app/features/agents/agents/experiment.py + rag_assistant.py). The +// `approval` flag mirrors agent_require_approval (create_alias / archive_run). +interface ToolInfo { + name: string + desc: string + approval?: boolean +} + +const RAG_TOOLS: ToolInfo[] = [ + { name: 'tool_retrieve_context', desc: 'Semantic search over the indexed knowledge base.' }, + { name: 'tool_list_sources', desc: 'List indexed sources and chunk counts.' }, + { name: 'tool_format_citations', desc: 'Turn retrieval results into stable citations.' }, + { name: 'tool_check_evidence', desc: 'Decide whether the evidence is sufficient to answer.' }, +] + +const EXPERIMENT_TOOLS: ToolInfo[] = [ + { name: 'tool_list_runs', desc: 'Browse existing model runs in the registry.' }, + { name: 'tool_get_run', desc: 'Fetch the full detail of one model run.' }, + { name: 'tool_run_backtest', desc: 'Run a time-series backtest for a store / product.' }, + { + name: 'tool_compare_backtest_results', + desc: 'Compare two backtest results and recommend a winner.', + }, + { name: 'tool_compare_runs', desc: 'Diff two registered runs (config + metrics).' }, + { name: 'tool_create_alias', desc: 'Promote a successful run to a deployment alias.', approval: true }, + { name: 'tool_archive_run', desc: 'Archive a model run.', approval: true }, +] + +const SESSION_STEPS = [ + 'Open Chat, pick an agent type, and click "Start Session".', + 'Type a message and send it.', + 'Watch the reply stream token-by-token; tool calls appear as chips (start → end).', + 'If the agent proposes a guarded action, an approval prompt appears — approve or reject it.', + '"New Session" starts a fresh conversation with a clean history.', +] + +const RAG_PROMPTS = [ + 'What forecasting models does ForecastLabAI support?', + 'How does backtesting prevent data leakage?', + 'What is in your knowledge base?', +] + +const EXPERIMENT_PROMPTS = [ + 'Backtest a seasonal_naive model for store 1 product 1 over the last 90 days and compare it to the naive baseline.', + 'List the most recent model runs and tell me which has the lowest WAPE.', +] + +export default function GuidePage() { + const { data: config, isLoading: configLoading } = useAIConfig() + + return ( +
+ {/* Header */} +
+

Agent Guide

+

How to use the Chat agents.

+
+ + {/* Live model callout */} + {config && ( +
+ + + Agents currently run on{' '} + {config.agent_default_model}. + + + + Manage in Admin → AI Models + +
+ )} + + {/* The two agents */} +
+ + See what it can answer from → Knowledge + + + } + /> + + See the runs it acts on → Model Runs + + + } + /> +
+ + {/* How a chat session works */} + + +
+ + How a chat session works +
+ + Each session is one conversation. Replies stream over a WebSocket — text arrives as{' '} + text_delta events and tool + calls as tool_call_start /{' '} + tool_call_end events. + +
+ +
    + {SESSION_STEPS.map((step, i) => ( +
  1. + + {i + 1} + + {step} +
  2. + ))} +
+
+
+ + {/* Human-in-the-loop approval */} + + +
+ + Human-in-the-loop approval +
+ + Tools that change registry state never run unattended. + +
+ +

+ When an agent calls a guarded tool, the run pauses and the Chat page shows an approval + prompt. The action only proceeds once you approve it; rejecting it returns control to + the agent. This keeps every mutation of the model registry under human control. +

+
+ Approval-gated tools: + {config ? ( + config.agent_require_approval.map((tool) => ( + + + {tool} + + )) + ) : ( + + )} +
+
+
+ + {/* Session limits */} + + +
+ + Session limits +
+ + Live from GET /config/ai. + These are the configured defaults — an operator can change them in Admin → AI Models. + +
+ + {configLoading && } + {config && ( + + + + Limit + Value + + + + + Token budget per session + {config.agent_max_tokens.toLocaleString()} tokens + + + Tool calls per session + {config.agent_max_tool_calls} + + + Per-run timeout + {config.agent_timeout_seconds} seconds + + + Retry attempts + {config.agent_retry_attempts} + + + Session time-to-live + {config.agent_session_ttl_minutes} minutes + + + Approval-gated tools + + {config.agent_require_approval.join(', ') || 'none'} + + + +
+ )} + {!configLoading && !config && ( +

+ Session limits are unavailable right now — the configuration endpoint could not be + reached. +

+ )} +
+
+ + {/* Example prompts */} + + +
+ + Example prompts +
+ Copy one of these into Chat to get started. +
+ + + + +
+ + {/* CTA */} +
+ +
+
+ ) +} + +function AgentCard({ + icon: Icon, + title, + agentId, + purpose, + tools, + footer, +}: { + icon: React.ComponentType<{ className?: string }> + title: string + agentId: string + purpose: string + tools: ToolInfo[] + footer: React.ReactNode +}) { + return ( + + +
+ + {title} + + {agentId} + +
+ {purpose} +
+ +
+

Tools

+
    + {tools.map((tool) => ( +
  • +
    + {tool.name} + {tool.approval && ( + + + requires approval + + )} +
    +

    {tool.desc}

    +
  • + ))} +
+
+
{footer}
+
+
+ ) +} + +function PromptList({ title, prompts }: { title: string; prompts: string[] }) { + return ( +
+

{title}

+ {prompts.map((prompt) => ( + + {prompt} + + ))} +
+ ) +} diff --git a/frontend/src/pages/knowledge.tsx b/frontend/src/pages/knowledge.tsx new file mode 100644 index 00000000..1335ae88 --- /dev/null +++ b/frontend/src/pages/knowledge.tsx @@ -0,0 +1,344 @@ +import { useState } from 'react' +import { Link } from 'react-router-dom' +import { format } from 'date-fns' +import { + Library, + Search, + FileText, + Loader2, + Store, + Package, + TrendingUp, + CalendarRange, + Database, + Tag, + ArrowRight, + FolderOpen, +} from 'lucide-react' +import { useRagSources, useRetrieve } from '@/hooks/use-rag-sources' +import { useSeederStatus } from '@/hooks/use-seeder' +import { useRuns, useAliases } from '@/hooks/use-runs' +import { LoadingState } from '@/components/common/loading-state' +import { ErrorDisplay } from '@/components/common/error-display' +import { Button } from '@/components/ui/button' +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { Badge } from '@/components/ui/badge' +import { Input } from '@/components/ui/input' +import { Skeleton } from '@/components/ui/skeleton' +import { ApiError, getErrorMessage } from '@/lib/api' +import { ROUTES } from '@/lib/constants' +import { formatRelevance, chunkExcerpt, groupSourcesByType } from '@/lib/knowledge-utils' + +export default function KnowledgePage() { + return ( +
+ {/* Header */} +
+

Knowledge

+

+ Everything ForecastLabAI can currently draw on — the RAG knowledge base its assistant + answers from, and the live data its experiment agent acts on. +

+
+ + + + +
+ ) +} + +// === Section 1 — Knowledge Base (indexed RAG sources, read-only) === + +function KnowledgeBaseSection() { + const { data, isLoading, error, refetch } = useRagSources() + + if (error) { + return + } + if (isLoading) { + return + } + + const sources = data?.sources ?? [] + const byType = groupSourcesByType(sources) + + return ( + + +
+ + Knowledge Base +
+ + {data?.total_sources ?? 0} sources • {data?.total_chunks ?? 0} chunks + {sources.length > 0 && ( + <> + {' • '} + {Object.entries(byType) + .map(([type, items]) => `${items.length} ${type}`) + .join(', ')} + + )} + +
+ + {sources.length > 0 ? ( +
+ {sources.map((source) => ( +
+
+ +
+

{source.source_path}

+

+ {source.chunk_count} chunks • Indexed{' '} + {format(new Date(source.indexed_at), 'MMM d, yyyy')} +

+
+
+ + {source.source_type} + +
+ ))} +
+ ) : ( +
+ +
+

No documents indexed yet

+

+ The RAG assistant has nothing to answer from. Index documents in Admin → RAG + Sources, or run the RAG seeder scenario. +

+
+ +
+ )} +
+
+ ) +} + +// === Section 2 — Semantic Search (POST /rag/retrieve) === + +function SemanticSearchSection() { + const [query, setQuery] = useState('') + const retrieve = useRetrieve() + + const trimmed = query.trim() + + const handleSubmit = (e: React.FormEvent) => { + e.preventDefault() + if (!trimmed || retrieve.isPending) return + retrieve.mutate({ query: trimmed, top_k: 5 }) + } + + const searchUnavailable = retrieve.error instanceof ApiError && retrieve.error.status === 502 + const results = retrieve.data?.results ?? [] + + return ( + + +
+ + Semantic Search +
+ + Search the indexed knowledge base the way the RAG assistant does — by meaning, not + keywords. + +
+ +
+ setQuery(e.target.value)} + placeholder="e.g. How does backtesting prevent data leakage?" + aria-label="Semantic search query" + /> + +
+ + {searchUnavailable && ( +

+ Semantic search is unavailable — configure an embedding provider in{' '} + + Admin → AI Models + + . The source list above does not need embeddings and still works. +

+ )} + + {retrieve.isError && !searchUnavailable && ( +

+ {getErrorMessage(retrieve.error)} +

+ )} + + {retrieve.isSuccess && results.length === 0 && ( +

+ No matching content found. Try rephrasing the query. +

+ )} + + {results.length > 0 && ( +
+

+ {results.length} match{results.length === 1 ? '' : 'es'} •{' '} + {retrieve.data?.total_chunks_searched ?? 0} chunks searched in{' '} + {Math.round(retrieve.data?.search_time_ms ?? 0)} ms +

+ {results.map((chunk) => ( +
+
+

+ {chunk.source_path} + ({chunk.source_type}) +

+ + {formatRelevance(chunk.relevance_score)} match + +
+

{chunkExcerpt(chunk)}

+
+ ))} +
+ )} +
+
+ ) +} + +// === Section 3 — Live System State (what the experiment agent acts on) === + +function StatCard({ + icon: Icon, + label, + value, +}: { + icon: React.ComponentType<{ className?: string }> + label: string + value: string | number +}) { + return ( +
+ +

{typeof value === 'number' ? value.toLocaleString() : value}

+

{label}

+
+ ) +} + +function LiveSystemStateSection() { + const { data: status, isLoading: statusLoading } = useSeederStatus() + const { data: runs, isLoading: runsLoading } = useRuns({ page: 1, pageSize: 1 }) + const { data: aliases, isLoading: aliasesLoading } = useAliases() + + const dateRange = + status?.date_range_start && status?.date_range_end + ? `${status.date_range_start} → ${status.date_range_end}` + : 'No data' + + return ( + + +
+ + Live System State +
+ + The seeded data and registered models the experiment agent can query through its tools. + +
+ + {/* Seeded data tiles */} + {statusLoading ? ( +
+ {Array.from({ length: 4 }).map((_, i) => ( + + ))} +
+ ) : ( +
+ + + + +
+ )} + + {/* Registry summary */} +
+
+
+ +

Registered model runs

+
+

+ {runsLoading ? '—' : (runs?.total ?? 0).toLocaleString()} +

+ + Browse all runs + + +
+ +
+
+ +

Deployment aliases

+
+ {aliasesLoading ? ( +

Loading…

+ ) : aliases && aliases.length > 0 ? ( +
    + {aliases.map((alias) => ( +
  • + {alias.alias_name} + {alias.model_type} +
  • + ))} +
+ ) : ( +

No aliases yet.

+ )} +
+
+ + {/* Explainer */} +

+ The RAG assistant answers from the Knowledge Base above; the experiment agent acts on this + Live System State. Learn how to use them in the{' '} + + Agent Guide + + , or start a conversation in{' '} + + Chat + + . +

+
+
+ ) +} diff --git a/frontend/src/types/api.ts b/frontend/src/types/api.ts index c356c6ab..f0eceadd 100644 --- a/frontend/src/types/api.ts +++ b/frontend/src/types/api.ts @@ -181,6 +181,35 @@ export interface IndexDocumentResponse { chunks_created: number } +// Semantic-search request for POST /rag/retrieve. +// Mirrors app/features/rag/schemas.py RetrieveRequest (extra="forbid" — send +// nothing beyond these fields). Omit similarity_threshold to use the server default. +export interface RetrieveRequest { + query: string + top_k?: number // 1..50, server default 5 + similarity_threshold?: number // 0..1 + filters?: Record | null +} + +// One matching chunk from a semantic search. +export interface ChunkResult { + chunk_id: string + source_id: string + source_path: string + source_type: string + content: string + relevance_score: number // 0..1 + metadata: Record | null +} + +// Response from POST /rag/retrieve. +export interface RetrieveResponse { + results: ChunkResult[] + query_embedding_time_ms: number + search_time_ms: number + total_chunks_searched: number +} + // === Agents WebSocket === export type AgentEventType = | 'text_delta' @@ -373,6 +402,11 @@ export interface AIModelConfig { agent_temperature: number agent_max_tokens: number agent_thinking_budget: number | null + agent_max_tool_calls: number + agent_timeout_seconds: number + agent_retry_attempts: number + agent_session_ttl_minutes: number + agent_require_approval: string[] rag_embedding_provider: string rag_embedding_model: string rag_embedding_dimension: number From 622a1f94e2d7dde9de627f6eee06ddeeb2047988 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 18 May 2026 18:38:24 +0200 Subject: [PATCH 3/4] test(ui): cover knowledge-utils pure helpers (#185) --- frontend/src/lib/knowledge-utils.test.ts | 91 ++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 frontend/src/lib/knowledge-utils.test.ts diff --git a/frontend/src/lib/knowledge-utils.test.ts b/frontend/src/lib/knowledge-utils.test.ts new file mode 100644 index 00000000..c00e0663 --- /dev/null +++ b/frontend/src/lib/knowledge-utils.test.ts @@ -0,0 +1,91 @@ +import { describe, it, expect } from 'vitest' +import { formatRelevance, groupSourcesByType, chunkExcerpt } from './knowledge-utils' +import type { RagSource, ChunkResult } from '@/types/api' + +/** Build a RagSource with sensible defaults for the fields not under test. */ +function makeSource(partial: Partial & Pick): RagSource { + return { + source_id: partial.source_id, + source_type: partial.source_type ?? 'markdown', + source_path: partial.source_path ?? 'docs/example.md', + chunk_count: partial.chunk_count ?? 3, + content_hash: partial.content_hash ?? 'hash', + indexed_at: partial.indexed_at ?? '2026-05-18T00:00:00Z', + metadata: partial.metadata ?? null, + } +} + +/** Build a ChunkResult with sensible defaults for the fields not under test. */ +function makeChunk(partial: Partial & Pick): ChunkResult { + return { + chunk_id: partial.chunk_id ?? 'chunk-1', + source_id: partial.source_id ?? 'src-1', + source_path: partial.source_path ?? 'docs/example.md', + source_type: partial.source_type ?? 'markdown', + content: partial.content, + relevance_score: partial.relevance_score ?? 0.8, + metadata: partial.metadata ?? null, + } +} + +describe('formatRelevance', () => { + it('renders a score as a rounded percentage', () => { + expect(formatRelevance(0.873)).toBe('87%') + }) + + it('clamps the endpoints', () => { + expect(formatRelevance(0)).toBe('0%') + expect(formatRelevance(1)).toBe('100%') + }) + + it('clamps out-of-range values', () => { + expect(formatRelevance(1.4)).toBe('100%') + expect(formatRelevance(-0.2)).toBe('0%') + }) + + it('treats a non-finite score as zero', () => { + expect(formatRelevance(Number.NaN)).toBe('0%') + expect(formatRelevance(Number.POSITIVE_INFINITY)).toBe('0%') + }) +}) + +describe('groupSourcesByType', () => { + it('groups a mixed source list into per-type buckets', () => { + const groups = groupSourcesByType([ + makeSource({ source_id: 'a', source_type: 'markdown' }), + makeSource({ source_id: 'b', source_type: 'openapi' }), + makeSource({ source_id: 'c', source_type: 'markdown' }), + ]) + expect(Object.keys(groups).sort()).toEqual(['markdown', 'openapi']) + expect(groups.markdown).toHaveLength(2) + expect(groups.openapi).toHaveLength(1) + }) + + it('returns an empty object for an empty array', () => { + expect(groupSourcesByType([])).toEqual({}) + }) + + it('falls back to "unknown" for a blank source_type', () => { + const groups = groupSourcesByType([makeSource({ source_id: 'a', source_type: '' })]) + expect(groups.unknown).toHaveLength(1) + }) +}) + +describe('chunkExcerpt', () => { + it('returns short content intact with collapsed whitespace', () => { + const chunk = makeChunk({ content: ' hello world\n\tagain ' }) + expect(chunkExcerpt(chunk)).toBe('hello world again') + }) + + it('truncates long content with an ellipsis', () => { + const chunk = makeChunk({ content: 'x'.repeat(500) }) + const excerpt = chunkExcerpt(chunk) + expect(excerpt.endsWith('…')).toBe(true) + expect(excerpt.length).toBeLessThanOrEqual(241) + }) + + it('honours a custom maxChars', () => { + const chunk = makeChunk({ content: 'abcdefghij' }) + expect(chunkExcerpt(chunk, 5)).toBe('abcde…') + }) +}) From 4cf06419d71ca83377801d65046ebe7af8393b0b Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 18 May 2026 18:38:24 +0200 Subject: [PATCH 4/4] docs(docs): document the knowledge and agent guide pages (#185) --- .../PRP-19-knowledge-and-agent-guide-pages.md | 952 ++++++++++++++++++ README.md | 2 + docs/_base/REPO_MAP_INDEX.md | 2 + 3 files changed, 956 insertions(+) create mode 100644 PRPs/PRP-19-knowledge-and-agent-guide-pages.md diff --git a/PRPs/PRP-19-knowledge-and-agent-guide-pages.md b/PRPs/PRP-19-knowledge-and-agent-guide-pages.md new file mode 100644 index 00000000..2991a0df --- /dev/null +++ b/PRPs/PRP-19-knowledge-and-agent-guide-pages.md @@ -0,0 +1,952 @@ +name: "PRP-19 — Knowledge page + Agent Guide page (in-product self-documentation)" +description: | + Add two new React pages to the ForecastLabAI dashboard, frontend-led and + fully additive: + + 1. **Knowledge** (`/knowledge`) — presents, in detail, *what ForecastLabAI + currently knows*: the RAG knowledge base (indexed sources + a live semantic + search box) plus a summary of the live system state the agents can query + (seeded data, registered model runs, deployment aliases). + 2. **Agent Guide** (`/guide`) — explains, in detail, *how to use the Chat + agents*: the two agent types, their tools, the human-in-the-loop approval + flow, session limits, the streaming protocol, and copy-paste example prompts. + + Frontend-led, with one small additive backend change: the existing + `GET /config/ai` response gains read-only agent-limit fields so the Guide + shows session limits live. No new backend slice, no migration, no new env var. + Every other endpoint these pages consume already exists with a frontend hook. + +## Purpose +Close the in-product self-documentation gap. Today a dashboard visitor can open +`/chat` and talk to an agent, but nothing in the UI tells them (a) what the +RAG assistant actually has indexed to answer from, or (b) how the agents work, +what they can do, or how the approval gate behaves. The two pages turn implicit +system knowledge into a visible, browsable surface — a natural onboarding pair: +**Knowledge** = "what it knows" → **Agent Guide** = "how to ask it". + +> **PRP numbering:** `PRP-16` is reserved (Phase-2 LightGBM, per PRP-15). +> `PRP-17` (Showcase) and `PRP-18` (AI Model console) are used. This is `PRP-19`. + +## Core Principles +1. **Context is King** — every endpoint shape, hook name, schema field, and + pattern referenced below is linked to a real source file + line. +2. **Reuse existing patterns** — both pages are lazy routes registered exactly + like `Showcase` (PRP-17); data comes through existing TanStack Query hooks + (`useRagSources`, `useSeederStatus`, `useAIConfig`, …); UI uses existing + shadcn primitives (`Card`, `Badge`, `Input`, `Tabs`, `Button`). No new + streaming primitive, no new fetch wrapper. +3. **Additive only** — no new backend slice, no Alembic migration, no new + `.env` var. The one backend change is additive: read-only agent-limit fields + appended to the existing `AIModelConfig` (`GET /config/ai`) response. Plus + one new hook (`useRetrieve`), three new TS interfaces, two new pages, one + pure-helper module. +4. **Read-only, no duplication** — the Knowledge page is *presentational*. It + does NOT duplicate Admin's RAG management (index / delete) — those stay in + `frontend/src/pages/admin.tsx`. It adds the semantic-search exploration that + Admin lacks. +5. **Strict gates honored** — `pnpm tsc --noEmit` + `pnpm lint` + `pnpm test` + green; AND because the `config` slice `.py` files change, the repo-wide + `ruff`/`mypy`/`pyright`/`pytest` CI jobs must be run and stay green — the + `/config/ai` change ships with `config` slice tests. +6. **UI through skills** — pages built via `frontend-design` + `shadcn-ui` and + dogfooded via `webapp-testing` / `agent-browser` per `.claude/rules/ui-design.md`. + A green type-check is NOT proof the UI works. + +--- + +## Goal +Two new nav items route to two new pages. + +**`/knowledge` — Knowledge** +- A **Knowledge Base** section: `total_sources` / `total_chunks` summary, a + read-only list of every indexed RAG source (path, type badge, chunk count, + indexed date), and a **semantic search box** that POSTs to `/rag/retrieve` + and renders the matching chunks with relevance scores + source citations. +- A **Live System State** section: the seeded-data summary (stores / products / + sales / date range), the count of registered model runs, and the deployment + aliases — i.e. what the *experiment* agent can query through its tools. +- A short explainer tying it together: "the RAG assistant answers from the + Knowledge Base; the experiment agent acts on the Live System State." + +**`/guide` — Agent Guide** +- Describes the **two agents** (`rag_assistant`, `experiment`), each with its + purpose, its exact tool names, and what it returns. +- Walks through **how a chat session works**: pick agent → Start Session → + send a message → streamed text + tool-call chips → approval prompts → + New Session. +- Explains the **human-in-the-loop approval gate** (`create_alias`, + `archive_run`). +- Lists **session limits** (token budget, tool-call cap, timeout, TTL, retries) + — rendered **live** from `/config/ai`, which is extended to return them. +- Gives **copy-paste example prompts** per agent. +- Surfaces the **currently configured agent model** (live, from `/config/ai`) + and links to Chat and Admin → AI Models. +- Reachable both from a flat top-level nav item AND from a help link on the + Chat page. + +## Why +- **Portfolio identity.** `.claude/rules/product-vision.md` principle 1 — + "portfolio-grade, end-to-end … every phase ships working code". The agentic + layer (PRP-10) and RAG layer (PRP-9) are fully built but invisible as + *capabilities* — a reviewer has to read code to learn what the agents do. +- **Onboarding.** A first-time user opening `/chat` has no idea what to ask the + RAG assistant (it can only answer from indexed docs) or that the experiment + agent can run real backtests. These two pages remove that guesswork. +- **Low-cost surface.** Almost everything needed already exists server-side; + the only backend work is a small additive `/config/ai` extension. This is + high-value-per-line work: mostly composition of shipped endpoints into two + polished pages. + +## What +Frontend-led. Two lazy-loaded pages mirroring the `Showcase` registration +(PRP-17), two new `ROUTES` entries, two `NAV_ITEMS` entries, a help link to +`/guide` on the Chat page, one new mutation hook (`useRetrieve` for +`POST /rag/retrieve`), three new TS interfaces, and one pure-helper module with +a vitest. Plus one additive backend change: the `config` slice's `AIModelConfig` +schema + `get_effective_config` service gain read-only agent-limit fields +(`agent_max_tool_calls`, `agent_timeout_seconds`, `agent_retry_attempts`, +`agent_session_ttl_minutes`, `agent_require_approval`) so the Guide's limits are +live; shipped with `config` slice tests. No migration, no new env var. + +### Success Criteria +- [ ] `GET /knowledge` in the running SPA renders the Knowledge Base section + (source list + summary) and the Live System State section. +- [ ] The semantic search box on `/knowledge` POSTs `/rag/retrieve` and renders + `ChunkResult`s with a relevance score; an empty query is rejected client-side; + a `502` (no embedding provider) shows a graceful "search unavailable" state + while the source list still renders. +- [ ] An empty knowledge base shows a friendly empty state pointing at + Admin → RAG Sources (not a crash, not a blank card). +- [ ] `GET /guide` renders both agent cards with the **exact** tool names from + the agent definitions, the approval-gate explainer, the example prompts, + and the session limits + agent model rendered **live** from `/config/ai`. +- [ ] Both pages appear in the top nav (desktop + mobile sheet) and in `App.tsx` + as lazy ``s wrapped in ``; the Chat page links to `/guide`. +- [ ] `cd frontend && pnpm tsc --noEmit && pnpm lint && pnpm test --run` all clean. +- [ ] `frontend/src/lib/knowledge-utils.test.ts` passes (pure-helper coverage). +- [ ] `GET /config/ai` returns the five additive agent-limit fields; the `config` + slice tests (`test_schemas.py`/`test_service.py`/`test_routes.py`) cover + them and `ruff`/`mypy`/`pyright`/`pytest` stay green. +- [ ] Only the `config` slice changes server-side; no Alembic migration; no + `.env`/`.env.example` var. +- [ ] Admin's RAG index/delete management is untouched and NOT duplicated. +- [ ] Both pages dogfooded in a real browser (screenshot captured). + +--- + +## All Needed Context + +### Documentation & References +```yaml +- url: https://tanstack.com/query/latest/docs/framework/react/guides/queries + why: useQuery (GET) vs useMutation (POST) — the Knowledge search is a mutation + critical: | + GET data → useQuery({ queryKey, queryFn }). POST actions → useMutation({ + mutationFn }). The repo's hooks follow this exactly (see use-rag-sources.ts). + Semantic search is a POST → a useMutation, NOT a useQuery. + +- url: https://reactrouter.com/en/main/route/lazy + why: react-router v6 route registration; the repo lazy-loads every page + critical: Mirror App.tsx — `lazy(() => import('@/pages/x'))` + ``. + +- file: PRPs/PRP-17-demo-showcase-page.md + why: The most recent "add a new page" PRP. Its frontend tasks (constants + + App.tsx + lazy route + nav entry) are the exact pattern to copy. + critical: This PRP follows PRP-17's frontend half precisely; the only deltas + are "two pages instead of one" and "no backend slice". + +- file: frontend/src/App.tsx + why: Lazy-route registration. Add `KnowledgePage` and `GuidePage` lazily and a + `` / `` exactly + like the existing `ShowcasePage` block (lines 12, 42-49). + critical: Pages are `lazy(() => import(...))`; each route element is wrapped in + `}>`. + +- file: frontend/src/lib/constants.ts + why: ROUTES + NAV_ITEMS. Add `KNOWLEDGE: '/knowledge'` and `GUIDE: '/guide'` + to ROUTES, and two NAV_ITEMS entries. + critical: | + NAV_ITEMS is `as const`. Knowledge and Agent Guide are flat top-level items + (not grouped). Place `Knowledge` after `Visualize` and `Agent Guide` after + `Chat` so the nav reads: Dashboard · Showcase · Explorer · Visualize · + Knowledge · Chat · Agent Guide · Admin (a "know it → chat → how to chat" + cluster). No new WS URL needed. + +- file: frontend/src/pages/admin.tsx + why: THE reference page. `RagSourcesPanel` (lines 116-253) already lists + `/rag/sources` data — copy its source-row markup. `SeederPanel`'s `StatCard` + (lines 769-785) is the data-summary tile to reuse on the Knowledge page. + critical: | + - admin.tsx keeps all sub-components in ONE file (RagSourcesPanel, + AliasesPanel, SeederPanel, StatCard helpers). Mirror that: knowledge.tsx + and guide.tsx each hold their own internal function components — do NOT + create a components/knowledge/ directory. + - The Knowledge page is READ-ONLY. Copy the source LIST markup but DROP the + "Index Document" dialog and the per-row delete AlertDialog — those are + management actions that stay in Admin. + - Reuse loading/error states: `` and + ``. + +- file: frontend/src/hooks/use-rag-sources.ts + why: Existing RAG hooks. `useRagSources()` (GET /rag/sources) is reused as-is. + ADD a new `useRetrieve()` mutation hook here for POST /rag/retrieve. + critical: | + useRagSources already returns SourceListResponse. The new useRetrieve wraps + `api('/rag/retrieve', { method: 'POST', body })`. It is a + useMutation (no cache invalidation needed — search is ephemeral). + +- file: frontend/src/lib/api.ts + why: The `api()` fetch wrapper + `ApiError` (carries the RFC 7807 + ProblemDetail) + `getErrorMessage()`. + critical: | + `api('/rag/retrieve', { method: 'POST', body: {...} })` JSON-encodes `body`. + On non-2xx it throws `ApiError` with `.status` and `.detail`. The Knowledge + search must catch this: `502` → "search unavailable, configure an embedding + provider"; other → `getErrorMessage(err)`. + +- file: app/features/rag/routes.py + why: The RAG endpoints the Knowledge page consumes. + critical: | + - GET /rag/sources → SourceListResponse (no embeddings needed — always works) + - POST /rag/retrieve → RetrieveResponse (needs an embedding provider; + returns 502 application/problem+json if embedding generation fails — see + routes.py:214-224). The page must degrade gracefully on 502. + +- file: app/features/rag/schemas.py + why: AUTHORITATIVE wire shapes. Mirror these field-for-field into types/api.ts. + critical: | + RetrieveRequest (model_config = ConfigDict(extra="forbid") — send NOTHING + extra): query:str(1..2000), top_k:int(1..50, default 5), + similarity_threshold:float|null(0..1, default from settings — OMIT to use + the server default), filters:dict|null. + ChunkResult: chunk_id, source_id, source_path, source_type, content, + relevance_score:float(0..1), metadata:dict|null. + RetrieveResponse: results:ChunkResult[], query_embedding_time_ms:float, + search_time_ms:float, total_chunks_searched:int. + SourceResponse (already typed as `RagSource` in types/api.ts:157): source_id, + source_type, source_path, chunk_count, content_hash, indexed_at, metadata. + +- file: frontend/src/types/api.ts + why: TS type surface. `RagSource` + `SourceListResponse` (lines 157-171), + `AgentType` (line 199), `AIModelConfig`/`ProviderHealth` (lines 360-415) + already exist. ADD `RetrieveRequest`, `ChunkResult`, `RetrieveResponse` + near the `// === RAG ===` block (line 156). + critical: snake_case field names on the wire — match the Pydantic models exactly. + +- file: app/features/agents/agents/experiment.py + why: The experiment agent's EXACT tool names + behavior for the Guide page. + critical: | + Tools (use these EXACT names on the Guide page): tool_list_runs, + tool_get_run, tool_run_backtest, tool_compare_backtest_results, + tool_compare_runs, tool_create_alias (REQUIRES APPROVAL), + tool_archive_run (REQUIRES APPROVAL). The system prompt (lines 45-72) + describes the workflow — paraphrase it, do not invent capabilities. + +- file: app/features/agents/agents/rag_assistant.py + why: The RAG assistant's EXACT tool names + behavior for the Guide page. + critical: | + Tools: tool_retrieve_context, tool_format_citations, tool_check_evidence, + tool_list_sources. It answers ONLY from retrieved evidence, cites + source_path:chunk_id, and says "I don't have enough information" when the + knowledge base lacks coverage (system prompt lines 38-67). + +- file: app/features/agents/agents/base.py + why: Shared agent behavior + the approval helper for the Guide page. + critical: | + `requires_approval(name)` checks `settings.agent_require_approval`. + SYSTEM_PROMPT_HEADER / SAFETY_INSTRUCTIONS (lines 269-294) state the safety + contract — the Guide's "approval" section paraphrases SAFETY_INSTRUCTIONS. + +- file: app/core/config.py + why: The agent session limits to state on the Guide page (lines 147-172). + critical: | + Defaults to quote on the Guide (label them "default"): agent_max_tokens=4096, + agent_max_tool_calls=10, agent_timeout_seconds=120, agent_retry_attempts=3, + agent_require_approval=["create_alias","archive_run"], + agent_session_ttl_minutes=120, agent_default_model="anthropic:claude-sonnet-4-5". + The LIVE model is shown via /config/ai (useAIConfig) — the static numbers + above are config defaults; phrase them as "default" since an operator can + change them in Admin → AI Models. + +- file: frontend/src/hooks/use-config.ts + why: `useAIConfig()` (GET /config/ai) — the Guide page uses it to show the + currently-configured agent model AND the (now live) session limits. + critical: Reuse the hook as-is; do NOT add a config hook. The hook's response + type `AIModelConfig` in types/api.ts gains the five new agent-limit fields. + +- file: app/features/config/schemas.py + why: `AIModelConfig` (GET /config/ai response, lines 65-83). Extend it with + read-only agent-limit fields so the Guide renders limits live. + critical: | + ADD to AIModelConfig (NOT to AIModelConfigUpdate — these stay read-only, + not operator-settable here): agent_max_tool_calls:int, + agent_timeout_seconds:int, agent_retry_attempts:int, + agent_session_ttl_minutes:int, agent_require_approval:list[str]. + agent_max_tokens is ALREADY present — do not re-add it. + +- file: app/features/config/service.py + why: `get_effective_config` (line 129) builds AIModelConfig from the Settings + singleton. Populate the five new fields from `settings.*`. + critical: The new fields are sourced from Settings exactly like the existing + agent_* fields (app/core/config.py lines 147-172) — pure read, no DB, no + migration. Mirror the existing `agent_max_tokens=settings.agent_max_tokens` + line. + +- file: app/features/config/tests/ + why: test_schemas.py / test_service.py / test_routes.py — extend each so the + five new fields are covered (construction, service mapping from Settings, + and the GET /config/ai route response). Required by test-requirements.md. + +- file: frontend/src/pages/chat.tsx + why: The actual chat flow the Guide page describes — keep the Guide accurate + to it: pick agent in a Select → "Start Session" → type → stream → approval + prompt → "New Session". + critical: | + Client → server WS frame is `{ session_id, message }`. Server → client + events: text_delta, tool_call_start, tool_call_end, approval_required, + complete, error (see types/api.ts:185-197 AgentEventType). Describe these + accurately; do not invent event names. + +- file: docs/_base/API_CONTRACTS.md + why: Cross-check the /rag and /agents endpoint contracts + WS event list. + critical: The "WebSocket Events (/agents/stream)" section is the source of + truth for the Guide's streaming description. + +- file: frontend/src/hooks/use-demo-pipeline.test.ts + why: The vitest pattern — test PURE exported helpers (applyEvent, + createInitialSteps), not the React component. `knowledge-utils.test.ts` + mirrors this. + +- file: frontend/src/lib/date-utils.ts & frontend/src/lib/status-utils.ts + why: Precedent for a `lib/*.ts` pure-helper module. `knowledge-utils.ts` joins + them — pure functions, no React, easy to unit-test. + +- file: frontend/src/hooks/use-runs.ts & frontend/src/hooks/use-seeder.ts + why: The Live System State section reuses these. use-seeder.ts exports + `useSeederStatus()` (GET /seeder/status → SeederStatus). use-runs.ts exports + the runs + aliases hooks used by admin.tsx (`useAliases`) and + explorer/runs.tsx. + critical: VERIFY the exact export names in use-runs.ts before wiring — reuse + whatever it exports for runs (paginated) + aliases; do not add new hooks. + +- file: .claude/rules/ui-design.md + why: UI built/dogfooded via frontend-design + shadcn-ui + webapp-testing. +- file: .claude/rules/output-formatting.md + why: If the Guide uses status glyphs, reuse the ✅/⚠️/⏭️ vocabulary. +- file: .claude/rules/test-requirements.md + why: New TS component owning non-trivial state SHOULD have a vitest — satisfied + by extracting pure helpers into knowledge-utils.ts and testing them. +- file: .claude/rules/commit-format.md + why: `type(scope): description (#issue)`; scope `ui` for frontend/**, `docs` + for README/docs. Open the tracking issue FIRST. +- file: .claude/rules/branch-naming.md + why: `/` off dev → `feat/knowledge-and-guide-pages`. +``` + +### Current Codebase tree (relevant) +```bash +frontend/src/ +├── App.tsx # MOD — add /knowledge + /guide lazy routes +├── lib/ +│ ├── api.ts # reuse api() + ApiError + getErrorMessage +│ ├── constants.ts # MOD — ROUTES + NAV_ITEMS +│ ├── date-utils.ts # precedent: pure lib helper module +│ ├── status-utils.ts # precedent: pure lib helper module +│ └── knowledge-utils.ts # NEW — pure helpers for the Knowledge page +├── types/api.ts # MOD — +RetrieveRequest, ChunkResult, RetrieveResponse +├── hooks/ +│ ├── use-rag-sources.ts # MOD — +useRetrieve mutation +│ ├── use-seeder.ts # reuse useSeederStatus +│ ├── use-runs.ts # reuse runs + aliases hooks +│ └── use-config.ts # reuse useAIConfig +├── pages/ +│ ├── admin.tsx # reference (RagSourcesPanel, StatCard) — UNCHANGED +│ ├── chat.tsx # reference for the Guide's accuracy — UNCHANGED +│ ├── showcase.tsx # reference page registration (PRP-17) +│ ├── knowledge.tsx # NEW — the Knowledge page +│ └── guide.tsx # NEW — the Agent Guide page +└── components/ + ├── ui/ # reuse Card, Badge, Input, Button, Tabs, Separator + └── common/ # reuse LoadingState, ErrorDisplay +``` + +### Desired Codebase tree (files added / changed) +```bash +NEW frontend/src/pages/knowledge.tsx # Knowledge page (KB + live state) +NEW frontend/src/pages/guide.tsx # Agent Guide page +NEW frontend/src/lib/knowledge-utils.ts # pure helpers (testable, no React) +NEW frontend/src/lib/knowledge-utils.test.ts # vitest — pure-helper coverage +MOD frontend/src/types/api.ts # +RetrieveRequest/ChunkResult/RetrieveResponse; +5 AIModelConfig fields +MOD frontend/src/hooks/use-rag-sources.ts # +useRetrieve mutation hook +MOD frontend/src/lib/constants.ts # +KNOWLEDGE/GUIDE routes, +2 NAV_ITEMS +MOD frontend/src/App.tsx # +2 lazy imports, +2 s +MOD frontend/src/pages/chat.tsx # + help link to /guide +MOD app/features/config/schemas.py # +5 read-only agent-limit fields on AIModelConfig +MOD app/features/config/service.py # populate the 5 fields in get_effective_config +MOD app/features/config/tests/test_schemas.py # cover the new fields +MOD app/features/config/tests/test_service.py # cover get_effective_config mapping +MOD app/features/config/tests/test_routes.py # cover GET /config/ai response +MOD README.md # mention the two new pages in the feature list +MOD docs/_base/REPO_MAP_INDEX.md # +rows for knowledge.tsx + guide.tsx +KEEP frontend/src/pages/admin.tsx # UNCHANGED — management stays here +KEEP all other app/** (backend) # UNCHANGED — only the config slice changes +``` + +### Known Gotchas & Library Quirks +```typescript +// CRITICAL: FRONTEND-LED PRP with ONE additive backend change — the config +// slice only (schemas.py + service.py + tests). No new slice, no Alembic +// migration, no .env var. Because .py files DO change, the repo-wide +// ruff/mypy/pyright/pytest gates genuinely apply — run them (see Validation +// Level 4), do not assume they pass trivially. The three pnpm gates still +// gate the frontend half. + +// CRITICAL: /rag/retrieve needs an embedding provider (OpenAI key or Ollama). +// With none configured it returns 502 application/problem+json. The Knowledge +// page MUST degrade gracefully: the source LIST (GET /rag/sources) needs NO +// embeddings and always works; only the SEARCH box can 502 — catch ApiError, +// show "Semantic search unavailable — configure an embedding provider in +// Admin → AI Models", keep the rest of the page functional. + +// CRITICAL: RetrieveRequest is ConfigDict(extra="forbid"). Send ONLY +// { query, top_k } (+ optional similarity_threshold/filters). Any stray field +// → 422. OMIT similarity_threshold entirely to use the server-side default. + +// CRITICAL: search is a useMutation, NOT a useQuery. The query string is +// user-typed and submitted on click/Enter — it is an imperative action with +// ephemeral results, exactly the useMutation shape. (useQuery would re-fire +// on every keystroke / refetch.) + +// CRITICAL: the Knowledge page is READ-ONLY. Do NOT add index/delete actions — +// they already live in Admin → RAG Sources (admin.tsx RagSourcesPanel). The +// Knowledge page COPIES the source-row display markup but DROPS the dialog +// and the delete AlertDialog. Duplicating management UI is the anti-pattern. + +// CRITICAL: the Guide page must use the EXACT agent tool names from the agent +// definitions (experiment.py / rag_assistant.py). Do not paraphrase tool +// names. A user copying "tool_run_backtest" into chat must match reality. + +// GOTCHA: agent limit numbers (4096 tokens, 10 tool calls, 120s, TTL 120 min) +// are config DEFAULTS — an operator can change them. Label them "default" on +// the Guide. The LIVE agent model comes from /config/ai (useAIConfig); render +// that dynamically, not a hardcoded model string. + +// GOTCHA: empty knowledge base — a fresh DB has zero RAG sources. The Knowledge +// Base section must show a friendly empty state ("No documents indexed yet — +// add some in Admin → RAG Sources, or run the RAG seeder scenario"), not a +// blank card and not a crash. + +// GOTCHA: NAV_ITEMS is declared `as const`. Adding two flat entries is fine; +// keep the object shape `{ label, href }` identical to the existing flat +// items (Dashboard/Showcase/Chat/Admin) so top-nav.tsx's `'items' in item` +// discriminator still works. + +// GOTCHA: react-router lazy route — the page file MUST `export default` the +// component (App.tsx does `lazy(() => import('@/pages/knowledge'))`). Named +// helper exports from the SAME file are allowed, but the Knowledge page's +// pure helpers live in lib/knowledge-utils.ts so they are import-cheap to +// unit-test (mirrors use-demo-pipeline.ts exporting applyEvent et al.). + +// GOTCHA: new frontend files use LF line endings (the repo's CRLF note in +// memory applies to .py files only). Match the surrounding .tsx files — they +// are LF. eslint.config.js + tsc are the enforcers. + +// GOTCHA: every commit needs an open issue (commit-format.md). Open the +// tracking issue BEFORE the first commit. No AI co-author trailer, ever. +``` + +### Known Tradeoffs (decided — do not re-litigate) +```yaml +interpretation: + decision: "ForecastLab's current knowledge" = the RAG knowledge base (what the + rag_assistant answers from) PLUS the live system state (what the experiment + agent acts on: seeded data, runs, aliases). The Knowledge page shows both. + why: The agentic layer has two agents with two distinct knowledge surfaces. + Showing only the RAG corpus would under-represent "what the system knows" + and would also thinly duplicate Admin's RAG tab. Showing both makes the page + a genuine "knowledge dashboard" and a true counterpart to the Agent Guide. + status: confirmed — Resolved Decision 1 keeps both the RAG corpus and the + Live System State section; not scoped down to RAG-only. +minimal-backend: + decision: no NEW backend slice and no /knowledge or /guide API. The only + server-side change is additive: read-only agent-limit fields on the existing + AIModelConfig (GET /config/ai) response. + why: Every page datum except the live session limits is already served + (/rag/sources, /rag/retrieve, /seeder/status, /registry/runs, + /registry/aliases, /config/ai). The maintainer chose live limits over static + text (Resolved Decision 3), and /config/ai is the natural, already-existing + home for them — extending it beats a new endpoint. +guide-content-plus-live-config: + decision: the Guide page is hand-authored content + live /config/ai data (the + configured model AND the session limits). + why: It is documentation; the prose (agents, tools, approval flow, example + prompts) is stable. The two things that legitimately drift — the model and + the limits — are both fetched live from /config/ai. +search-is-mutation: + decision: semantic search uses useMutation, not useQuery. + why: it is a user-initiated imperative action with throwaway results. +``` + +--- + +## Implementation Blueprint + +### Data models / types (`frontend/src/types/api.ts`, add near line 156 `// === RAG ===`) +```typescript +// Append to the existing RAG block — mirror app/features/rag/schemas.py exactly. + +export interface RetrieveRequest { + query: string + top_k?: number // 1..50, server default 5 + similarity_threshold?: number // 0..1 — OMIT to use the server default + filters?: Record | null +} + +export interface ChunkResult { + chunk_id: string + source_id: string + source_path: string + source_type: string + content: string + relevance_score: number // 0..1 + metadata: Record | null +} + +export interface RetrieveResponse { + results: ChunkResult[] + query_embedding_time_ms: number + search_time_ms: number + total_chunks_searched: number +} +``` + +### Backend change (`app/features/config/schemas.py` + `service.py`) +```python +# schemas.py — append to AIModelConfig (the GET /config/ai response model), +# NOT to AIModelConfigUpdate (these are read-only, not operator-settable here): +agent_max_tool_calls: int = Field(description="Per-session tool-call cap") +agent_timeout_seconds: int = Field(description="Per-run agent timeout (seconds)") +agent_retry_attempts: int = Field(description="Agent retry attempts on failure") +agent_session_ttl_minutes: int = Field(description="Session time-to-live (minutes)") +agent_require_approval: list[str] = Field( + description="Tool names gated by human-in-the-loop approval" +) +# agent_max_tokens is ALREADY on AIModelConfig — do not re-add it. + +# service.py — get_effective_config(): populate each from the Settings singleton, +# mirroring the existing `agent_max_tokens=settings.agent_max_tokens` line. +``` + +### Frontend type extension (`frontend/src/types/api.ts`, existing `AIModelConfig`) +```typescript +// EXTEND the existing AIModelConfig interface (~line 360) with the five fields +// the backend now returns — snake_case, matching the Pydantic model: +// agent_max_tool_calls: number +// agent_timeout_seconds: number +// agent_retry_attempts: number +// agent_session_ttl_minutes: number +// agent_require_approval: string[] +// agent_max_tokens already exists on AIModelConfig — do not duplicate it. +``` + +### Hook (`frontend/src/hooks/use-rag-sources.ts`, append) +```typescript +// Pseudocode — mirror the existing useIndexDocument mutation shape. +import type { RetrieveRequest, RetrieveResponse } from '@/types/api' + +export function useRetrieve() { + return useMutation({ + mutationFn: (body: RetrieveRequest) => + api('/rag/retrieve', { method: 'POST', body }), + // no onSuccess cache invalidation — search results are ephemeral + }) +} +``` + +### Pure helpers (`frontend/src/lib/knowledge-utils.ts`) +```typescript +// Pure, React-free, unit-testable. Exact helper set is implementer's choice; +// at minimum provide these two so knowledge-utils.test.ts has real coverage: + +import type { RagSource, ChunkResult } from '@/types/api' + +/** Relevance score (0..1) → a display percentage string, e.g. 0.873 -> "87%". */ +export function formatRelevance(score: number): string { /* clamp 0..1, round */ } + +/** Group indexed sources by source_type for the "by type" summary. */ +export function groupSourcesByType(sources: RagSource[]): Record { /* ... */ } + +/** Optional: short, single-line excerpt of a chunk for the result card. */ +export function chunkExcerpt(chunk: ChunkResult, maxChars?: number): string { /* ... */ } +``` + +### Knowledge page (`frontend/src/pages/knowledge.tsx`) +```text +export default function KnowledgePage() +Layout (build with frontend-design + shadcn-ui; mirror admin.tsx structure): + +- Header:

Knowledge

+ one sentence: "Everything ForecastLabAI can + currently draw on — the RAG knowledge base its assistant answers from, and the + live data its experiment agent acts on." + +- SECTION 1 — Knowledge Base (Card): + * useRagSources() → SourceListResponse. + * CardDescription: "{total_sources} sources • {total_chunks} chunks". + * Source list: read-only rows (path, {source_type}, + "{chunk_count} chunks", "Indexed {date}"). COPY the row markup from + admin.tsx RagSourcesPanel lines 209-243 MINUS the delete AlertDialog. + * Empty state when sources.length === 0 → friendly message + link to + ROUTES.ADMIN ("Index documents in Admin → RAG Sources"). + * isLoading → ; error → . + +- SECTION 2 — Semantic Search (Card, inside or below Section 1): + * Controlled for the query + a "Search"