w7-mgfcode · w7-mgfcode · May 18, 2026 · May 18, 2026 · May 18, 2026 · May 18, 2026
@@ -13,6 +13,8 @@ Portfolio-grade end-to-end retail demand forecasting system.
 - **Agentic Layer**: PydanticAI agents for autonomous experimentation and evidence-grounded Q&A with human-in-the-loop approval
 - **Data Seeder (The Forge)**: Reproducible synthetic data generator with realistic time-series patterns, scenario presets, and retail effects
 - **AI Models Console**: `/admin` → AI Models tab — swap the agent LLM (incl. fully-local Ollama), the RAG embedding model, and provider API keys at runtime; changes apply live with no restart
+- **Knowledge Page**: `/knowledge` — browse the indexed RAG corpus, run a live semantic search, and see the live system state (seeded data, model runs, deployment aliases) the agents draw on
+- **Agent Guide**: `/guide` — in-product reference for the two chat agents — their tools, the human-in-the-loop approval gate, live session limits, and copy-paste example prompts
 
 ## Quick Start
 

@@ -72,6 +72,13 @@ class AIModelConfig(BaseModel):
     agent_thinking_budget: int | None = Field(
         description="Extended-reasoning token budget (Gemini 2.5+); None disables it"
     )
+    agent_max_tool_calls: int = Field(description="Per-session tool-call cap")
+    agent_timeout_seconds: int = Field(description="Per-run agent timeout (seconds)")
+    agent_retry_attempts: int = Field(description="Agent retry attempts on failure")
+    agent_session_ttl_minutes: int = Field(description="Session time-to-live (minutes)")
+    agent_require_approval: list[str] = Field(
+        description="Tool names gated by human-in-the-loop approval"
+    )
     rag_embedding_provider: str = Field(description="RAG embedding provider: 'openai' | 'ollama'")
     rag_embedding_model: str = Field(description="OpenAI embedding model name")
     rag_embedding_dimension: int = Field(description="Embedding vector dimension")

@@ -144,6 +144,11 @@ async def get_effective_config(db: AsyncSession) -> AIModelConfig:
         agent_temperature=settings.agent_temperature,
         agent_max_tokens=settings.agent_max_tokens,
         agent_thinking_budget=settings.agent_thinking_budget,
+        agent_max_tool_calls=settings.agent_max_tool_calls,
+        agent_timeout_seconds=settings.agent_timeout_seconds,
+        agent_retry_attempts=settings.agent_retry_attempts,
+        agent_session_ttl_minutes=settings.agent_session_ttl_minutes,
+        agent_require_approval=list(settings.agent_require_approval),
         rag_embedding_provider=settings.rag_embedding_provider,
         rag_embedding_model=settings.rag_embedding_model,
         rag_embedding_dimension=settings.rag_embedding_dimension,

@@ -27,6 +27,11 @@ def _sample_config(
         agent_temperature=agent_temperature,
         agent_max_tokens=4096,
         agent_thinking_budget=None,
+        agent_max_tool_calls=10,
+        agent_timeout_seconds=120,
+        agent_retry_attempts=3,
+        agent_session_ttl_minutes=120,
+        agent_require_approval=["create_alias", "archive_run"],
         rag_embedding_provider="openai",
         rag_embedding_model="text-embedding-3-small",
         rag_embedding_dimension=1536,
@@ -65,6 +70,22 @@ def test_returns_effective_config(self, client):
         assert data["agent_default_model"] == "anthropic:claude-sonnet-4-5"
         assert data["api_keys"][0]["masked"] == "sk-ant-…1234"
 
+    def test_returns_agent_session_limits(self, client):
+        """GET /config/ai exposes the read-only agent session-limit fields."""
+        with patch(
+            "app.features.config.routes.service.get_effective_config",
+            new=AsyncMock(return_value=_sample_config()),
+        ):
+            response = client.get("/config/ai")
+
+        assert response.status_code == 200
+        data = response.json()
+        assert data["agent_max_tool_calls"] == 10
+        assert data["agent_timeout_seconds"] == 120
+        assert data["agent_retry_attempts"] == 3
+        assert data["agent_session_ttl_minutes"] == 120
+        assert data["agent_require_approval"] == ["create_alias", "archive_run"]
+
 
 class TestUpdateAIConfig:
     """Tests for PATCH /config/ai."""

@@ -105,6 +105,11 @@ def test_ai_model_config_constructs(self):
             agent_temperature=0.1,
             agent_max_tokens=4096,
             agent_thinking_budget=None,
+            agent_max_tool_calls=10,
+            agent_timeout_seconds=120,
+            agent_retry_attempts=3,
+            agent_session_ttl_minutes=120,
+            agent_require_approval=["create_alias", "archive_run"],
             rag_embedding_provider="openai",
             rag_embedding_model="text-embedding-3-small",
             rag_embedding_dimension=1536,
@@ -115,6 +120,33 @@ def test_ai_model_config_constructs(self):
         )
         assert cfg.agent_thinking_budget is None
 
+    def test_ai_model_config_carries_agent_limits(self):
+        """AIModelConfig exposes the read-only agent session-limit fields."""
+        cfg = AIModelConfig(
+            agent_default_model="anthropic:claude-sonnet-4-5",
+            agent_fallback_model="openai:gpt-4o",
+            agent_temperature=0.1,
+            agent_max_tokens=4096,
+            agent_thinking_budget=None,
+            agent_max_tool_calls=10,
+            agent_timeout_seconds=120,
+            agent_retry_attempts=3,
+            agent_session_ttl_minutes=120,
+            agent_require_approval=["create_alias", "archive_run"],
+            rag_embedding_provider="openai",
+            rag_embedding_model="text-embedding-3-small",
+            rag_embedding_dimension=1536,
+            ollama_base_url="http://localhost:11434",
+            ollama_embedding_model="nomic-embed-text",
+            api_keys=[],
+            overridden_keys=[],
+        )
+        assert cfg.agent_max_tool_calls == 10
+        assert cfg.agent_timeout_seconds == 120
+        assert cfg.agent_retry_attempts == 3
+        assert cfg.agent_session_ttl_minutes == 120
+        assert cfg.agent_require_approval == ["create_alias", "archive_run"]
+
     def test_api_key_status(self):
         """ApiKeyStatus carries presence + a masked preview."""
         status = ApiKeyStatus(provider="anthropic", is_set=True, masked="sk-ant-…3f9a")

@@ -97,6 +97,24 @@ async def test_get_effective_config_masks_secrets(self):
         assert anthropic.masked is not None
         assert "supersecretvalue" not in config.model_dump_json()
 
+    @pytest.mark.asyncio
+    async def test_get_effective_config_maps_agent_limits(self):
+        """The agent session-limit fields are sourced from the Settings singleton."""
+        settings = get_settings()
+        settings.agent_max_tool_calls = 7
+        settings.agent_timeout_seconds = 99
+        settings.agent_retry_attempts = 2
+        settings.agent_session_ttl_minutes = 45
+        settings.agent_require_approval = ["create_alias"]
+
+        config = await service.get_effective_config(_mock_db())
+
+        assert config.agent_max_tool_calls == 7
+        assert config.agent_timeout_seconds == 99
+        assert config.agent_retry_attempts == 2
+        assert config.agent_session_ttl_minutes == 45
+        assert config.agent_require_approval == ["create_alias"]
+
 
 # =============================================================================
 # Unit tests — update_config

@@ -23,6 +23,8 @@ ForecastLabAI is a portfolio-grade, single-host retail-demand-forecasting system
 | [`scripts/run_demo.py`](../../scripts/run_demo.py) | End-to-end pipeline driver — seed → features → train ×3 → backtest → register → alias → agent | First-run demonstrability, integration debugging |
 | [`app/features/demo/`](../../app/features/demo/) | In-process e2e demo slice — `POST /demo/run` + `WS /demo/stream` drive the pipeline via `ASGITransport` (no cross-slice imports) | Showcase page, in-product demo |
 | [`frontend/src/pages/showcase.tsx`](../../frontend/src/pages/showcase.tsx) | The Showcase page — streams the live pipeline into the dashboard as status cards | Demoing the system in-browser |
+| [`frontend/src/pages/knowledge.tsx`](../../frontend/src/pages/knowledge.tsx) | The Knowledge page — indexed RAG corpus, live semantic search, and live system state | Surfacing what the agents can draw on |
+| [`frontend/src/pages/guide.tsx`](../../frontend/src/pages/guide.tsx) | The Agent Guide page — agent tools, approval gate, live session limits, example prompts | Explaining how to use the chat agents |
 | [`alembic/versions/`](../../alembic/versions/) | Six migrations through `d6e0f2g3h456_create_agent_session_table.py` | DB-schema questions, migration drift |
 | [`docs/ARCHITECTURE.md`](../ARCHITECTURE.md) | Phase-by-phase architecture narrative | High-level component reasoning |
 | [`docs/PHASE-index.md`](../PHASE-index.md) | Index of all 11 phase docs | Locating per-phase deep-dive |

@@ -18,6 +18,8 @@ const JobsMonitorPage = lazy(() => import('@/pages/explorer/jobs'))
 const ForecastPage = lazy(() => import('@/pages/visualize/forecast'))
 const BacktestPage = lazy(() => import('@/pages/visualize/backtest'))
 const ChatPage = lazy(() => import('@/pages/chat'))
+const KnowledgePage = lazy(() => import('@/pages/knowledge'))
+const GuidePage = lazy(() => import('@/pages/guide'))
 const AdminPage = lazy(() => import('@/pages/admin'))
 
 function PageLoader() {
@@ -103,6 +105,14 @@ function App() {
                   </Suspense>
                 }
               />
+              <Route
+                path={ROUTES.KNOWLEDGE}
+                element={
+                  <Suspense fallback={<PageLoader />}>
+                    <KnowledgePage />
+                  </Suspense>
+                }
+              />
               <Route
                 path={ROUTES.CHAT}
                 element={
@@ -111,6 +121,14 @@ function App() {
                   </Suspense>
                 }
               />
+              <Route
+                path={ROUTES.GUIDE}
+                element={
+                  <Suspense fallback={<PageLoader />}>
+                    <GuidePage />
+                  </Suspense>
+                }
+              />
               <Route
                 path={ROUTES.ADMIN}
                 element={

@@ -1,6 +1,12 @@
 import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'
 import { api } from '@/lib/api'
-import type { SourceListResponse, IndexDocumentRequest, IndexDocumentResponse } from '@/types/api'
+import type {
+  SourceListResponse,
+  IndexDocumentRequest,
+  IndexDocumentResponse,
+  RetrieveRequest,
+  RetrieveResponse,
+} from '@/types/api'
 
 export function useRagSources() {
   return useQuery({
@@ -33,3 +39,13 @@ export function useIndexDocument() {
     },
   })
 }
+
+// Mutation: semantic search over the knowledge base (POST /rag/retrieve).
+// Search results are ephemeral — no cache invalidation. A 502 (no embedding
+// provider configured) surfaces as an ApiError the caller degrades gracefully.
+export function useRetrieve() {
+  return useMutation({
+    mutationFn: (body: RetrieveRequest) =>
+      api<RetrieveResponse>('/rag/retrieve', { method: 'POST', body }),
+  })
+}
@@ -13,7 +13,9 @@ export const ROUTES = {
     FORECAST: '/visualize/forecast',
     BACKTEST: '/visualize/backtest',
   },
+  KNOWLEDGE: '/knowledge',
   CHAT: '/chat',
+  GUIDE: '/guide',
   ADMIN: '/admin',
 } as const
 
@@ -38,7 +40,9 @@ export const NAV_ITEMS = [
       { label: 'Backtest Results', href: ROUTES.VISUALIZE.BACKTEST },
     ],
   },
+  { label: 'Knowledge', href: ROUTES.KNOWLEDGE },
   { label: 'Chat', href: ROUTES.CHAT },
+  { label: 'Agent Guide', href: ROUTES.GUIDE },
   { label: 'Admin', href: ROUTES.ADMIN },
 ] as const
 

@@ -0,0 +1,91 @@
+import { describe, it, expect } from 'vitest'
+import { formatRelevance, groupSourcesByType, chunkExcerpt } from './knowledge-utils'
+import type { RagSource, ChunkResult } from '@/types/api'
+
+/** Build a RagSource with sensible defaults for the fields not under test. */
+function makeSource(partial: Partial<RagSource> & Pick<RagSource, 'source_id'>): RagSource {
+  return {
+    source_id: partial.source_id,
+    source_type: partial.source_type ?? 'markdown',
+    source_path: partial.source_path ?? 'docs/example.md',
+    chunk_count: partial.chunk_count ?? 3,
+    content_hash: partial.content_hash ?? 'hash',
+    indexed_at: partial.indexed_at ?? '2026-05-18T00:00:00Z',
+    metadata: partial.metadata ?? null,
+  }
+}
+
+/** Build a ChunkResult with sensible defaults for the fields not under test. */
+function makeChunk(partial: Partial<ChunkResult> & Pick<ChunkResult, 'content'>): ChunkResult {
+  return {
+    chunk_id: partial.chunk_id ?? 'chunk-1',
+    source_id: partial.source_id ?? 'src-1',
+    source_path: partial.source_path ?? 'docs/example.md',
+    source_type: partial.source_type ?? 'markdown',
+    content: partial.content,
+    relevance_score: partial.relevance_score ?? 0.8,
+    metadata: partial.metadata ?? null,
+  }
+}
+
+describe('formatRelevance', () => {
+  it('renders a score as a rounded percentage', () => {
+    expect(formatRelevance(0.873)).toBe('87%')
+  })
+
+  it('clamps the endpoints', () => {
+    expect(formatRelevance(0)).toBe('0%')
+    expect(formatRelevance(1)).toBe('100%')
+  })
+
+  it('clamps out-of-range values', () => {
+    expect(formatRelevance(1.4)).toBe('100%')
+    expect(formatRelevance(-0.2)).toBe('0%')
+  })
+
+  it('treats a non-finite score as zero', () => {
+    expect(formatRelevance(Number.NaN)).toBe('0%')
+    expect(formatRelevance(Number.POSITIVE_INFINITY)).toBe('0%')
+  })
+})
+
+describe('groupSourcesByType', () => {
+  it('groups a mixed source list into per-type buckets', () => {
+    const groups = groupSourcesByType([
+      makeSource({ source_id: 'a', source_type: 'markdown' }),
+      makeSource({ source_id: 'b', source_type: 'openapi' }),
+      makeSource({ source_id: 'c', source_type: 'markdown' }),
+    ])
+    expect(Object.keys(groups).sort()).toEqual(['markdown', 'openapi'])
+    expect(groups.markdown).toHaveLength(2)
+    expect(groups.openapi).toHaveLength(1)
+  })
+
+  it('returns an empty object for an empty array', () => {
+    expect(groupSourcesByType([])).toEqual({})
+  })
+
+  it('falls back to "unknown" for a blank source_type', () => {
+    const groups = groupSourcesByType([makeSource({ source_id: 'a', source_type: '' })])
+    expect(groups.unknown).toHaveLength(1)
+  })
+})
+
+describe('chunkExcerpt', () => {
+  it('returns short content intact with collapsed whitespace', () => {
+    const chunk = makeChunk({ content: '  hello   world\n\tagain  ' })
+    expect(chunkExcerpt(chunk)).toBe('hello world again')
+  })
+
+  it('truncates long content with an ellipsis', () => {
+    const chunk = makeChunk({ content: 'x'.repeat(500) })
+    const excerpt = chunkExcerpt(chunk)
+    expect(excerpt.endsWith('…')).toBe(true)
+    expect(excerpt.length).toBeLessThanOrEqual(241)
+  })
+
+  it('honours a custom maxChars', () => {
+    const chunk = makeChunk({ content: 'abcdefghij' })
+    expect(chunkExcerpt(chunk, 5)).toBe('abcde…')
+  })
+})
@@ -0,0 +1,43 @@
+// Pure, React-free helpers for the Knowledge page. Kept separate from the page
+// component so they are cheap to unit-test (see knowledge-utils.test.ts) —
+// mirrors the use-demo-pipeline.ts / status-utils.ts precedent.
+import type { RagSource, ChunkResult } from '@/types/api'
+
+/**
+ * Convert a relevance score (0..1) into a display percentage string.
+ * Non-finite or out-of-range inputs are clamped: 0.873 -> "87%",
+ * 1.4 -> "100%", -0.2 -> "0%".
+ */
+export function formatRelevance(score: number): string {
+  const safe = Number.isFinite(score) ? score : 0
+  const clamped = Math.min(1, Math.max(0, safe))
+  return `${Math.round(clamped * 100)}%`
+}
+
+/**
+ * Group indexed sources by their source_type (e.g. "markdown", "openapi").
+ * An empty array yields an empty object; a missing type falls back to "unknown".
+ */
+export function groupSourcesByType(sources: RagSource[]): Record<string, RagSource[]> {
+  const groups: Record<string, RagSource[]> = {}
+  for (const source of sources) {
+    const key = source.source_type || 'unknown'
+    if (!groups[key]) {
+      groups[key] = []
+    }
+    groups[key].push(source)
+  }
+  return groups
+}
+
+/**
+ * Single-line excerpt of a chunk's content for a result card. Collapses runs of
+ * whitespace and truncates with an ellipsis past `maxChars` (default 240).
+ */
+export function chunkExcerpt(chunk: ChunkResult, maxChars = 240): string {
+  const collapsed = chunk.content.replace(/\s+/g, ' ').trim()
+  if (collapsed.length <= maxChars) {
+    return collapsed
+  }
+  return `${collapsed.slice(0, maxChars).trimEnd()}…`
+}