Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
952 changes: 952 additions & 0 deletions PRPs/PRP-19-knowledge-and-agent-guide-pages.md

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ Portfolio-grade end-to-end retail demand forecasting system.
- **Agentic Layer**: PydanticAI agents for autonomous experimentation and evidence-grounded Q&A with human-in-the-loop approval
- **Data Seeder (The Forge)**: Reproducible synthetic data generator with realistic time-series patterns, scenario presets, and retail effects
- **AI Models Console**: `/admin` → AI Models tab — swap the agent LLM (incl. fully-local Ollama), the RAG embedding model, and provider API keys at runtime; changes apply live with no restart
- **Knowledge Page**: `/knowledge` — browse the indexed RAG corpus, run a live semantic search, and see the live system state (seeded data, model runs, deployment aliases) the agents draw on
- **Agent Guide**: `/guide` — in-product reference for the two chat agents — their tools, the human-in-the-loop approval gate, live session limits, and copy-paste example prompts

## Quick Start

Expand Down
7 changes: 7 additions & 0 deletions app/features/config/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@ class AIModelConfig(BaseModel):
agent_thinking_budget: int | None = Field(
description="Extended-reasoning token budget (Gemini 2.5+); None disables it"
)
agent_max_tool_calls: int = Field(description="Per-session tool-call cap")
agent_timeout_seconds: int = Field(description="Per-run agent timeout (seconds)")
agent_retry_attempts: int = Field(description="Agent retry attempts on failure")
agent_session_ttl_minutes: int = Field(description="Session time-to-live (minutes)")
agent_require_approval: list[str] = Field(
description="Tool names gated by human-in-the-loop approval"
)
rag_embedding_provider: str = Field(description="RAG embedding provider: 'openai' | 'ollama'")
rag_embedding_model: str = Field(description="OpenAI embedding model name")
rag_embedding_dimension: int = Field(description="Embedding vector dimension")
Expand Down
5 changes: 5 additions & 0 deletions app/features/config/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,11 @@ async def get_effective_config(db: AsyncSession) -> AIModelConfig:
agent_temperature=settings.agent_temperature,
agent_max_tokens=settings.agent_max_tokens,
agent_thinking_budget=settings.agent_thinking_budget,
agent_max_tool_calls=settings.agent_max_tool_calls,
agent_timeout_seconds=settings.agent_timeout_seconds,
agent_retry_attempts=settings.agent_retry_attempts,
agent_session_ttl_minutes=settings.agent_session_ttl_minutes,
agent_require_approval=list(settings.agent_require_approval),
rag_embedding_provider=settings.rag_embedding_provider,
rag_embedding_model=settings.rag_embedding_model,
rag_embedding_dimension=settings.rag_embedding_dimension,
Expand Down
21 changes: 21 additions & 0 deletions app/features/config/tests/test_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ def _sample_config(
agent_temperature=agent_temperature,
agent_max_tokens=4096,
agent_thinking_budget=None,
agent_max_tool_calls=10,
agent_timeout_seconds=120,
agent_retry_attempts=3,
agent_session_ttl_minutes=120,
agent_require_approval=["create_alias", "archive_run"],
rag_embedding_provider="openai",
rag_embedding_model="text-embedding-3-small",
rag_embedding_dimension=1536,
Expand Down Expand Up @@ -65,6 +70,22 @@ def test_returns_effective_config(self, client):
assert data["agent_default_model"] == "anthropic:claude-sonnet-4-5"
assert data["api_keys"][0]["masked"] == "sk-ant-…1234"

def test_returns_agent_session_limits(self, client):
"""GET /config/ai exposes the read-only agent session-limit fields."""
with patch(
"app.features.config.routes.service.get_effective_config",
new=AsyncMock(return_value=_sample_config()),
):
response = client.get("/config/ai")

assert response.status_code == 200
data = response.json()
assert data["agent_max_tool_calls"] == 10
assert data["agent_timeout_seconds"] == 120
assert data["agent_retry_attempts"] == 3
assert data["agent_session_ttl_minutes"] == 120
assert data["agent_require_approval"] == ["create_alias", "archive_run"]


class TestUpdateAIConfig:
"""Tests for PATCH /config/ai."""
Expand Down
32 changes: 32 additions & 0 deletions app/features/config/tests/test_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ def test_ai_model_config_constructs(self):
agent_temperature=0.1,
agent_max_tokens=4096,
agent_thinking_budget=None,
agent_max_tool_calls=10,
agent_timeout_seconds=120,
agent_retry_attempts=3,
agent_session_ttl_minutes=120,
agent_require_approval=["create_alias", "archive_run"],
rag_embedding_provider="openai",
rag_embedding_model="text-embedding-3-small",
rag_embedding_dimension=1536,
Expand All @@ -115,6 +120,33 @@ def test_ai_model_config_constructs(self):
)
assert cfg.agent_thinking_budget is None

def test_ai_model_config_carries_agent_limits(self):
"""AIModelConfig exposes the read-only agent session-limit fields."""
cfg = AIModelConfig(
agent_default_model="anthropic:claude-sonnet-4-5",
agent_fallback_model="openai:gpt-4o",
agent_temperature=0.1,
agent_max_tokens=4096,
agent_thinking_budget=None,
agent_max_tool_calls=10,
agent_timeout_seconds=120,
agent_retry_attempts=3,
agent_session_ttl_minutes=120,
agent_require_approval=["create_alias", "archive_run"],
rag_embedding_provider="openai",
rag_embedding_model="text-embedding-3-small",
rag_embedding_dimension=1536,
ollama_base_url="http://localhost:11434",
ollama_embedding_model="nomic-embed-text",
api_keys=[],
overridden_keys=[],
)
assert cfg.agent_max_tool_calls == 10
assert cfg.agent_timeout_seconds == 120
assert cfg.agent_retry_attempts == 3
assert cfg.agent_session_ttl_minutes == 120
assert cfg.agent_require_approval == ["create_alias", "archive_run"]

def test_api_key_status(self):
"""ApiKeyStatus carries presence + a masked preview."""
status = ApiKeyStatus(provider="anthropic", is_set=True, masked="sk-ant-…3f9a")
Expand Down
18 changes: 18 additions & 0 deletions app/features/config/tests/test_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,24 @@ async def test_get_effective_config_masks_secrets(self):
assert anthropic.masked is not None
assert "supersecretvalue" not in config.model_dump_json()

@pytest.mark.asyncio
async def test_get_effective_config_maps_agent_limits(self):
"""The agent session-limit fields are sourced from the Settings singleton."""
settings = get_settings()
settings.agent_max_tool_calls = 7
settings.agent_timeout_seconds = 99
settings.agent_retry_attempts = 2
settings.agent_session_ttl_minutes = 45
settings.agent_require_approval = ["create_alias"]

config = await service.get_effective_config(_mock_db())

assert config.agent_max_tool_calls == 7
assert config.agent_timeout_seconds == 99
assert config.agent_retry_attempts == 2
assert config.agent_session_ttl_minutes == 45
assert config.agent_require_approval == ["create_alias"]


# =============================================================================
# Unit tests — update_config
Expand Down
2 changes: 2 additions & 0 deletions docs/_base/REPO_MAP_INDEX.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ ForecastLabAI is a portfolio-grade, single-host retail-demand-forecasting system
| [`scripts/run_demo.py`](../../scripts/run_demo.py) | End-to-end pipeline driver — seed → features → train ×3 → backtest → register → alias → agent | First-run demonstrability, integration debugging |
| [`app/features/demo/`](../../app/features/demo/) | In-process e2e demo slice — `POST /demo/run` + `WS /demo/stream` drive the pipeline via `ASGITransport` (no cross-slice imports) | Showcase page, in-product demo |
| [`frontend/src/pages/showcase.tsx`](../../frontend/src/pages/showcase.tsx) | The Showcase page — streams the live pipeline into the dashboard as status cards | Demoing the system in-browser |
| [`frontend/src/pages/knowledge.tsx`](../../frontend/src/pages/knowledge.tsx) | The Knowledge page — indexed RAG corpus, live semantic search, and live system state | Surfacing what the agents can draw on |
| [`frontend/src/pages/guide.tsx`](../../frontend/src/pages/guide.tsx) | The Agent Guide page — agent tools, approval gate, live session limits, example prompts | Explaining how to use the chat agents |
| [`alembic/versions/`](../../alembic/versions/) | Six migrations through `d6e0f2g3h456_create_agent_session_table.py` | DB-schema questions, migration drift |
| [`docs/ARCHITECTURE.md`](../ARCHITECTURE.md) | Phase-by-phase architecture narrative | High-level component reasoning |
| [`docs/PHASE-index.md`](../PHASE-index.md) | Index of all 11 phase docs | Locating per-phase deep-dive |
Expand Down
18 changes: 18 additions & 0 deletions frontend/src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ const JobsMonitorPage = lazy(() => import('@/pages/explorer/jobs'))
const ForecastPage = lazy(() => import('@/pages/visualize/forecast'))
const BacktestPage = lazy(() => import('@/pages/visualize/backtest'))
const ChatPage = lazy(() => import('@/pages/chat'))
const KnowledgePage = lazy(() => import('@/pages/knowledge'))
const GuidePage = lazy(() => import('@/pages/guide'))
const AdminPage = lazy(() => import('@/pages/admin'))

function PageLoader() {
Expand Down Expand Up @@ -103,6 +105,14 @@ function App() {
</Suspense>
}
/>
<Route
path={ROUTES.KNOWLEDGE}
element={
<Suspense fallback={<PageLoader />}>
<KnowledgePage />
</Suspense>
}
/>
<Route
path={ROUTES.CHAT}
element={
Expand All @@ -111,6 +121,14 @@ function App() {
</Suspense>
}
/>
<Route
path={ROUTES.GUIDE}
element={
<Suspense fallback={<PageLoader />}>
<GuidePage />
</Suspense>
}
/>
<Route
path={ROUTES.ADMIN}
element={
Expand Down
18 changes: 17 additions & 1 deletion frontend/src/hooks/use-rag-sources.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'
import { api } from '@/lib/api'
import type { SourceListResponse, IndexDocumentRequest, IndexDocumentResponse } from '@/types/api'
import type {
SourceListResponse,
IndexDocumentRequest,
IndexDocumentResponse,
RetrieveRequest,
RetrieveResponse,
} from '@/types/api'

export function useRagSources() {
return useQuery({
Expand Down Expand Up @@ -33,3 +39,13 @@ export function useIndexDocument() {
},
})
}

// Mutation: semantic search over the knowledge base (POST /rag/retrieve).
// Search results are ephemeral — no cache invalidation. A 502 (no embedding
// provider configured) surfaces as an ApiError the caller degrades gracefully.
export function useRetrieve() {
return useMutation({
mutationFn: (body: RetrieveRequest) =>
api<RetrieveResponse>('/rag/retrieve', { method: 'POST', body }),
})
}
4 changes: 4 additions & 0 deletions frontend/src/lib/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ export const ROUTES = {
FORECAST: '/visualize/forecast',
BACKTEST: '/visualize/backtest',
},
KNOWLEDGE: '/knowledge',
CHAT: '/chat',
GUIDE: '/guide',
ADMIN: '/admin',
} as const

Expand All @@ -38,7 +40,9 @@ export const NAV_ITEMS = [
{ label: 'Backtest Results', href: ROUTES.VISUALIZE.BACKTEST },
],
},
{ label: 'Knowledge', href: ROUTES.KNOWLEDGE },
{ label: 'Chat', href: ROUTES.CHAT },
{ label: 'Agent Guide', href: ROUTES.GUIDE },
{ label: 'Admin', href: ROUTES.ADMIN },
] as const

Expand Down
91 changes: 91 additions & 0 deletions frontend/src/lib/knowledge-utils.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import { describe, it, expect } from 'vitest'
import { formatRelevance, groupSourcesByType, chunkExcerpt } from './knowledge-utils'
import type { RagSource, ChunkResult } from '@/types/api'

/** Build a RagSource with sensible defaults for the fields not under test. */
function makeSource(partial: Partial<RagSource> & Pick<RagSource, 'source_id'>): RagSource {
return {
source_id: partial.source_id,
source_type: partial.source_type ?? 'markdown',
source_path: partial.source_path ?? 'docs/example.md',
chunk_count: partial.chunk_count ?? 3,
content_hash: partial.content_hash ?? 'hash',
indexed_at: partial.indexed_at ?? '2026-05-18T00:00:00Z',
metadata: partial.metadata ?? null,
}
}

/** Build a ChunkResult with sensible defaults for the fields not under test. */
function makeChunk(partial: Partial<ChunkResult> & Pick<ChunkResult, 'content'>): ChunkResult {
return {
chunk_id: partial.chunk_id ?? 'chunk-1',
source_id: partial.source_id ?? 'src-1',
source_path: partial.source_path ?? 'docs/example.md',
source_type: partial.source_type ?? 'markdown',
content: partial.content,
relevance_score: partial.relevance_score ?? 0.8,
metadata: partial.metadata ?? null,
}
}

describe('formatRelevance', () => {
it('renders a score as a rounded percentage', () => {
expect(formatRelevance(0.873)).toBe('87%')
})

it('clamps the endpoints', () => {
expect(formatRelevance(0)).toBe('0%')
expect(formatRelevance(1)).toBe('100%')
})

it('clamps out-of-range values', () => {
expect(formatRelevance(1.4)).toBe('100%')
expect(formatRelevance(-0.2)).toBe('0%')
})

it('treats a non-finite score as zero', () => {
expect(formatRelevance(Number.NaN)).toBe('0%')
expect(formatRelevance(Number.POSITIVE_INFINITY)).toBe('0%')
})
})

describe('groupSourcesByType', () => {
it('groups a mixed source list into per-type buckets', () => {
const groups = groupSourcesByType([
makeSource({ source_id: 'a', source_type: 'markdown' }),
makeSource({ source_id: 'b', source_type: 'openapi' }),
makeSource({ source_id: 'c', source_type: 'markdown' }),
])
expect(Object.keys(groups).sort()).toEqual(['markdown', 'openapi'])
expect(groups.markdown).toHaveLength(2)
expect(groups.openapi).toHaveLength(1)
})

it('returns an empty object for an empty array', () => {
expect(groupSourcesByType([])).toEqual({})
})

it('falls back to "unknown" for a blank source_type', () => {
const groups = groupSourcesByType([makeSource({ source_id: 'a', source_type: '' })])
expect(groups.unknown).toHaveLength(1)
})
})

describe('chunkExcerpt', () => {
it('returns short content intact with collapsed whitespace', () => {
const chunk = makeChunk({ content: ' hello world\n\tagain ' })
expect(chunkExcerpt(chunk)).toBe('hello world again')
})

it('truncates long content with an ellipsis', () => {
const chunk = makeChunk({ content: 'x'.repeat(500) })
const excerpt = chunkExcerpt(chunk)
expect(excerpt.endsWith('…')).toBe(true)
expect(excerpt.length).toBeLessThanOrEqual(241)
})

it('honours a custom maxChars', () => {
const chunk = makeChunk({ content: 'abcdefghij' })
expect(chunkExcerpt(chunk, 5)).toBe('abcde…')
})
})
43 changes: 43 additions & 0 deletions frontend/src/lib/knowledge-utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Pure, React-free helpers for the Knowledge page. Kept separate from the page
// component so they are cheap to unit-test (see knowledge-utils.test.ts) —
// mirrors the use-demo-pipeline.ts / status-utils.ts precedent.
import type { RagSource, ChunkResult } from '@/types/api'

/**
* Convert a relevance score (0..1) into a display percentage string.
* Non-finite or out-of-range inputs are clamped: 0.873 -> "87%",
* 1.4 -> "100%", -0.2 -> "0%".
*/
export function formatRelevance(score: number): string {
const safe = Number.isFinite(score) ? score : 0
const clamped = Math.min(1, Math.max(0, safe))
return `${Math.round(clamped * 100)}%`
}

/**
* Group indexed sources by their source_type (e.g. "markdown", "openapi").
* An empty array yields an empty object; a missing type falls back to "unknown".
*/
export function groupSourcesByType(sources: RagSource[]): Record<string, RagSource[]> {
const groups: Record<string, RagSource[]> = {}
for (const source of sources) {
const key = source.source_type || 'unknown'
if (!groups[key]) {
groups[key] = []
}
groups[key].push(source)
}
return groups
}

/**
* Single-line excerpt of a chunk's content for a result card. Collapses runs of
* whitespace and truncates with an ellipsis past `maxChars` (default 240).
*/
export function chunkExcerpt(chunk: ChunkResult, maxChars = 240): string {
const collapsed = chunk.content.replace(/\s+/g, ' ').trim()
if (collapsed.length <= maxChars) {
return collapsed
}
return `${collapsed.slice(0, maxChars).trimEnd()}…`
}
Loading