diff --git a/PRPs/PRP-23-rag-corpus-manager.md b/PRPs/PRP-23-rag-corpus-manager.md new file mode 100644 index 00000000..4eeca2e2 --- /dev/null +++ b/PRPs/PRP-23-rag-corpus-manager.md @@ -0,0 +1,869 @@ +name: "PRP-23 — RAG Corpus Manager: one-click bulk-index of bundled project docs" +description: | + Promote the MVP of `docs/optional-features/01-rag-corpus-manager.md` into code. + + A fresh ForecastLabAI install has an **empty RAG corpus** (`0 sources / 0 chunks`), + so the RAG Assistant agent can cite nothing and the Knowledge page is a permanent + empty state — despite the repo bundling ~115 markdown files under `docs/`, `PRPs/`, + and the root. + + This PRP adds **one new orchestration endpoint** — `POST /rag/index/project-docs` — + that discovers the bundled markdown and indexes each file through the existing + `RAGService.index_document` path (reusing its chunking, embedding, SHA-256 + content-hash idempotency, and upsert). The Admin → "RAG Sources" tab gets an + **"Index Project Docs"** button that calls it and toasts the summary. + + Everything else the feature doc lists is **already done** or **out of scope**: + source listing / deletion / provider-health / Knowledge empty-state all exist; + stale-detection, re-index, chunk-preview, and the Knowledge source-type filter are + explicitly deferred to a follow-up ("Full Version" — see Anti-Patterns / NOTES). + +> **PRP numbering:** `PRP-16` is reserved (Phase-2 LightGBM). `PRP-17`–`PRP-22` are +> used. This is `PRP-23`. Source plan: `.agents/plans/rag-index-project-docs.md`. + +## Purpose + +Close the "the RAG corpus starts empty and there is no operator-facing way to fill +it" gap. Today the only ways to populate the corpus are (a) `POST /rag/index` once +per file (~115 calls, requires pasting each path) or (b) the seeder's synthetic +3-document scenario, which indexes throwaway test prose rather than the real project +documentation. An operator or demo reviewer needs **one click** that turns +`0 sources` into a populated, citable corpus drawn from the repo's own docs. + +## Core Principles + +1. **Context is King** — every endpoint shape, schema field, service method, hook + name, and pattern below is linked to a real source file with verified line + numbers. +2. **Reuse, don't reinvent** — `index_project_docs` is a thin orchestrator over the + existing `RAGService.index_document`; it does NOT re-implement hashing, chunking, + embedding, or upsert. The route mirrors the existing `index_document` route's + exception handling; the hook mirrors `useIndexDocument`. +3. **Additive only** — NO Alembic migration (no schema change — `category` rides in + the existing `DocumentSource.metadata_` JSONB), NO new slice, NO `.env` var, NO + `app/main.py` change (the `rag` router is already wired). +4. **Strict gates honored** — `.py` files in the `rag` slice change, so the repo-wide + `ruff` / `mypy --strict` / `pyright --strict` / `pytest` CI jobs genuinely apply; + the new endpoint ships with unit + integration tests. +5. **UI through the running app** — the Admin button is verified in a real browser + via `webapp-testing` per `.claude/rules/ui-design.md`. A green `tsc` is NOT proof + the UI works. + +--- + +## Goal + +**Backend (additive, no migration, no `main.py` change):** + +- `POST /rag/index/project-docs` — discovers markdown under `docs/**`, `PRPs/**`, + and a fixed root-file allow-list (`README.md`, `AGENTS.md`, `CHANGELOG.md`), + indexes each through `RAGService.index_document`, and returns a per-file + + aggregate summary. Request body is three optional booleans (`include_docs`, + `include_prps`, `include_root`, all default `true`). Idempotent — re-runs return + every file `unchanged` via the existing SHA-256 short-circuit. A single + unreadable / non-UTF-8 file is reported `failed` without aborting the batch; + `EmbeddingError` / `SQLAlchemyError` are batch-fatal and surface as `502` / + `application/problem+json`. + +**Frontend:** + +- New TanStack mutation hook `useIndexProjectDocs` in `use-rag-sources.ts`. +- Three new TS types (`IndexProjectDocsRequest`, `ProjectDocResult`, + `IndexProjectDocsResponse`) in `types/api.ts`. +- An **"Index Project Docs"** button in the Admin → "RAG Sources" tab + (`RagSourcesPanel`) — spinner while running, a `toast` summary on completion, + and a `['rag-sources']` query invalidation so the list + counts refresh. + +## Why + +- **Portfolio identity.** `.claude/rules/product-vision.md` principle 1 — + "portfolio-grade, end-to-end … every phase ships working code". The RAG slice + exists end-to-end but is invisible: a reviewer opening a fresh system sees an + empty Knowledge page and an agent that can cite nothing. This makes the existing + RAG investment demonstrable. +- **Demo narrative.** `docs/optional-features/README.md` § "Promotion Criteria" — + a feature should "improve the demo narrative without breaking the local-first + setup". Bulk-indexing the repo's own docs is the most direct way to show the RAG + Assistant working off real evidence. +- **Operator workflow.** The feature doc's user value: "Demo reviewers can index + project docs without CLI setup"; "the Knowledge page becomes a real corpus + browser instead of mostly an empty-state page". + +## What + +A logged-in operator opens **Admin → RAG Sources**, sees `0 sources • 0 chunks`, +clicks **Index Project Docs**, watches a spinner for up to ~1–3 minutes (first run, +real embedding provider), then sees a toast — e.g. *"Indexed 112, updated 0, +unchanged 0, 3 failed — 1 480 chunks"* — and the source list populates. Opening +**Knowledge** now shows the corpus and semantic search returns cited chunks. +Clicking **Index Project Docs** again completes near-instantly with every file +`unchanged`. + +### Success Criteria + +- [ ] `POST /rag/index/project-docs` indexes `docs/**/*.md`, `PRPs/**/*.md`, and the + root allow-list; returns `IndexProjectDocsResponse` with per-file results + + aggregate counts. +- [ ] Idempotent — a second call with unchanged files returns every result + `unchanged` and creates no new chunks. +- [ ] `include_docs` / `include_prps` / `include_root` toggles select roots + independently; an empty `{}` body indexes all three. +- [ ] A single unreadable / non-UTF-8 file is reported `status="failed"` with an + `error` string and does not abort the batch. +- [ ] `EmbeddingError` → `502`, `SQLAlchemyError` → `DatabaseError` / + `application/problem+json` (no partial commit — the request rolls back). +- [ ] Admin → "RAG Sources" has a working "Index Project Docs" button: spinner, + toast summary (`toast.warning` when `failed > 0`, else `toast.success`), and + a live source-list refresh. +- [ ] All validation gates (ruff, mypy --strict, pyright --strict, pytest unit + + integration, frontend tsc/lint/test) pass; integration tests leave no + `test-` rows in `document_source`. +- [ ] `docs/_base/API_CONTRACTS.md` lists the new endpoint. +- [ ] No regression in existing RAG tests or `app/core/tests/test_strict_mode_policy.py`. + +## All Needed Context + +### Documentation & References + +```yaml +# ---- External docs ---- +- url: https://docs.python.org/3/library/pathlib.html#pathlib.Path.rglob + why: Path.rglob("*.md") for recursive discovery. CRITICAL — rglob on a + NON-EXISTENT directory yields nothing (no exception); relied on so an + absent docs/ or PRPs/ root simply contributes 0 files. +- url: https://fastapi.tiangolo.com/tutorial/body/ + why: A Pydantic model as a request body whose fields ALL have defaults + validates an empty `{}` payload — the frontend always posts `{}`. +- url: https://docs.pydantic.dev/latest/concepts/models/#extra-fields + why: ConfigDict(extra="forbid") on the request → an unknown body field 422s. + Mirrors the existing IndexRequest. + +# ---- Source feature spec ---- +- file: docs/optional-features/01-rag-corpus-manager.md + why: The spec. Implement ONLY the "MVP Scope" section. "Full Version" (stale + detection, re-index, chunk preview, Knowledge filters) is OUT OF SCOPE. +- file: .agents/plans/rag-index-project-docs.md + why: The source implementation plan this PRP refines (notably: the unit test + now targets a new pure _discover_project_doc_files helper, not a mocked + index_document — see "Resolved Decisions"). + +# ---- Backend: the rag slice (all changes land here) ---- +- file: app/features/rag/routes.py + why: lines 61-133 — `index_document` route: the EXACT exception-handling + shape to mirror (EmbeddingError→502, SQLAlchemyError→DatabaseError) and + the structured-logging style. Lines 12-19 — the schema import block to + extend. Lines 1-24 — router, `logger`, `RAGService` imports. +- file: app/features/rag/service.py + why: lines 130-251 — `index_document`, the method `index_project_docs` + orchestrates per file. lines 159-163 — the `if request.content:` branch + (see the empty-file GOTCHA). lines 173-191 — the SHA-256 idempotency + short-circuit. lines 61-81 — `__init__` + the `base_dir` test override. + lines 94-128 — `_read_content_from_path` (path-traversal pattern). + lines 29-38 — the schema import block to extend. +- file: app/features/rag/schemas.py + why: lines 17-43 `IndexRequest`, 46-65 `IndexResponse` — the schema style to + mirror: `ConfigDict(extra="forbid")` on the request, `Literal` status + field, `Args:` docstrings. NOTE: `IndexRequest` is NOT `strict=True`, so + the new request model needs no `Field(strict=False)` overrides and + `app/core/tests/test_strict_mode_policy.py` is unaffected. +- file: app/features/rag/models.py + why: lines 35-66 `DocumentSource` — confirms `source_type` is free-form + `String(50)` (we keep `"markdown"`), `metadata_` is JSONB (we store + `{"category": ...}`), and `uq_source_type_path` drives idempotency. +- file: app/features/rag/chunkers.py + why: `get_chunker("markdown")` → `MarkdownChunker`. Confirms `"markdown"` is a + valid `source_type` for every project doc. +- file: app/core/exceptions.py + why: `DatabaseError` — re-raised on `SQLAlchemyError`; already imported in + `routes.py:9`. + +# ---- Backend: tests ---- +- file: app/features/rag/tests/conftest.py + why: `db_session` + `client` integration fixtures, `mock_embedding_service` + unit fixture, and the cleanup at LINE 46 + (`DocumentSource.source_path.like("test-%")`) — this PRP widens it to + `"%test-%"` so nested fixture paths (`docs/test-*.md`) are cleaned up. +- file: app/features/rag/tests/test_routes.py + why: lines 22-37 `create_mock_embedding_service()` and the + `patch("app.features.rag.service.get_embedding_service", ...)` pattern; + `TestIndexEndpoint` (45-167) class layout to mirror. +- file: app/features/rag/tests/test_service.py + why: `TestRAGServiceUnit` — pure-unit class layout (`RAGService()` with no DB, + no mocks); the home for the new `_discover_project_doc_files` unit test. + +# ---- Frontend ---- +- file: frontend/src/hooks/use-rag-sources.ts + why: lines 29-41 `useIndexDocument` — the EXACT mutation-hook shape + (`useMutation` + `api(...)` + `invalidateQueries(['rag-sources'])`). +- file: frontend/src/pages/admin.tsx + why: lines 116-253 `RagSourcesPanel` — where the button goes; the `CardHeader` + actions area (148-205); the lucide import block (4-21 — `Library` must be + ADDED); `toast` already imported (line 68); the `handleGenerate` toast + pattern (470-488); the `Loader2` spinner-in-button pattern (line 199). +- file: frontend/src/types/api.ts + why: lines 258-313 — the `// === RAG ===` block to extend; `RagSource`, + `IndexDocumentResponse`, `RetrieveResponse` naming convention. +- file: frontend/src/lib/api.ts + why: lines 23-44 — `api(endpoint, {method, body})`; a truthy `{}` body is + JSON-stringified to `"{}"`. + +# ---- Rules ---- +- file: .claude/rules/security-patterns.md + why: § "File operations" — `pathlib.Path.resolve()`, allow-listed roots, no + `..`. Discovery globs only fixed roots under `base_dir` (no user input) → + inherently allow-listed; keep it that way. +- file: .claude/rules/test-requirements.md + why: new endpoint ⇒ route test with 2xx happy path + ≥1 error path. +- file: .claude/rules/commit-format.md + why: commit `type(scope): description (#issue)`; `rag,ui` comma-pair scope is + allowed; every commit references an open issue; NO AI co-author trailer. +``` + +### Current Codebase tree (relevant) + +``` +app/features/rag/ +├── __init__.py +├── chunkers.py # MarkdownChunker / OpenAPIChunker — UNCHANGED +├── embeddings.py # OpenAI / Ollama providers — UNCHANGED +├── models.py # DocumentSource / DocumentChunk — UNCHANGED (no migration) +├── routes.py # /rag/index, /retrieve, /sources — ADD one route +├── schemas.py # IndexRequest, …, DeleteResponse — ADD three models +├── service.py # RAGService — ADD _discover_project_doc_files + index_project_docs +└── tests/ + ├── conftest.py # MODIFY line 46 cleanup glob + ├── test_chunkers.py # UNCHANGED + ├── test_embeddings.py # UNCHANGED + ├── test_routes.py # ADD TestIndexProjectDocsEndpoint + ├── test_schemas.py # ADD new-schema cases + └── test_service.py # ADD _discover_project_doc_files unit test + +frontend/src/ +├── hooks/use-rag-sources.ts # ADD useIndexProjectDocs +├── pages/admin.tsx # ADD button in RagSourcesPanel +└── types/api.ts # ADD 3 interfaces + +docs/_base/API_CONTRACTS.md # ADD one table row +``` + +### Desired Codebase tree (files added / changed) + +No new files. Eleven existing files are modified: + +``` +MODIFY app/features/rag/schemas.py + IndexProjectDocsRequest / ProjectDocResult / IndexProjectDocsResponse +MODIFY app/features/rag/service.py + _discover_project_doc_files() + index_project_docs() + 2 module constants +MODIFY app/features/rag/routes.py + POST /rag/index/project-docs route +MODIFY app/features/rag/tests/conftest.py ~ cleanup glob "test-%" -> "%test-%" +MODIFY app/features/rag/tests/test_schemas.py + new-schema validation cases +MODIFY app/features/rag/tests/test_service.py + _discover_project_doc_files unit test +MODIFY app/features/rag/tests/test_routes.py + TestIndexProjectDocsEndpoint (integration) +MODIFY frontend/src/types/api.ts + 3 interfaces +MODIFY frontend/src/hooks/use-rag-sources.ts + useIndexProjectDocs hook +MODIFY frontend/src/pages/admin.tsx + "Index Project Docs" button (+ Library icon import) +MODIFY docs/_base/API_CONTRACTS.md + endpoint-table row +``` + +### Known Gotchas & Library Quirks + +```python +# CRITICAL: CRLF line endings. Every existing app/**/*.py file in this repo is +# CRLF-terminated (no .gitattributes — project memory). A FULL-FILE rewrite +# (the Write tool, or a text-mode dump) silently flips them to LF and produces +# a whole-file diff. Use the Edit tool (exact string replacement — it preserves +# the surrounding line endings) for every .py change. After EACH edit run +# `git diff --stat`: the changed-line count must be small. If you see a +# whole-file churn, the EOLs flipped — restore CRLF before continuing. New +# files: none here. Frontend .ts/.tsx files are LF — safe. + +# CRITICAL: NO app/main.py change. The `rag` router is already wired +# (main.py:27 import, main.py:142 include_router). The new route attaches to +# the existing `router = APIRouter(prefix="/rag", ...)` in routes.py. + +# CRITICAL: NO Alembic migration. DocumentSource / DocumentChunk are unchanged. +# The per-source `category` ("docs" | "prp" | "root") rides inside the EXISTING +# `DocumentSource.metadata_` JSONB column. `.claude/rules` require a migration +# only when the SCHEMA changes — adding one here would be wrong. + +# CRITICAL: index_document's content branch (service.py:160) is `if request.content:` +# — an EMPTY string is FALSY, so an empty .md file passed as content="" falls +# through to `_read_content_from_path(rel)`, which resolves the relative path +# against CWD. In production CWD == base_dir (uvicorn runs from the repo root), +# so the redundant re-read succeeds and the file indexes to 0 chunks. In a +# base_dir-OVERRIDE test (CWD != base_dir) it raises FileNotFoundError — which +# is a subclass of OSError and is therefore caught by the per-file +# `except (OSError, ValueError)` and reported `status="failed"`. NEVER fatal. +# Mitigation: make every test fixture file NON-EMPTY (`"# Test\n\nContent."`). +# Do NOT "fix" index_document — it is shared with POST /rag/index. + +# CRITICAL: pass BOTH source_path (the clean RELATIVE posix path — the DB id) AND +# content (the file text) to IndexRequest. source_path drives the +# `(source_type, source_path)` idempotency lookup + is stored; content (when +# truthy) is hashed/chunked. NEVER store an absolute path — it is +# machine-specific and breaks idempotency across machines/CI. + +# CRITICAL: route-test base_dir injection. The route does `RAGService()` with no +# args (→ base_dir = Path.cwd()). To point an integration test at a tmp_path, +# patch the class symbol in the routes module: +# patch("app.features.rag.routes.RAGService", +# functools.partial(RAGService, base_dir=str(tmp_path))) +# `partial(Cls, kw=v)()` constructs `Cls(kw=v)`. Patch +# `app.features.rag.service.get_embedding_service` SEPARATELY (the existing +# test pattern) so __init__ picks up the mock provider. + +# CRITICAL: integration-test cleanup. conftest.py:46 deletes +# `source_path LIKE 'test-%'`. Project-doc source paths are NESTED +# (`docs/test-proj-1.md`) and do NOT start with `test-`. Widen the glob to +# `"%test-%"` (Task 1). Real corpus paths (`docs/ARCHITECTURE.md`, `PRPs/PRP-1-…`) +# never contain `test-`, so the wider LIKE is safe; existing `test-`-prefixed +# fixtures still match. Name every new fixture file with a `test-` token. + +# GOTCHA: synchronous by design. Indexing runs in-request. ~115 bundled markdown +# files ⇒ the first run with a real embedding provider takes ~1-3 min (one +# batched embedding call per file). `fetch` has no default timeout and the +# TanStack mutation waits, so this is acceptable for an admin action; re-runs +# are fast (all `unchanged`). The jobs-layer upgrade is the deferred +# "Full Version" — OUT OF SCOPE here. + +# GOTCHA: status-Literal widening. IndexResponse.status is +# Literal["indexed","updated","unchanged"]; ProjectDocResult.status is +# Literal["indexed","updated","unchanged","failed"]. Assigning the narrower +# into the wider is fine for mypy/pyright (subtype). "failed" is only ever set +# in the per-file except branch. + +# GOTCHA: EmbeddingError is NOT an OSError/ValueError (it extends Exception), so +# it is NOT caught by the per-file `except (OSError, ValueError)` — it +# propagates out of the loop, the request rolls back, and the route maps it to +# 502. Same for SQLAlchemyError. This is intentional: a dead embedding provider +# makes the whole batch pointless. + +# GOTCHA: `RAGService()` is safe to construct in a pure unit test with no mocks — +# __init__ only builds the (lazy) embedding client + a tiktoken encoder, no +# network. test_service.py::TestRAGServiceUnit already relies on this. + +# GOTCHA: admin.tsx does NOT currently import `Library` from lucide-react. Add it +# to the existing import block (admin.tsx:4-21). `toast` IS already imported +# (admin.tsx:68). +``` + +### Resolved Decisions (carried from `.agents/plans/rag-index-project-docs.md`) + +- **Scope = MVP only.** Out of scope: `GET /rag/sources/{id}/chunks` + chunk + preview, `POST /rag/sources/{id}/reindex` + stale detection, the Knowledge-page + source-type filter, per-source embedding-metadata columns (would need a + migration). Keeping the PR small matches the maintainer preference in + `CLAUDE.local.md` ("prefer a smaller PR over a bundled one"). +- **Root file allow-list = `("README.md", "AGENTS.md", "CHANGELOG.md")`.** `CLAUDE.md` + is excluded — it is mostly an operating index and `@import`s `AGENTS.md` (whose + substance is already indexed). +- **`source_type` stays `"markdown"` for every project doc.** The `docs|prp|root` + distinction is stored as `metadata.category`, which powers the existing + `RetrieveRequest.filters.category` path (`service.py:585-589`) for free, with no + schema change. +- **Refinement vs the plan:** discovery is extracted into a pure, sync + `_discover_project_doc_files` helper so it can be unit-tested with no DB and no + mocks (the plan's "mock `index_document`" approach would pass a `MagicMock` + where `mypy --strict` expects an `AsyncSession`). The full `index_project_docs` + loop/aggregate path is covered by the route integration test. +- **Synchronous in-request** (not the jobs layer) — see the GOTCHA above. + +## Implementation Blueprint + +### Data models — backend schemas (`app/features/rag/schemas.py`) + +Append after `DeleteResponse`. Mirror `IndexRequest` / `IndexResponse` style. + +```python +# Pseudocode — do not copy verbatim; add full `Args:` docstrings per file style. + +class IndexProjectDocsRequest(BaseModel): + """Request to bulk-index bundled project documentation.""" + model_config = ConfigDict(extra="forbid") # NOT strict=True (mirror IndexRequest) + include_docs: bool = Field(default=True, description="Index docs/**/*.md") + include_prps: bool = Field(default=True, description="Index PRPs/**/*.md") + include_root: bool = Field(default=True, description="Index README/AGENTS/CHANGELOG") + +class ProjectDocResult(BaseModel): + """Per-file outcome of a project-docs index run.""" + source_path: str + status: Literal["indexed", "updated", "unchanged", "failed"] + chunks_created: int + error: str | None = None + +class IndexProjectDocsResponse(BaseModel): + """Aggregate result of POST /rag/index/project-docs.""" + results: list[ProjectDocResult] + total_files: int + indexed: int + updated: int + unchanged: int + failed: int + total_chunks: int + duration_ms: float +``` + +`Literal`, `BaseModel`, `ConfigDict`, `Field` are already imported (`schemas.py:11-14`). + +### Data models — frontend types (`frontend/src/types/api.ts`, in the `// === RAG ===` block, after `RetrieveResponse` ~line 313) + +```ts +export interface IndexProjectDocsRequest { + include_docs?: boolean + include_prps?: boolean + include_root?: boolean +} +export interface ProjectDocResult { + source_path: string + status: 'indexed' | 'updated' | 'unchanged' | 'failed' + chunks_created: number + error: string | null +} +export interface IndexProjectDocsResponse { + results: ProjectDocResult[] + total_files: number + indexed: number + updated: number + unchanged: number + failed: number + total_chunks: number + duration_ms: number +} +``` + +### Backend service (`app/features/rag/service.py`) + +Add two module-level constants (after the imports, before `class RAGService`) and +two methods on `RAGService`. + +```python +# Module-level — the allow-listed project-doc roots. +_PROJECT_ROOT_FILES: tuple[str, ...] = ("README.md", "AGENTS.md", "CHANGELOG.md") + +class RAGService: + ... + def _discover_project_doc_files( + self, request: IndexProjectDocsRequest + ) -> list[tuple[Path, str]]: + """Discover bundled markdown under allow-listed roots. Pure + sync. + + Returns a deterministically sorted list of (absolute_path, category) + where category is "docs" | "prp" | "root". + """ + found: list[tuple[Path, str]] = [] + if request.include_docs: + found += [(p, "docs") for p in (self._base_dir / "docs").rglob("*.md")] + if request.include_prps: + found += [(p, "prp") for p in (self._base_dir / "PRPs").rglob("*.md")] + if request.include_root: + for name in _PROJECT_ROOT_FILES: + candidate = self._base_dir / name + if candidate.is_file(): + found.append((candidate, "root")) + # GOTCHA: rglob order is filesystem-dependent — sort for stable results. + return sorted(found, key=lambda pair: str(pair[0])) + + async def index_project_docs( + self, db: AsyncSession, request: IndexProjectDocsRequest + ) -> IndexProjectDocsResponse: + """Bulk-index discovered project docs via index_document. Idempotent.""" + start = time.time() + logger.info("rag.index_project_docs_started", + include_docs=request.include_docs, + include_prps=request.include_prps, + include_root=request.include_root) + + results: list[ProjectDocResult] = [] + for abs_path, category in self._discover_project_doc_files(request): + # abs_path came from globbing UNDER self._base_dir → relative_to is safe. + rel = abs_path.relative_to(self._base_dir).as_posix() + try: + content = abs_path.read_text(encoding="utf-8") + index_response = await self.index_document( + db, + IndexRequest( + source_type="markdown", + source_path=rel, # clean relative DB id + content=content, + metadata={"category": category}, + ), + ) + results.append(ProjectDocResult( + source_path=rel, + status=index_response.status, # narrower Literal → wider: OK + chunks_created=index_response.chunks_created, + error=None, + )) + except (OSError, ValueError) as exc: + # FileNotFoundError ⊂ OSError ; UnicodeDecodeError ⊂ ValueError. + # EmbeddingError / SQLAlchemyError are NOT caught → batch-fatal. + logger.warning("rag.index_project_docs_file_failed", + source_path=rel, error=str(exc), + error_type=type(exc).__name__) + results.append(ProjectDocResult( + source_path=rel, status="failed", + chunks_created=0, error=str(exc))) + + duration_ms = (time.time() - start) * 1000 + summary = IndexProjectDocsResponse( + results=results, + total_files=len(results), + indexed=sum(r.status == "indexed" for r in results), + updated=sum(r.status == "updated" for r in results), + unchanged=sum(r.status == "unchanged" for r in results), + failed=sum(r.status == "failed" for r in results), + total_chunks=sum(r.chunks_created for r in results), + duration_ms=duration_ms, + ) + logger.info("rag.index_project_docs_completed", + total_files=summary.total_files, indexed=summary.indexed, + updated=summary.updated, unchanged=summary.unchanged, + failed=summary.failed, total_chunks=summary.total_chunks, + duration_ms=duration_ms) + return summary +``` + +IMPORTS to add to `service.py`: extend the existing +`from app.features.rag.schemas import (...)` block (lines 29-38) with +`IndexProjectDocsRequest`, `IndexProjectDocsResponse`, `ProjectDocResult`. `time`, +`Path`, `IndexRequest`, `AsyncSession`, `logger` are already imported. + +### Backend route (`app/features/rag/routes.py`) + +Add `IndexProjectDocsRequest, IndexProjectDocsResponse` to the schema import block +(lines 12-19), then append after the `index_document` route: + +```python +@router.post( + "/index/project-docs", + response_model=IndexProjectDocsResponse, + summary="Index bundled project documentation", + description="Discover and index docs/**, PRPs/**, and selected root markdown. " + "Idempotent via content hash; per-file + aggregate summary.", +) +async def index_project_docs( + request: IndexProjectDocsRequest, + db: AsyncSession = Depends(get_db), +) -> IndexProjectDocsResponse: + logger.info("rag.index_project_docs_request_received", + include_docs=request.include_docs, + include_prps=request.include_prps, + include_root=request.include_root) + service = RAGService() + try: + response = await service.index_project_docs(db=db, request=request) + logger.info("rag.index_project_docs_request_completed", + total_files=response.total_files, + total_chunks=response.total_chunks, failed=response.failed) + return response + except EmbeddingError as e: # mirror index_document route + logger.error("rag.index_project_docs_request_failed", error=str(e), + error_type=type(e).__name__, exc_info=True) + raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, + detail=f"Embedding generation failed: {e}") from e + except SQLAlchemyError as e: + logger.error("rag.index_project_docs_request_failed", error=str(e), + error_type=type(e).__name__, exc_info=True) + raise DatabaseError(message="Failed to index project docs", + details={"error": str(e)}) from e +``` + +NO explicit `status_code` → default `200` (this is a mixed, idempotent batch — not a +single-resource create). `/index/project-docs` and `/index` are distinct static +paths — no route-ordering conflict. Do NOT add a `FileNotFoundError` handler: the +service swallows per-file read errors as `status="failed"` and never raises it. + +### Frontend hook (`frontend/src/hooks/use-rag-sources.ts`) — mirror `useIndexDocument` + +```ts +// extend the type import with IndexProjectDocsRequest, IndexProjectDocsResponse +export function useIndexProjectDocs() { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (body: IndexProjectDocsRequest) => + api('/rag/index/project-docs', { method: 'POST', body }), + onSuccess: () => { + void queryClient.invalidateQueries({ queryKey: ['rag-sources'] }) + }, + }) +} +``` + +### Admin button (`frontend/src/pages/admin.tsx` → `RagSourcesPanel`) + +- Add `Library` to the lucide-react import (admin.tsx:4-21). +- In `RagSourcesPanel`, call `const indexProjectDocs = useIndexProjectDocs()`. +- Add a handler: + +```tsx +const handleIndexProjectDocs = async () => { + try { + const r = await indexProjectDocs.mutateAsync({}) // {} → all roots + const summary = + `Indexed ${r.indexed}, updated ${r.updated}, unchanged ${r.unchanged}, ` + + `${r.failed} failed — ${r.total_chunks} chunks` + if (r.failed > 0) toast.warning(summary) + else toast.success(summary) + } catch (err) { + toast.error(err instanceof Error ? err.message : 'Project-docs indexing failed') + } +} +``` + +- In the `CardHeader`, wrap the existing "Index Document" `` and a new + ` +``` + +Do NOT restructure the existing "Index Document" dialog — only wrap + add beside it. +No confirm dialog — indexing is additive and idempotent. + +### list of tasks (in execution order) + +```yaml +Task 0 — PRECONDITION: + - Find or open a GitHub issue (promote docs/optional-features/01-rag-corpus-manager.md). + - VERIFY: gh issue view --json state → "OPEN". + - git switch -c feat/rag-index-project-docs (off an up-to-date dev). + +Task 1 — MODIFY app/features/rag/tests/conftest.py: + - FIND: DocumentSource.source_path.like("test-%") # line 46 + - REPLACE: DocumentSource.source_path.like("%test-%") + - Use the Edit tool (preserve CRLF). git diff --stat → 1 line changed. + +Task 2 — MODIFY app/features/rag/schemas.py: + - APPEND IndexProjectDocsRequest, ProjectDocResult, IndexProjectDocsResponse + after DeleteResponse. MIRROR IndexRequest/IndexResponse style. + +Task 3 — MODIFY app/features/rag/service.py: + - ADD module constant _PROJECT_ROOT_FILES after the imports. + - EXTEND the rag.schemas import block with the 3 new names. + - ADD RAGService._discover_project_doc_files (pure/sync) and + RAGService.index_project_docs (async orchestrator). + +Task 4 — MODIFY app/features/rag/routes.py: + - EXTEND the rag.schemas import block with IndexProjectDocsRequest/Response. + - ADD the POST /rag/index/project-docs route after index_document. + +Task 5 — MODIFY app/features/rag/tests/test_schemas.py: + - ADD cases: empty IndexProjectDocsRequest() defaults all True; + model_validate({}) ok; unknown field → ValidationError (extra="forbid"); + ProjectDocResult rejects an out-of-Literal status; IndexProjectDocsResponse + round-trips a populated payload. + +Task 6 — MODIFY app/features/rag/tests/test_service.py: + - ADD a UNIT test for _discover_project_doc_files: build a tmp_path tree + (docs/test-a.md, docs/sub/test-b.md, PRPs/test-c.md, README.md, notes.txt), + RAGService(base_dir=str(tmp_path)), assert discovery counts, category tags, + .md-only filtering, root allow-list, and include_* toggles. No DB, no mocks. + +Task 7 — MODIFY app/features/rag/tests/test_routes.py: + - ADD @pytest.mark.integration TestIndexProjectDocsEndpoint (see pseudocode). + +Task 8 — MODIFY frontend/src/types/api.ts: + - ADD the 3 interfaces in the // === RAG === block. + +Task 9 — MODIFY frontend/src/hooks/use-rag-sources.ts: + - ADD useIndexProjectDocs (extend the type import). + +Task 10 — MODIFY frontend/src/pages/admin.tsx: + - ADD Library to the lucide import; ADD the button + handler in RagSourcesPanel. + +Task 11 — MODIFY docs/_base/API_CONTRACTS.md: + - ADD a rag row: + | rag | POST | /rag/index/project-docs | Bulk-index bundled docs/, PRPs/, and root markdown; per-file + aggregate summary; idempotent via content hash | + +Task 12 — Run the full Validation Loop (Levels 1-4); fix until green. +``` + +### Per-task pseudocode (highest-risk task) + +```python +# Task 7 — app/features/rag/tests/test_routes.py — the integration test. +# IMPORTS to add: `from functools import partial`, +# `from app.features.rag.service import RAGService`, +# `from app.features.rag.embeddings import EmbeddingError` (EmbeddingService already imported). + +@pytest.mark.integration +class TestIndexProjectDocsEndpoint: + @pytest.mark.asyncio + async def test_indexes_discovered_docs(self, client, tmp_path): + # fixture files — NON-EMPTY, names contain `test-` so conftest cleanup catches them + (tmp_path / "docs").mkdir() + (tmp_path / "PRPs").mkdir() + (tmp_path / "docs" / "test-proj-1.md").write_text("# A\n\nAlpha content.") + (tmp_path / "PRPs" / "test-proj-2.md").write_text("# B\n\nBeta content.") + mock = create_mock_embedding_service() + with patch("app.features.rag.routes.RAGService", + partial(RAGService, base_dir=str(tmp_path))), \ + patch("app.features.rag.service.get_embedding_service", return_value=mock): + r1 = await client.post("/rag/index/project-docs", json={}) + assert r1.status_code == 200 + d1 = r1.json() + assert d1["total_files"] == 2 and d1["indexed"] == 2 + assert d1["total_chunks"] >= 2 and d1["failed"] == 0 + # idempotent re-run + r2 = await client.post("/rag/index/project-docs", json={}) + assert r2.json()["unchanged"] == 2 + + @pytest.mark.asyncio + async def test_empty_roots_returns_zero(self, client, tmp_path): + mock = create_mock_embedding_service() + with patch("app.features.rag.routes.RAGService", + partial(RAGService, base_dir=str(tmp_path))), \ + patch("app.features.rag.service.get_embedding_service", return_value=mock): + r = await client.post("/rag/index/project-docs", json={}) + assert r.status_code == 200 and r.json()["total_files"] == 0 + + @pytest.mark.asyncio + async def test_unknown_field_rejected(self, client): + r = await client.post("/rag/index/project-docs", json={"bogus": True}) + assert r.status_code == 422 # extra="forbid" + + @pytest.mark.asyncio + async def test_embedding_failure_returns_502(self, client, tmp_path): + (tmp_path / "docs").mkdir() + (tmp_path / "docs" / "test-proj-3.md").write_text("# C\n\nGamma content.") + mock = create_mock_embedding_service() + mock.embed_texts = AsyncMock(side_effect=EmbeddingError("no key")) + with patch("app.features.rag.routes.RAGService", + partial(RAGService, base_dir=str(tmp_path))), \ + patch("app.features.rag.service.get_embedding_service", return_value=mock): + r = await client.post("/rag/index/project-docs", json={}) + assert r.status_code == 502 +``` + +### Integration Points + +```yaml +DATABASE: + - migration: NONE — no schema change (category rides in DocumentSource.metadata_ JSONB). +ROUTES: + - app/features/rag/routes.py — new route on the EXISTING `/rag` APIRouter. + - app/main.py — NO change (rag router already wired at main.py:142). +CONFIG: + - NONE — `_PROJECT_ROOT_FILES` is a code constant, not a Settings field. No .env.example change. +FRONTEND: + - frontend/src/hooks/use-rag-sources.ts — new hook beside useIndexDocument. + - frontend/src/pages/admin.tsx — button in RagSourcesPanel; invalidates ['rag-sources']. +DOCS: + - docs/_base/API_CONTRACTS.md — one new rag endpoint row. +``` + +## Validation Loop + +### Level 1: Syntax & Style + +```bash +# Run from the repo root. Fix every error before proceeding. +uv run ruff check . --fix +uv run ruff format . +git diff --stat # CRLF guard: confirm NO whole-file churn on the .py edits +``` + +### Level 2: Type Checks + Unit Tests + +```bash +uv run mypy app/ && uv run pyright app/ # both --strict — both gate merge +uv run pytest app/features/rag/ -v -m "not integration" +uv run pytest app/core/tests/test_strict_mode_policy.py -v # must still pass +``` + +Watch: the `index_response.status` (3-Literal) → `ProjectDocResult.status` +(4-Literal) assignment, and `sum(r.status == "..." for r in results)` returning +`int`, are the most likely strict-mode snags — both are fine, but verify. + +### Level 3: Integration Tests + +```bash +docker compose up -d +uv run alembic upgrade head +uv run pytest app/features/rag/tests/test_routes.py -v -m integration +# If they fail on a stale local Postgres: +# docker compose down -v && docker compose up -d && uv run alembic upgrade head +``` + +### Level 4: Frontend + Manual / Browser QA + +```bash +cd frontend && pnpm tsc --noEmit && pnpm lint && pnpm test --run +``` + +Manual dogfood (per `.claude/rules/ui-design.md` — use the `webapp-testing` skill): + +```bash +# Backend MUST run from the repo root so Path.cwd() == repo root. +uv run uvicorn app.main:app --reload --port 8123 +cd frontend && ./node_modules/.bin/vite --host 0.0.0.0 +``` + +1. `curl -s -X POST localhost:8123/rag/index/project-docs -H 'content-type: application/json' -d '{}' | head -c 400` + → `200`, a JSON summary with `total_files` ≈ 110+. +2. Open `/admin` → "RAG Sources" tab → on a fresh DB it shows `0 sources • 0 chunks`. +3. Click **Index Project Docs** → spinner → toast summary → the source list + + counts populate (the `['rag-sources']` invalidation). +4. Open `/knowledge` → the empty state is gone; "N sources • M chunks" reflects + the corpus; a semantic search ("How does backtesting prevent leakage?") returns + cited chunks. +5. Click **Index Project Docs** again → toast shows all `unchanged` (idempotency). + +## Final validation Checklist + +- [ ] `uv run ruff check .` and `uv run ruff format --check .` — clean. +- [ ] `uv run mypy app/` and `uv run pyright app/` — clean (`--strict`). +- [ ] `uv run pytest app/features/rag/ -v -m "not integration"` — green. +- [ ] `uv run pytest app/features/rag/tests/test_routes.py -v -m integration` — green; + no `document_source` row with `source_path` containing `test-` remains. +- [ ] `uv run pytest app/core/tests/test_strict_mode_policy.py -v` — still green. +- [ ] `cd frontend && pnpm tsc --noEmit && pnpm lint && pnpm test --run` — green. +- [ ] `git diff --stat` shows small, line-level diffs on the `.py` files — NO + whole-file CRLF→LF churn. +- [ ] Manual: Index Project Docs populates the corpus; a re-run is all `unchanged`; + Knowledge search returns cited chunks. +- [ ] `docs/_base/API_CONTRACTS.md` lists `POST /rag/index/project-docs`. +- [ ] Commit `feat(rag,ui): index bundled project docs into the RAG corpus (#)` + — references the open issue, NO AI co-author / "Generated with" trailer; PR + into `dev`. + +--- + +## Anti-Patterns to Avoid + +- ❌ Don't re-implement chunking / embedding / hashing — orchestrate + `index_document`. +- ❌ Don't add an Alembic migration — there is no schema change. +- ❌ Don't touch `app/main.py` — the `rag` router is already wired. +- ❌ Don't "fix" `index_document`'s `if request.content:` branch — it is shared + with `POST /rag/index`; the empty-file edge is already handled by the per-file + `OSError` catch. +- ❌ Don't store absolute paths as `source_path` — use the clean relative POSIX id. +- ❌ Don't rewrite existing `.py` files with the Write tool — CRLF will flip to LF. + Use Edit; verify with `git diff --stat`. +- ❌ Don't widen scope into the "Full Version" (chunk preview, re-index, stale + detection, Knowledge filters) — that is a separate, deferred PR. +- ❌ Don't catch `EmbeddingError` / `SQLAlchemyError` per file — they are + batch-fatal and must reach the route's `502` / `problem+json` handlers. +- ❌ Don't claim the UI works on a green `tsc` alone — dogfood it in a browser. + +## Confidence Score + +**8.5 / 10** for one-pass implementation success. + +The feature is almost entirely additive on a mature, well-tested slice; every +endpoint shape, schema field, and pattern is pinned to a verified source line. The +residual risks are all identified and mitigated in-PRP: (1) CRLF EOL churn on the +`.py` edits — mitigated by the explicit Edit-tool + `git diff --stat` gotcha; +(2) integration-test DB cleanup of nested fixture paths — mitigated by the Task-1 +`LIKE` widening + `test-`-token fixture names; (3) the `RAGService` `base_dir` +injection in the route test — mitigated by the documented `partial(...)` patch +point; (4) the empty-file / falsy-`content` interaction — mitigated by non-empty +fixtures + the `OSError` safety net. The half-point deduction is for the manual +browser-QA step, which depends on a live embedding provider being configured and +reachable in the implementer's environment. diff --git a/app/features/rag/routes.py b/app/features/rag/routes.py index 4daf0106..e4474fb2 100644 --- a/app/features/rag/routes.py +++ b/app/features/rag/routes.py @@ -11,6 +11,8 @@ from app.features.rag.embeddings import EmbeddingError from app.features.rag.schemas import ( DeleteResponse, + IndexProjectDocsRequest, + IndexProjectDocsResponse, IndexRequest, IndexResponse, RetrieveRequest, @@ -133,6 +135,91 @@ async def index_document( ) from e +@router.post( + "/index/project-docs", + response_model=IndexProjectDocsResponse, + summary="Index bundled project documentation", + description=""" +Discover and bulk-index the repository's own bundled markdown. + +**Discovery roots (all toggleable, all default on):** +- `include_docs`: every `docs/**/*.md` +- `include_prps`: every `PRPs/**/*.md` +- `include_root`: `README.md`, `AGENTS.md`, `CHANGELOG.md` + +Each file is indexed through the same path as `POST /rag/index`, so chunking, +embedding, the SHA-256 content-hash idempotency short-circuit, and upsert are +all reused. Re-runs return every unchanged file as `status: "unchanged"`. + +**Returns:** per-file results plus aggregate counts (indexed / updated / +unchanged / failed / total_chunks). A single unreadable file is reported +`status: "failed"` without aborting the batch; an embedding-provider or +database failure is batch-fatal and surfaces as `502` / problem+json. +""", +) +async def index_project_docs( + request: IndexProjectDocsRequest, + db: AsyncSession = Depends(get_db), +) -> IndexProjectDocsResponse: + """Bulk-index bundled project documentation into the knowledge base. + + Args: + request: Toggles selecting which doc roots to index. + db: Async database session from dependency. + + Returns: + Per-file results plus aggregate indexing statistics. + + Raises: + HTTPException: If embedding generation fails (502). + DatabaseError: If a database operation fails. + """ + logger.info( + "rag.index_project_docs_request_received", + include_docs=request.include_docs, + include_prps=request.include_prps, + include_root=request.include_root, + ) + + service = RAGService() + + try: + response = await service.index_project_docs(db=db, request=request) + + logger.info( + "rag.index_project_docs_request_completed", + total_files=response.total_files, + total_chunks=response.total_chunks, + failed=response.failed, + ) + + return response + + except EmbeddingError as e: + logger.error( + "rag.index_project_docs_request_failed", + error=str(e), + error_type=type(e).__name__, + exc_info=True, + ) + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, + detail=f"Embedding generation failed: {e}", + ) from e + + except SQLAlchemyError as e: + logger.error( + "rag.index_project_docs_request_failed", + error=str(e), + error_type=type(e).__name__, + exc_info=True, + ) + raise DatabaseError( + message="Failed to index project docs", + details={"error": str(e)}, + ) from e + + # ============================================================================= # Retrieve Endpoint # ============================================================================= diff --git a/app/features/rag/schemas.py b/app/features/rag/schemas.py index 41a31d1b..44dbac2d 100644 --- a/app/features/rag/schemas.py +++ b/app/features/rag/schemas.py @@ -179,3 +179,63 @@ class DeleteResponse(BaseModel): source_id: str chunks_deleted: int status: Literal["deleted"] + + +class IndexProjectDocsRequest(BaseModel): + """Request to bulk-index bundled project documentation. + + All fields default to True so an empty ``{}`` body indexes every root. + + Args: + include_docs: Index markdown discovered under docs/**. + include_prps: Index markdown discovered under PRPs/**. + include_root: Index the root allow-list (README/AGENTS/CHANGELOG). + """ + + model_config = ConfigDict(extra="forbid") + + include_docs: bool = Field(default=True, description="Index docs/**/*.md") + include_prps: bool = Field(default=True, description="Index PRPs/**/*.md") + include_root: bool = Field( + default=True, description="Index README.md / AGENTS.md / CHANGELOG.md" + ) + + +class ProjectDocResult(BaseModel): + """Per-file outcome of a project-docs index run. + + Args: + source_path: Relative POSIX path of the file (the source identifier). + status: Outcome — indexed, updated, unchanged, or failed. + chunks_created: Number of chunks created (0 when unchanged or failed). + error: Error message when status is "failed", otherwise None. + """ + + source_path: str + status: Literal["indexed", "updated", "unchanged", "failed"] + chunks_created: int + error: str | None = None + + +class IndexProjectDocsResponse(BaseModel): + """Aggregate result of POST /rag/index/project-docs. + + Args: + results: Per-file outcomes. + total_files: Total files discovered and processed. + indexed: Count of newly indexed files. + updated: Count of re-indexed (changed) files. + unchanged: Count of files skipped by the content-hash short-circuit. + failed: Count of files that could not be read. + total_chunks: Total chunks created across all files. + duration_ms: Wall-clock time taken for the batch. + """ + + results: list[ProjectDocResult] + total_files: int + indexed: int + updated: int + unchanged: int + failed: int + total_chunks: int + duration_ms: float diff --git a/app/features/rag/service.py b/app/features/rag/service.py index d77f6f7e..8229bb33 100644 --- a/app/features/rag/service.py +++ b/app/features/rag/service.py @@ -29,8 +29,11 @@ from app.features.rag.schemas import ( ChunkResult, DeleteResponse, + IndexProjectDocsRequest, + IndexProjectDocsResponse, IndexRequest, IndexResponse, + ProjectDocResult, RetrieveRequest, RetrieveResponse, SourceListResponse, @@ -39,6 +42,10 @@ logger = structlog.get_logger() +# Allow-listed root markdown files indexed by index_project_docs. CLAUDE.md is +# deliberately excluded — it is an operating index that @imports AGENTS.md. +_PROJECT_ROOT_FILES: tuple[str, ...] = ("README.md", "AGENTS.md", "CHANGELOG.md") + class SourceNotFoundError(ValueError): """Source not found in the knowledge base.""" @@ -250,6 +257,135 @@ async def index_document( status=status, ) + def _discover_project_doc_files( + self, request: IndexProjectDocsRequest + ) -> list[tuple[Path, str]]: + """Discover bundled markdown under the allow-listed project-doc roots. + + Pure and synchronous — no DB, no network. ``rglob`` on a non-existent + directory yields nothing (no exception), so an absent docs/ or PRPs/ + root simply contributes 0 files. + + Args: + request: Toggles selecting which roots to discover. + + Returns: + A deterministically sorted list of (absolute_path, category) pairs + where category is "docs", "prp", or "root". + """ + found: list[tuple[Path, str]] = [] + + if request.include_docs: + found += [(p, "docs") for p in (self._base_dir / "docs").rglob("*.md")] + + if request.include_prps: + found += [(p, "prp") for p in (self._base_dir / "PRPs").rglob("*.md")] + + if request.include_root: + for name in _PROJECT_ROOT_FILES: + candidate = self._base_dir / name + if candidate.is_file(): + found.append((candidate, "root")) + + # rglob order is filesystem-dependent — sort for stable, reproducible runs. + return sorted(found, key=lambda pair: str(pair[0])) + + async def index_project_docs( + self, + db: AsyncSession, + request: IndexProjectDocsRequest, + ) -> IndexProjectDocsResponse: + """Bulk-index discovered project docs via index_document. Idempotent. + + Each file is indexed through index_document, reusing its chunking, + embedding, SHA-256 content-hash idempotency, and upsert. A single + unreadable / non-UTF-8 file is reported status="failed" and does NOT + abort the batch. EmbeddingError / SQLAlchemyError are NOT caught here — + they are batch-fatal and propagate to the route's error handlers. + + Args: + db: Database session. + request: Toggles selecting which roots to index. + + Returns: + Per-file results plus aggregate counts. + """ + start_time = time.time() + + logger.info( + "rag.index_project_docs_started", + include_docs=request.include_docs, + include_prps=request.include_prps, + include_root=request.include_root, + ) + + results: list[ProjectDocResult] = [] + + for abs_path, category in self._discover_project_doc_files(request): + # abs_path was globbed under self._base_dir, so relative_to is safe. + rel = abs_path.relative_to(self._base_dir).as_posix() + try: + content = abs_path.read_text(encoding="utf-8") + index_response = await self.index_document( + db, + IndexRequest( + source_type="markdown", + source_path=rel, + content=content, + metadata={"category": category}, + ), + ) + results.append( + ProjectDocResult( + source_path=rel, + status=index_response.status, + chunks_created=index_response.chunks_created, + error=None, + ) + ) + except (OSError, ValueError) as exc: + # FileNotFoundError ⊂ OSError; UnicodeDecodeError ⊂ ValueError. + logger.warning( + "rag.index_project_docs_file_failed", + source_path=rel, + error=str(exc), + error_type=type(exc).__name__, + ) + results.append( + ProjectDocResult( + source_path=rel, + status="failed", + chunks_created=0, + error=str(exc), + ) + ) + + duration_ms = (time.time() - start_time) * 1000 + + summary = IndexProjectDocsResponse( + results=results, + total_files=len(results), + indexed=sum(r.status == "indexed" for r in results), + updated=sum(r.status == "updated" for r in results), + unchanged=sum(r.status == "unchanged" for r in results), + failed=sum(r.status == "failed" for r in results), + total_chunks=sum(r.chunks_created for r in results), + duration_ms=duration_ms, + ) + + logger.info( + "rag.index_project_docs_completed", + total_files=summary.total_files, + indexed=summary.indexed, + updated=summary.updated, + unchanged=summary.unchanged, + failed=summary.failed, + total_chunks=summary.total_chunks, + duration_ms=duration_ms, + ) + + return summary + async def retrieve( self, db: AsyncSession, diff --git a/app/features/rag/tests/conftest.py b/app/features/rag/tests/conftest.py index 3bf7f318..30d5fe6a 100644 --- a/app/features/rag/tests/conftest.py +++ b/app/features/rag/tests/conftest.py @@ -41,9 +41,10 @@ async def db_session() -> AsyncGenerator[AsyncSession, None]: try: yield session finally: - # Clean up test data (delete sources with test- prefix) + # Clean up test data (delete sources whose path contains a test- token, + # including nested project-doc fixture paths like docs/test-*.md) test_source_ids = delete(DocumentSource).where( - DocumentSource.source_path.like("test-%") + DocumentSource.source_path.like("%test-%") ) await session.execute(test_source_ids) await session.commit() diff --git a/app/features/rag/tests/test_routes.py b/app/features/rag/tests/test_routes.py index ce09a05a..f898a9f1 100644 --- a/app/features/rag/tests/test_routes.py +++ b/app/features/rag/tests/test_routes.py @@ -7,12 +7,14 @@ Note: These tests mock the OpenAI embedding service to avoid API calls. """ +from functools import partial from unittest.mock import AsyncMock, MagicMock, patch import pytest from httpx import AsyncClient -from app.features.rag.embeddings import EmbeddingService +from app.features.rag.embeddings import EmbeddingError, EmbeddingService +from app.features.rag.service import RAGService # ============================================================================= # Mock Embedding Service for Integration Tests @@ -431,3 +433,135 @@ async def test_index_openapi_creates_endpoint_chunks(self, client: AsyncClient): data = response.json() # Should have at least: info chunk + 2 endpoint chunks assert data["chunks_created"] >= 3 + + +# ============================================================================= +# Index Project Docs Endpoint Tests +# ============================================================================= + + +@pytest.mark.integration +class TestIndexProjectDocsEndpoint: + """Integration tests for POST /rag/index/project-docs endpoint.""" + + @pytest.mark.asyncio + async def test_indexes_discovered_docs(self, client: AsyncClient, tmp_path): + """Test that discovered docs are indexed and re-runs are idempotent.""" + (tmp_path / "docs").mkdir() + (tmp_path / "PRPs").mkdir() + # Non-empty content; `test-` token so conftest cleanup catches the rows. + (tmp_path / "docs" / "test-proj-1.md").write_text( + "# Alpha\n\nAlpha content.", encoding="utf-8" + ) + (tmp_path / "PRPs" / "test-proj-2.md").write_text( + "# Beta\n\nBeta content.", encoding="utf-8" + ) + mock_service = create_mock_embedding_service() + + with ( + patch( + "app.features.rag.routes.RAGService", + partial(RAGService, base_dir=str(tmp_path)), + ), + patch( + "app.features.rag.service.get_embedding_service", + return_value=mock_service, + ), + ): + response1 = await client.post("/rag/index/project-docs", json={}) + assert response1.status_code == 200 + data1 = response1.json() + assert data1["total_files"] == 2 + assert data1["indexed"] == 2 + assert data1["failed"] == 0 + assert data1["total_chunks"] >= 2 + + # Idempotent re-run — every file unchanged, no new chunks. + response2 = await client.post("/rag/index/project-docs", json={}) + assert response2.status_code == 200 + assert response2.json()["unchanged"] == 2 + + @pytest.mark.asyncio + async def test_empty_roots_returns_zero(self, client: AsyncClient, tmp_path): + """Test that an empty doc tree returns zero files without error.""" + mock_service = create_mock_embedding_service() + + with ( + patch( + "app.features.rag.routes.RAGService", + partial(RAGService, base_dir=str(tmp_path)), + ), + patch( + "app.features.rag.service.get_embedding_service", + return_value=mock_service, + ), + ): + response = await client.post("/rag/index/project-docs", json={}) + + assert response.status_code == 200 + assert response.json()["total_files"] == 0 + + @pytest.mark.asyncio + async def test_toggles_select_roots(self, client: AsyncClient, tmp_path): + """Test that include_* toggles restrict discovery.""" + (tmp_path / "docs").mkdir() + (tmp_path / "PRPs").mkdir() + (tmp_path / "docs" / "test-toggle-1.md").write_text( + "# Docs\n\nDocs content.", encoding="utf-8" + ) + (tmp_path / "PRPs" / "test-toggle-2.md").write_text( + "# Prp\n\nPrp content.", encoding="utf-8" + ) + mock_service = create_mock_embedding_service() + + with ( + patch( + "app.features.rag.routes.RAGService", + partial(RAGService, base_dir=str(tmp_path)), + ), + patch( + "app.features.rag.service.get_embedding_service", + return_value=mock_service, + ), + ): + response = await client.post( + "/rag/index/project-docs", + json={"include_prps": False, "include_root": False}, + ) + + assert response.status_code == 200 + data = response.json() + assert data["total_files"] == 1 + assert data["results"][0]["source_path"] == "docs/test-toggle-1.md" + + @pytest.mark.asyncio + async def test_unknown_field_rejected(self, client: AsyncClient): + """Test that an unknown body field is rejected (extra='forbid').""" + response = await client.post("/rag/index/project-docs", json={"bogus": True}) + assert response.status_code == 422 + + @pytest.mark.asyncio + async def test_embedding_failure_returns_502(self, client: AsyncClient, tmp_path): + """Test that an embedding-provider failure is batch-fatal (502).""" + (tmp_path / "docs").mkdir() + (tmp_path / "docs" / "test-proj-3.md").write_text( + "# Gamma\n\nGamma content.", encoding="utf-8" + ) + # Build a mock whose embed_texts raises — a MagicMock var (not the + # EmbeddingService-typed factory return) so mypy permits the assignment. + mock_service = MagicMock(spec=EmbeddingService) + mock_service.embed_texts = AsyncMock(side_effect=EmbeddingError("no key")) + + with ( + patch( + "app.features.rag.routes.RAGService", + partial(RAGService, base_dir=str(tmp_path)), + ), + patch( + "app.features.rag.service.get_embedding_service", + return_value=mock_service, + ), + ): + response = await client.post("/rag/index/project-docs", json={}) + + assert response.status_code == 502 diff --git a/app/features/rag/tests/test_schemas.py b/app/features/rag/tests/test_schemas.py index 3a1881e7..479a95a2 100644 --- a/app/features/rag/tests/test_schemas.py +++ b/app/features/rag/tests/test_schemas.py @@ -6,8 +6,11 @@ from app.features.rag.schemas import ( ChunkResult, DeleteResponse, + IndexProjectDocsRequest, + IndexProjectDocsResponse, IndexRequest, IndexResponse, + ProjectDocResult, RetrieveRequest, RetrieveResponse, SourceListResponse, @@ -344,3 +347,115 @@ def test_valid_delete_response(self): ) assert response.status == "deleted" assert response.chunks_deleted == 10 + + +class TestIndexProjectDocsRequest: + """Tests for IndexProjectDocsRequest schema.""" + + def test_defaults_all_true(self): + """Test that an empty request defaults every root to True.""" + request = IndexProjectDocsRequest() + assert request.include_docs is True + assert request.include_prps is True + assert request.include_root is True + + def test_model_validate_empty_dict(self): + """Test that an empty {} body validates (the frontend always posts {}).""" + request = IndexProjectDocsRequest.model_validate({}) + assert request.include_docs is True + assert request.include_prps is True + assert request.include_root is True + + def test_toggles_select_roots_independently(self): + """Test that each include_* toggle is honored independently.""" + request = IndexProjectDocsRequest(include_docs=True, include_prps=False, include_root=False) + assert request.include_docs is True + assert request.include_prps is False + assert request.include_root is False + + def test_extra_fields_rejected(self): + """Test that an unknown body field is rejected (extra='forbid').""" + with pytest.raises(ValidationError) as exc_info: + IndexProjectDocsRequest(bogus=True) # type: ignore[call-arg] + assert "bogus" in str(exc_info.value) + + +class TestProjectDocResult: + """Tests for ProjectDocResult schema.""" + + def test_valid_result(self): + """Test a valid per-file result.""" + result = ProjectDocResult( + source_path="docs/ARCHITECTURE.md", + status="indexed", + chunks_created=7, + ) + assert result.status == "indexed" + assert result.chunks_created == 7 + assert result.error is None + + def test_failed_result_carries_error(self): + """Test a failed result carries an error string.""" + result = ProjectDocResult( + source_path="docs/bad.md", + status="failed", + chunks_created=0, + error="not valid UTF-8", + ) + assert result.status == "failed" + assert result.error == "not valid UTF-8" + + def test_invalid_status_rejected(self): + """Test that an out-of-Literal status is rejected.""" + with pytest.raises(ValidationError) as exc_info: + ProjectDocResult( + source_path="docs/x.md", + status="bogus", # type: ignore[arg-type] + chunks_created=0, + ) + assert "status" in str(exc_info.value) + + +class TestIndexProjectDocsResponse: + """Tests for IndexProjectDocsResponse schema.""" + + def test_valid_response_round_trips(self): + """Test a populated aggregate response round-trips through validation.""" + response = IndexProjectDocsResponse( + results=[ + ProjectDocResult(source_path="docs/a.md", status="indexed", chunks_created=3), + ProjectDocResult( + source_path="PRPs/b.md", + status="failed", + chunks_created=0, + error="boom", + ), + ], + total_files=2, + indexed=1, + updated=0, + unchanged=0, + failed=1, + total_chunks=3, + duration_ms=42.5, + ) + assert response.total_files == 2 + assert response.indexed == 1 + assert response.failed == 1 + assert response.total_chunks == 3 + assert len(response.results) == 2 + + def test_empty_response(self): + """Test an aggregate response with no discovered files.""" + response = IndexProjectDocsResponse( + results=[], + total_files=0, + indexed=0, + updated=0, + unchanged=0, + failed=0, + total_chunks=0, + duration_ms=1.0, + ) + assert response.total_files == 0 + assert len(response.results) == 0 diff --git a/app/features/rag/tests/test_service.py b/app/features/rag/tests/test_service.py index 52a7afc2..836bc84b 100644 --- a/app/features/rag/tests/test_service.py +++ b/app/features/rag/tests/test_service.py @@ -1,11 +1,16 @@ """Unit tests for RAG service.""" import hashlib +from pathlib import Path from unittest.mock import AsyncMock, MagicMock, patch import pytest -from app.features.rag.schemas import IndexRequest, RetrieveRequest +from app.features.rag.schemas import ( + IndexProjectDocsRequest, + IndexRequest, + RetrieveRequest, +) from app.features.rag.service import RAGService, SourceNotFoundError @@ -70,6 +75,88 @@ def test_read_content_from_path_traversal_blocked(self, tmp_path): service._read_content_from_path("/etc/passwd") +class TestRAGServiceDiscoverProjectDocFiles: + """Unit tests for RAGService._discover_project_doc_files (pure, no DB).""" + + @staticmethod + def _build_tree(tmp_path: Path) -> None: + """Create a fixture doc tree under tmp_path.""" + (tmp_path / "docs" / "sub").mkdir(parents=True) + (tmp_path / "PRPs").mkdir() + (tmp_path / "docs" / "test-a.md").write_text("# A", encoding="utf-8") + (tmp_path / "docs" / "sub" / "test-b.md").write_text("# B", encoding="utf-8") + (tmp_path / "docs" / "notes.txt").write_text("not markdown", encoding="utf-8") + (tmp_path / "PRPs" / "test-c.md").write_text("# C", encoding="utf-8") + (tmp_path / "README.md").write_text("# Readme", encoding="utf-8") + + def test_discovers_all_roots(self, tmp_path): + """Test discovery across docs/, PRPs/, and the root allow-list.""" + self._build_tree(tmp_path) + service = RAGService(base_dir=str(tmp_path)) + + found = service._discover_project_doc_files(IndexProjectDocsRequest()) + + rel = {p.relative_to(tmp_path).as_posix(): cat for p, cat in found} + assert rel == { + "docs/test-a.md": "docs", + "docs/sub/test-b.md": "docs", + "PRPs/test-c.md": "prp", + "README.md": "root", + } + + def test_filters_non_markdown(self, tmp_path): + """Test that non-.md files (notes.txt) are excluded.""" + self._build_tree(tmp_path) + service = RAGService(base_dir=str(tmp_path)) + + found = service._discover_project_doc_files(IndexProjectDocsRequest()) + + assert all(p.suffix == ".md" for p, _ in found) + + def test_result_is_sorted(self, tmp_path): + """Test that discovery returns a deterministically sorted list.""" + self._build_tree(tmp_path) + service = RAGService(base_dir=str(tmp_path)) + + found = service._discover_project_doc_files(IndexProjectDocsRequest()) + + paths = [str(p) for p, _ in found] + assert paths == sorted(paths) + + def test_toggles_select_roots(self, tmp_path): + """Test that include_* toggles select roots independently.""" + self._build_tree(tmp_path) + service = RAGService(base_dir=str(tmp_path)) + + docs_only = service._discover_project_doc_files( + IndexProjectDocsRequest(include_prps=False, include_root=False) + ) + assert {cat for _, cat in docs_only} == {"docs"} + + no_root = service._discover_project_doc_files(IndexProjectDocsRequest(include_root=False)) + assert "root" not in {cat for _, cat in no_root} + + def test_missing_root_directory_yields_nothing(self, tmp_path): + """Test that an absent docs/ or PRPs/ root contributes 0 files.""" + # tmp_path is empty — no docs/, no PRPs/, no root markdown. + service = RAGService(base_dir=str(tmp_path)) + + found = service._discover_project_doc_files(IndexProjectDocsRequest()) + + assert found == [] + + def test_root_allow_list_only(self, tmp_path): + """Test that only allow-listed root files are discovered.""" + (tmp_path / "README.md").write_text("# Readme", encoding="utf-8") + (tmp_path / "NOTES.md").write_text("# Notes", encoding="utf-8") + service = RAGService(base_dir=str(tmp_path)) + + found = service._discover_project_doc_files(IndexProjectDocsRequest()) + + names = {p.name for p, _ in found} + assert names == {"README.md"} + + class TestRAGServiceIndexDocument: """Tests for index_document method.""" diff --git a/docs/_base/API_CONTRACTS.md b/docs/_base/API_CONTRACTS.md index 00e93fb4..d46e99ea 100644 --- a/docs/_base/API_CONTRACTS.md +++ b/docs/_base/API_CONTRACTS.md @@ -37,6 +37,7 @@ All endpoints serve JSON; error responses use `application/problem+json` (RFC 78 | jobs | GET | `/jobs/{job_id}` | Status + result JSON | | jobs | DELETE | `/jobs/{job_id}` | Cancel pending | | rag | POST | `/rag/index` | Index a markdown/openapi document; idempotent via content hash | +| rag | POST | `/rag/index/project-docs` | Bulk-index bundled `docs/`, `PRPs/`, and root markdown; per-file + aggregate summary; idempotent via content hash; `502` if the embedding provider fails | | rag | POST | `/rag/retrieve` | Semantic search (HNSW), top-k with similarity threshold | | rag | GET | `/rag/sources` | List indexed sources | | rag | DELETE | `/rag/sources/{source_id}` | Delete source + cascaded chunks | diff --git a/docs/user-guide/agents-and-rag-guide.md b/docs/user-guide/agents-and-rag-guide.md new file mode 100644 index 00000000..052d8b33 --- /dev/null +++ b/docs/user-guide/agents-and-rag-guide.md @@ -0,0 +1,121 @@ +# Agents and RAG Guide + +ForecastLab includes a conversational AI layer — chat agents — backed by a +**RAG knowledge base** (retrieval-augmented generation). This guide explains how both +work and how to use them safely. + +## The RAG Knowledge Base + +RAG lets the system answer questions using a body of indexed documents rather than +only the language model's general training. ForecastLab uses it to ground answers in +**project documentation**. + +### How indexing works + +When you index a document: + +1. The document is split into overlapping **chunks** (markdown is split by heading, + OpenAPI specs by endpoint). +2. Each chunk is converted into an **embedding** — a numeric vector capturing its meaning. +3. Chunks and embeddings are stored in PostgreSQL using the `pgvector` extension. + +Indexing is **idempotent**: each document is identified by its path and a content +hash, so re-indexing unchanged content does nothing, and changed content replaces the +old chunks cleanly. + +### How retrieval works + +A search query is embedded the same way, then compared against every stored chunk by +**cosine similarity**. The closest chunks above a similarity threshold are returned, +each with a relevance score and a citation back to its source document. Retrieval +returns evidence — passages — not a generated answer; the agent decides what to do +with them. + +### Using it + +- **Knowledge page** (`/knowledge`) — browse the indexed corpus and run live semantic + searches. +- **Admin → RAG Sources** — index a new document, list sources, or delete one. +- **API** — `POST /rag/index`, `POST /rag/retrieve`, `GET /rag/sources`, + `DELETE /rag/sources/{id}`. + +### Embedding providers + +Embeddings come from either **OpenAI** or a local **Ollama** server. The active +provider, model, and vector dimension are shown and changed under **Admin → AI models** +(`GET` / `PATCH /config/ai`). Local Ollama keeps document content off external services. + +## The Chat Agents + +The agents are conversational assistants built with PydanticAI. Two agent types exist: + +- **`rag_assistant`** — answers questions using the RAG knowledge base. +- **`experiment`** — can run forecasting experiments (training, backtesting, registry + actions) on your behalf. + +### Talking to an agent + +Use the **Chat** page (`/chat`) or the API: + +1. `POST /agents/sessions` — open a session, choosing the agent type. +2. `POST /agents/sessions/{id}/chat` — send a message and get the full response, or + connect to `WS /agents/stream` for token-by-token streaming. +3. `DELETE /agents/sessions/{id}` — close the session. + +A session keeps its message history, so the agent remembers earlier turns in the +conversation. + +### Tools + +Agents can call **tools** — typed functions that fetch data or perform actions +(retrieve documentation, list model runs, start a backtest, and so on). When an agent +uses a tool, the chat UI shows the call and its result, so you can see exactly how an +answer was produced. + +## The Human-in-the-Loop Approval Gate + +Most tools are read-only and run immediately. Tools that **change state** — for +example creating a registry alias or archiving a run — are different: they **pause and +wait for your approval**. + +When an agent wants to run one of these tools: + +1. The session enters an `awaiting_approval` state and an `approval_required` event is + emitted. +2. Nothing happens until you respond. +3. You approve or reject via `POST /agents/sessions/{id}/approve` (the Chat page + surfaces this as a prompt). +4. On approval the tool runs; on rejection it is skipped. + +This gate means an agent can never silently mutate the model registry — a person is +always in the loop for consequential actions. The set of approval-gated tools is a +deliberate, fixed list. + +### Other safety limits + +Each session is bounded so an agent cannot run away: + +- a **token budget** per session, +- a **maximum number of tool calls** per session, +- a **timeout** wrapping each agent run. + +The **Agent Guide** page (`/guide`) shows these limits live, along with the available +tools and example prompts. + +## Putting It Together + +A typical RAG-assisted exchange: you ask a question on the Chat page → the +`rag_assistant` agent calls its retrieval tool → the tool runs a semantic search over +the indexed corpus → the agent reads the returned passages → it answers, grounded in +real documentation, and you can see the citations. For experiments, the `experiment` +agent can additionally trigger training or backtesting — pausing for your approval +before anything that writes to the registry. + +## Tips + +- The agents need an LLM API key (`OPENAI_API_KEY` or `ANTHROPIC_API_KEY`) in `.env`. + Without one, the chat features are unavailable but the rest of the system still works. +- For useful RAG answers, index relevant documentation first — an empty corpus means + the assistant has nothing to cite. +- Watch the tool-call display in the chat: it is the simplest way to understand how + the agent reached its answer. diff --git a/docs/user-guide/dashboard-guide.md b/docs/user-guide/dashboard-guide.md new file mode 100644 index 00000000..83aec0bd --- /dev/null +++ b/docs/user-guide/dashboard-guide.md @@ -0,0 +1,93 @@ +# Dashboard Guide + +The ForecastLab dashboard is a React web app at **http://localhost:5173**. This guide +walks through every page. The top navigation bar groups pages as: Dashboard, +Showcase, **Explorer** (menu), **Visualize** (menu), Knowledge, Chat, Agent Guide, +and Admin. A light/dark theme toggle sits on the right. + +## Dashboard (`/`) + +The landing page. It shows headline **KPI cards** — total revenue, units sold, +transactions, average unit price, average basket — plus a revenue-over-time chart. +Use it for a quick health check of the seeded dataset. If the database is empty, +the cards read zero; seed data first (see Admin, or run `make demo`). + +## Showcase (`/showcase`) + +Runs the **end-to-end demo pipeline live in your browser**. Click to start, and the +page streams one status card per step: seed → features → train three models → +backtest → register the winner → alias → agent check. Each card flips to a +pass / fail / skip state, and a summary banner reports the winning model and its +accuracy. This is the best page for a guided demo of the whole system. + +Tip: tick **Re-seed first** if the database is empty or stale. Only one pipeline can +run at a time. + +## Explorer + +The Explorer menu contains read-only pages for browsing the underlying data and +model history. Tables support pagination, filtering, search, and sorting; clicking a +row opens a detail page. + +- **Sales** (`/explorer/sales`) — browse daily sales records. +- **Stores** (`/explorer/stores`) — list of retail stores. Click a store to open its + **detail page**: an entity profile, date-scoped KPIs, a revenue-over-time chart, + and a top-products drilldown. +- **Products** (`/explorer/products`) — list of products (SKUs). Click a product for + its **detail page**: profile, KPIs, revenue and lifecycle-demand curves, and a + top-stores drilldown. +- **Model Runs** (`/explorer/runs`) — every trained model tracked in the registry. + A run **detail page** shows its configuration, metrics, and runtime info as JSON, + cross-links to the store/product, an artifact-integrity check, and a compare link. + Two runs can be compared side by side (config diff + metrics diff with deltas). +- **Jobs** (`/explorer/jobs`) — submitted train/predict/backtest jobs. A job + **detail page** shows parameters, result JSON, error details, the linked run, a + cancel action, and live status polling. + +## Visualize + +The Visualize menu holds the analytical, chart-heavy pages. + +- **Demand Planner** (`/visualize/demand`) — rolls completed `predict` jobs into a + multi-SKU table showing tomorrow / next-week / next-month demand and the + inventory required to cover it. Includes a lead-time selector and a single-SKU + drill-in. Answers "how much will this SKU sell, and do I have enough stock?" +- **Forecast** (`/visualize/forecast`) — visualizes a model's horizon predictions. +- **Backtest Results** (`/visualize/backtest`) — charts backtest folds and the + accuracy metrics (MAE, sMAPE, WAPE, bias, stability) for a model run. + +## Knowledge (`/knowledge`) + +Surfaces the **RAG knowledge base**: the indexed document corpus, a live semantic +search box, and current system state. Type a question to retrieve the most relevant +documentation passages with similarity scores. If the corpus is empty, the page +shows an empty state until documents are indexed (see the Admin page). + +## Chat (`/chat`) + +The **AI agent chat**. Ask questions in natural language; the assistant streams its +answer token by token and shows any tools it calls. Some actions pause for your +approval before they run. See the Agents and RAG Guide for details. + +## Agent Guide (`/guide`) + +An in-app reference for the chat agents: the tools they can use, the human-in-the-loop +approval gate, live session limits, and example prompts to try. + +## Admin (`/admin`) + +Operational controls, organized into tabs: + +- **Data seeding** — generate synthetic retail data from named scenarios, append more, + verify integrity, or clear the dataset. +- **RAG Sources** — list indexed knowledge documents, index a new document, and + delete sources. +- **Aliases** — manage model registry aliases (e.g. promote a run to `production`). +- **AI models** — view and change the agent LLM and RAG embedding configuration + live, with per-provider health indicators. + +## Notes + +- Pages fetch data from the backend API; if everything shows "Loading…", confirm the + backend is running and `VITE_API_BASE_URL` points at it. +- Explorer detail pages are reached by clicking table rows — they are not in the nav. diff --git a/docs/user-guide/feature-reference.md b/docs/user-guide/feature-reference.md new file mode 100644 index 00000000..91e1ba51 --- /dev/null +++ b/docs/user-guide/feature-reference.md @@ -0,0 +1,142 @@ +# Feature Reference + +This is a capability-by-capability reference for ForecastLab's backend. Every feature +is a REST API served at **http://localhost:8123**; the interactive Swagger UI at +**/docs** is the authoritative, always-current contract. All errors use the RFC 7807 +`application/problem+json` format. + +## Health + +- `GET /health` — liveness probe; returns `{"status": "ok"}`. + +## Data Platform and Ingest + +The data platform owns seven retail tables: `store`, `product`, `calendar`, +`sales_daily`, `price_history`, `promotion`, and `inventory_snapshot_daily`. + +- `POST /ingest/sales-daily` — batch-load daily sales. Resolves natural keys + (store code, SKU) to IDs and upserts idempotently, so re-sending the same batch is + safe. + +## Dimensions + +Reference data — the "who" and "what" behind the sales facts. + +- `GET /dimensions/stores` — list stores (pagination, region / store-type filters, + case-insensitive search, optional sorting). +- `GET /dimensions/stores/{store_id}` — one store by ID. +- `GET /dimensions/products` — list products (category / brand filters, SKU / name + search, optional sorting). +- `GET /dimensions/products/{product_id}` — one product by ID. + +## Analytics + +Read-only aggregates computed over the sales data. + +- `GET /analytics/kpis` — headline KPIs: revenue, units, transactions, average unit + price, average basket. +- `GET /analytics/drilldowns` — group sales by store, product, category, region, or date. +- `GET /analytics/timeseries` — period-bucketed sales series (day / week / month / + quarter) for revenue-over-time charts. +- `GET /analytics/inventory-status` — latest inventory snapshot per store-product pair. + +## Feature Engineering + +Turns raw sales into model-ready features while strictly preventing **data leakage** — +features never use information from the future. + +- `POST /featuresets/compute` — compute time-safe features (lags, rolling-window + statistics, calendar effects) up to a cutoff date. +- `POST /featuresets/preview` — preview computed features with sample rows. + +## Forecasting + +Trains demand-forecasting models and generates predictions. + +- `POST /forecasting/train` — train a model. Supported model types: `naive`, + `seasonal_naive`, `moving_average` (baselines) and `lightgbm` (machine learning). +- `POST /forecasting/predict` — generate horizon predictions from a trained model. + +The three baselines exist as honest comparison points — a machine-learning model is +only worth using if it beats them. + +## Backtesting + +Measures how accurate a model would have been, using time-series cross-validation. + +- `POST /backtesting/run` — run rolling or expanding train/test splits and report + accuracy metrics: **MAE**, **sMAPE**, **WAPE**, **bias**, and **stability**. + +## Model Registry + +Tracks every trained model so runs are reproducible and comparable. + +- `POST /registry/runs` — create a model run record (starts `pending`). +- `GET /registry/runs` — list runs with filters, pagination, and sorting. +- `GET /registry/runs/{run_id}` — run details, including metrics and runtime info. +- `PATCH /registry/runs/{run_id}` — update a run's status, metrics, or artifact location. +- `GET /registry/runs/{run_id}/verify` — verify the model artifact's SHA-256 integrity. +- `GET /registry/compare/{run_id_a}/{run_id_b}` — diff two runs. +- `POST /registry/aliases` — create or move an alias (e.g. `production`); aliases may + point only to a successful run. +- `GET /registry/aliases`, `GET /registry/aliases/{name}`, `DELETE /registry/aliases/{name}` + — manage aliases. + +A run moves through `pending → running → success` (or `failed`), and an alias is a +human-friendly pointer (like `production` or `champion`) to a chosen successful run. + +## Jobs + +Long-running work — training, prediction, backtesting — submitted as jobs. + +- `POST /jobs` — submit a `train`, `predict`, or `backtest` job; returns a `job_id`. +- `GET /jobs` — list jobs with filters and sorting. +- `GET /jobs/{job_id}` — job status and result JSON. +- `DELETE /jobs/{job_id}` — cancel a pending job. + +## RAG Knowledge Base + +Semantic search over indexed documents. See the Agents and RAG Guide for the full +picture. + +- `POST /rag/index` — index a markdown or OpenAPI document; idempotent via content hash. +- `POST /rag/retrieve` — semantic search; returns the top-k most relevant passages. +- `GET /rag/sources` — list indexed sources. +- `DELETE /rag/sources/{source_id}` — delete a source and its chunks. + +## Agents + +The conversational AI layer. See the Agents and RAG Guide. + +- `POST /agents/sessions` — open a chat session (`experiment` or `rag_assistant`). +- `GET /agents/sessions/{id}` — session status and message history. +- `POST /agents/sessions/{id}/chat` — send a message; returns the full response. +- `POST /agents/sessions/{id}/approve` — approve or reject a pending tool call. +- `DELETE /agents/sessions/{id}` — close a session. +- `WS /agents/stream` — token-by-token streaming with tool-call events. + +## Seeder ("The Forge") + +Generates realistic synthetic retail data so you have something to forecast. + +- `GET /seeder/status` — current dataset state. +- `GET /seeder/scenarios` — available named scenarios. +- `GET /seeder/channels` — available sales channels. +- `POST /seeder/generate` — generate a dataset from a scenario. +- `POST /seeder/append` — append more data to an existing dataset. +- `DELETE /seeder/data` — clear the generated data. +- `GET /seeder/exogenous` — exogenous signal data. +- `POST /seeder/verify` — verify dataset integrity. + +## Demo Pipeline + +- `POST /demo/run` — run the full end-to-end pipeline in one call. +- `WS /demo/stream` — stream per-step events for the live Showcase page. + +## Configuration + +- `GET /config/ai` — effective AI-model configuration (agent LLM + RAG embeddings); + API keys are always masked. +- `PATCH /config/ai` — change AI-model settings live, with no restart. +- `GET /config/providers/health` — per-provider connectivity status. +- `GET /config/ollama/models` — models available on the configured Ollama host. diff --git a/docs/user-guide/getting-started.md b/docs/user-guide/getting-started.md new file mode 100644 index 00000000..f3109d4b --- /dev/null +++ b/docs/user-guide/getting-started.md @@ -0,0 +1,100 @@ +# Getting Started with ForecastLab + +This guide takes you from a fresh clone to a running ForecastLab system with data, +trained models, and a working dashboard — in about ten minutes. + +## What ForecastLab Is + +ForecastLab is a **retail demand-forecasting system** you run on a single machine. It +covers the whole forecasting lifecycle end to end: + +1. **Data platform** — stores, products, calendar, daily sales, prices, promotions, inventory. +2. **Ingest** — load sales data through a batch API. +3. **Feature engineering** — build time-safe features (lags, rolling windows, calendar effects). +4. **Forecasting** — train baseline and machine-learning models. +5. **Backtesting** — measure accuracy with time-series cross-validation. +6. **Model registry** — track every trained model, compare runs, promote a champion. +7. **RAG knowledge base** — semantic search over project documentation. +8. **AI agents** — a chat assistant that can run experiments and answer questions. +9. **Dashboard** — a React web app that surfaces all of the above. + +It is built for learning, demos, and portfolio use. It is **not** a multi-tenant SaaS, +not a real-time streaming system, and needs no cloud account — everything runs locally. + +## Prerequisites + +- **Docker** (for the PostgreSQL database) +- **Python 3.12** with [`uv`](https://docs.astral.sh/uv/) (the Python package manager) +- **Node.js** with `pnpm` (enabled through `corepack`) + +## Install and Run + +Run these from the repository root. + +```bash +# 1. Configure environment — add your OpenAI / Anthropic API keys to .env +cp .env.example .env + +# 2. Start PostgreSQL + pgvector (listens on host port 5433) +docker compose up -d + +# 3. Install backend dependencies +uv sync --extra dev + +# 4. Apply database migrations +uv run alembic upgrade head + +# 5. Start the backend API (http://localhost:8123) +uv run uvicorn app.main:app --reload --port 8123 +``` + +In a second terminal, start the web dashboard: + +```bash +cd frontend +corepack enable pnpm +pnpm install +pnpm dev # dashboard at http://localhost:5173 +``` + +Open **http://localhost:5173** in your browser. The interactive API documentation +(Swagger UI) is available at **http://localhost:8123/docs**. + +## Load Data and See It Work + +A fresh database is empty. The fastest way to see the whole system in action is the +**end-to-end demo**, which seeds data, computes features, trains three models, +backtests them, registers the winner, and exercises the agent: + +```bash +make demo +``` + +You can also watch the same pipeline run live in the browser on the **Showcase** page +(see the Dashboard Guide). To generate data without the full pipeline, use the +**Admin** page or the seeder API directly. + +## Key Ports and URLs + +| Service | URL | +|----------------|------------------------------| +| Dashboard | http://localhost:5173 | +| Backend API | http://localhost:8123 | +| API docs | http://localhost:8123/docs | +| PostgreSQL | localhost:5433 | + +## If Something Goes Wrong + +- **Dashboard shows "Loading…" everywhere** — the frontend cannot reach the backend. + Check that the API is running (`curl http://localhost:8123/health`) and that + `frontend/.env` has `VITE_API_BASE_URL=http://localhost:8123`. +- **Database connection refused** — make sure `docker compose up -d` succeeded and + migrations are applied (`uv run alembic upgrade head`). +- **API keys** — the AI agent and RAG features need `OPENAI_API_KEY` and/or + `ANTHROPIC_API_KEY` set in `.env`. Forecasting and the dashboard work without them. + +## Next Steps + +- **Dashboard Guide** — a tour of every page in the web app. +- **Feature Reference** — what each part of the system does and its API endpoints. +- **Agents and RAG Guide** — how the chat assistant and knowledge base work. diff --git a/frontend/src/hooks/use-rag-sources.ts b/frontend/src/hooks/use-rag-sources.ts index ee80cc93..55902285 100644 --- a/frontend/src/hooks/use-rag-sources.ts +++ b/frontend/src/hooks/use-rag-sources.ts @@ -4,6 +4,8 @@ import type { SourceListResponse, IndexDocumentRequest, IndexDocumentResponse, + IndexProjectDocsRequest, + IndexProjectDocsResponse, RetrieveRequest, RetrieveResponse, } from '@/types/api' @@ -40,6 +42,23 @@ export function useIndexDocument() { }) } +// Mutation: bulk-index the repo's bundled project docs (POST /rag/index/project-docs). +// Synchronous server-side — the first run can take ~1-3 min with a real embedding +// provider. Invalidates ['rag-sources'] so the list + counts refresh on completion. +export function useIndexProjectDocs() { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (body: IndexProjectDocsRequest) => + api('/rag/index/project-docs', { + method: 'POST', + body, + }), + onSuccess: () => { + void queryClient.invalidateQueries({ queryKey: ['rag-sources'] }) + }, + }) +} + // Mutation: semantic search over the knowledge base (POST /rag/retrieve). // Search results are ephemeral — no cache invalidation. A 502 (no embedding // provider configured) surfaces as an ApiError the caller degrades gracefully. diff --git a/frontend/src/pages/admin.tsx b/frontend/src/pages/admin.tsx index b933238c..a5de6024 100644 --- a/frontend/src/pages/admin.tsx +++ b/frontend/src/pages/admin.tsx @@ -18,8 +18,14 @@ import { History, Percent, Bot, + Library, } from 'lucide-react' -import { useRagSources, useDeleteRagSource, useIndexDocument } from '@/hooks/use-rag-sources' +import { + useRagSources, + useDeleteRagSource, + useIndexDocument, + useIndexProjectDocs, +} from '@/hooks/use-rag-sources' import { useAliases, useDeleteAlias, useCreateAlias } from '@/hooks/use-runs' import { useSeederStatus, @@ -117,6 +123,7 @@ function RagSourcesPanel() { const { data, isLoading, error, refetch } = useRagSources() const deleteSource = useDeleteRagSource() const indexDocument = useIndexDocument() + const indexProjectDocs = useIndexProjectDocs() const [newSource, setNewSource] = useState({ type: 'markdown', path: '' }) const [isDialogOpen, setIsDialogOpen] = useState(false) @@ -131,6 +138,19 @@ function RagSourcesPanel() { setIsDialogOpen(false) } + const handleIndexProjectDocs = async () => { + try { + const r = await indexProjectDocs.mutateAsync({}) + const summary = + `Indexed ${r.indexed}, updated ${r.updated}, unchanged ${r.unchanged}, ` + + `${r.failed} failed — ${r.total_chunks} chunks` + if (r.failed > 0) toast.warning(summary) + else toast.success(summary) + } catch (err) { + toast.error(err instanceof Error ? err.message : 'Project-docs indexing failed') + } + } + const handleDelete = async (sourceId: string) => { await deleteSource.mutateAsync(sourceId) } @@ -152,6 +172,20 @@ function RagSourcesPanel() { {data?.total_sources ?? 0} sources • {data?.total_chunks ?? 0} chunks +
+ +
{data?.sources.length ? ( diff --git a/frontend/src/types/api.ts b/frontend/src/types/api.ts index d2a16e42..e37539ad 100644 --- a/frontend/src/types/api.ts +++ b/frontend/src/types/api.ts @@ -312,6 +312,34 @@ export interface RetrieveResponse { total_chunks_searched: number } +// Request for POST /rag/index/project-docs. All flags default to true +// server-side (extra="forbid"), so the UI posts an empty {}. +export interface IndexProjectDocsRequest { + include_docs?: boolean + include_prps?: boolean + include_root?: boolean +} + +// One file's outcome in a project-docs index run. +export interface ProjectDocResult { + source_path: string + status: 'indexed' | 'updated' | 'unchanged' | 'failed' + chunks_created: number + error: string | null +} + +// Aggregate result of POST /rag/index/project-docs. +export interface IndexProjectDocsResponse { + results: ProjectDocResult[] + total_files: number + indexed: number + updated: number + unchanged: number + failed: number + total_chunks: number + duration_ms: number +} + // === Agents WebSocket === export type AgentEventType = | 'text_delta'