w7-mgfcode · w7-mgfcode · May 19, 2026 · May 19, 2026 · May 19, 2026
@@ -11,6 +11,8 @@
 from app.features.rag.embeddings import EmbeddingError
 from app.features.rag.schemas import (
     DeleteResponse,
+    IndexProjectDocsRequest,
+    IndexProjectDocsResponse,
     IndexRequest,
     IndexResponse,
     RetrieveRequest,
@@ -133,6 +135,91 @@ async def index_document(
         ) from e
 
 
+@router.post(
+    "/index/project-docs",
+    response_model=IndexProjectDocsResponse,
+    summary="Index bundled project documentation",
+    description="""
+Discover and bulk-index the repository's own bundled markdown.
+
+**Discovery roots (all toggleable, all default on):**
+- `include_docs`: every `docs/**/*.md`
+- `include_prps`: every `PRPs/**/*.md`
+- `include_root`: `README.md`, `AGENTS.md`, `CHANGELOG.md`
+
+Each file is indexed through the same path as `POST /rag/index`, so chunking,
+embedding, the SHA-256 content-hash idempotency short-circuit, and upsert are
+all reused. Re-runs return every unchanged file as `status: "unchanged"`.
+
+**Returns:** per-file results plus aggregate counts (indexed / updated /
+unchanged / failed / total_chunks). A single unreadable file is reported
+`status: "failed"` without aborting the batch; an embedding-provider or
+database failure is batch-fatal and surfaces as `502` / problem+json.
+""",
+)
+async def index_project_docs(
+    request: IndexProjectDocsRequest,
+    db: AsyncSession = Depends(get_db),
+) -> IndexProjectDocsResponse:
+    """Bulk-index bundled project documentation into the knowledge base.
+
+    Args:
+        request: Toggles selecting which doc roots to index.
+        db: Async database session from dependency.
+
+    Returns:
+        Per-file results plus aggregate indexing statistics.
+
+    Raises:
+        HTTPException: If embedding generation fails (502).
+        DatabaseError: If a database operation fails.
+    """
+    logger.info(
+        "rag.index_project_docs_request_received",
+        include_docs=request.include_docs,
+        include_prps=request.include_prps,
+        include_root=request.include_root,
+    )
+
+    service = RAGService()
+
+    try:
+        response = await service.index_project_docs(db=db, request=request)
+
+        logger.info(
+            "rag.index_project_docs_request_completed",
+            total_files=response.total_files,
+            total_chunks=response.total_chunks,
+            failed=response.failed,
+        )
+
+        return response
+
+    except EmbeddingError as e:
+        logger.error(
+            "rag.index_project_docs_request_failed",
+            error=str(e),
+            error_type=type(e).__name__,
+            exc_info=True,
+        )
+        raise HTTPException(
+            status_code=status.HTTP_502_BAD_GATEWAY,
+            detail=f"Embedding generation failed: {e}",
+        ) from e
+
+    except SQLAlchemyError as e:
+        logger.error(
+            "rag.index_project_docs_request_failed",
+            error=str(e),
+            error_type=type(e).__name__,
+            exc_info=True,
+        )
+        raise DatabaseError(
+            message="Failed to index project docs",
+            details={"error": str(e)},
+        ) from e
+
+
 # =============================================================================
 # Retrieve Endpoint
 # =============================================================================

@@ -179,3 +179,63 @@ class DeleteResponse(BaseModel):
     source_id: str
     chunks_deleted: int
     status: Literal["deleted"]
+
+
+class IndexProjectDocsRequest(BaseModel):
+    """Request to bulk-index bundled project documentation.
+
+    All fields default to True so an empty ``{}`` body indexes every root.
+
+    Args:
+        include_docs: Index markdown discovered under docs/**.
+        include_prps: Index markdown discovered under PRPs/**.
+        include_root: Index the root allow-list (README/AGENTS/CHANGELOG).
+    """
+
+    model_config = ConfigDict(extra="forbid")
+
+    include_docs: bool = Field(default=True, description="Index docs/**/*.md")
+    include_prps: bool = Field(default=True, description="Index PRPs/**/*.md")
+    include_root: bool = Field(
+        default=True, description="Index README.md / AGENTS.md / CHANGELOG.md"
+    )
+
+
+class ProjectDocResult(BaseModel):
+    """Per-file outcome of a project-docs index run.
+
+    Args:
+        source_path: Relative POSIX path of the file (the source identifier).
+        status: Outcome — indexed, updated, unchanged, or failed.
+        chunks_created: Number of chunks created (0 when unchanged or failed).
+        error: Error message when status is "failed", otherwise None.
+    """
+
+    source_path: str
+    status: Literal["indexed", "updated", "unchanged", "failed"]
+    chunks_created: int
+    error: str | None = None
+
+
+class IndexProjectDocsResponse(BaseModel):
+    """Aggregate result of POST /rag/index/project-docs.
+
+    Args:
+        results: Per-file outcomes.
+        total_files: Total files discovered and processed.
+        indexed: Count of newly indexed files.
+        updated: Count of re-indexed (changed) files.
+        unchanged: Count of files skipped by the content-hash short-circuit.
+        failed: Count of files that could not be read.
+        total_chunks: Total chunks created across all files.
+        duration_ms: Wall-clock time taken for the batch.
+    """
+
+    results: list[ProjectDocResult]
+    total_files: int
+    indexed: int
+    updated: int
+    unchanged: int
+    failed: int
+    total_chunks: int
+    duration_ms: float
@@ -29,8 +29,11 @@
 from app.features.rag.schemas import (
     ChunkResult,
     DeleteResponse,
+    IndexProjectDocsRequest,
+    IndexProjectDocsResponse,
     IndexRequest,
     IndexResponse,
+    ProjectDocResult,
     RetrieveRequest,
     RetrieveResponse,
     SourceListResponse,
@@ -39,6 +42,10 @@
 
 logger = structlog.get_logger()
 
+# Allow-listed root markdown files indexed by index_project_docs. CLAUDE.md is
+# deliberately excluded — it is an operating index that @imports AGENTS.md.
+_PROJECT_ROOT_FILES: tuple[str, ...] = ("README.md", "AGENTS.md", "CHANGELOG.md")
+
 
 class SourceNotFoundError(ValueError):
     """Source not found in the knowledge base."""
@@ -250,6 +257,135 @@ async def index_document(
             status=status,
         )
 
+    def _discover_project_doc_files(
+        self, request: IndexProjectDocsRequest
+    ) -> list[tuple[Path, str]]:
+        """Discover bundled markdown under the allow-listed project-doc roots.
+
+        Pure and synchronous — no DB, no network. ``rglob`` on a non-existent
+        directory yields nothing (no exception), so an absent docs/ or PRPs/
+        root simply contributes 0 files.
+
+        Args:
+            request: Toggles selecting which roots to discover.
+
+        Returns:
+            A deterministically sorted list of (absolute_path, category) pairs
+            where category is "docs", "prp", or "root".
+        """
+        found: list[tuple[Path, str]] = []
+
+        if request.include_docs:
+            found += [(p, "docs") for p in (self._base_dir / "docs").rglob("*.md")]
+
+        if request.include_prps:
+            found += [(p, "prp") for p in (self._base_dir / "PRPs").rglob("*.md")]
+
+        if request.include_root:
+            for name in _PROJECT_ROOT_FILES:
+                candidate = self._base_dir / name
+                if candidate.is_file():
+                    found.append((candidate, "root"))
+
+        # rglob order is filesystem-dependent — sort for stable, reproducible runs.
+        return sorted(found, key=lambda pair: str(pair[0]))
+
+    async def index_project_docs(
+        self,
+        db: AsyncSession,
+        request: IndexProjectDocsRequest,
+    ) -> IndexProjectDocsResponse:
+        """Bulk-index discovered project docs via index_document. Idempotent.
+
+        Each file is indexed through index_document, reusing its chunking,
+        embedding, SHA-256 content-hash idempotency, and upsert. A single
+        unreadable / non-UTF-8 file is reported status="failed" and does NOT
+        abort the batch. EmbeddingError / SQLAlchemyError are NOT caught here —
+        they are batch-fatal and propagate to the route's error handlers.
+
+        Args:
+            db: Database session.
+            request: Toggles selecting which roots to index.
+
+        Returns:
+            Per-file results plus aggregate counts.
+        """
+        start_time = time.time()
+
+        logger.info(
+            "rag.index_project_docs_started",
+            include_docs=request.include_docs,
+            include_prps=request.include_prps,
+            include_root=request.include_root,
+        )
+
+        results: list[ProjectDocResult] = []
+
+        for abs_path, category in self._discover_project_doc_files(request):
+            # abs_path was globbed under self._base_dir, so relative_to is safe.
+            rel = abs_path.relative_to(self._base_dir).as_posix()
+            try:
+                content = abs_path.read_text(encoding="utf-8")
+                index_response = await self.index_document(
+                    db,
+                    IndexRequest(
+                        source_type="markdown",
+                        source_path=rel,
+                        content=content,
+                        metadata={"category": category},
+                    ),
+                )
+                results.append(
+                    ProjectDocResult(
+                        source_path=rel,
+                        status=index_response.status,
+                        chunks_created=index_response.chunks_created,
+                        error=None,
+                    )
+                )
+            except (OSError, ValueError) as exc:
+                # FileNotFoundError ⊂ OSError; UnicodeDecodeError ⊂ ValueError.
+                logger.warning(
+                    "rag.index_project_docs_file_failed",
+                    source_path=rel,
+                    error=str(exc),
+                    error_type=type(exc).__name__,
+                )
+                results.append(
+                    ProjectDocResult(
+                        source_path=rel,
+                        status="failed",
+                        chunks_created=0,
+                        error=str(exc),
+                    )
+                )
+
+        duration_ms = (time.time() - start_time) * 1000
+
+        summary = IndexProjectDocsResponse(
+            results=results,
+            total_files=len(results),
+            indexed=sum(r.status == "indexed" for r in results),
+            updated=sum(r.status == "updated" for r in results),
+            unchanged=sum(r.status == "unchanged" for r in results),
+            failed=sum(r.status == "failed" for r in results),
+            total_chunks=sum(r.chunks_created for r in results),
+            duration_ms=duration_ms,
+        )
+
+        logger.info(
+            "rag.index_project_docs_completed",
+            total_files=summary.total_files,
+            indexed=summary.indexed,
+            updated=summary.updated,
+            unchanged=summary.unchanged,
+            failed=summary.failed,
+            total_chunks=summary.total_chunks,
+            duration_ms=duration_ms,
+        )
+
+        return summary
+
     async def retrieve(
         self,
         db: AsyncSession,

@@ -41,9 +41,10 @@ async def db_session() -> AsyncGenerator[AsyncSession, None]:
         try:
             yield session
         finally:
-            # Clean up test data (delete sources with test- prefix)
+            # Clean up test data (delete sources whose path contains a test- token,
+            # including nested project-doc fixture paths like docs/test-*.md)
             test_source_ids = delete(DocumentSource).where(
-                DocumentSource.source_path.like("test-%")
+                DocumentSource.source_path.like("%test-%")
             )
             await session.execute(test_source_ids)
             await session.commit()