diff --git a/.env.example b/.env.example
index 442da0c0..7c4e121b 100644
--- a/.env.example
+++ b/.env.example
@@ -22,5 +22,36 @@ FORECAST_MAX_HORIZON=90
 FORECAST_MODEL_ARTIFACTS_DIR=./artifacts/models
 FORECAST_ENABLE_LIGHTGBM=false
 
+# RAG Configuration
+# Embedding Provider: "openai" or "ollama"
+RAG_EMBEDDING_PROVIDER=openai
+
+# OpenAI Configuration (when RAG_EMBEDDING_PROVIDER=openai)
+OPENAI_API_KEY=sk-your-openai-api-key-here
+RAG_EMBEDDING_MODEL=text-embedding-3-small
+
+# Ollama Configuration (when RAG_EMBEDDING_PROVIDER=ollama)
+# OLLAMA_BASE_URL=http://localhost:11434
+# OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+
+# Embedding dimension (must match your model: OpenAI=1536, nomic-embed-text=768, etc.)
+RAG_EMBEDDING_DIMENSION=1536
+RAG_EMBEDDING_BATCH_SIZE=100
+
+# Chunking settings
+RAG_CHUNK_SIZE=512
+RAG_CHUNK_OVERLAP=50
+RAG_MIN_CHUNK_SIZE=100
+
+# Retrieval settings
+RAG_TOP_K=5
+RAG_SIMILARITY_THRESHOLD=0.7
+RAG_MAX_CONTEXT_TOKENS=4000
+
+# pgvector index settings
+RAG_INDEX_TYPE=hnsw
+RAG_HNSW_M=16
+RAG_HNSW_EF_CONSTRUCTION=64
+
 # Frontend (Vite)
 VITE_API_BASE_URL=http://localhost:8123
diff --git a/INITIAL-10.md b/INITIAL-10.md
new file mode 100644
index 00000000..4e6eb4d3
--- /dev/null
+++ b/INITIAL-10.md
@@ -0,0 +1,430 @@
+# INITIAL-10.md — Agentic Layer (The Brain)
+
+## Architectural Role
+
+**"The Brain"** - Autonomous decision-making, tool orchestration, and structured outputs using PydanticAI.
+
+This phase provides intelligent orchestration capabilities:
+- Experiment automation (config generation → backtest → deploy)
+- RAG-powered Q&A with evidence-grounded answers and citations
+- Human-in-the-loop approval for sensitive operations
+- Structured, schema-enforced outputs
+
+---
+
+## Tech Stack
+
+| Component | Technology | Purpose |
+| --------- | ---------- | ------- |
+| Agent Framework | [PydanticAI](https://ai.pydantic.dev/) | Type-safe agent orchestration |
+| Tool System | [Function Tools](https://ai.pydantic.dev/tools/) | API binding |
+| Tool Groups | [Toolsets](https://ai.pydantic.dev/toolsets/) | Grouped tool management |
+| LLM Provider | Anthropic Claude / OpenAI GPT-4 | Configurable provider |
+| Streaming | [PydanticAI Streaming](https://ai.pydantic.dev/results/#streamed-results) | Real-time responses |
+
+---
+
+## FEATURE
+
+### Experiment Orchestrator Agent
+Autonomous experiment workflow management:
+- **Tools**: `list_models`, `run_backtest`, `compare_runs`, `create_alias`, `archive_run`
+- **Workflow**: Generate configs → Run backtests → Analyze metrics → Select best → Deploy alias
+- **Output**: Structured `ExperimentReport` with methodology, results, and recommendations
+
+### RAG Assistant Agent
+Evidence-grounded question answering:
+- **Tools**: `retrieve_context` (from INITIAL-9), `format_citation`
+- **Workflow**: Parse query → Retrieve chunks → Synthesize answer → Format citations
+- **Output**: Structured `RAGResponse` with answer, citations, and confidence score
+
+### Agent Session Management
+- Session state persistence for multi-turn conversations
+- Tool call logging with correlation IDs
+- Human-in-the-loop approval for sensitive actions
+- Graceful LLM API failure handling with retries
+
+---
+
+## ENDPOINTS
+
+### POST /agents/experiment/run
+Execute an experiment workflow with the Orchestrator Agent.
+
+**Request**:
+
+```json
+{
+  "objective": "Find the best model configuration for store S001, product P001",
+  "constraints": {
+    "model_types": ["moving_average", "seasonal_naive"],
+    "min_train_size": 60,
+    "max_splits": 5
+  },
+  "auto_deploy": false,
+  "session_id": "optional-session-id"
+}
+```
+
+**Response**:
+
+```json
+{
+  "session_id": "sess_abc123",
+  "status": "completed",
+  "report": {
+    "objective": "Find the best model configuration for store S001, product P001",
+    "methodology": "Evaluated 6 configurations using 5-fold expanding window CV",
+    "experiments_run": 6,
+    "best_run": {
+      "run_id": "run_xyz789",
+      "model_type": "moving_average",
+      "config": {"window": 14},
+      "metrics": {
+        "mae": 12.5,
+        "smape": 15.2,
+        "wape": 0.08
+      }
+    },
+    "baseline_comparison": {
+      "vs_naive": {
+        "mae_improvement_pct": 23.5,
+        "smape_improvement_pct": 18.2
+      }
+    },
+    "recommendation": "Deploy moving_average with window=14",
+    "approval_required": true,
+    "pending_action": "create_alias"
+  },
+  "tool_calls": [
+    {
+      "tool": "list_models",
+      "args": {},
+      "result_summary": "Found 4 model types"
+    },
+    {
+      "tool": "run_backtest",
+      "args": {"model_type": "moving_average", "window": 7},
+      "result_summary": "MAE: 15.2"
+    }
+  ],
+  "tokens_used": 2450,
+  "duration_ms": 45000
+}
+```
+
+### POST /agents/experiment/approve
+Approve a pending action from an experiment session.
+
+**Request**:
+
+```json
+{
+  "session_id": "sess_abc123",
+  "action": "create_alias",
+  "approved": true,
+  "comment": "Approved for staging deployment"
+}
+```
+
+**Response**:
+
+```json
+{
+  "session_id": "sess_abc123",
+  "action": "create_alias",
+  "status": "executed",
+  "result": {
+    "alias_name": "production",
+    "run_id": "run_xyz789"
+  }
+}
+```
+
+### POST /agents/rag/query
+Query with answer generation using the RAG Assistant Agent.
+
+**Request**:
+
+```json
+{
+  "query": "How does the backtesting module prevent data leakage?",
+  "session_id": "optional-session-id",
+  "include_sources": true
+}
+```
+
+**Response**:
+
+```json
+{
+  "session_id": "sess_def456",
+  "answer": "The backtesting module prevents data leakage through several mechanisms:\n\n1. **Time-based splits only**: The TimeSeriesSplitter uses expanding or sliding window strategies, never random splits.\n\n2. **Gap parameter**: Configurable gap between train and test sets simulates operational latency.\n\n3. **Lag feature validation**: Features are computed with explicit cutoff dates to prevent future data access.",
+  "confidence": 0.92,
+  "citations": [
+    {
+      "source_type": "markdown",
+      "source_path": "docs/PHASE/5-BACKTESTING.md",
+      "chunk_id": "chunk_abc123",
+      "snippet": "TimeSeriesSplitter uses time-based splits (expanding/sliding window)...",
+      "relevance_score": 0.94
+    },
+    {
+      "source_type": "markdown",
+      "source_path": "CLAUDE.md",
+      "chunk_id": "chunk_def456",
+      "snippet": "Backtesting uses time-based splits (rolling/expanding), never random split...",
+      "relevance_score": 0.89
+    }
+  ],
+  "tokens_used": 1250,
+  "duration_ms": 3200
+}
+```
+
+### GET /agents/status/{session_id}
+Check agent session status.
+
+**Response**:
+
+```json
+{
+  "session_id": "sess_abc123",
+  "agent_type": "experiment_orchestrator",
+  "status": "awaiting_approval",
+  "created_at": "2026-02-01T10:30:00Z",
+  "last_activity": "2026-02-01T10:35:00Z",
+  "pending_action": {
+    "action": "create_alias",
+    "details": {
+      "alias_name": "production",
+      "run_id": "run_xyz789"
+    }
+  },
+  "tool_calls_count": 8,
+  "tokens_used": 2450
+}
+```
+
+### WS /agents/stream
+WebSocket endpoint for streaming responses.
+
+**Client → Server**:
+
+```json
+{
+  "type": "query",
+  "agent": "rag_assistant",
+  "payload": {
+    "query": "Explain the model registry workflow"
+  }
+}
+```
+
+**Server → Client (streaming)**:
+
+```json
+{"type": "token", "content": "The"}
+{"type": "token", "content": " model"}
+{"type": "token", "content": " registry"}
+{"type": "tool_call", "tool": "retrieve_context", "status": "started"}
+{"type": "tool_call", "tool": "retrieve_context", "status": "completed", "summary": "Found 5 relevant chunks"}
+{"type": "token", "content": " tracks..."}
+{"type": "complete", "session_id": "sess_xyz", "tokens_used": 850}
+```
+
+---
+
+## AGENT DEFINITIONS
+
+### Experiment Orchestrator Agent
+
+```python
+from pydantic_ai import Agent
+from pydantic import BaseModel
+
+class ExperimentReport(BaseModel):
+    """Structured output for experiment results."""
+    objective: str
+    methodology: str
+    experiments_run: int
+    best_run: RunSummary
+    baseline_comparison: BaselineComparison
+    recommendation: str
+    approval_required: bool
+    pending_action: str | None
+
+experiment_agent = Agent(
+    model="anthropic:claude-sonnet-4-20250514",
+    result_type=ExperimentReport,
+    system_prompt="""You are an ML experiment orchestrator for retail demand forecasting.
+
+Your goal is to find the best model configuration through systematic experimentation.
+Always:
+1. Start with baseline models (naive, seasonal_naive)
+2. Compare against baselines with improvement percentages
+3. Use time-based backtesting with appropriate train/test splits
+4. Recommend the best configuration with justification
+5. Request approval before deployment actions""",
+    tools=[list_models, run_backtest, compare_runs, create_alias, archive_run]
+)
+```
+
+### RAG Assistant Agent
+
+```python
+class RAGResponse(BaseModel):
+    """Structured output for RAG queries."""
+    answer: str
+    confidence: float  # 0.0 - 1.0
+    citations: list[Citation]
+    insufficient_context: bool = False
+
+rag_agent = Agent(
+    model="anthropic:claude-sonnet-4-20250514",
+    result_type=RAGResponse,
+    system_prompt="""You are a documentation assistant for ForecastLabAI.
+
+Your responses must be evidence-grounded:
+- Only answer based on retrieved context
+- Include citations for all claims
+- If context is insufficient, set insufficient_context=True and explain what's missing
+- Never hallucinate information not in the retrieved chunks""",
+    tools=[retrieve_context, format_citation]
+)
+```
+
+---
+
+## TOOL DEFINITIONS
+
+### list_models
+```python
+@tool
+async def list_models(ctx: RunContext[AgentDeps]) -> list[ModelInfo]:
+    """List available forecasting models with their configurations.
+
+    Use this to discover what model types can be experimented with.
+    Returns model_type, default_config, and description.
+    """
+    ...
+```
+
+### run_backtest
+```python
+@tool
+async def run_backtest(
+    ctx: RunContext[AgentDeps],
+    model_type: str,
+    config: dict[str, Any],
+    store_id: str,
+    product_id: str,
+    n_splits: int = 5
+) -> BacktestResult:
+    """Run a backtest for a model configuration.
+
+    Use this to evaluate model performance with time-series CV.
+    Returns per-fold and aggregated metrics (MAE, sMAPE, WAPE).
+    """
+    ...
+```
+
+### retrieve_context
+```python
+@tool
+async def retrieve_context(
+    ctx: RunContext[AgentDeps],
+    query: str,
+    top_k: int = 5
+) -> list[RetrievedChunk]:
+    """Retrieve relevant documentation chunks for a query.
+
+    Use this before answering any question about the system.
+    Returns chunks with content, source_path, and relevance_score.
+    """
+    ...
+```
+
+---
+
+## CONFIGURATION (Settings)
+
+```python
+# app/core/config.py additions
+
+# Agent LLM Configuration
+agent_default_model: str = "anthropic:claude-sonnet-4-20250514"
+agent_fallback_model: str = "openai:gpt-4o"
+agent_temperature: float = 0.1
+agent_max_tokens: int = 4096
+
+# Agent Execution Configuration
+agent_max_tool_calls: int = 10
+agent_timeout_seconds: int = 120
+agent_retry_attempts: int = 3
+agent_retry_delay_seconds: float = 1.0
+
+# Human-in-the-Loop Configuration
+agent_require_approval: list[str] = ["create_alias", "archive_run"]
+agent_approval_timeout_minutes: int = 60
+
+# Streaming Configuration
+agent_enable_streaming: bool = True
+agent_stream_chunk_size: int = 10  # tokens per chunk
+
+# Session Configuration
+agent_session_ttl_minutes: int = 120
+agent_max_sessions_per_user: int = 5
+```
+
+---
+
+## SUCCESS CRITERIA
+
+- [ ] Agents produce schema-enforced structured outputs
+- [ ] Tool calls are logged with correlation IDs and timing
+- [ ] Human-in-the-loop approval blocks sensitive actions
+- [ ] Graceful handling of LLM API failures with retries
+- [ ] WebSocket streaming delivers tokens in real-time
+- [ ] Session state persists across multiple requests
+- [ ] Unit tests with mocked LLM responses
+- [ ] Integration tests with real LLM calls (rate-limited)
+- [ ] Structured logging for all agent operations
+- [ ] Token usage tracked per session for cost monitoring
+
+---
+
+## CROSS-MODULE INTEGRATION
+
+| Direction | Module | Integration Point |
+| --------- | ------ | ----------------- |
+| **← RAG Layer** | INITIAL-9 | Uses `retrieve_context` tool |
+| **← Registry** | Phase 6 | Uses `list_runs`, `compare_runs`, `create_alias` tools |
+| **← Backtesting** | Phase 5 | Uses `run_backtest` tool |
+| **← Forecasting** | Phase 4 | Uses `list_models`, `train_model` tools |
+| **→ Dashboard** | INITIAL-11 | Provides chat interface backend |
+| **→ Jobs** | Phase 7 | Creates job records for audit trail |
+
+---
+
+## DOCUMENTATION LINKS
+
+- [PydanticAI Documentation](https://ai.pydantic.dev/)
+- [PydanticAI Agents](https://ai.pydantic.dev/agents/)
+- [PydanticAI Tools](https://ai.pydantic.dev/tools/)
+- [PydanticAI Toolsets](https://ai.pydantic.dev/toolsets/)
+- [PydanticAI Built-in Tools](https://ai.pydantic.dev/builtin-tools/)
+- [PydanticAI Streaming Results](https://ai.pydantic.dev/results/#streamed-results)
+- [PydanticAI GitHub](https://github.com/pydantic/pydantic-ai)
+- [Anthropic Claude API](https://docs.anthropic.com/en/api)
+
+---
+
+## OTHER CONSIDERATIONS
+
+- **Structured Outputs**: All agent responses are Pydantic models, never raw text
+- **Tool Docstrings**: Follow guidance in CLAUDE.md for agent-optimized tool documentation
+- **Cost Control**: Track and limit token usage per session
+- **Audit Trail**: All tool calls logged with request correlation for debugging
+- **Fallback Provider**: Automatic failover to fallback model on primary failure
+- **Approval Workflow**: Pending actions expire after `agent_approval_timeout_minutes`
diff --git a/INITIAL-11.md b/INITIAL-11.md
new file mode 100644
index 00000000..3138f3c6
--- /dev/null
+++ b/INITIAL-11.md
@@ -0,0 +1,417 @@
+# INITIAL-11.md — ForecastLab Dashboard (The Face)
+
+## Architectural Role
+
+**"The Face"** - User interface, data visualization, and agent interaction using React 19 + shadcn/ui.
+
+This phase provides the visual layer for:
+- Data exploration with server-side pagination and filtering
+- Time series visualization with interactive charts
+- Agent chat interface with streaming responses
+- Admin panel for system management
+
+---
+
+## Tech Stack
+
+| Component | Technology | Purpose |
+|-----------|------------|---------|
+| Framework | React 19 + [Vite](https://vite.dev/) | Fast build, HMR |
+| Components | [shadcn/ui](https://ui.shadcn.com/) | Accessible, customizable UI |
+| Data Tables | [TanStack Table](https://tanstack.com/table/latest) | Server-side data grids |
+| Data Fetching | [TanStack Query](https://tanstack.com/query/latest) | Caching, invalidation |
+| Charts | [Recharts](https://recharts.org/) | Time series visualization |
+| Styling | Tailwind CSS 4 | Utility-first CSS |
+| State | React 19 `use()` + TanStack Query | Server state management |
+
+---
+
+## FEATURE
+
+### Data Explorer
+Interactive data tables with full server-side capabilities:
+- **Tables**: Sales, Stores, Products, Model Runs, Jobs
+- **Features**: Pagination, sorting, filtering, column visibility
+- **Export**: CSV download for selected/all rows
+- **Pattern**: [shadcn/ui Data Table](https://ui.shadcn.com/docs/components/data-table)
+
+### Time Series Visualizers
+Charts for forecasting analysis:
+- **Actual vs Predicted**: Line chart with confidence intervals
+- **Backtest Folds**: Train/test split visualization
+- **Metric Comparison**: Bar charts for model comparison
+- **Interactive**: Tooltips, zoom, pan, brush selection
+
+### Agent Chat Interface
+Real-time interaction with AI agents:
+- **Streaming**: WebSocket-based token streaming
+- **Citations**: Rendered with source links
+- **Tool Calls**: Collapsible visualization of agent actions
+- **History**: Session sidebar with conversation threads
+
+### Admin Panel
+System management and monitoring:
+- **RAG Sources**: Index/delete documentation sources
+- **Model Aliases**: Manage deployment aliases
+- **Health Dashboard**: Service status, recent errors
+- **Job Monitor**: Active and historical job status
+
+---
+
+## PAGE STRUCTURE
+
+### /dashboard
+Main dashboard with KPI summary cards and quick actions.
+
+### /explorer/sales
+Sales data explorer with date range filtering.
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│  Sales Explorer                                    [Export] │
+├─────────────────────────────────────────────────────────────┤
+│  Filters: [Date Range] [Store ▼] [Product ▼] [Search...]    │
+├─────────────────────────────────────────────────────────────┤
+│  Date        │ Store   │ Product │ Quantity │ Revenue       │
+│  2026-01-15  │ S001    │ P001    │ 150      │ $2,250.00     │
+│  2026-01-15  │ S001    │ P002    │ 75       │ $1,125.00     │
+│  ...         │ ...     │ ...     │ ...      │ ...           │
+├─────────────────────────────────────────────────────────────┤
+│  Page 1 of 50  │  [< Prev]  [1] [2] [3] ... [50]  [Next >]  │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### /explorer/runs
+Model run explorer with comparison capabilities.
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│  Model Runs                              [Compare Selected] │
+├─────────────────────────────────────────────────────────────┤
+│  [☐] │ Run ID    │ Model    │ Status  │ MAE   │ Created     │
+│  [☐] │ run_abc   │ MA(14)   │ SUCCESS │ 12.5  │ 2h ago      │
+│  [☐] │ run_def   │ SN(7)    │ SUCCESS │ 15.2  │ 3h ago      │
+│  [☐] │ run_ghi   │ Naive    │ SUCCESS │ 18.9  │ 5h ago      │
+├─────────────────────────────────────────────────────────────┤
+│  Showing 3 of 127 runs                                      │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### /visualize/forecast
+Forecast visualization with actual vs predicted overlay.
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│  Forecast: Store S001, Product P001                         │
+├─────────────────────────────────────────────────────────────┤
+│  [Store ▼] [Product ▼] [Model Run ▼] [Date Range]          │
+├─────────────────────────────────────────────────────────────┤
+│                                                             │
+│  200 ─┤                               ╭──────              │
+│       │                          ╭────╯    Predicted       │
+│  150 ─┤                     ╭────╯                         │
+│       │                ╭────╯      ───── Actual            │
+│  100 ─┤           ╭────╯           - - - Confidence        │
+│       │      ╭────╯                                        │
+│   50 ─┤ ╭────╯                                             │
+│       │─╯                                                   │
+│    0 ─┼────────────────────────────────────────────────    │
+│       Jan 1     Jan 15    Feb 1     Feb 15    Mar 1        │
+│                                                             │
+├─────────────────────────────────────────────────────────────┤
+│  MAE: 12.5  │  sMAPE: 15.2%  │  WAPE: 8.1%  │  Bias: -2.3  │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### /visualize/backtest
+Backtest fold visualization.
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│  Backtest: run_abc123 (5-fold Expanding Window)             │
+├─────────────────────────────────────────────────────────────┤
+│                                                             │
+│  Fold 1: ████████████░░░░  MAE: 14.2  sMAPE: 16.8%         │
+│  Fold 2: █████████████████░░░░  MAE: 13.1  sMAPE: 15.4%    │
+│  Fold 3: ███████████████████████░░░░  MAE: 12.8  sMAPE: 14.9│
+│  Fold 4: █████████████████████████████░░░░  MAE: 11.9      │
+│  Fold 5: ███████████████████████████████████░░░░  MAE: 11.2│
+│                                                             │
+│  █ Train   ░ Test                                          │
+├─────────────────────────────────────────────────────────────┤
+│  Aggregated: MAE: 12.6 ± 1.1  │  Stability: 0.91           │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### /chat
+Agent chat interface with streaming.
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│  ForecastLab Assistant                                      │
+├────────────┬────────────────────────────────────────────────┤
+│  Sessions  │                                                │
+│  ─────────│  How does backtesting prevent data leakage?    │
+│  Today     │                                                │
+│  ◉ Current │  The backtesting module prevents data leakage │
+│  ○ 10:30am │  through several mechanisms:                   │
+│  ○ 9:15am  │                                                │
+│  Yesterday │  1. **Time-based splits**: Uses expanding...   │
+│  ○ 4:45pm  │                                                │
+│            │  📚 Citations:                                  │
+│            │  [1] docs/PHASE/5-BACKTESTING.md               │
+│            │  [2] CLAUDE.md                                 │
+│            │                                                │
+│            │  ──────────────────────────────────────────    │
+│            │  🔧 Tool: retrieve_context (5 chunks found)    │
+│            │  ──────────────────────────────────────────    │
+├────────────┴────────────────────────────────────────────────┤
+│  [Type your question...]                          [Send ➤] │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### /admin
+Admin panel for system management.
+
+---
+
+## COMPONENTS
+
+### DataTable (shadcn/ui pattern)
+
+```tsx
+// components/data-table/data-table.tsx
+import {
+  ColumnDef,
+  flexRender,
+  getCoreRowModel,
+  useReactTable,
+} from "@tanstack/react-table"
+
+interface DataTableProps<TData, TValue> {
+  columns: ColumnDef<TData, TValue>[]
+  data: TData[]
+  pageCount: number
+  pageIndex: number
+  pageSize: number
+  onPaginationChange: (pagination: PaginationState) => void
+  onSortingChange: (sorting: SortingState) => void
+  onFilterChange: (filters: ColumnFiltersState) => void
+}
+
+export function DataTable<TData, TValue>({
+  columns,
+  data,
+  pageCount,
+  ...props
+}: DataTableProps<TData, TValue>) {
+  const table = useReactTable({
+    data,
+    columns,
+    pageCount,
+    manualPagination: true,
+    manualSorting: true,
+    manualFiltering: true,
+    getCoreRowModel: getCoreRowModel(),
+    // ...
+  })
+
+  return (
+    <Table>
+      <TableHeader>...</TableHeader>
+      <TableBody>...</TableBody>
+    </Table>
+  )
+}
+```
+
+### TimeSeriesChart
+
+```tsx
+// components/charts/time-series-chart.tsx
+import { LineChart, Line, XAxis, YAxis, Tooltip, Legend } from 'recharts'
+
+interface TimeSeriesChartProps {
+  data: { date: string; actual: number; predicted?: number }[]
+  showConfidence?: boolean
+  height?: number
+}
+
+export function TimeSeriesChart({ data, showConfidence, height = 400 }: TimeSeriesChartProps) {
+  return (
+    <LineChart data={data} height={height}>
+      <XAxis dataKey="date" />
+      <YAxis />
+      <Tooltip />
+      <Legend />
+      <Line type="monotone" dataKey="actual" stroke="#2563eb" name="Actual" />
+      {data[0]?.predicted !== undefined && (
+        <Line type="monotone" dataKey="predicted" stroke="#16a34a" name="Predicted" strokeDasharray="5 5" />
+      )}
+    </LineChart>
+  )
+}
+```
+
+### ChatMessage
+
+```tsx
+// components/chat/chat-message.tsx
+interface ChatMessageProps {
+  role: 'user' | 'assistant'
+  content: string
+  citations?: Citation[]
+  toolCalls?: ToolCall[]
+  isStreaming?: boolean
+}
+
+export function ChatMessage({ role, content, citations, toolCalls, isStreaming }: ChatMessageProps) {
+  return (
+    <div className={cn("flex", role === 'user' ? 'justify-end' : 'justify-start')}>
+      <div className="max-w-[80%] rounded-lg p-4 bg-muted">
+        <Markdown>{content}</Markdown>
+        {isStreaming && <span className="animate-pulse">▋</span>}
+        {citations && <CitationList citations={citations} />}
+        {toolCalls && <ToolCallList toolCalls={toolCalls} />}
+      </div>
+    </div>
+  )
+}
+```
+
+---
+
+## API HOOKS (TanStack Query)
+
+```tsx
+// hooks/use-sales.ts
+export function useSales(params: SalesQueryParams) {
+  return useQuery({
+    queryKey: ['sales', params],
+    queryFn: () => api.get('/analytics/drilldowns', { params }),
+    placeholderData: keepPreviousData,
+  })
+}
+
+// hooks/use-runs.ts
+export function useRuns(params: RunsQueryParams) {
+  return useQuery({
+    queryKey: ['runs', params],
+    queryFn: () => api.get('/registry/runs', { params }),
+  })
+}
+
+// hooks/use-chat.ts
+export function useChat(sessionId?: string) {
+  const [messages, setMessages] = useState<Message[]>([])
+  const ws = useWebSocket(`${WS_URL}/agents/stream`)
+
+  const sendMessage = useCallback((content: string) => {
+    ws.send(JSON.stringify({ type: 'query', agent: 'rag_assistant', payload: { query: content } }))
+  }, [ws])
+
+  return { messages, sendMessage, isConnected: ws.readyState === WebSocket.OPEN }
+}
+```
+
+---
+
+## CONFIGURATION (Environment)
+
+```env
+# .env.example for frontend
+
+# API Configuration
+VITE_API_BASE_URL=http://localhost:8123
+VITE_WS_URL=ws://localhost:8123/agents/stream
+
+# Feature Flags
+VITE_ENABLE_AGENT_CHAT=true
+VITE_ENABLE_ADMIN_PANEL=true
+
+# Visualization
+VITE_DEFAULT_PAGE_SIZE=25
+VITE_MAX_CHART_POINTS=365
+```
+
+---
+
+## EXAMPLES
+
+### examples/ui/README.md
+```markdown
+# Dashboard Page Map
+
+| Page | API Endpoints | Description |
+|------|---------------|-------------|
+| /dashboard | GET /analytics/kpis | KPI summary cards |
+| /explorer/sales | GET /analytics/drilldowns | Sales data table |
+| /explorer/runs | GET /registry/runs | Model run table |
+| /visualize/forecast | GET /forecasting/predict | Forecast chart |
+| /visualize/backtest | GET /backtesting/results/{run_id} | Fold visualization |
+| /chat | WS /agents/stream | Agent chat |
+| /admin | GET /rag/sources, GET /registry/aliases | Admin panel |
+
+## Running the Dashboard
+
+\`\`\`bash
+cd frontend
+pnpm install
+pnpm dev
+\`\`\`
+
+Open http://localhost:5173
+```
+
+---
+
+## SUCCESS CRITERIA
+
+- [ ] Data tables handle 10k+ rows with virtual scrolling
+- [ ] Server-side pagination, sorting, filtering all functional
+- [ ] Charts render smoothly with 365+ data points
+- [ ] WebSocket chat shows streaming tokens in real-time
+- [ ] Citations render as clickable source links
+- [ ] Tool calls displayed in collapsible sections
+- [ ] Responsive design works on tablet and mobile
+- [ ] Lighthouse performance score > 90
+- [ ] Accessibility: keyboard navigation, screen reader support
+- [ ] Dark/light theme toggle
+
+---
+
+## CROSS-MODULE INTEGRATION
+
+| Direction | Module | Integration Point |
+|-----------|--------|-------------------|
+| **← RAG Layer** | INITIAL-9 | Displays indexed sources, allows re-indexing |
+| **← Agentic Layer** | INITIAL-10 | Chat interface, experiment status display |
+| **← Registry** | Phase 6 | Run leaderboard, comparison views |
+| **← Analytics** | Phase 7 | KPI dashboard, drilldown charts |
+| **← Jobs** | Phase 7 | Job status monitoring |
+| **← Dimensions** | Phase 7 | Store/product selectors |
+
+---
+
+## DOCUMENTATION LINKS
+
+- [shadcn/ui Documentation](https://ui.shadcn.com/)
+- [shadcn/ui Data Table](https://ui.shadcn.com/docs/components/data-table)
+- [shadcn/ui Table](https://ui.shadcn.com/docs/components/table)
+- [TanStack Table](https://tanstack.com/table/latest)
+- [TanStack Query](https://tanstack.com/query/latest)
+- [Recharts](https://recharts.org/)
+- [Vite Documentation](https://vite.dev/)
+- [React 19 Documentation](https://react.dev/)
+- [Tailwind CSS 4](https://tailwindcss.com/)
+
+---
+
+## OTHER CONSIDERATIONS
+
+- **No Hardcoded URLs**: API base URL from environment variable only
+- **Error Boundaries**: Graceful error handling with retry options
+- **Loading States**: Skeleton components for all async data
+- **Optimistic Updates**: Instant UI feedback for mutations
+- **Caching**: TanStack Query manages cache invalidation
+- **Bundle Size**: Code splitting per route for fast initial load
diff --git a/INITIAL-9.md b/INITIAL-9.md
index e82c4453..da491760 100644
--- a/INITIAL-9.md
+++ b/INITIAL-9.md
@@ -1,33 +1,319 @@
-# INITIAL-9.md — Dashboard + RAG + Agentic Layer (PydanticAI)
-
-## FEATURE:
-- Dashboard (React + Vite + shadcn/ui Data Table):
-  - Data Explorer (tables, filters, export)
-  - Model Runs (leaderboard, compare)
-  - Train & Predict (forms, status)
-  - Predictions (tabular view)
-- RAG assistant (pgvector):
-  - indexed sources: README.md, /docs/*, OpenAPI export, run reports
-  - retrieve top-k → answer with citations
-- Optional PydanticAI:
-  - agent with tools:
-    - experiment orchestrator (generate configs → backtest → select best → report)
-    - rag assistant (query → retrieve → structured answer)
-  - enforced structured outputs
-
-## EXAMPLES:
-- `examples/ui/README.md` — page map + API mapping (no hardcoded base URL).
-- `examples/rag/index_docs.py` — chunk+embed+store (Settings-driven).
-- `examples/rag/query.http` — Q&A returning a citations schema.
-- `examples/agent/` — best-practice agent setup (providers, tools, dependencies).
-
-## DOCUMENTATION:
-- shadcn/ui Data Table pattern + TanStack Table
-- pgvector similarity search + indexing strategies
-- PydanticAI docs (include link in README as a code block)
-
-## OTHER CONSIDERATIONS:
-- Required: `.env.example` for frontend (`VITE_API_BASE_URL`).
-- RAG must be evidence-grounded: if no support, return “not found” (no hallucinations).
-- Stable citation schema: source_type, source_id/path, chunk_id, snippet/span.
-- Embedding model + dimension must come from Settings (never hardcoded).
+# INITIAL-9.md — RAG Knowledge Base (The Memory)
+
+## Architectural Role
+
+**"The Memory"** - Vector storage, document ingestion, and semantic retrieval infrastructure.
+
+This phase provides the foundational knowledge layer that enables:
+- Indexed documentation and run reports for AI-assisted search
+- Semantic retrieval with relevance scoring
+- Evidence-grounded context for the Agentic Layer (INITIAL-10)
+
+---
+
+## Tech Stack
+
+| Component | Technology | Purpose |
+|-----------|------------|---------|
+| Vector Store | PostgreSQL 16 + [pgvector](https://github.com/pgvector/pgvector) | Similarity search |
+| Embeddings | [OpenAI text-embedding-3-small](https://platform.openai.com/docs/models/text-embedding-3-small) | 1536-dim vectors (configurable) |
+| Chunking | Markdown-aware, OpenAPI endpoint-aware | Semantic boundaries |
+| Index Type | HNSW (default) or IVFFlat | Approximate nearest neighbor |
+
+---
+
+## FEATURE
+
+### Database Layer
+- `document_chunk` table with vector column (`embedding VECTOR(1536)`)
+- HNSW index for cosine similarity search
+- Unique constraint `(source_id, chunk_index)` for idempotent re-indexing
+- Metadata JSONB for source type, heading hierarchy, timestamps
+
+### Ingestion Pipeline
+- **Markdown Chunker**: Heading-aware splitting (configurable size/overlap)
+- **OpenAPI Chunker**: Endpoint-based granularity (one chunk per operation)
+- **Embedding Service**: Async batch processing with rate limiting
+- **Source Registry**: Track indexed sources with version/hash for change detection
+
+### Retrieval Engine
+- Top-k semantic search with configurable similarity threshold
+- Metadata filtering (source_type, date_range, tags)
+- Relevance score normalization (0.0 - 1.0)
+- Context window assembly for downstream consumption
+
+---
+
+## ENDPOINTS
+
+### POST /rag/index
+Index documents from various sources.
+
+**Request**:
+```json
+{
+  "source_type": "markdown",
+  "source_path": "docs/ARCHITECTURE.md",
+  "metadata": {
+    "category": "documentation",
+    "version": "1.0.0"
+  }
+}
+```
+
+**Response**:
+```json
+{
+  "source_id": "src_abc123",
+  "chunks_created": 15,
+  "tokens_processed": 4250,
+  "duration_ms": 1234.56,
+  "status": "indexed"
+}
+```
+
+### POST /rag/retrieve
+Semantic search across indexed documents.
+
+**Request**:
+```json
+{
+  "query": "How does backtesting prevent data leakage?",
+  "top_k": 5,
+  "similarity_threshold": 0.7,
+  "filters": {
+    "source_type": ["markdown", "openapi"],
+    "category": "documentation"
+  }
+}
+```
+
+**Response**:
+```json
+{
+  "results": [
+    {
+      "chunk_id": "chunk_xyz789",
+      "source_id": "src_abc123",
+      "source_path": "docs/PHASE/5-BACKTESTING.md",
+      "content": "TimeSeriesSplitter uses time-based splits (expanding/sliding window) to prevent leakage...",
+      "relevance_score": 0.92,
+      "metadata": {
+        "heading": "Leakage Prevention",
+        "section_path": ["Phase 5: Backtesting", "Implementation", "Leakage Prevention"]
+      }
+    }
+  ],
+  "query_embedding_time_ms": 45.2,
+  "search_time_ms": 12.8,
+  "total_chunks_searched": 1250
+}
+```
+
+### GET /rag/sources
+List all indexed sources with metadata.
+
+**Response**:
+```json
+{
+  "sources": [
+    {
+      "source_id": "src_abc123",
+      "source_type": "markdown",
+      "source_path": "docs/ARCHITECTURE.md",
+      "chunk_count": 15,
+      "indexed_at": "2026-02-01T10:30:00Z",
+      "content_hash": "sha256:abc123..."
+    }
+  ],
+  "total_sources": 12,
+  "total_chunks": 450
+}
+```
+
+### DELETE /rag/sources/{source_id}
+Remove an indexed source and all its chunks.
+
+**Response**:
+```json
+{
+  "source_id": "src_abc123",
+  "chunks_deleted": 15,
+  "status": "deleted"
+}
+```
+
+---
+
+## DATABASE SCHEMA
+
+```sql
+-- Enable pgvector extension
+CREATE EXTENSION IF NOT EXISTS vector;
+
+-- Document source registry
+CREATE TABLE document_source (
+    source_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    source_type VARCHAR(50) NOT NULL,  -- 'markdown', 'openapi', 'run_report'
+    source_path TEXT NOT NULL,
+    content_hash VARCHAR(64) NOT NULL,  -- SHA-256 for change detection
+    metadata JSONB DEFAULT '{}',
+    indexed_at TIMESTAMPTZ DEFAULT NOW(),
+    updated_at TIMESTAMPTZ DEFAULT NOW(),
+    UNIQUE (source_type, source_path)
+);
+
+-- Document chunks with embeddings
+CREATE TABLE document_chunk (
+    chunk_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    source_id UUID NOT NULL REFERENCES document_source(source_id) ON DELETE CASCADE,
+    chunk_index INTEGER NOT NULL,
+    content TEXT NOT NULL,
+    embedding VECTOR(1536),  -- Configurable dimension
+    token_count INTEGER NOT NULL,
+    metadata JSONB DEFAULT '{}',  -- heading, section_path, etc.
+    created_at TIMESTAMPTZ DEFAULT NOW(),
+    UNIQUE (source_id, chunk_index)
+);
+
+-- HNSW index for cosine similarity
+CREATE INDEX idx_chunk_embedding_hnsw
+ON document_chunk
+USING hnsw (embedding vector_cosine_ops)
+WITH (m = 16, ef_construction = 64);
+
+-- Metadata filtering index
+CREATE INDEX idx_chunk_metadata ON document_chunk USING gin (metadata);
+```
+
+---
+
+## EXAMPLES
+
+### examples/rag/index_docs.py
+```python
+"""Index documentation into RAG knowledge base."""
+import asyncio
+from pathlib import Path
+import httpx
+
+async def index_markdown_docs():
+    """Index all markdown docs from docs/ directory."""
+    async with httpx.AsyncClient(base_url="http://localhost:8123") as client:
+        docs_dir = Path("docs")
+        for md_file in docs_dir.rglob("*.md"):
+            response = await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": str(md_file),
+                    "metadata": {"category": "documentation"}
+                }
+            )
+            result = response.json()
+            print(f"Indexed {md_file}: {result['chunks_created']} chunks")
+
+if __name__ == "__main__":
+    asyncio.run(index_markdown_docs())
+```
+
+### examples/rag/query.http
+```http
+### Semantic search query
+POST http://localhost:8123/rag/retrieve
+Content-Type: application/json
+
+{
+  "query": "How do I configure backtesting splits?",
+  "top_k": 5,
+  "similarity_threshold": 0.7
+}
+
+### List all indexed sources
+GET http://localhost:8123/rag/sources
+
+### Re-index after documentation update
+POST http://localhost:8123/rag/index
+Content-Type: application/json
+
+{
+  "source_type": "markdown",
+  "source_path": "README.md",
+  "metadata": {"category": "overview"}
+}
+```
+
+---
+
+## CONFIGURATION (Settings)
+
+```python
+# app/core/config.py additions
+
+# RAG Embedding Configuration
+rag_embedding_model: str = "text-embedding-3-small"
+rag_embedding_dimension: int = 1536
+rag_embedding_batch_size: int = 100
+
+# RAG Chunking Configuration
+rag_chunk_size: int = 512  # tokens
+rag_chunk_overlap: int = 50  # tokens
+rag_min_chunk_size: int = 100  # minimum tokens per chunk
+
+# RAG Retrieval Configuration
+rag_top_k: int = 5
+rag_similarity_threshold: float = 0.7
+rag_max_context_tokens: int = 4000
+
+# RAG Index Configuration
+rag_index_type: Literal["hnsw", "ivfflat"] = "hnsw"
+rag_hnsw_m: int = 16
+rag_hnsw_ef_construction: int = 64
+```
+
+---
+
+## SUCCESS CRITERIA
+
+- [ ] pgvector extension enabled and tested in docker-compose
+- [ ] Markdown chunker respects heading boundaries
+- [ ] OpenAPI chunker produces one chunk per endpoint
+- [ ] Embeddings generated via async batch processing
+- [ ] Retrieval returns top-k with normalized relevance scores
+- [ ] Re-indexing is idempotent (content_hash change detection)
+- [ ] Unique constraint prevents duplicate chunks
+- [ ] HNSW index provides sub-100ms search latency
+- [ ] Integration tests with real embeddings (mocked in unit tests)
+- [ ] Structured logging for all index/retrieve operations
+
+---
+
+## CROSS-MODULE INTEGRATION
+
+| Direction | Module | Integration Point |
+|-----------|--------|-------------------|
+| **→ Agentic Layer** | INITIAL-10 | Provides `retrieve_context` tool for RAG Assistant agent |
+| **→ Dashboard** | INITIAL-11 | Sources list displayed in Admin panel |
+| **← Registry** | Phase 6 | Run reports indexed as knowledge sources |
+| **← Jobs** | Phase 7 | Indexing operations tracked as jobs |
+
+---
+
+## DOCUMENTATION LINKS
+
+- [pgvector GitHub](https://github.com/pgvector/pgvector)
+- [pgvector Tutorial (DataCamp)](https://www.datacamp.com/tutorial/pgvector-tutorial)
+- [OpenAI Embeddings Guide](https://platform.openai.com/docs/guides/embeddings)
+- [OpenAI API Reference](https://platform.openai.com/docs/api-reference/embeddings)
+- [Neon pgvector Docs](https://neon.com/docs/extensions/pgvector)
+- [HNSW Algorithm Paper](https://arxiv.org/abs/1603.09320)
+
+---
+
+## OTHER CONSIDERATIONS
+
+- **Evidence-Grounded**: Retrieval returns raw chunks only; no answer generation in this layer
+- **Idempotency**: Content hash comparison prevents unnecessary re-embedding
+- **Rate Limiting**: Respect OpenAI API rate limits during batch embedding
+- **Cost Tracking**: Log token counts for embedding cost monitoring
+- **Dimension Flexibility**: Support for other embedding models (e.g., 3072-dim text-embedding-3-large)
diff --git a/PRPs/PRP-10-agentic-layer.md b/PRPs/PRP-10-agentic-layer.md
new file mode 100644
index 00000000..3c1a97ea
--- /dev/null
+++ b/PRPs/PRP-10-agentic-layer.md
@@ -0,0 +1,920 @@
+# PRP-10: Agentic Layer ("The Brain")
+
+**Feature**: INITIAL-10.md — Agentic Layer
+**Status**: Ready for Implementation
+**Confidence Score**: 7.5/10
+
+---
+
+## Goal
+
+Build the Agentic Layer using PydanticAI providing:
+1. **Experiment Orchestrator Agent** - Autonomous model experimentation workflow
+2. **RAG Assistant Agent** - Evidence-grounded Q&A with citations
+3. **Human-in-the-Loop Approval** - Blocking sensitive actions until approved
+4. **WebSocket Streaming** - Real-time token delivery to clients
+5. **Session Management** - Persistent state across multi-turn conversations
+
+This is the "Brain" layer that orchestrates tools from INITIAL-9 (RAG), Phase 5 (Backtesting), and Phase 6 (Registry).
+
+---
+
+## Why
+
+- **Autonomous Experimentation**: Agent runs backtests, compares results, deploys winners
+- **Evidence-Grounded Answers**: RAG-powered Q&A prevents hallucination
+- **Safety Controls**: Human approval for deployment actions
+- **Real-Time UX**: Streaming responses for responsive chat interface
+- **Portfolio Value**: Demonstrates modern AI agent architecture
+
+---
+
+## What
+
+### Endpoints
+
+| Method | Path | Description |
+| ------ | ---- | ----------- |
+| `POST` | `/agents/experiment/run` | Execute experiment workflow |
+| `POST` | `/agents/experiment/approve` | Approve pending action |
+| `POST` | `/agents/rag/query` | Query with answer generation |
+| `GET` | `/agents/status/{session_id}` | Check session status |
+| `WS` | `/agents/stream` | WebSocket for streaming |
+
+### Success Criteria
+
+- [ ] Agents produce schema-enforced structured outputs
+- [ ] Tool calls logged with correlation IDs and timing
+- [ ] Human-in-the-loop blocks sensitive actions
+- [ ] WebSocket streaming delivers tokens in real-time
+- [ ] Session state persists across requests
+- [ ] Graceful LLM API failure handling with retries
+- [ ] 60+ unit tests with mocked LLM responses
+- [ ] 15+ integration tests (rate-limited real LLM calls)
+- [ ] All validation gates green
+
+---
+
+## All Needed Context
+
+### Documentation & References
+
+```yaml
+# CRITICAL - PydanticAI Documentation
+- url: https://ai.pydantic.dev/
+  why: "Official PydanticAI docs - main reference"
+
+- url: https://ai.pydantic.dev/agents/
+  why: "Agent constructor, result_type, system_prompt, run/run_stream methods"
+
+- url: https://ai.pydantic.dev/tools/
+  why: "@agent.tool decorator, RunContext, deps_type, tool parameters"
+
+- url: https://ai.pydantic.dev/output/
+  why: "AgentRunResult, StreamedRunResult, token usage tracking"
+
+- url: https://ai.pydantic.dev/examples/chat-app/
+  why: "FastAPI + streaming integration example"
+
+- url: https://github.com/pydantic/pydantic-ai
+  why: "Source code for edge cases"
+
+# Anthropic API (fallback reference)
+- url: https://docs.anthropic.com/en/api
+  why: "Claude model IDs, rate limits, error codes"
+
+# Codebase Patterns (CRITICAL)
+- file: app/features/registry/service.py
+  why: "Service pattern - __init__, get_settings(), structured logging"
+
+- file: app/features/jobs/service.py
+  why: "Job execution pattern - state machine, error handling, audit trail"
+
+- file: app/features/backtesting/service.py
+  why: "BacktestingService - the agent will call this via tools"
+
+- file: app/features/registry/routes.py
+  why: "Route patterns - APIRouter, response_model, HTTPException"
+
+- file: app/features/registry/tests/conftest.py
+  why: "Test fixtures - db_session, client, async patterns"
+
+# RAG Integration (INITIAL-9 dependency)
+- file: PRPs/PRP-9-rag-knowledge-base.md
+  why: "RAG layer the agent will consume via retrieve_context tool"
+```
+
+### Current Codebase Tree (Relevant Parts)
+
+```text
+app/
+├── core/
+│   ├── config.py          # Settings - ADD agent settings
+│   ├── database.py         # get_db dependency
+│   ├── logging.py          # get_logger
+│   └── exceptions.py       # ForecastLabError base
+├── features/
+│   ├── backtesting/        # Agent tool: run_backtest
+│   ├── registry/           # Agent tools: list_runs, compare_runs, create_alias
+│   ├── forecasting/        # Agent tool: list_models
+│   ├── rag/                # INITIAL-9 - Agent tool: retrieve_context
+│   └── agents/             # NEW: Create this vertical slice
+├── main.py                  # Include agents router + WebSocket
+```
+
+### Desired Codebase Tree (Files to Create)
+
+```text
+app/features/agents/
+├── __init__.py              # Export router
+├── models.py                # AgentSession ORM model
+├── schemas.py               # Request/Response Pydantic schemas
+├── routes.py                # REST endpoints
+├── websocket.py             # WebSocket endpoint handler
+├── service.py               # AgentService orchestration
+├── agents/
+│   ├── __init__.py
+│   ├── base.py              # Base agent configuration
+│   ├── experiment.py        # Experiment Orchestrator Agent
+│   └── rag_assistant.py     # RAG Assistant Agent
+├── tools/
+│   ├── __init__.py
+│   ├── registry_tools.py    # list_runs, compare_runs, create_alias
+│   ├── backtesting_tools.py # run_backtest
+│   ├── forecasting_tools.py # list_models
+│   └── rag_tools.py         # retrieve_context, format_citation
+├── deps.py                  # AgentDeps dataclass for dependency injection
+├── tests/
+│   ├── __init__.py
+│   ├── conftest.py          # Fixtures with mocked LLM
+│   ├── test_schemas.py
+│   ├── test_tools.py
+│   ├── test_agents.py
+│   ├── test_service.py
+│   └── test_routes.py
+
+alembic/versions/
+└── xxxx_create_agent_sessions_table.py
+
+examples/agents/
+├── experiment_demo.py
+├── rag_query.http
+└── websocket_client.py
+```
+
+### Known Gotchas & Library Quirks
+
+```python
+# CRITICAL: PydanticAI model identifier format
+# Use "anthropic:claude-sonnet-4-20250514" NOT "claude-sonnet-4-20250514"
+agent = Agent(model="anthropic:claude-sonnet-4-20250514")
+
+# CRITICAL: deps_type must match RunContext generic parameter
+agent = Agent(
+    model="anthropic:claude-sonnet-4-20250514",
+    deps_type=AgentDeps,  # Your dependency dataclass
+)
+
+@agent.tool
+def my_tool(ctx: RunContext[AgentDeps], param: str) -> str:
+    # ctx.deps is typed as AgentDeps
+    db = ctx.deps.db
+    ...
+
+# CRITICAL: Use @agent.tool for context access, @agent.tool_plain without
+@agent.tool_plain
+def roll_dice() -> str:
+    """No RunContext needed here."""
+    return str(random.randint(1, 6))
+
+# CRITICAL: output_type (not result_type) for structured outputs
+agent = Agent(
+    model="...",
+    output_type=ExperimentReport,  # NOT result_type
+)
+
+# CRITICAL: run() is async, run_sync() is sync wrapper
+result = await agent.run(prompt, deps=deps)  # Async
+result = agent.run_sync(prompt, deps=deps)   # Sync
+
+# CRITICAL: Streaming requires async context manager
+async with agent.run_stream(prompt, deps=deps) as result:
+    async for text in result.stream_text():
+        yield text
+
+# CRITICAL: Access token usage after run completes
+print(result.usage())  # RunUsage(input_tokens=X, output_tokens=Y)
+
+# CRITICAL: Message history for multi-turn
+result2 = await agent.run(
+    "follow-up question",
+    deps=deps,
+    message_history=result.messages,  # Previous messages
+)
+
+# CRITICAL: Tool docstrings become schema descriptions
+@agent.tool
+async def run_backtest(
+    ctx: RunContext[AgentDeps],
+    model_type: str,
+    config: dict[str, Any],
+) -> BacktestResult:
+    """Run a backtest for a model configuration.
+
+    Use this to evaluate model performance with time-series CV.
+    Returns per-fold and aggregated metrics (MAE, sMAPE, WAPE).
+
+    Args:
+        model_type: Type of model (naive, seasonal_naive, moving_average)
+        config: Model-specific configuration
+    """
+    ...
+
+# CRITICAL: FastAPI WebSocket pattern
+from fastapi import WebSocket, WebSocketDisconnect
+
+@router.websocket("/agents/stream")
+async def websocket_stream(websocket: WebSocket):
+    await websocket.accept()
+    try:
+        while True:
+            data = await websocket.receive_json()
+            # Process and stream response
+            async for chunk in stream_agent_response(data):
+                await websocket.send_json(chunk)
+    except WebSocketDisconnect:
+        pass
+
+# CRITICAL: PydanticAI retry mechanism
+from pydantic_ai import ModelRetry
+
+@agent.tool
+async def risky_tool(ctx: RunContext[AgentDeps]) -> str:
+    try:
+        return await external_api()
+    except APIError as e:
+        raise ModelRetry(f"API failed: {e}. Please try again.") from e
+```
+
+---
+
+## Implementation Blueprint
+
+### Data Models
+
+#### ORM Model (models.py)
+
+```python
+"""Agent session persistence."""
+from __future__ import annotations
+from datetime import datetime
+from enum import Enum
+from typing import Any
+from sqlalchemy import DateTime, Integer, String, Text
+from sqlalchemy.dialects.postgresql import JSONB
+from sqlalchemy.orm import Mapped, mapped_column
+from app.core.database import Base
+from app.shared.models import TimestampMixin
+
+
+class SessionStatus(str, Enum):
+    """Agent session states."""
+    ACTIVE = "active"
+    AWAITING_APPROVAL = "awaiting_approval"
+    COMPLETED = "completed"
+    EXPIRED = "expired"
+    FAILED = "failed"
+
+
+class AgentType(str, Enum):
+    """Available agent types."""
+    EXPERIMENT_ORCHESTRATOR = "experiment_orchestrator"
+    RAG_ASSISTANT = "rag_assistant"
+
+
+class AgentSession(TimestampMixin, Base):
+    """Persistent agent session for multi-turn conversations."""
+    __tablename__ = "agent_session"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    session_id: Mapped[str] = mapped_column(String(32), unique=True, index=True)
+    agent_type: Mapped[str] = mapped_column(String(50), index=True)
+    status: Mapped[str] = mapped_column(String(30), default=SessionStatus.ACTIVE.value)
+
+    # Message history for multi-turn
+    message_history: Mapped[list[dict[str, Any]]] = mapped_column(JSONB, default=list)
+
+    # Pending approval
+    pending_action: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True)
+
+    # Usage tracking
+    total_tokens_used: Mapped[int] = mapped_column(Integer, default=0)
+    tool_calls_count: Mapped[int] = mapped_column(Integer, default=0)
+
+    # Timing
+    last_activity: Mapped[datetime] = mapped_column(DateTime(timezone=True))
+    expires_at: Mapped[datetime] = mapped_column(DateTime(timezone=True))
+```
+
+#### Dependencies (deps.py)
+
+```python
+"""Agent dependencies for tool access."""
+from dataclasses import dataclass
+from sqlalchemy.ext.asyncio import AsyncSession
+
+
+@dataclass
+class AgentDeps:
+    """Dependencies passed to agent tools via RunContext."""
+    db: AsyncSession
+    session_id: str
+    request_id: str | None = None
+```
+
+#### Pydantic Schemas (schemas.py)
+
+```python
+"""Agent API schemas."""
+from datetime import datetime
+from typing import Any, Literal
+from pydantic import BaseModel, ConfigDict, Field
+
+
+# === Experiment Agent ===
+
+class ExperimentConstraints(BaseModel):
+    """Constraints for experiment search."""
+    model_config = ConfigDict(extra="forbid")
+
+    model_types: list[str] = Field(default_factory=lambda: ["naive", "seasonal_naive"])
+    min_train_size: int = Field(default=60, ge=30)
+    max_splits: int = Field(default=5, ge=1, le=20)
+
+
+class ExperimentRequest(BaseModel):
+    """Request to run experiment workflow."""
+    model_config = ConfigDict(extra="forbid")
+
+    objective: str = Field(..., min_length=10, max_length=500)
+    store_id: int = Field(..., ge=1)
+    product_id: int = Field(..., ge=1)
+    constraints: ExperimentConstraints = Field(default_factory=ExperimentConstraints)
+    auto_deploy: bool = False
+    session_id: str | None = None
+
+
+class RunSummary(BaseModel):
+    """Summary of a model run."""
+    run_id: str
+    model_type: str
+    config: dict[str, Any]
+    metrics: dict[str, float]
+
+
+class BaselineComparison(BaseModel):
+    """Comparison against baseline models."""
+    vs_naive: dict[str, float] | None = None
+    vs_seasonal_naive: dict[str, float] | None = None
+
+
+class ExperimentReport(BaseModel):
+    """Structured output from Experiment Agent."""
+    objective: str
+    methodology: str
+    experiments_run: int
+    best_run: RunSummary | None
+    baseline_comparison: BaselineComparison | None
+    recommendation: str
+    approval_required: bool
+    pending_action: str | None = None
+
+
+class ToolCallSummary(BaseModel):
+    """Summary of a tool call."""
+    tool: str
+    args: dict[str, Any]
+    result_summary: str
+    duration_ms: float
+
+
+class ExperimentResponse(BaseModel):
+    """Response from experiment workflow."""
+    session_id: str
+    status: Literal["completed", "awaiting_approval", "failed"]
+    report: ExperimentReport | None = None
+    tool_calls: list[ToolCallSummary] = Field(default_factory=list)
+    tokens_used: int = 0
+    duration_ms: float = 0
+
+
+# === Approval ===
+
+class ApprovalRequest(BaseModel):
+    """Request to approve/reject pending action."""
+    model_config = ConfigDict(extra="forbid")
+
+    session_id: str
+    action: str
+    approved: bool
+    comment: str | None = Field(None, max_length=500)
+
+
+class ApprovalResponse(BaseModel):
+    """Response from approval action."""
+    session_id: str
+    action: str
+    status: Literal["executed", "rejected"]
+    result: dict[str, Any] | None = None
+
+
+# === RAG Agent ===
+
+class RAGQueryRequest(BaseModel):
+    """Request for RAG-powered Q&A."""
+    model_config = ConfigDict(extra="forbid")
+
+    query: str = Field(..., min_length=5, max_length=2000)
+    session_id: str | None = None
+    include_sources: bool = True
+
+
+class Citation(BaseModel):
+    """Citation from RAG retrieval."""
+    source_type: str
+    source_path: str
+    chunk_id: str
+    snippet: str
+    relevance_score: float
+
+
+class RAGQueryResponse(BaseModel):
+    """Response from RAG query."""
+    session_id: str
+    answer: str
+    confidence: float = Field(..., ge=0.0, le=1.0)
+    citations: list[Citation] = Field(default_factory=list)
+    insufficient_context: bool = False
+    tokens_used: int = 0
+    duration_ms: float = 0
+
+
+# === Session Status ===
+
+class SessionStatusResponse(BaseModel):
+    """Session status details."""
+    session_id: str
+    agent_type: str
+    status: str
+    created_at: datetime
+    last_activity: datetime
+    pending_action: dict[str, Any] | None = None
+    tool_calls_count: int
+    tokens_used: int
+
+
+# === WebSocket Messages ===
+
+class WSMessage(BaseModel):
+    """WebSocket message from client."""
+    type: Literal["query", "approve", "cancel"]
+    agent: Literal["rag_assistant", "experiment_orchestrator"]
+    payload: dict[str, Any]
+
+
+class WSEvent(BaseModel):
+    """WebSocket event to client."""
+    type: Literal["token", "tool_call", "complete", "error"]
+    content: str | None = None
+    tool: str | None = None
+    status: str | None = None
+    summary: str | None = None
+    session_id: str | None = None
+    tokens_used: int | None = None
+```
+
+---
+
+## Task List
+
+### Task 1: Add Dependencies to pyproject.toml
+
+```yaml
+MODIFY: pyproject.toml
+ADD to dependencies:
+  - "pydantic-ai>=0.1.0"    # PydanticAI agent framework
+  - "anthropic>=0.40.0"      # Anthropic SDK for Claude
+  - "websockets>=13.0"       # WebSocket support (already in uvicorn[standard])
+```
+
+### Task 2: Add Agent Settings to config.py
+
+```yaml
+MODIFY: app/core/config.py
+ADD after RAG settings:
+
+  # Agent LLM Configuration
+  agent_default_model: str = "anthropic:claude-sonnet-4-20250514"
+  agent_fallback_model: str = "openai:gpt-4o"
+  agent_temperature: float = 0.1
+  agent_max_tokens: int = 4096
+  anthropic_api_key: str = ""  # Required
+
+  # Agent Execution Configuration
+  agent_max_tool_calls: int = 10
+  agent_timeout_seconds: int = 120
+  agent_retry_attempts: int = 3
+  agent_retry_delay_seconds: float = 1.0
+
+  # Human-in-the-Loop Configuration
+  agent_require_approval: list[str] = ["create_alias", "archive_run"]
+  agent_approval_timeout_minutes: int = 60
+
+  # Session Configuration
+  agent_session_ttl_minutes: int = 120
+  agent_max_sessions_per_user: int = 5
+
+  # Streaming Configuration
+  agent_enable_streaming: bool = True
+```
+
+### Task 3: Create Alembic Migration
+
+```yaml
+CREATE: alembic/versions/xxxx_create_agent_sessions_table.py
+PATTERN: Follow existing migration patterns
+
+Key columns:
+  - session_id (String 32, unique, indexed)
+  - agent_type (String 50, indexed)
+  - status (String 30)
+  - message_history (JSONB)
+  - pending_action (JSONB, nullable)
+  - total_tokens_used (Integer)
+  - tool_calls_count (Integer)
+  - last_activity (DateTime TZ)
+  - expires_at (DateTime TZ)
+  - created_at, updated_at (TimestampMixin)
+```
+
+### Task 4: Create ORM Models
+
+```yaml
+CREATE: app/features/agents/models.py
+MIRROR: app/features/registry/models.py pattern
+INCLUDE:
+  - SessionStatus enum
+  - AgentType enum
+  - AgentSession model with JSONB columns
+```
+
+### Task 5: Create Dependencies Dataclass
+
+```yaml
+CREATE: app/features/agents/deps.py
+CONTENT:
+  - AgentDeps dataclass
+  - Fields: db (AsyncSession), session_id, request_id
+```
+
+### Task 6: Create Pydantic Schemas
+
+```yaml
+CREATE: app/features/agents/schemas.py
+MIRROR: app/features/registry/schemas.py pattern
+INCLUDE:
+  - ExperimentRequest, ExperimentResponse, ExperimentReport
+  - ApprovalRequest, ApprovalResponse
+  - RAGQueryRequest, RAGQueryResponse, Citation
+  - SessionStatusResponse
+  - WSMessage, WSEvent
+```
+
+### Task 7: Create Tool Modules
+
+```yaml
+CREATE: app/features/agents/tools/registry_tools.py
+TOOLS:
+  - list_runs(ctx, filters) -> list[RunSummary]
+  - compare_runs(ctx, run_id_a, run_id_b) -> CompareResult
+  - create_alias(ctx, alias_name, run_id) -> AliasResult
+  - archive_run(ctx, run_id) -> ArchiveResult
+
+CREATE: app/features/agents/tools/backtesting_tools.py
+TOOLS:
+  - run_backtest(ctx, model_type, config, store_id, product_id, n_splits) -> BacktestResult
+
+CREATE: app/features/agents/tools/forecasting_tools.py
+TOOLS:
+  - list_models(ctx) -> list[ModelInfo]
+
+CREATE: app/features/agents/tools/rag_tools.py
+TOOLS:
+  - retrieve_context(ctx, query, top_k) -> list[RetrievedChunk]
+  - format_citation(ctx, chunk) -> Citation
+
+CRITICAL for all tools:
+  - Use @agent.tool decorator (not @agent.tool_plain) for db access
+  - First param is RunContext[AgentDeps]
+  - Detailed docstrings for LLM schema
+  - Structured logging with timing
+```
+
+### Task 8: Create Agent Definitions
+
+```yaml
+CREATE: app/features/agents/agents/base.py
+CONTENT:
+  - get_agent_settings() helper
+  - Common model configuration
+
+CREATE: app/features/agents/agents/experiment.py
+CONTENT:
+  - ExperimentReport output schema
+  - experiment_agent = Agent(...)
+  - System prompt for experiment orchestration
+  - Tools: list_models, run_backtest, compare_runs, create_alias
+
+CREATE: app/features/agents/agents/rag_assistant.py
+CONTENT:
+  - RAGResponse output schema
+  - rag_agent = Agent(...)
+  - System prompt for evidence-grounded answers
+  - Tools: retrieve_context, format_citation
+```
+
+### Task 9: Create Agent Service
+
+```yaml
+CREATE: app/features/agents/service.py
+MIRROR: app/features/jobs/service.py pattern
+
+Class AgentService:
+  async def run_experiment(self, db, request) -> ExperimentResponse:
+    - Create/resume session
+    - Build AgentDeps
+    - Run experiment_agent with tools
+    - Capture tool calls and timing
+    - Handle approval_required check
+    - Update session state
+    - Return structured response
+
+  async def run_rag_query(self, db, request) -> RAGQueryResponse:
+    - Create/resume session
+    - Run rag_agent with tools
+    - Extract citations from tool results
+    - Return structured response
+
+  async def approve_action(self, db, request) -> ApprovalResponse:
+    - Load session
+    - Validate pending_action matches
+    - Execute action if approved
+    - Update session status
+    - Return result
+
+  async def get_session_status(self, db, session_id) -> SessionStatusResponse:
+    - Load session
+    - Return status details
+
+  async def stream_response(self, db, message) -> AsyncGenerator[WSEvent]:
+    - Route to appropriate agent
+    - Use run_stream for token-by-token delivery
+    - Yield WSEvent for each chunk
+```
+
+### Task 10: Create REST Routes
+
+```yaml
+CREATE: app/features/agents/routes.py
+MIRROR: app/features/registry/routes.py pattern
+
+Routes:
+  POST /agents/experiment/run -> ExperimentResponse
+  POST /agents/experiment/approve -> ApprovalResponse
+  POST /agents/rag/query -> RAGQueryResponse
+  GET /agents/status/{session_id} -> SessionStatusResponse
+
+CRITICAL:
+  - Structured logging with agents.* prefix
+  - Handle LLM API errors gracefully
+  - Timeout handling
+```
+
+### Task 11: Create WebSocket Handler
+
+```yaml
+CREATE: app/features/agents/websocket.py
+PATTERN: FastAPI WebSocket with async iteration
+
+Key functions:
+  websocket_stream(websocket: WebSocket):
+    - Accept connection
+    - Receive JSON messages
+    - Parse WSMessage
+    - Call service.stream_response()
+    - Send WSEvent for each chunk
+    - Handle disconnect gracefully
+
+CRITICAL:
+  - Use asyncio.wait_for for timeout
+  - Catch WebSocketDisconnect
+  - Log all events with correlation ID
+```
+
+### Task 12: Register Router in main.py
+
+```yaml
+MODIFY: app/main.py
+ADD import: from app.features.agents.routes import router as agents_router
+ADD import: from app.features.agents.websocket import websocket_stream
+ADD router: app.include_router(agents_router)
+ADD websocket: app.add_api_websocket_route("/agents/stream", websocket_stream)
+```
+
+### Task 13: Create Test Fixtures
+
+```yaml
+CREATE: app/features/agents/tests/conftest.py
+FIXTURES:
+  - db_session: Async session with cleanup
+  - client: AsyncClient with db override
+  - mock_anthropic: Mock Anthropic API responses
+  - sample_experiment_request: Test request
+  - sample_rag_request: Test request
+```
+
+### Task 14: Create Unit Tests
+
+```yaml
+CREATE: app/features/agents/tests/test_schemas.py
+  - Test all request/response validation
+
+CREATE: app/features/agents/tests/test_tools.py
+  - Test each tool function with mocked deps
+  - Test tool return types
+  - Test error handling
+
+CREATE: app/features/agents/tests/test_agents.py
+  - Test agent with mocked LLM
+  - Test structured output parsing
+  - Test tool call ordering
+```
+
+### Task 15: Create Integration Tests
+
+```yaml
+CREATE: app/features/agents/tests/test_routes.py
+@pytest.mark.integration:
+  - test_experiment_run_creates_session
+  - test_experiment_approval_workflow
+  - test_rag_query_returns_citations
+  - test_session_status_returns_details
+  - test_websocket_streaming (with TestClient)
+```
+
+### Task 16: Create Examples
+
+```yaml
+CREATE: examples/agents/experiment_demo.py
+  - Full experiment workflow demo
+
+CREATE: examples/agents/rag_query.http
+  - HTTP client examples
+
+CREATE: examples/agents/websocket_client.py
+  - Python WebSocket client example
+```
+
+### Task 17: Update .env.example
+
+```yaml
+MODIFY: .env.example
+ADD:
+  # Agent Configuration
+  ANTHROPIC_API_KEY=sk-ant-...
+  AGENT_DEFAULT_MODEL=anthropic:claude-sonnet-4-20250514
+  AGENT_MAX_TOOL_CALLS=10
+  AGENT_TIMEOUT_SECONDS=120
+```
+
+---
+
+## Validation Loop
+
+### Level 1: Syntax & Style
+
+```bash
+# Run FIRST
+uv run ruff check app/features/agents/ --fix
+uv run ruff format app/features/agents/
+
+# Expected: No errors
+```
+
+### Level 2: Type Checking
+
+```bash
+# MUST be green
+uv run mypy app/features/agents/
+uv run pyright app/features/agents/
+
+# Expected: 0 errors
+```
+
+### Level 3: Unit Tests
+
+```bash
+# No LLM calls required (mocked)
+uv run pytest app/features/agents/tests/ -v -m "not integration"
+
+# Expected: All pass
+```
+
+### Level 4: Integration Tests
+
+```bash
+# Requires PostgreSQL + API keys
+docker-compose up -d
+uv run alembic upgrade head
+uv run pytest app/features/agents/tests/ -v -m integration
+
+# Expected: All pass (rate-limited)
+```
+
+### Level 5: Manual Smoke Test
+
+```bash
+# Start API
+uv run uvicorn app.main:app --reload --port 8123
+
+# RAG Query
+curl -X POST http://localhost:8123/agents/rag/query \
+  -H "Content-Type: application/json" \
+  -d '{"query": "How does backtesting prevent data leakage?"}'
+
+# Expected: {"session_id": "...", "answer": "...", "citations": [...]}
+
+# Experiment (requires indexed RAG data)
+curl -X POST http://localhost:8123/agents/experiment/run \
+  -H "Content-Type: application/json" \
+  -d '{
+    "objective": "Find best model for store 1, product 1",
+    "store_id": 1,
+    "product_id": 1
+  }'
+
+# Expected: {"session_id": "...", "status": "completed", "report": {...}}
+
+# WebSocket test
+python examples/agents/websocket_client.py
+```
+
+---
+
+## Final Validation Checklist
+
+- [ ] All tests pass: `uv run pytest app/features/agents/tests/ -v`
+- [ ] No linting errors: `uv run ruff check app/features/agents/`
+- [ ] No type errors: `uv run mypy && pyright`
+- [ ] Migration applies: `uv run alembic upgrade head`
+- [ ] Manual smoke tests pass
+- [ ] Structured logging with `agents.*` prefix
+- [ ] Tool calls logged with timing
+- [ ] Session state persists across requests
+- [ ] Approval workflow blocks sensitive actions
+- [ ] WebSocket streaming works
+
+---
+
+## Anti-Patterns to Avoid
+
+- ❌ Don't use `result_type` - use `output_type` in PydanticAI
+- ❌ Don't forget `deps_type` when using `RunContext[AgentDeps]`
+- ❌ Don't use `@agent.tool_plain` when db access needed
+- ❌ Don't forget to handle `WebSocketDisconnect`
+- ❌ Don't block on LLM calls without timeout
+- ❌ Don't store raw message_history as strings - use JSONB
+- ❌ Don't skip structured logging for tool calls
+- ❌ Don't hardcode model names - use settings
+
+---
+
+## Confidence Score: 7.5/10
+
+**Strengths:**
+- PydanticAI has excellent documentation
+- Clear FastAPI integration patterns
+- Existing service patterns to follow
+- Tool integrations with existing modules
+
+**Risks:**
+- PydanticAI is relatively new (versioning may change)
+- WebSocket streaming with tools is complex
+- LLM rate limits may affect tests
+- Message history serialization edge cases
+
+**Mitigations:**
+- Pin PydanticAI version in pyproject.toml
+- Comprehensive mocking for unit tests
+- Rate-limited integration tests
+- JSONB for flexible message storage
diff --git a/PRPs/PRP-11-forecastlab-dashboard.md b/PRPs/PRP-11-forecastlab-dashboard.md
new file mode 100644
index 00000000..fd98f21f
--- /dev/null
+++ b/PRPs/PRP-11-forecastlab-dashboard.md
@@ -0,0 +1,2147 @@
+# PRP-11: ForecastLab Dashboard ("The Face")
+
+**Feature**: INITIAL-11.md — ForecastLab Dashboard
+**Status**: Ready for Implementation
+**Confidence Score**: 7.5/10
+
+---
+
+## Goal
+
+Build the ForecastLab Dashboard providing:
+1. **Data Explorer** with server-side pagination, sorting, and filtering using TanStack Table
+2. **Time Series Visualization** for forecasts and backtest results using Recharts
+3. **Agent Chat Interface** with WebSocket streaming (depends on INITIAL-10 completion)
+4. **Admin Panel** for RAG sources and deployment alias management
+
+This is the "Face" layer that consumes the backend API (Phases 1-10) and provides a user-friendly interface.
+
+---
+
+## Why
+
+- **User Experience**: No CLI required for data exploration and visualization
+- **Agent Interaction**: Chat interface for RAG queries and experiment orchestration
+- **Portfolio Value**: Demonstrates full-stack React 19 + FastAPI integration
+- **Operational**: Admin panel for system management without API calls
+
+---
+
+## What
+
+### Page Structure
+
+| Route | Purpose | Backend Endpoints |
+|-------|---------|-------------------|
+| `/dashboard` | KPI summary cards | `GET /analytics/kpis` |
+| `/explorer/sales` | Sales data table | `GET /analytics/drilldowns` |
+| `/explorer/stores` | Store dimension table | `GET /dimensions/stores` |
+| `/explorer/products` | Product dimension table | `GET /dimensions/products` |
+| `/explorer/runs` | Model run leaderboard | `GET /registry/runs` |
+| `/explorer/jobs` | Job monitor | `GET /jobs` |
+| `/visualize/forecast` | Forecast chart | (via job results) |
+| `/visualize/backtest` | Backtest fold visualization | (via job results) |
+| `/chat` | Agent chat interface | `WS /agents/stream` |
+| `/admin` | RAG sources + aliases | `GET /rag/sources`, `GET /registry/aliases` |
+
+### Success Criteria
+
+- [ ] Vite + React 19 project scaffolded with TypeScript strict mode
+- [ ] shadcn/ui components installed and configured (Table, Card, Button, Dialog, etc.)
+- [ ] TanStack Table with server-side pagination/sorting/filtering
+- [ ] TanStack Query for all API calls with proper caching
+- [ ] Recharts time series chart with actual/predicted lines
+- [ ] WebSocket hook for agent chat streaming
+- [ ] Dark/light theme toggle via shadcn/ui
+- [ ] Responsive design (mobile-friendly)
+- [ ] Error boundaries with retry functionality
+- [ ] Lighthouse performance score > 90
+- [ ] All TypeScript strict checks pass
+
+---
+
+## All Needed Context
+
+### Documentation & References
+
+```yaml
+# React 19 + Vite Setup
+- url: https://vite.dev/guide/
+  why: "Vite project scaffolding, environment variables (import.meta.env)"
+  section: "Getting Started, Env Variables"
+
+- url: https://react.dev/
+  why: "React 19 hooks (use(), useState, useEffect), Suspense, ErrorBoundary"
+
+# shadcn/ui (CRITICAL - Primary Component Library)
+- url: https://ui.shadcn.com/docs/installation/vite
+  why: "Vite + React installation steps, tailwind.config.js setup"
+  critical: "Must use 'npx shadcn@latest init' NOT 'shadcn-ui'"
+
+- url: https://ui.shadcn.com/docs/components/data-table
+  why: "TanStack Table integration pattern - the core pattern for all data tables"
+  critical: "Uses @tanstack/react-table, manualPagination=true for server-side"
+
+- url: https://ui.shadcn.com/docs/components/table
+  why: "Base Table component used by Data Table"
+
+- url: https://ui.shadcn.com/docs/dark-mode/vite
+  why: "Dark mode setup with ThemeProvider"
+
+# TanStack Table (Server-Side Pattern)
+- url: https://tanstack.com/table/latest/docs/guide/pagination
+  why: "Server-side pagination with manualPagination=true"
+  critical: "pageCount must be passed, onPaginationChange callback"
+
+- url: https://tanstack.com/table/latest/docs/guide/sorting
+  why: "Server-side sorting with manualSorting=true"
+
+- url: https://tanstack.com/table/latest/docs/guide/column-filtering
+  why: "Server-side filtering with manualFiltering=true"
+
+# TanStack Query (Data Fetching)
+- url: https://tanstack.com/query/latest/docs/framework/react/guides/queries
+  why: "useQuery pattern with queryKey and queryFn"
+
+- url: https://tanstack.com/query/latest/docs/framework/react/guides/paginated-queries
+  why: "keepPreviousData for smooth pagination transitions"
+
+- url: https://tanstack.com/query/latest/docs/framework/react/guides/mutations
+  why: "useMutation for POST/DELETE/PATCH operations"
+
+# Recharts
+- url: https://recharts.org/en-US/api/LineChart
+  why: "Time series visualization with LineChart, XAxis, YAxis"
+
+- url: https://recharts.org/en-US/api/Tooltip
+  why: "Interactive tooltips"
+
+- url: https://recharts.org/en-US/examples/SimpleLineChart
+  why: "Basic example to follow"
+
+# Tailwind CSS 4
+- url: https://tailwindcss.com/docs/installation/using-vite
+  why: "Tailwind 4 setup with Vite"
+
+# WebSocket (for Agent Chat)
+- url: https://developer.mozilla.org/en-US/docs/Web/API/WebSocket
+  why: "Native WebSocket API - useWebSocket custom hook pattern"
+```
+
+### Backend API Contract Summary
+
+```typescript
+// ALL LIST ENDPOINTS USE THIS PAGINATION PATTERN:
+// Query params: page (1-indexed), page_size (max 100)
+// Response: { items[], total, page, page_size }
+
+// Dimensions
+GET /dimensions/stores?page=1&page_size=20&region=&store_type=&search=
+// Response: StoreListResponse { stores[], total, page, page_size }
+
+GET /dimensions/products?page=1&page_size=20&category=&brand=&search=
+// Response: ProductListResponse { products[], total, page, page_size }
+
+// Analytics
+GET /analytics/kpis?start_date=&end_date=&store_id=&product_id=&category=
+// Response: KPIResponse { metrics: KPIMetrics, start_date, end_date, ... }
+
+GET /analytics/drilldowns?dimension=store&start_date=&end_date=&max_items=20
+// Response: DrilldownResponse { dimension, items[], total_items, ... }
+
+// Registry
+GET /registry/runs?page=1&page_size=20&model_type=&status=&store_id=&product_id=
+// Response: RunListResponse { runs[], total, page, page_size }
+
+GET /registry/compare/{run_id_a}/{run_id_b}
+// Response: RunCompareResponse { run_a, run_b, config_diff, metrics_diff }
+
+POST /registry/aliases
+// Body: { alias_name, run_id, description }
+// Response: AliasResponse
+
+GET /registry/aliases
+// Response: AliasResponse[]
+
+// Jobs
+GET /jobs?page=1&page_size=20&job_type=&status=
+// Response: JobListResponse { jobs[], total, page, page_size }
+
+POST /jobs
+// Body: { job_type: 'train'|'predict'|'backtest', params: {...} }
+// Response: JobResponse (202 ACCEPTED)
+
+DELETE /jobs/{job_id}
+// Response: 204 NO CONTENT (only for pending jobs)
+
+// Error Responses (RFC 7807)
+// All errors return: { type, title, status, detail, instance, errors?, code, request_id }
+```
+
+### Current Codebase Tree
+
+```
+.
+├── alembic/
+├── app/
+│   ├── core/
+│   ├── features/
+│   │   ├── analytics/     # GET /analytics/kpis, /drilldowns
+│   │   ├── backtesting/   # POST /backtesting/run
+│   │   ├── dimensions/    # GET /dimensions/stores, /products
+│   │   ├── forecasting/   # POST /forecasting/train, /predict
+│   │   ├── jobs/          # POST/GET/DELETE /jobs
+│   │   └── registry/      # CRUD /registry/runs, /aliases
+│   └── main.py
+├── docs/
+├── examples/
+├── PRPs/
+├── docker-compose.yml
+├── pyproject.toml
+└── README.md
+```
+
+### Desired Codebase Tree (Files to Create)
+
+```
+frontend/                        # NEW: React 19 + Vite project
+├── public/
+│   └── favicon.ico
+├── src/
+│   ├── components/
+│   │   ├── ui/                  # shadcn/ui components (auto-generated)
+│   │   │   ├── button.tsx
+│   │   │   ├── card.tsx
+│   │   │   ├── dialog.tsx
+│   │   │   ├── dropdown-menu.tsx
+│   │   │   ├── input.tsx
+│   │   │   ├── label.tsx
+│   │   │   ├── select.tsx
+│   │   │   ├── skeleton.tsx
+│   │   │   ├── table.tsx
+│   │   │   └── toast.tsx
+│   │   ├── data-table/          # Reusable data table components
+│   │   │   ├── data-table.tsx   # Main DataTable component
+│   │   │   ├── data-table-pagination.tsx
+│   │   │   ├── data-table-column-header.tsx
+│   │   │   └── data-table-toolbar.tsx
+│   │   ├── charts/
+│   │   │   ├── time-series-chart.tsx
+│   │   │   ├── kpi-card.tsx
+│   │   │   └── metric-bar-chart.tsx
+│   │   ├── chat/                # Agent chat (Phase 2 - after INITIAL-10)
+│   │   │   ├── chat-message.tsx
+│   │   │   ├── chat-input.tsx
+│   │   │   └── citation-list.tsx
+│   │   ├── layout/
+│   │   │   ├── app-layout.tsx   # Main layout with sidebar
+│   │   │   ├── sidebar.tsx
+│   │   │   ├── header.tsx
+│   │   │   └── theme-toggle.tsx
+│   │   └── error-boundary.tsx
+│   ├── hooks/
+│   │   ├── use-stores.ts        # TanStack Query hooks for /dimensions/stores
+│   │   ├── use-products.ts      # TanStack Query hooks for /dimensions/products
+│   │   ├── use-kpis.ts          # TanStack Query hook for /analytics/kpis
+│   │   ├── use-drilldowns.ts    # TanStack Query hook for /analytics/drilldowns
+│   │   ├── use-runs.ts          # TanStack Query hooks for /registry/runs
+│   │   ├── use-aliases.ts       # TanStack Query hooks for /registry/aliases
+│   │   ├── use-jobs.ts          # TanStack Query hooks for /jobs
+│   │   └── use-websocket.ts     # WebSocket hook for agent streaming
+│   ├── lib/
+│   │   ├── api.ts               # Axios/fetch client with base URL
+│   │   ├── query-client.ts      # TanStack Query client config
+│   │   └── utils.ts             # cn() for class merging (shadcn pattern)
+│   ├── pages/
+│   │   ├── dashboard.tsx
+│   │   ├── explorer/
+│   │   │   ├── sales.tsx
+│   │   │   ├── stores.tsx
+│   │   │   ├── products.tsx
+│   │   │   ├── runs.tsx
+│   │   │   └── jobs.tsx
+│   │   ├── visualize/
+│   │   │   ├── forecast.tsx
+│   │   │   └── backtest.tsx
+│   │   ├── chat.tsx             # Phase 2 - after INITIAL-10
+│   │   └── admin.tsx
+│   ├── types/
+│   │   ├── api.ts               # TypeScript types matching backend schemas
+│   │   └── index.ts
+│   ├── App.tsx                  # Main app with router
+│   ├── main.tsx                 # Entry point
+│   └── index.css                # Tailwind imports
+├── .env.example                 # VITE_API_BASE_URL, VITE_WS_URL
+├── .gitignore
+├── components.json              # shadcn/ui config
+├── eslint.config.js
+├── index.html
+├── package.json
+├── postcss.config.js
+├── tailwind.config.ts
+├── tsconfig.json
+├── tsconfig.node.json
+└── vite.config.ts
+
+examples/ui/
+└── README.md                    # Dashboard page map and setup instructions
+```
+
+### Known Gotchas & Library Quirks
+
+```typescript
+// CRITICAL: shadcn/ui installation command
+// Use: npx shadcn@latest init
+// NOT: npx shadcn-ui init (deprecated)
+
+// CRITICAL: TanStack Table v8 breaking changes
+// - useReactTable (NOT useTable)
+// - getCoreRowModel() required
+// - manualPagination, manualSorting, manualFiltering for server-side
+
+// CRITICAL: Vite environment variables
+// - Must prefix with VITE_ (e.g., VITE_API_BASE_URL)
+// - Access via import.meta.env.VITE_API_BASE_URL
+// - NOT process.env (that's Node.js)
+
+// CRITICAL: TanStack Query v5
+// - queryKey is now an array: ['runs', params]
+// - useQuery returns object with { data, isLoading, error }
+// - placeholderData replaces keepPreviousData option
+
+// CRITICAL: Recharts responsive container
+// - ResponsiveContainer requires explicit parent height
+// - Use CSS: min-height: 400px on parent
+
+// CRITICAL: WebSocket reconnection
+// - Browser WebSocket API has no auto-reconnect
+// - Must implement exponential backoff manually
+
+// CRITICAL: shadcn/ui dark mode
+// - Requires ThemeProvider wrapper
+// - Uses localStorage for persistence
+// - HTML class="dark" toggling
+
+// CRITICAL: Decimal handling from backend
+// - Backend sends Decimal as string (e.g., "1234.56")
+// - Parse with parseFloat() or use library like decimal.js
+// - Format with Intl.NumberFormat for currency display
+```
+
+---
+
+## Implementation Blueprint
+
+### Phase 1: Project Scaffolding (Tasks 1-5)
+
+#### Task 1: Initialize Vite + React 19 + TypeScript Project
+
+```bash
+# Commands to run (in project root)
+cd /path/to/ForecastLabAI
+pnpm create vite@latest frontend -- --template react-ts
+cd frontend
+pnpm install
+```
+
+Configure TypeScript strict mode in `tsconfig.json`:
+```json
+{
+  "compilerOptions": {
+    "strict": true,
+    "noUncheckedIndexedAccess": true,
+    "noImplicitReturns": true,
+    "strictNullChecks": true
+  }
+}
+```
+
+#### Task 2: Install Tailwind CSS 4
+
+```bash
+pnpm add -D tailwindcss @tailwindcss/vite
+```
+
+Update `vite.config.ts`:
+```typescript
+import tailwindcss from '@tailwindcss/vite'
+
+export default defineConfig({
+  plugins: [react(), tailwindcss()],
+})
+```
+
+Create `src/index.css`:
+```css
+@import "tailwindcss";
+```
+
+#### Task 3: Initialize shadcn/ui
+
+```bash
+npx shadcn@latest init
+# Choose:
+# - Style: Default
+# - Base color: Neutral
+# - CSS variables: Yes
+```
+
+Install required components:
+```bash
+npx shadcn@latest add button card dialog dropdown-menu input label select skeleton table toast
+```
+
+#### Task 4: Install TanStack Libraries
+
+```bash
+pnpm add @tanstack/react-table @tanstack/react-query
+```
+
+Create `src/lib/query-client.ts`:
+```typescript
+import { QueryClient } from '@tanstack/react-query'
+
+export const queryClient = new QueryClient({
+  defaultOptions: {
+    queries: {
+      staleTime: 5 * 60 * 1000, // 5 minutes
+      retry: 1,
+      refetchOnWindowFocus: false,
+    },
+  },
+})
+```
+
+#### Task 5: Install Recharts and React Router
+
+```bash
+pnpm add recharts react-router-dom
+```
+
+---
+
+### Phase 2: Core Infrastructure (Tasks 6-10)
+
+#### Task 6: Create API Client
+
+File: `src/lib/api.ts`
+
+```typescript
+const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://localhost:8123'
+
+interface RequestConfig {
+  method?: 'GET' | 'POST' | 'PATCH' | 'DELETE'
+  body?: unknown
+  params?: Record<string, string | number | undefined>
+}
+
+interface ProblemDetail {
+  type: string
+  title: string
+  status: number
+  detail: string
+  instance?: string
+  errors?: Array<{ field: string; message: string; type: string }>
+  code?: string
+  request_id?: string
+}
+
+export class ApiError extends Error {
+  constructor(
+    message: string,
+    public status: number,
+    public detail?: ProblemDetail
+  ) {
+    super(message)
+    this.name = 'ApiError'
+  }
+}
+
+export async function api<T>(endpoint: string, config: RequestConfig = {}): Promise<T> {
+  const { method = 'GET', body, params } = config
+
+  const url = new URL(`${API_BASE_URL}${endpoint}`)
+  if (params) {
+    Object.entries(params).forEach(([key, value]) => {
+      if (value !== undefined) {
+        url.searchParams.set(key, String(value))
+      }
+    })
+  }
+
+  const response = await fetch(url.toString(), {
+    method,
+    headers: {
+      'Content-Type': 'application/json',
+    },
+    body: body ? JSON.stringify(body) : undefined,
+  })
+
+  if (!response.ok) {
+    const detail = await response.json() as ProblemDetail
+    throw new ApiError(detail.detail || response.statusText, response.status, detail)
+  }
+
+  return response.json() as Promise<T>
+}
+```
+
+#### Task 7: Create TypeScript Types (Match Backend Schemas)
+
+File: `src/types/api.ts`
+
+```typescript
+// Pagination
+export interface PaginationParams {
+  page: number
+  page_size: number
+}
+
+export interface PaginatedResponse<T> {
+  total: number
+  page: number
+  page_size: number
+}
+
+// Dimensions
+export interface Store {
+  id: number
+  code: string
+  name: string
+  region: string | null
+  city: string | null
+  store_type: string | null
+  created_at: string
+  updated_at: string
+}
+
+export interface StoreListResponse extends PaginatedResponse<Store> {
+  stores: Store[]
+}
+
+export interface Product {
+  id: number
+  sku: string
+  name: string
+  category: string | null
+  brand: string | null
+  base_price: string | null  // Decimal as string
+  base_cost: string | null   // Decimal as string
+  created_at: string
+  updated_at: string
+}
+
+export interface ProductListResponse extends PaginatedResponse<Product> {
+  products: Product[]
+}
+
+// Analytics
+export interface KPIMetrics {
+  total_revenue: string    // Decimal as string
+  total_units: number
+  total_transactions: number
+  avg_unit_price: string | null
+  avg_basket_value: string | null
+}
+
+export interface KPIResponse {
+  metrics: KPIMetrics
+  start_date: string
+  end_date: string
+  store_id: number | null
+  product_id: number | null
+  category: string | null
+}
+
+export interface DrilldownItem {
+  dimension_value: string
+  dimension_id: number | null
+  metrics: KPIMetrics
+  rank: number
+  revenue_share_pct: string  // Decimal as string
+}
+
+export type DrilldownDimension = 'store' | 'product' | 'category' | 'region' | 'date'
+
+export interface DrilldownResponse {
+  dimension: DrilldownDimension
+  items: DrilldownItem[]
+  total_items: number
+  start_date: string
+  end_date: string
+  store_id: number | null
+  product_id: number | null
+}
+
+// Registry
+export type RunStatus = 'pending' | 'running' | 'success' | 'failed' | 'archived'
+
+export interface ModelRun {
+  run_id: string
+  status: RunStatus
+  model_type: string
+  model_config: Record<string, unknown>
+  feature_config: Record<string, unknown> | null
+  config_hash: string
+  data_window_start: string
+  data_window_end: string
+  store_id: number
+  product_id: number
+  metrics: Record<string, number> | null
+  artifact_uri: string | null
+  artifact_hash: string | null
+  artifact_size_bytes: number | null
+  runtime_info: Record<string, unknown> | null
+  agent_context: Record<string, unknown> | null
+  git_sha: string | null
+  error_message: string | null
+  started_at: string | null
+  completed_at: string | null
+  created_at: string
+  updated_at: string
+}
+
+export interface RunListResponse extends PaginatedResponse<ModelRun> {
+  runs: ModelRun[]
+}
+
+export interface Alias {
+  alias_name: string
+  run_id: string
+  run_status: RunStatus
+  model_type: string
+  description: string | null
+  created_at: string
+  updated_at: string
+}
+
+export interface RunCompareResponse {
+  run_a: ModelRun
+  run_b: ModelRun
+  config_diff: Record<string, unknown>
+  metrics_diff: Record<string, { a: number | null; b: number | null; diff: number | null }>
+}
+
+// Jobs
+export type JobType = 'train' | 'predict' | 'backtest'
+export type JobStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled'
+
+export interface Job {
+  job_id: string
+  job_type: JobType
+  status: JobStatus
+  params: Record<string, unknown>
+  result: Record<string, unknown> | null
+  error_message: string | null
+  error_type: string | null
+  run_id: string | null
+  started_at: string | null
+  completed_at: string | null
+  created_at: string
+  updated_at: string
+}
+
+export interface JobListResponse extends PaginatedResponse<Job> {
+  jobs: Job[]
+}
+
+export interface JobCreate {
+  job_type: JobType
+  params: Record<string, unknown>
+}
+```
+
+#### Task 8: Create TanStack Query Hooks
+
+File: `src/hooks/use-stores.ts`
+
+```typescript
+import { useQuery } from '@tanstack/react-query'
+import { api } from '@/lib/api'
+import type { StoreListResponse } from '@/types/api'
+
+interface UseStoresParams {
+  page: number
+  pageSize: number
+  region?: string
+  storeType?: string
+  search?: string
+}
+
+export function useStores({ page, pageSize, region, storeType, search }: UseStoresParams) {
+  return useQuery({
+    queryKey: ['stores', { page, pageSize, region, storeType, search }],
+    queryFn: () => api<StoreListResponse>('/dimensions/stores', {
+      params: {
+        page,
+        page_size: pageSize,
+        region,
+        store_type: storeType,
+        search,
+      },
+    }),
+    placeholderData: (previousData) => previousData,
+  })
+}
+```
+
+File: `src/hooks/use-runs.ts`
+
+```typescript
+import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'
+import { api } from '@/lib/api'
+import type { RunListResponse, RunCompareResponse, Alias } from '@/types/api'
+
+interface UseRunsParams {
+  page: number
+  pageSize: number
+  modelType?: string
+  status?: string
+  storeId?: number
+  productId?: number
+}
+
+export function useRuns({ page, pageSize, modelType, status, storeId, productId }: UseRunsParams) {
+  return useQuery({
+    queryKey: ['runs', { page, pageSize, modelType, status, storeId, productId }],
+    queryFn: () => api<RunListResponse>('/registry/runs', {
+      params: {
+        page,
+        page_size: pageSize,
+        model_type: modelType,
+        status,
+        store_id: storeId,
+        product_id: productId,
+      },
+    }),
+    placeholderData: (previousData) => previousData,
+  })
+}
+
+export function useCompareRuns(runIdA: string, runIdB: string, enabled = false) {
+  return useQuery({
+    queryKey: ['runs', 'compare', runIdA, runIdB],
+    queryFn: () => api<RunCompareResponse>(`/registry/compare/${runIdA}/${runIdB}`),
+    enabled,
+  })
+}
+
+export function useAliases() {
+  return useQuery({
+    queryKey: ['aliases'],
+    queryFn: () => api<Alias[]>('/registry/aliases'),
+  })
+}
+
+export function useCreateAlias() {
+  const queryClient = useQueryClient()
+  return useMutation({
+    mutationFn: (data: { alias_name: string; run_id: string; description?: string }) =>
+      api<Alias>('/registry/aliases', { method: 'POST', body: data }),
+    onSuccess: () => {
+      queryClient.invalidateQueries({ queryKey: ['aliases'] })
+    },
+  })
+}
+```
+
+File: `src/hooks/use-jobs.ts`
+
+```typescript
+import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'
+import { api } from '@/lib/api'
+import type { JobListResponse, Job, JobCreate } from '@/types/api'
+
+interface UseJobsParams {
+  page: number
+  pageSize: number
+  jobType?: string
+  status?: string
+}
+
+export function useJobs({ page, pageSize, jobType, status }: UseJobsParams) {
+  return useQuery({
+    queryKey: ['jobs', { page, pageSize, jobType, status }],
+    queryFn: () => api<JobListResponse>('/jobs', {
+      params: {
+        page,
+        page_size: pageSize,
+        job_type: jobType,
+        status,
+      },
+    }),
+    placeholderData: (previousData) => previousData,
+    refetchInterval: 5000, // Poll every 5 seconds for status updates
+  })
+}
+
+export function useJob(jobId: string, enabled = true) {
+  return useQuery({
+    queryKey: ['jobs', jobId],
+    queryFn: () => api<Job>(`/jobs/${jobId}`),
+    enabled,
+    refetchInterval: (query) => {
+      // Stop polling when job is complete
+      const status = query.state.data?.status
+      return status === 'pending' || status === 'running' ? 2000 : false
+    },
+  })
+}
+
+export function useCreateJob() {
+  const queryClient = useQueryClient()
+  return useMutation({
+    mutationFn: (data: JobCreate) =>
+      api<Job>('/jobs', { method: 'POST', body: data }),
+    onSuccess: () => {
+      queryClient.invalidateQueries({ queryKey: ['jobs'] })
+    },
+  })
+}
+
+export function useCancelJob() {
+  const queryClient = useQueryClient()
+  return useMutation({
+    mutationFn: (jobId: string) =>
+      api<void>(`/jobs/${jobId}`, { method: 'DELETE' }),
+    onSuccess: () => {
+      queryClient.invalidateQueries({ queryKey: ['jobs'] })
+    },
+  })
+}
+```
+
+#### Task 9: Create Layout Components
+
+File: `src/components/layout/app-layout.tsx`
+
+```typescript
+import { Outlet } from 'react-router-dom'
+import { Sidebar } from './sidebar'
+import { Header } from './header'
+
+export function AppLayout() {
+  return (
+    <div className="flex h-screen bg-background">
+      <Sidebar />
+      <div className="flex flex-1 flex-col overflow-hidden">
+        <Header />
+        <main className="flex-1 overflow-y-auto p-6">
+          <Outlet />
+        </main>
+      </div>
+    </div>
+  )
+}
+```
+
+File: `src/components/layout/sidebar.tsx`
+
+```typescript
+import { NavLink } from 'react-router-dom'
+import { cn } from '@/lib/utils'
+import {
+  LayoutDashboard,
+  Table2,
+  LineChart,
+  MessageSquare,
+  Settings,
+  Store,
+  Package,
+  FlaskConical,
+  ListTodo,
+} from 'lucide-react'
+
+const navigation = [
+  { name: 'Dashboard', href: '/', icon: LayoutDashboard },
+  { name: 'Sales', href: '/explorer/sales', icon: Table2 },
+  { name: 'Stores', href: '/explorer/stores', icon: Store },
+  { name: 'Products', href: '/explorer/products', icon: Package },
+  { name: 'Model Runs', href: '/explorer/runs', icon: FlaskConical },
+  { name: 'Jobs', href: '/explorer/jobs', icon: ListTodo },
+  { name: 'Forecast', href: '/visualize/forecast', icon: LineChart },
+  { name: 'Chat', href: '/chat', icon: MessageSquare },
+  { name: 'Admin', href: '/admin', icon: Settings },
+]
+
+export function Sidebar() {
+  return (
+    <aside className="w-64 border-r bg-card">
+      <div className="flex h-14 items-center border-b px-4">
+        <span className="font-semibold">ForecastLab</span>
+      </div>
+      <nav className="space-y-1 p-2">
+        {navigation.map((item) => (
+          <NavLink
+            key={item.href}
+            to={item.href}
+            className={({ isActive }) =>
+              cn(
+                'flex items-center gap-3 rounded-md px-3 py-2 text-sm font-medium',
+                isActive
+                  ? 'bg-primary text-primary-foreground'
+                  : 'text-muted-foreground hover:bg-muted hover:text-foreground'
+              )
+            }
+          >
+            <item.icon className="h-4 w-4" />
+            {item.name}
+          </NavLink>
+        ))}
+      </nav>
+    </aside>
+  )
+}
+```
+
+#### Task 10: Create Error Boundary
+
+File: `src/components/error-boundary.tsx`
+
+```typescript
+import { Component, type ReactNode } from 'react'
+import { Button } from '@/components/ui/button'
+import { Card, CardHeader, CardTitle, CardContent, CardFooter } from '@/components/ui/card'
+
+interface Props {
+  children: ReactNode
+}
+
+interface State {
+  hasError: boolean
+  error: Error | null
+}
+
+export class ErrorBoundary extends Component<Props, State> {
+  constructor(props: Props) {
+    super(props)
+    this.state = { hasError: false, error: null }
+  }
+
+  static getDerivedStateFromError(error: Error): State {
+    return { hasError: true, error }
+  }
+
+  render() {
+    if (this.state.hasError) {
+      return (
+        <Card className="max-w-md mx-auto mt-8">
+          <CardHeader>
+            <CardTitle className="text-destructive">Something went wrong</CardTitle>
+          </CardHeader>
+          <CardContent>
+            <p className="text-sm text-muted-foreground">
+              {this.state.error?.message || 'An unexpected error occurred'}
+            </p>
+          </CardContent>
+          <CardFooter>
+            <Button onClick={() => window.location.reload()}>
+              Reload Page
+            </Button>
+          </CardFooter>
+        </Card>
+      )
+    }
+
+    return this.props.children
+  }
+}
+```
+
+---
+
+### Phase 3: Data Table Components (Tasks 11-15)
+
+#### Task 11: Create Reusable DataTable Component
+
+File: `src/components/data-table/data-table.tsx`
+
+```typescript
+import {
+  type ColumnDef,
+  type PaginationState,
+  type SortingState,
+  type ColumnFiltersState,
+  flexRender,
+  getCoreRowModel,
+  useReactTable,
+} from '@tanstack/react-table'
+import {
+  Table,
+  TableBody,
+  TableCell,
+  TableHead,
+  TableHeader,
+  TableRow,
+} from '@/components/ui/table'
+import { DataTablePagination } from './data-table-pagination'
+import { Skeleton } from '@/components/ui/skeleton'
+
+interface DataTableProps<TData, TValue> {
+  columns: ColumnDef<TData, TValue>[]
+  data: TData[]
+  pageCount: number
+  pagination: PaginationState
+  onPaginationChange: (updater: PaginationState | ((old: PaginationState) => PaginationState)) => void
+  sorting?: SortingState
+  onSortingChange?: (updater: SortingState | ((old: SortingState) => SortingState)) => void
+  isLoading?: boolean
+}
+
+export function DataTable<TData, TValue>({
+  columns,
+  data,
+  pageCount,
+  pagination,
+  onPaginationChange,
+  sorting,
+  onSortingChange,
+  isLoading = false,
+}: DataTableProps<TData, TValue>) {
+  const table = useReactTable({
+    data,
+    columns,
+    pageCount,
+    state: {
+      pagination,
+      sorting,
+    },
+    onPaginationChange,
+    onSortingChange,
+    getCoreRowModel: getCoreRowModel(),
+    manualPagination: true,
+    manualSorting: true,
+  })
+
+  return (
+    <div className="space-y-4">
+      <div className="rounded-md border">
+        <Table>
+          <TableHeader>
+            {table.getHeaderGroups().map((headerGroup) => (
+              <TableRow key={headerGroup.id}>
+                {headerGroup.headers.map((header) => (
+                  <TableHead key={header.id}>
+                    {header.isPlaceholder
+                      ? null
+                      : flexRender(header.column.columnDef.header, header.getContext())}
+                  </TableHead>
+                ))}
+              </TableRow>
+            ))}
+          </TableHeader>
+          <TableBody>
+            {isLoading ? (
+              Array.from({ length: pagination.pageSize }).map((_, i) => (
+                <TableRow key={i}>
+                  {columns.map((_, j) => (
+                    <TableCell key={j}>
+                      <Skeleton className="h-4 w-full" />
+                    </TableCell>
+                  ))}
+                </TableRow>
+              ))
+            ) : table.getRowModel().rows?.length ? (
+              table.getRowModel().rows.map((row) => (
+                <TableRow key={row.id}>
+                  {row.getVisibleCells().map((cell) => (
+                    <TableCell key={cell.id}>
+                      {flexRender(cell.column.columnDef.cell, cell.getContext())}
+                    </TableCell>
+                  ))}
+                </TableRow>
+              ))
+            ) : (
+              <TableRow>
+                <TableCell colSpan={columns.length} className="h-24 text-center">
+                  No results.
+                </TableCell>
+              </TableRow>
+            )}
+          </TableBody>
+        </Table>
+      </div>
+      <DataTablePagination table={table} />
+    </div>
+  )
+}
+```
+
+#### Task 12: Create Stores Explorer Page
+
+File: `src/pages/explorer/stores.tsx`
+
+```typescript
+import { useState } from 'react'
+import { type ColumnDef, type PaginationState } from '@tanstack/react-table'
+import { DataTable } from '@/components/data-table/data-table'
+import { useStores } from '@/hooks/use-stores'
+import { Input } from '@/components/ui/input'
+import type { Store } from '@/types/api'
+
+const columns: ColumnDef<Store>[] = [
+  { accessorKey: 'id', header: 'ID' },
+  { accessorKey: 'code', header: 'Code' },
+  { accessorKey: 'name', header: 'Name' },
+  { accessorKey: 'region', header: 'Region' },
+  { accessorKey: 'city', header: 'City' },
+  { accessorKey: 'store_type', header: 'Type' },
+]
+
+export default function StoresPage() {
+  const [pagination, setPagination] = useState<PaginationState>({
+    pageIndex: 0,
+    pageSize: 20,
+  })
+  const [search, setSearch] = useState('')
+
+  const { data, isLoading } = useStores({
+    page: pagination.pageIndex + 1,
+    pageSize: pagination.pageSize,
+    search: search || undefined,
+  })
+
+  return (
+    <div className="space-y-4">
+      <div className="flex items-center justify-between">
+        <h1 className="text-2xl font-bold">Stores</h1>
+        <Input
+          placeholder="Search stores..."
+          value={search}
+          onChange={(e) => setSearch(e.target.value)}
+          className="max-w-sm"
+        />
+      </div>
+      <DataTable
+        columns={columns}
+        data={data?.stores ?? []}
+        pageCount={data ? Math.ceil(data.total / data.page_size) : 0}
+        pagination={pagination}
+        onPaginationChange={setPagination}
+        isLoading={isLoading}
+      />
+    </div>
+  )
+}
+```
+
+#### Task 13: Create Runs Explorer Page
+
+File: `src/pages/explorer/runs.tsx`
+
+```typescript
+import { useState } from 'react'
+import { type ColumnDef, type PaginationState } from '@tanstack/react-table'
+import { DataTable } from '@/components/data-table/data-table'
+import { useRuns, useCompareRuns } from '@/hooks/use-runs'
+import { Button } from '@/components/ui/button'
+import { Badge } from '@/components/ui/badge'
+import { Checkbox } from '@/components/ui/checkbox'
+import type { ModelRun, RunStatus } from '@/types/api'
+
+const statusColors: Record<RunStatus, string> = {
+  pending: 'bg-yellow-100 text-yellow-800',
+  running: 'bg-blue-100 text-blue-800',
+  success: 'bg-green-100 text-green-800',
+  failed: 'bg-red-100 text-red-800',
+  archived: 'bg-gray-100 text-gray-800',
+}
+
+const columns: ColumnDef<ModelRun>[] = [
+  {
+    id: 'select',
+    header: ({ table }) => (
+      <Checkbox
+        checked={table.getIsAllPageRowsSelected()}
+        onCheckedChange={(value) => table.toggleAllPageRowsSelected(!!value)}
+      />
+    ),
+    cell: ({ row }) => (
+      <Checkbox
+        checked={row.getIsSelected()}
+        onCheckedChange={(value) => row.toggleSelected(!!value)}
+      />
+    ),
+  },
+  { accessorKey: 'run_id', header: 'Run ID', cell: ({ row }) => row.original.run_id.slice(0, 8) },
+  { accessorKey: 'model_type', header: 'Model' },
+  {
+    accessorKey: 'status',
+    header: 'Status',
+    cell: ({ row }) => (
+      <Badge className={statusColors[row.original.status]}>{row.original.status}</Badge>
+    ),
+  },
+  {
+    accessorKey: 'metrics.mae',
+    header: 'MAE',
+    cell: ({ row }) => row.original.metrics?.mae?.toFixed(2) ?? '-',
+  },
+  {
+    accessorKey: 'metrics.smape',
+    header: 'sMAPE',
+    cell: ({ row }) => row.original.metrics?.smape ? `${row.original.metrics.smape.toFixed(1)}%` : '-',
+  },
+  {
+    accessorKey: 'created_at',
+    header: 'Created',
+    cell: ({ row }) => new Date(row.original.created_at).toLocaleDateString(),
+  },
+]
+
+export default function RunsPage() {
+  const [pagination, setPagination] = useState<PaginationState>({
+    pageIndex: 0,
+    pageSize: 20,
+  })
+  const [selectedRuns, setSelectedRuns] = useState<string[]>([])
+
+  const { data, isLoading } = useRuns({
+    page: pagination.pageIndex + 1,
+    pageSize: pagination.pageSize,
+  })
+
+  const canCompare = selectedRuns.length === 2
+  const { data: comparison, refetch: compare } = useCompareRuns(
+    selectedRuns[0] || '',
+    selectedRuns[1] || '',
+    canCompare
+  )
+
+  return (
+    <div className="space-y-4">
+      <div className="flex items-center justify-between">
+        <h1 className="text-2xl font-bold">Model Runs</h1>
+        <Button
+          disabled={!canCompare}
+          onClick={() => compare()}
+        >
+          Compare Selected ({selectedRuns.length}/2)
+        </Button>
+      </div>
+      <DataTable
+        columns={columns}
+        data={data?.runs ?? []}
+        pageCount={data ? Math.ceil(data.total / data.page_size) : 0}
+        pagination={pagination}
+        onPaginationChange={setPagination}
+        isLoading={isLoading}
+      />
+    </div>
+  )
+}
+```
+
+#### Task 14: Create Jobs Monitor Page
+
+File: `src/pages/explorer/jobs.tsx`
+
+```typescript
+import { useState } from 'react'
+import { type ColumnDef, type PaginationState } from '@tanstack/react-table'
+import { DataTable } from '@/components/data-table/data-table'
+import { useJobs, useCancelJob } from '@/hooks/use-jobs'
+import { Button } from '@/components/ui/button'
+import { Badge } from '@/components/ui/badge'
+import type { Job, JobStatus } from '@/types/api'
+
+const statusColors: Record<JobStatus, string> = {
+  pending: 'bg-yellow-100 text-yellow-800',
+  running: 'bg-blue-100 text-blue-800',
+  completed: 'bg-green-100 text-green-800',
+  failed: 'bg-red-100 text-red-800',
+  cancelled: 'bg-gray-100 text-gray-800',
+}
+
+export default function JobsPage() {
+  const [pagination, setPagination] = useState<PaginationState>({
+    pageIndex: 0,
+    pageSize: 20,
+  })
+
+  const { data, isLoading } = useJobs({
+    page: pagination.pageIndex + 1,
+    pageSize: pagination.pageSize,
+  })
+
+  const cancelJob = useCancelJob()
+
+  const columns: ColumnDef<Job>[] = [
+    { accessorKey: 'job_id', header: 'Job ID', cell: ({ row }) => row.original.job_id.slice(0, 8) },
+    { accessorKey: 'job_type', header: 'Type' },
+    {
+      accessorKey: 'status',
+      header: 'Status',
+      cell: ({ row }) => (
+        <Badge className={statusColors[row.original.status]}>{row.original.status}</Badge>
+      ),
+    },
+    {
+      accessorKey: 'created_at',
+      header: 'Created',
+      cell: ({ row }) => new Date(row.original.created_at).toLocaleString(),
+    },
+    {
+      id: 'actions',
+      cell: ({ row }) => {
+        if (row.original.status !== 'pending') return null
+        return (
+          <Button
+            variant="destructive"
+            size="sm"
+            onClick={() => cancelJob.mutate(row.original.job_id)}
+          >
+            Cancel
+          </Button>
+        )
+      },
+    },
+  ]
+
+  return (
+    <div className="space-y-4">
+      <h1 className="text-2xl font-bold">Jobs</h1>
+      <DataTable
+        columns={columns}
+        data={data?.jobs ?? []}
+        pageCount={data ? Math.ceil(data.total / data.page_size) : 0}
+        pagination={pagination}
+        onPaginationChange={setPagination}
+        isLoading={isLoading}
+      />
+    </div>
+  )
+}
+```
+
+#### Task 15: Create Dashboard Page with KPI Cards
+
+File: `src/pages/dashboard.tsx`
+
+```typescript
+import { useState } from 'react'
+import { Card, CardHeader, CardTitle, CardContent } from '@/components/ui/card'
+import { useKPIs } from '@/hooks/use-kpis'
+import { Skeleton } from '@/components/ui/skeleton'
+
+function formatCurrency(value: string | null): string {
+  if (!value) return '-'
+  return new Intl.NumberFormat('en-US', {
+    style: 'currency',
+    currency: 'USD',
+  }).format(parseFloat(value))
+}
+
+function formatNumber(value: number | null): string {
+  if (value === null) return '-'
+  return new Intl.NumberFormat('en-US').format(value)
+}
+
+export default function DashboardPage() {
+  const [dateRange] = useState({
+    startDate: new Date(Date.now() - 30 * 24 * 60 * 60 * 1000).toISOString().split('T')[0],
+    endDate: new Date().toISOString().split('T')[0],
+  })
+
+  const { data, isLoading } = useKPIs({
+    startDate: dateRange.startDate,
+    endDate: dateRange.endDate,
+  })
+
+  const kpiCards = [
+    { title: 'Total Revenue', value: formatCurrency(data?.metrics.total_revenue ?? null) },
+    { title: 'Total Units', value: formatNumber(data?.metrics.total_units ?? null) },
+    { title: 'Transactions', value: formatNumber(data?.metrics.total_transactions ?? null) },
+    { title: 'Avg Unit Price', value: formatCurrency(data?.metrics.avg_unit_price ?? null) },
+  ]
+
+  return (
+    <div className="space-y-6">
+      <h1 className="text-2xl font-bold">Dashboard</h1>
+      <div className="grid gap-4 md:grid-cols-2 lg:grid-cols-4">
+        {kpiCards.map((card) => (
+          <Card key={card.title}>
+            <CardHeader className="pb-2">
+              <CardTitle className="text-sm font-medium text-muted-foreground">
+                {card.title}
+              </CardTitle>
+            </CardHeader>
+            <CardContent>
+              {isLoading ? (
+                <Skeleton className="h-8 w-24" />
+              ) : (
+                <div className="text-2xl font-bold">{card.value}</div>
+              )}
+            </CardContent>
+          </Card>
+        ))}
+      </div>
+    </div>
+  )
+}
+```
+
+---
+
+### Phase 4: Visualization Components (Tasks 16-18)
+
+#### Task 16: Create Time Series Chart Component
+
+File: `src/components/charts/time-series-chart.tsx`
+
+```typescript
+import {
+  LineChart,
+  Line,
+  XAxis,
+  YAxis,
+  CartesianGrid,
+  Tooltip,
+  Legend,
+  ResponsiveContainer,
+  Area,
+  ComposedChart,
+} from 'recharts'
+
+interface DataPoint {
+  date: string
+  actual?: number
+  predicted?: number
+  lower_bound?: number
+  upper_bound?: number
+}
+
+interface TimeSeriesChartProps {
+  data: DataPoint[]
+  showConfidence?: boolean
+  height?: number
+}
+
+export function TimeSeriesChart({
+  data,
+  showConfidence = false,
+  height = 400,
+}: TimeSeriesChartProps) {
+  return (
+    <ResponsiveContainer width="100%" height={height}>
+      <ComposedChart data={data} margin={{ top: 5, right: 30, left: 20, bottom: 5 }}>
+        <CartesianGrid strokeDasharray="3 3" className="stroke-muted" />
+        <XAxis
+          dataKey="date"
+          tick={{ fontSize: 12 }}
+          tickFormatter={(value) => new Date(value).toLocaleDateString('en-US', { month: 'short', day: 'numeric' })}
+        />
+        <YAxis tick={{ fontSize: 12 }} />
+        <Tooltip
+          contentStyle={{ backgroundColor: 'hsl(var(--card))', borderColor: 'hsl(var(--border))' }}
+          labelFormatter={(value) => new Date(value).toLocaleDateString()}
+        />
+        <Legend />
+
+        {showConfidence && (
+          <Area
+            type="monotone"
+            dataKey="upper_bound"
+            stroke="none"
+            fill="hsl(var(--primary))"
+            fillOpacity={0.1}
+            name="Confidence"
+          />
+        )}
+
+        <Line
+          type="monotone"
+          dataKey="actual"
+          stroke="hsl(var(--primary))"
+          strokeWidth={2}
+          dot={false}
+          name="Actual"
+        />
+
+        <Line
+          type="monotone"
+          dataKey="predicted"
+          stroke="hsl(var(--chart-2))"
+          strokeWidth={2}
+          strokeDasharray="5 5"
+          dot={false}
+          name="Predicted"
+        />
+      </ComposedChart>
+    </ResponsiveContainer>
+  )
+}
+```
+
+#### Task 17: Create Forecast Visualization Page
+
+File: `src/pages/visualize/forecast.tsx`
+
+```typescript
+import { useState } from 'react'
+import { Card, CardHeader, CardTitle, CardContent } from '@/components/ui/card'
+import { TimeSeriesChart } from '@/components/charts/time-series-chart'
+import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select'
+import { useStores } from '@/hooks/use-stores'
+import { useProducts } from '@/hooks/use-products'
+
+export default function ForecastPage() {
+  const [storeId, setStoreId] = useState<string>('')
+  const [productId, setProductId] = useState<string>('')
+
+  const { data: stores } = useStores({ page: 1, pageSize: 100 })
+  const { data: products } = useProducts({ page: 1, pageSize: 100 })
+
+  // Placeholder data - in production, fetch from job results
+  const chartData = [
+    { date: '2026-01-01', actual: 100, predicted: 98 },
+    { date: '2026-01-02', actual: 120, predicted: 115 },
+    { date: '2026-01-03', actual: 110, predicted: 112 },
+    { date: '2026-01-04', actual: 140, predicted: 135 },
+    { date: '2026-01-05', actual: 130, predicted: 128 },
+  ]
+
+  return (
+    <div className="space-y-6">
+      <h1 className="text-2xl font-bold">Forecast Visualization</h1>
+
+      <div className="flex gap-4">
+        <Select value={storeId} onValueChange={setStoreId}>
+          <SelectTrigger className="w-48">
+            <SelectValue placeholder="Select store" />
+          </SelectTrigger>
+          <SelectContent>
+            {stores?.stores.map((store) => (
+              <SelectItem key={store.id} value={String(store.id)}>
+                {store.code} - {store.name}
+              </SelectItem>
+            ))}
+          </SelectContent>
+        </Select>
+
+        <Select value={productId} onValueChange={setProductId}>
+          <SelectTrigger className="w-48">
+            <SelectValue placeholder="Select product" />
+          </SelectTrigger>
+          <SelectContent>
+            {products?.products.map((product) => (
+              <SelectItem key={product.id} value={String(product.id)}>
+                {product.sku} - {product.name}
+              </SelectItem>
+            ))}
+          </SelectContent>
+        </Select>
+      </div>
+
+      <Card>
+        <CardHeader>
+          <CardTitle>Actual vs Predicted</CardTitle>
+        </CardHeader>
+        <CardContent className="h-[400px]">
+          <TimeSeriesChart data={chartData} showConfidence />
+        </CardContent>
+      </Card>
+    </div>
+  )
+}
+```
+
+#### Task 18: Create Main App Router
+
+File: `src/App.tsx`
+
+```typescript
+import { BrowserRouter, Routes, Route } from 'react-router-dom'
+import { QueryClientProvider } from '@tanstack/react-query'
+import { queryClient } from '@/lib/query-client'
+import { ThemeProvider } from '@/components/theme-provider'
+import { AppLayout } from '@/components/layout/app-layout'
+import { ErrorBoundary } from '@/components/error-boundary'
+
+// Pages
+import DashboardPage from '@/pages/dashboard'
+import StoresPage from '@/pages/explorer/stores'
+import ProductsPage from '@/pages/explorer/products'
+import RunsPage from '@/pages/explorer/runs'
+import JobsPage from '@/pages/explorer/jobs'
+import ForecastPage from '@/pages/visualize/forecast'
+
+export default function App() {
+  return (
+    <ErrorBoundary>
+      <QueryClientProvider client={queryClient}>
+        <ThemeProvider defaultTheme="system" storageKey="forecastlab-theme">
+          <BrowserRouter>
+            <Routes>
+              <Route element={<AppLayout />}>
+                <Route path="/" element={<DashboardPage />} />
+                <Route path="/explorer/stores" element={<StoresPage />} />
+                <Route path="/explorer/products" element={<ProductsPage />} />
+                <Route path="/explorer/runs" element={<RunsPage />} />
+                <Route path="/explorer/jobs" element={<JobsPage />} />
+                <Route path="/visualize/forecast" element={<ForecastPage />} />
+              </Route>
+            </Routes>
+          </BrowserRouter>
+        </ThemeProvider>
+      </QueryClientProvider>
+    </ErrorBoundary>
+  )
+}
+```
+
+---
+
+### Phase 5: Agent Chat (Tasks 19-21) - DEPENDS ON INITIAL-10
+
+#### Task 19: Create WebSocket Hook
+
+File: `src/hooks/use-websocket.ts`
+
+```typescript
+import { useEffect, useRef, useState, useCallback } from 'react'
+
+type ConnectionStatus = 'connecting' | 'connected' | 'disconnected' | 'error'
+
+interface UseWebSocketOptions {
+  onMessage?: (data: unknown) => void
+  onError?: (error: Event) => void
+  reconnectAttempts?: number
+  reconnectInterval?: number
+}
+
+export function useWebSocket(url: string | null, options: UseWebSocketOptions = {}) {
+  const {
+    onMessage,
+    onError,
+    reconnectAttempts = 5,
+    reconnectInterval = 3000,
+  } = options
+
+  const [status, setStatus] = useState<ConnectionStatus>('disconnected')
+  const wsRef = useRef<WebSocket | null>(null)
+  const reconnectCountRef = useRef(0)
+
+  const connect = useCallback(() => {
+    if (!url) return
+
+    setStatus('connecting')
+    const ws = new WebSocket(url)
+
+    ws.onopen = () => {
+      setStatus('connected')
+      reconnectCountRef.current = 0
+    }
+
+    ws.onmessage = (event) => {
+      try {
+        const data = JSON.parse(event.data)
+        onMessage?.(data)
+      } catch {
+        onMessage?.(event.data)
+      }
+    }
+
+    ws.onerror = (error) => {
+      setStatus('error')
+      onError?.(error)
+    }
+
+    ws.onclose = () => {
+      setStatus('disconnected')
+      if (reconnectCountRef.current < reconnectAttempts) {
+        reconnectCountRef.current++
+        setTimeout(connect, reconnectInterval)
+      }
+    }
+
+    wsRef.current = ws
+  }, [url, onMessage, onError, reconnectAttempts, reconnectInterval])
+
+  const disconnect = useCallback(() => {
+    reconnectCountRef.current = reconnectAttempts // Prevent auto-reconnect
+    wsRef.current?.close()
+    wsRef.current = null
+  }, [reconnectAttempts])
+
+  const send = useCallback((data: unknown) => {
+    if (wsRef.current?.readyState === WebSocket.OPEN) {
+      wsRef.current.send(typeof data === 'string' ? data : JSON.stringify(data))
+    }
+  }, [])
+
+  useEffect(() => {
+    connect()
+    return () => disconnect()
+  }, [connect, disconnect])
+
+  return { status, send, disconnect, reconnect: connect }
+}
+```
+
+#### Task 20: Create Chat Message Component
+
+File: `src/components/chat/chat-message.tsx`
+
+```typescript
+import { cn } from '@/lib/utils'
+import { Card } from '@/components/ui/card'
+
+interface Citation {
+  source_type: string
+  source_id: string
+  chunk_id: string
+  snippet: string
+}
+
+interface ToolCall {
+  name: string
+  arguments: Record<string, unknown>
+  result?: unknown
+}
+
+interface ChatMessageProps {
+  role: 'user' | 'assistant'
+  content: string
+  citations?: Citation[]
+  toolCalls?: ToolCall[]
+  isStreaming?: boolean
+}
+
+export function ChatMessage({
+  role,
+  content,
+  citations,
+  toolCalls,
+  isStreaming,
+}: ChatMessageProps) {
+  return (
+    <div className={cn('flex', role === 'user' ? 'justify-end' : 'justify-start')}>
+      <Card className={cn(
+        'max-w-[80%] p-4',
+        role === 'user' ? 'bg-primary text-primary-foreground' : 'bg-muted'
+      )}>
+        <div className="prose prose-sm dark:prose-invert">
+          {content}
+          {isStreaming && <span className="animate-pulse ml-1">|</span>}
+        </div>
+
+        {citations && citations.length > 0 && (
+          <div className="mt-4 pt-4 border-t">
+            <p className="text-xs font-medium mb-2">Sources:</p>
+            <ul className="text-xs space-y-1">
+              {citations.map((citation, i) => (
+                <li key={i} className="text-muted-foreground">
+                  [{i + 1}] {citation.source_id}
+                </li>
+              ))}
+            </ul>
+          </div>
+        )}
+
+        {toolCalls && toolCalls.length > 0 && (
+          <details className="mt-4 pt-4 border-t text-xs">
+            <summary className="cursor-pointer font-medium">Tool Calls ({toolCalls.length})</summary>
+            <div className="mt-2 space-y-2">
+              {toolCalls.map((call, i) => (
+                <div key={i} className="bg-background/50 rounded p-2">
+                  <code>{call.name}</code>
+                </div>
+              ))}
+            </div>
+          </details>
+        )}
+      </Card>
+    </div>
+  )
+}
+```
+
+#### Task 21: Create Chat Page
+
+File: `src/pages/chat.tsx`
+
+```typescript
+import { useState, useCallback } from 'react'
+import { useWebSocket } from '@/hooks/use-websocket'
+import { ChatMessage } from '@/components/chat/chat-message'
+import { Button } from '@/components/ui/button'
+import { Input } from '@/components/ui/input'
+import { Card } from '@/components/ui/card'
+import { Send } from 'lucide-react'
+
+interface Message {
+  id: string
+  role: 'user' | 'assistant'
+  content: string
+  citations?: Array<{ source_type: string; source_id: string; chunk_id: string; snippet: string }>
+  toolCalls?: Array<{ name: string; arguments: Record<string, unknown>; result?: unknown }>
+  isStreaming?: boolean
+}
+
+const WS_URL = import.meta.env.VITE_WS_URL || 'ws://localhost:8123/agents/stream'
+
+export default function ChatPage() {
+  const [messages, setMessages] = useState<Message[]>([])
+  const [input, setInput] = useState('')
+  const [streamingContent, setStreamingContent] = useState('')
+
+  const handleMessage = useCallback((data: unknown) => {
+    const msg = data as { type: string; content?: string; done?: boolean; citations?: Message['citations']; tool_calls?: Message['toolCalls'] }
+
+    if (msg.type === 'token') {
+      setStreamingContent((prev) => prev + (msg.content || ''))
+    } else if (msg.type === 'done') {
+      setMessages((prev) => [
+        ...prev,
+        {
+          id: crypto.randomUUID(),
+          role: 'assistant',
+          content: streamingContent,
+          citations: msg.citations,
+          toolCalls: msg.tool_calls,
+        },
+      ])
+      setStreamingContent('')
+    }
+  }, [streamingContent])
+
+  const { status, send } = useWebSocket(WS_URL, { onMessage: handleMessage })
+
+  const handleSubmit = (e: React.FormEvent) => {
+    e.preventDefault()
+    if (!input.trim()) return
+
+    setMessages((prev) => [
+      ...prev,
+      { id: crypto.randomUUID(), role: 'user', content: input },
+    ])
+
+    send({
+      type: 'query',
+      agent: 'rag_assistant',
+      payload: { query: input },
+    })
+
+    setInput('')
+  }
+
+  return (
+    <div className="flex h-[calc(100vh-8rem)] flex-col">
+      <h1 className="text-2xl font-bold mb-4">ForecastLab Assistant</h1>
+
+      <Card className="flex-1 overflow-y-auto p-4 space-y-4">
+        {messages.map((msg) => (
+          <ChatMessage
+            key={msg.id}
+            role={msg.role}
+            content={msg.content}
+            citations={msg.citations}
+            toolCalls={msg.toolCalls}
+          />
+        ))}
+        {streamingContent && (
+          <ChatMessage role="assistant" content={streamingContent} isStreaming />
+        )}
+      </Card>
+
+      <form onSubmit={handleSubmit} className="mt-4 flex gap-2">
+        <Input
+          value={input}
+          onChange={(e) => setInput(e.target.value)}
+          placeholder="Ask about forecasting, backtesting, or data..."
+          disabled={status !== 'connected'}
+        />
+        <Button type="submit" disabled={status !== 'connected'}>
+          <Send className="h-4 w-4" />
+        </Button>
+      </form>
+
+      <p className="text-xs text-muted-foreground mt-2">
+        Status: {status}
+      </p>
+    </div>
+  )
+}
+```
+
+---
+
+### Phase 6: Admin Panel & Polish (Tasks 22-24)
+
+#### Task 22: Create Admin Page
+
+File: `src/pages/admin.tsx`
+
+```typescript
+import { useAliases, useCreateAlias } from '@/hooks/use-runs'
+import { Card, CardHeader, CardTitle, CardContent } from '@/components/ui/card'
+import { Button } from '@/components/ui/button'
+import { Badge } from '@/components/ui/badge'
+import {
+  Table,
+  TableBody,
+  TableCell,
+  TableHead,
+  TableHeader,
+  TableRow,
+} from '@/components/ui/table'
+
+export default function AdminPage() {
+  const { data: aliases, isLoading } = useAliases()
+
+  return (
+    <div className="space-y-6">
+      <h1 className="text-2xl font-bold">Admin Panel</h1>
+
+      <Card>
+        <CardHeader>
+          <CardTitle>Deployment Aliases</CardTitle>
+        </CardHeader>
+        <CardContent>
+          <Table>
+            <TableHeader>
+              <TableRow>
+                <TableHead>Alias Name</TableHead>
+                <TableHead>Run ID</TableHead>
+                <TableHead>Model Type</TableHead>
+                <TableHead>Status</TableHead>
+                <TableHead>Created</TableHead>
+              </TableRow>
+            </TableHeader>
+            <TableBody>
+              {isLoading ? (
+                <TableRow>
+                  <TableCell colSpan={5} className="text-center">Loading...</TableCell>
+                </TableRow>
+              ) : aliases?.length === 0 ? (
+                <TableRow>
+                  <TableCell colSpan={5} className="text-center">No aliases configured</TableCell>
+                </TableRow>
+              ) : (
+                aliases?.map((alias) => (
+                  <TableRow key={alias.alias_name}>
+                    <TableCell className="font-mono">{alias.alias_name}</TableCell>
+                    <TableCell className="font-mono">{alias.run_id.slice(0, 8)}</TableCell>
+                    <TableCell>{alias.model_type}</TableCell>
+                    <TableCell>
+                      <Badge variant={alias.run_status === 'success' ? 'default' : 'secondary'}>
+                        {alias.run_status}
+                      </Badge>
+                    </TableCell>
+                    <TableCell>{new Date(alias.created_at).toLocaleDateString()}</TableCell>
+                  </TableRow>
+                ))
+              )}
+            </TableBody>
+          </Table>
+        </CardContent>
+      </Card>
+    </div>
+  )
+}
+```
+
+#### Task 23: Create Environment Configuration
+
+File: `frontend/.env.example`
+
+```env
+# API Configuration
+VITE_API_BASE_URL=http://localhost:8123
+VITE_WS_URL=ws://localhost:8123/agents/stream
+
+# Feature Flags
+VITE_ENABLE_AGENT_CHAT=true
+VITE_ENABLE_ADMIN_PANEL=true
+
+# Visualization
+VITE_DEFAULT_PAGE_SIZE=25
+VITE_MAX_CHART_POINTS=365
+```
+
+#### Task 24: Create Examples Documentation
+
+File: `examples/ui/README.md`
+
+```markdown
+# ForecastLab Dashboard
+
+## Page Map
+
+| Page | Route | API Endpoints | Description |
+|------|-------|---------------|-------------|
+| Dashboard | `/` | `GET /analytics/kpis` | KPI summary cards |
+| Stores | `/explorer/stores` | `GET /dimensions/stores` | Store dimension table |
+| Products | `/explorer/products` | `GET /dimensions/products` | Product dimension table |
+| Model Runs | `/explorer/runs` | `GET /registry/runs` | Model run leaderboard |
+| Jobs | `/explorer/jobs` | `GET /jobs` | Job status monitor |
+| Forecast | `/visualize/forecast` | Job results | Forecast visualization |
+| Chat | `/chat` | `WS /agents/stream` | Agent chat interface |
+| Admin | `/admin` | `GET /registry/aliases` | Admin panel |
+
+## Running the Dashboard
+
+### Prerequisites
+- Node.js 20+
+- pnpm (recommended) or npm
+- Backend running on port 8123
+
+### Development
+
+```bash
+cd frontend
+pnpm install
+pnpm dev
+```
+
+Open http://localhost:5173
+
+### Production Build
+
+```bash
+cd frontend
+pnpm build
+pnpm preview
+```
+
+## Environment Variables
+
+Copy `.env.example` to `.env` and configure:
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `VITE_API_BASE_URL` | `http://localhost:8123` | Backend API base URL |
+| `VITE_WS_URL` | `ws://localhost:8123/agents/stream` | WebSocket URL for chat |
+
+## Tech Stack
+
+- React 19 + TypeScript
+- Vite for bundling
+- shadcn/ui components
+- TanStack Table for data grids
+- TanStack Query for data fetching
+- Recharts for visualization
+- Tailwind CSS 4 for styling
+```
+
+---
+
+## Validation Loop
+
+### Level 1: Syntax & Style
+
+```bash
+cd frontend
+
+# TypeScript compilation
+pnpm tsc --noEmit
+
+# ESLint
+pnpm eslint src/
+
+# Expected: No errors
+```
+
+### Level 2: Build Validation
+
+```bash
+cd frontend
+
+# Development build
+pnpm dev  # Should start without errors
+
+# Production build
+pnpm build
+
+# Expected: Build completes, outputs to dist/
+```
+
+### Level 3: Integration Test
+
+```bash
+# 1. Start backend
+docker-compose up -d
+uv run uvicorn app.main:app --port 8123
+
+# 2. Start frontend
+cd frontend && pnpm dev
+
+# 3. Manual verification:
+# - Open http://localhost:5173
+# - Navigate to /explorer/stores
+# - Verify data loads from API
+# - Check pagination works
+# - Verify dark mode toggle
+
+# 4. Lighthouse audit (Chrome DevTools)
+# - Performance > 90
+# - Accessibility > 90
+```
+
+---
+
+## Final Validation Checklist
+
+- [ ] Vite project scaffolded with React 19 + TypeScript strict
+- [ ] shadcn/ui components installed and working
+- [ ] TanStack Table with server-side pagination
+- [ ] TanStack Query hooks for all API endpoints
+- [ ] Recharts time series visualization
+- [ ] WebSocket hook for agent chat (placeholder if INITIAL-10 not ready)
+- [ ] Dark/light theme toggle
+- [ ] Responsive sidebar navigation
+- [ ] Error boundary with retry
+- [ ] All TypeScript strict checks pass
+- [ ] ESLint passes
+- [ ] Production build succeeds
+- [ ] Lighthouse performance > 90
+
+---
+
+## Integration Points
+
+```yaml
+BACKEND_DEPENDENCY:
+  - Requires backend running on VITE_API_BASE_URL
+  - Uses /dimensions/*, /analytics/*, /registry/*, /jobs/* endpoints
+  - WebSocket requires INITIAL-10 completion for full chat functionality
+
+PHASE_DEPENDENCIES:
+  - INITIAL-9 (RAG): Admin panel shows /rag/sources (placeholder if not ready)
+  - INITIAL-10 (Agentic): Chat interface uses WS /agents/stream
+  - Phase 7 (Serving): All data tables consume serving layer endpoints
+
+FEATURE_FLAGS:
+  - VITE_ENABLE_AGENT_CHAT: Gate chat interface until INITIAL-10 ready
+  - VITE_ENABLE_ADMIN_PANEL: Gate admin features
+```
+
+---
+
+## Anti-Patterns to Avoid
+
+- Do NOT hardcode API URLs - always use `import.meta.env.VITE_API_BASE_URL`
+- Do NOT use `process.env` - that's Node.js, use `import.meta.env` for Vite
+- Do NOT install `shadcn-ui` package - use `npx shadcn@latest` CLI
+- Do NOT use `useTable` - TanStack Table v8 uses `useReactTable`
+- Do NOT forget `manualPagination: true` for server-side tables
+- Do NOT skip error boundaries - API errors should be caught gracefully
+- Do NOT create custom fetch wrappers with Promise.race timeout - use AbortController
+
+---
+
+## Confidence Score Breakdown
+
+| Area | Score | Rationale |
+|------|-------|-----------|
+| Project Scaffolding | 9/10 | Vite + React well documented |
+| shadcn/ui Integration | 8/10 | CLI-based, clear patterns |
+| TanStack Table | 8/10 | Server-side examples available |
+| TanStack Query | 9/10 | Mature library, clear docs |
+| Recharts | 8/10 | Straightforward API |
+| WebSocket Chat | 6/10 | Custom implementation needed, depends on INITIAL-10 |
+| TypeScript Types | 8/10 | Backend schemas well-defined |
+| Overall | **7.5/10** | Chat dependency on INITIAL-10 lowers confidence |
+
+**Note**: Full chat functionality requires INITIAL-10 (Agentic Layer) WebSocket endpoint. Implement chat page with placeholder/disabled state if INITIAL-10 not ready.
diff --git a/PRPs/PRP-9-rag-knowledge-base.md b/PRPs/PRP-9-rag-knowledge-base.md
new file mode 100644
index 00000000..011ef88b
--- /dev/null
+++ b/PRPs/PRP-9-rag-knowledge-base.md
@@ -0,0 +1,776 @@
+# PRP-9: RAG Knowledge Base ("The Memory")
+
+**Feature**: INITIAL-9.md — RAG Knowledge Base
+**Status**: Ready for Implementation
+**Confidence Score**: 8.5/10
+
+---
+
+## Goal
+
+Build the RAG Knowledge Base layer providing:
+1. **Document ingestion** with markdown-aware and OpenAPI-aware chunking
+2. **Vector storage** using PostgreSQL + pgvector for embeddings
+3. **Semantic retrieval** with configurable top-k and similarity thresholds
+4. **Idempotent re-indexing** via content hash comparison
+
+This is the foundational "Memory" layer that INITIAL-10 (Agentic Layer) will consume via the `retrieve_context` tool.
+
+---
+
+## Why
+
+- **Agent-Ready**: Provides `retrieve_context` tool for INITIAL-10 RAG Assistant
+- **Evidence-Grounded**: Returns raw chunks with citations (no hallucination)
+- **Cost-Effective**: Uses existing PostgreSQL (no new infrastructure)
+- **Portfolio Value**: Demonstrates full-stack RAG implementation
+
+---
+
+## What
+
+### Endpoints
+
+| Method | Path | Description |
+|--------|------|-------------|
+| `POST` | `/rag/index` | Index document (markdown/openapi) |
+| `POST` | `/rag/retrieve` | Semantic search with filters |
+| `GET` | `/rag/sources` | List indexed sources |
+| `DELETE` | `/rag/sources/{source_id}` | Remove source and chunks |
+
+### Success Criteria
+
+- [ ] pgvector extension enabled via migration
+- [ ] Markdown chunker respects heading boundaries
+- [ ] OpenAPI chunker produces one chunk per endpoint
+- [ ] Async batch embedding with OpenAI API
+- [ ] HNSW index for sub-100ms retrieval
+- [ ] Idempotent re-indexing (content_hash change detection)
+- [ ] 80+ unit tests, 15+ integration tests
+- [ ] All validation gates green (ruff, mypy, pyright, pytest)
+
+---
+
+## All Needed Context
+
+### Documentation & References
+
+```yaml
+# CRITICAL - pgvector SQLAlchemy Integration
+- url: https://github.com/pgvector/pgvector-python
+  why: "Official pgvector Python library - Vector column, HNSW index, cosine_distance"
+
+- url: https://github.com/pgvector/pgvector-python/blob/master/README.md
+  why: "SQLAlchemy 2.0 patterns, Index creation with postgresql_ops"
+
+# pgvector Indexing
+- url: https://neon.com/blog/understanding-vector-search-and-hnsw-index-with-pgvector
+  why: "HNSW vs IVFFlat tradeoffs, index tuning parameters"
+
+# OpenAI Embeddings
+- url: https://platform.openai.com/docs/api-reference/embeddings
+  why: "Embeddings API reference - batch processing, input limits (8192 tokens)"
+
+- url: https://platform.openai.com/docs/guides/embeddings
+  why: "Best practices, token counting with tiktoken cl100k_base"
+
+# Markdown Chunking
+- url: https://python.langchain.com/docs/how_to/markdown_header_metadata_splitter/
+  why: "MarkdownHeaderTextSplitter pattern for heading-aware splitting"
+
+# Codebase Patterns (CRITICAL)
+- file: app/features/registry/models.py
+  why: "ORM pattern with JSONB, TimestampMixin, Index creation"
+
+- file: app/features/registry/schemas.py
+  why: "Pydantic v2 patterns - ConfigDict, field_validator, from_attributes"
+
+- file: app/features/registry/routes.py
+  why: "FastAPI patterns - APIRouter, response_model, HTTPException"
+
+- file: app/features/registry/service.py
+  why: "Async service pattern - get_settings(), structured logging"
+
+- file: app/features/registry/tests/conftest.py
+  why: "Test fixtures - db_session, client, cleanup patterns"
+
+# ADR
+- file: docs/ADR/ADR-0003-vector-storage-pgvector-in-postgres.md
+  why: "Architectural decision for pgvector over dedicated vector DB"
+```
+
+### Current Codebase Tree (Relevant Parts)
+
+```
+app/
+├── core/
+│   ├── config.py          # Settings singleton - ADD RAG settings here
+│   ├── database.py         # Base, get_db, get_engine
+│   ├── logging.py          # get_logger, structured logging
+│   └── exceptions.py       # ForecastLabError base class
+├── shared/
+│   └── models.py           # TimestampMixin
+├── features/
+│   ├── registry/           # REFERENCE: Follow this pattern exactly
+│   │   ├── models.py
+│   │   ├── schemas.py
+│   │   ├── routes.py
+│   │   ├── service.py
+│   │   ├── storage.py
+│   │   └── tests/
+│   └── rag/                # NEW: Create this vertical slice
+├── main.py                  # Include rag router here
+docker-compose.yml           # Already uses pgvector/pgvector:pg16
+alembic/versions/            # Add migration for pgvector extension + tables
+```
+
+### Desired Codebase Tree (Files to Create)
+
+```
+app/features/rag/
+├── __init__.py              # Export router
+├── models.py                # DocumentSource, DocumentChunk ORM models
+├── schemas.py               # IndexRequest/Response, RetrieveRequest/Response, etc.
+├── routes.py                # FastAPI router with /rag/* endpoints
+├── service.py               # RAGService - indexing and retrieval logic
+├── chunkers.py              # MarkdownChunker, OpenAPIChunker classes
+├── embeddings.py            # EmbeddingService - async OpenAI API calls
+├── tests/
+│   ├── __init__.py
+│   ├── conftest.py          # db_session, client fixtures
+│   ├── test_schemas.py      # Schema validation tests
+│   ├── test_chunkers.py     # Chunking logic tests (unit, no DB)
+│   ├── test_embeddings.py   # Embedding tests with mocked API
+│   ├── test_service.py      # Service tests (unit + integration)
+│   └── test_routes.py       # Route integration tests
+
+alembic/versions/
+└── xxxx_create_rag_tables.py  # Migration with CREATE EXTENSION vector
+
+examples/rag/
+├── index_docs.py            # Example: index docs/ directory
+└── query.http               # HTTP client examples
+```
+
+### Known Gotchas & Library Quirks
+
+```python
+# CRITICAL: pgvector SQLAlchemy requires explicit import
+from pgvector.sqlalchemy import Vector  # NOT from sqlalchemy
+
+# CRITICAL: HNSW index requires vector_cosine_ops for cosine distance
+Index(
+    "ix_embedding_hnsw",
+    DocumentChunk.embedding,
+    postgresql_using="hnsw",
+    postgresql_with={"m": 16, "ef_construction": 64},
+    postgresql_ops={"embedding": "vector_cosine_ops"},  # MUST match query distance
+)
+
+# CRITICAL: Cosine distance query uses cosine_distance method
+from pgvector.sqlalchemy import Vector
+stmt = select(DocumentChunk).order_by(
+    DocumentChunk.embedding.cosine_distance(query_embedding)  # NOT <=> operator
+).limit(top_k)
+
+# CRITICAL: OpenAI embeddings input limit is 8192 tokens per text
+# Use tiktoken to count tokens before sending to API
+import tiktoken
+enc = tiktoken.get_encoding("cl100k_base")
+tokens = enc.encode(text)
+if len(tokens) > 8191:
+    # Truncate or split
+
+# CRITICAL: OpenAI API returns embeddings in same order as input
+# But batch requests should be <= 2048 inputs per call
+
+# CRITICAL: Pydantic v2 uses ConfigDict, not class Config
+from pydantic import BaseModel, ConfigDict
+class MySchema(BaseModel):
+    model_config = ConfigDict(from_attributes=True, extra="forbid")
+
+# CRITICAL: SQLAlchemy 2.0 uses Mapped[] and mapped_column()
+from sqlalchemy.orm import Mapped, mapped_column
+embedding = mapped_column(Vector(1536))  # Vector column
+
+# CRITICAL: Alembic migration needs op.execute for CREATE EXTENSION
+op.execute("CREATE EXTENSION IF NOT EXISTS vector")
+```
+
+---
+
+## Implementation Blueprint
+
+### Data Models
+
+#### ORM Models (models.py)
+
+```python
+"""RAG knowledge base ORM models."""
+from __future__ import annotations
+import uuid
+from datetime import datetime
+from typing import Any
+from sqlalchemy import (
+    DateTime, Index, Integer, String, Text, UniqueConstraint, ForeignKey,
+)
+from sqlalchemy.dialects.postgresql import JSONB
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+from pgvector.sqlalchemy import Vector
+from app.core.database import Base
+from app.shared.models import TimestampMixin
+
+
+class DocumentSource(TimestampMixin, Base):
+    """Registered document source for indexing."""
+    __tablename__ = "document_source"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    source_id: Mapped[str] = mapped_column(String(32), unique=True, index=True)
+    source_type: Mapped[str] = mapped_column(String(50), index=True)  # markdown, openapi
+    source_path: Mapped[str] = mapped_column(Text, nullable=False)
+    content_hash: Mapped[str] = mapped_column(String(64), nullable=False)  # SHA-256
+    metadata_: Mapped[dict[str, Any] | None] = mapped_column("metadata", JSONB, nullable=True)
+    indexed_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False)
+
+    # Relationship
+    chunks: Mapped[list[DocumentChunk]] = relationship(
+        back_populates="source", cascade="all, delete-orphan"
+    )
+
+    __table_args__ = (
+        UniqueConstraint("source_type", "source_path", name="uq_source_type_path"),
+    )
+
+
+class DocumentChunk(TimestampMixin, Base):
+    """Indexed document chunk with embedding."""
+    __tablename__ = "document_chunk"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    chunk_id: Mapped[str] = mapped_column(String(32), unique=True, index=True)
+    source_id: Mapped[int] = mapped_column(
+        Integer, ForeignKey("document_source.id", ondelete="CASCADE"), index=True
+    )
+    chunk_index: Mapped[int] = mapped_column(Integer, nullable=False)
+    content: Mapped[str] = mapped_column(Text, nullable=False)
+    embedding = mapped_column(Vector(1536), nullable=True)  # Dimension from settings
+    token_count: Mapped[int] = mapped_column(Integer, nullable=False)
+    metadata_: Mapped[dict[str, Any] | None] = mapped_column("metadata", JSONB, nullable=True)
+
+    # Relationship
+    source: Mapped[DocumentSource] = relationship(back_populates="chunks")
+
+    __table_args__ = (
+        UniqueConstraint("source_id", "chunk_index", name="uq_source_chunk_index"),
+        Index(
+            "ix_chunk_embedding_hnsw",
+            "embedding",
+            postgresql_using="hnsw",
+            postgresql_with={"m": 16, "ef_construction": 64},
+            postgresql_ops={"embedding": "vector_cosine_ops"},
+        ),
+        Index("ix_chunk_metadata_gin", "metadata", postgresql_using="gin"),
+    )
+```
+
+#### Pydantic Schemas (schemas.py)
+
+```python
+"""Pydantic schemas for RAG API contracts."""
+from datetime import datetime
+from typing import Any, Literal
+from pydantic import BaseModel, ConfigDict, Field, field_validator
+
+
+class IndexRequest(BaseModel):
+    """Request to index a document."""
+    model_config = ConfigDict(extra="forbid")
+
+    source_type: Literal["markdown", "openapi"] = Field(
+        ..., description="Type of document to index"
+    )
+    source_path: str = Field(..., min_length=1, max_length=500)
+    content: str | None = Field(None, description="Optional content override")
+    metadata: dict[str, Any] | None = Field(None, description="Custom metadata")
+
+
+class IndexResponse(BaseModel):
+    """Response from indexing operation."""
+    model_config = ConfigDict(from_attributes=True)
+
+    source_id: str
+    source_path: str
+    chunks_created: int
+    tokens_processed: int
+    duration_ms: float
+    status: Literal["indexed", "updated", "unchanged"]
+
+
+class RetrieveRequest(BaseModel):
+    """Request for semantic search."""
+    model_config = ConfigDict(extra="forbid")
+
+    query: str = Field(..., min_length=1, max_length=2000)
+    top_k: int = Field(default=5, ge=1, le=50)
+    similarity_threshold: float = Field(default=0.7, ge=0.0, le=1.0)
+    filters: dict[str, Any] | None = Field(None, description="Metadata filters")
+
+
+class ChunkResult(BaseModel):
+    """Single chunk in retrieval results."""
+    model_config = ConfigDict(from_attributes=True)
+
+    chunk_id: str
+    source_id: str
+    source_path: str
+    source_type: str
+    content: str
+    relevance_score: float
+    metadata: dict[str, Any] | None = None
+
+
+class RetrieveResponse(BaseModel):
+    """Response from retrieval operation."""
+    results: list[ChunkResult]
+    query_embedding_time_ms: float
+    search_time_ms: float
+    total_chunks_searched: int
+
+
+class SourceResponse(BaseModel):
+    """Source details response."""
+    model_config = ConfigDict(from_attributes=True)
+
+    source_id: str
+    source_type: str
+    source_path: str
+    chunk_count: int
+    content_hash: str
+    indexed_at: datetime
+    metadata: dict[str, Any] | None = None
+
+
+class SourceListResponse(BaseModel):
+    """List of indexed sources."""
+    sources: list[SourceResponse]
+    total_sources: int
+    total_chunks: int
+
+
+class DeleteResponse(BaseModel):
+    """Response from delete operation."""
+    source_id: str
+    chunks_deleted: int
+    status: Literal["deleted"]
+```
+
+---
+
+## Task List
+
+### Task 1: Add Dependencies to pyproject.toml
+
+```yaml
+MODIFY: pyproject.toml
+ADD to dependencies:
+  - "pgvector>=0.3.0"      # pgvector SQLAlchemy support
+  - "openai>=1.40.0"       # OpenAI API client (async)
+  - "tiktoken>=0.7.0"      # Token counting for chunk size
+  - "httpx>=0.28.0"        # Already in dev, may need in main for async HTTP
+```
+
+### Task 2: Add RAG Settings to config.py
+
+```yaml
+MODIFY: app/core/config.py
+ADD after "jobs_retention_days" (~line 65):
+  # RAG Embedding Configuration
+  rag_embedding_model: str = "text-embedding-3-small"
+  rag_embedding_dimension: int = 1536
+  rag_embedding_batch_size: int = 100
+  openai_api_key: str = ""  # Required for embeddings
+
+  # RAG Chunking Configuration
+  rag_chunk_size: int = 512  # tokens
+  rag_chunk_overlap: int = 50  # tokens
+  rag_min_chunk_size: int = 100
+
+  # RAG Retrieval Configuration
+  rag_top_k: int = 5
+  rag_similarity_threshold: float = 0.7
+  rag_max_context_tokens: int = 4000
+
+  # RAG Index Configuration
+  rag_index_type: Literal["hnsw", "ivfflat"] = "hnsw"
+  rag_hnsw_m: int = 16
+  rag_hnsw_ef_construction: int = 64
+```
+
+### Task 3: Create Alembic Migration
+
+```yaml
+CREATE: alembic/versions/xxxx_create_rag_tables.py
+PATTERN: Follow app/features/registry migration pattern
+
+Pseudocode:
+def upgrade():
+    # Enable pgvector extension
+    op.execute("CREATE EXTENSION IF NOT EXISTS vector")
+
+    # Create document_source table
+    op.create_table("document_source", ...)
+
+    # Create document_chunk table with Vector column
+    op.create_table("document_chunk",
+        sa.Column("embedding", Vector(1536), nullable=True),
+        ...
+    )
+
+    # Create HNSW index
+    op.create_index(
+        "ix_chunk_embedding_hnsw",
+        "document_chunk",
+        ["embedding"],
+        postgresql_using="hnsw",
+        postgresql_with={"m": 16, "ef_construction": 64},
+        postgresql_ops={"embedding": "vector_cosine_ops"},
+    )
+```
+
+### Task 4: Create ORM Models
+
+```yaml
+CREATE: app/features/rag/models.py
+MIRROR: app/features/registry/models.py pattern
+CRITICAL:
+  - Use pgvector.sqlalchemy.Vector for embedding column
+  - Add HNSW index in __table_args__
+  - Use TimestampMixin
+  - Cascade delete from source to chunks
+```
+
+### Task 5: Create Pydantic Schemas
+
+```yaml
+CREATE: app/features/rag/schemas.py
+MIRROR: app/features/registry/schemas.py pattern
+INCLUDE:
+  - IndexRequest, IndexResponse
+  - RetrieveRequest, RetrieveResponse, ChunkResult
+  - SourceResponse, SourceListResponse
+  - DeleteResponse
+```
+
+### Task 6: Create Chunker Classes
+
+```yaml
+CREATE: app/features/rag/chunkers.py
+
+Classes:
+  BaseChunker (ABC):
+    - chunk(content: str) -> list[ChunkData]
+
+  MarkdownChunker(BaseChunker):
+    - Split on heading boundaries (# ## ###)
+    - Respect chunk_size and chunk_overlap from settings
+    - Extract heading hierarchy for metadata
+    - Use tiktoken cl100k_base for token counting
+
+  OpenAPIChunker(BaseChunker):
+    - Parse OpenAPI JSON/YAML
+    - One chunk per endpoint (path + method)
+    - Include operation summary, description, parameters
+
+CRITICAL:
+  - Use tiktoken for token counting (cl100k_base encoding)
+  - Never exceed 8191 tokens per chunk (OpenAI limit)
+```
+
+### Task 7: Create Embedding Service
+
+```yaml
+CREATE: app/features/rag/embeddings.py
+
+Class EmbeddingService:
+  __init__(self):
+    - Load settings (api_key, model, dimension, batch_size)
+    - Initialize AsyncOpenAI client
+
+  async def embed_texts(self, texts: list[str]) -> list[list[float]]:
+    - Batch texts into groups of batch_size
+    - Call OpenAI embeddings API for each batch
+    - Handle rate limits with exponential backoff
+    - Return embeddings in same order as input
+
+  async def embed_query(self, query: str) -> list[float]:
+    - Single text embedding for retrieval queries
+
+CRITICAL:
+  - Use openai.AsyncOpenAI for async calls
+  - Validate token count before API call
+  - Log token usage for cost tracking
+```
+
+### Task 8: Create RAG Service
+
+```yaml
+CREATE: app/features/rag/service.py
+MIRROR: app/features/registry/service.py pattern
+
+Class RAGService:
+  async def index_document(self, db, request: IndexRequest) -> IndexResponse:
+    - Read content from source_path (or use provided content)
+    - Compute SHA-256 content hash
+    - Check if source exists with same hash (skip if unchanged)
+    - Chunk content using appropriate chunker
+    - Generate embeddings for all chunks
+    - Upsert source record
+    - Delete old chunks, insert new chunks
+    - Return IndexResponse with stats
+
+  async def retrieve(self, db, request: RetrieveRequest) -> RetrieveResponse:
+    - Generate query embedding
+    - Build pgvector similarity query with cosine_distance
+    - Apply metadata filters if provided
+    - Execute query, compute relevance scores
+    - Return top-k results above threshold
+
+  async def list_sources(self, db) -> SourceListResponse:
+    - Query all sources with chunk counts
+    - Return paginated list
+
+  async def delete_source(self, db, source_id: str) -> DeleteResponse:
+    - Find source by source_id
+    - Delete (cascades to chunks)
+    - Return delete count
+
+CRITICAL:
+  - Use cosine_distance for similarity (NOT l2_distance)
+  - Relevance score = 1 - cosine_distance (normalized to 0-1)
+  - Handle source not found with 404
+```
+
+### Task 9: Create FastAPI Routes
+
+```yaml
+CREATE: app/features/rag/routes.py
+MIRROR: app/features/registry/routes.py pattern
+
+Routes:
+  POST /rag/index -> IndexResponse (201 CREATED)
+  POST /rag/retrieve -> RetrieveResponse (200 OK)
+  GET /rag/sources -> SourceListResponse (200 OK)
+  DELETE /rag/sources/{source_id} -> DeleteResponse (200 OK)
+
+CRITICAL:
+  - Use structured logging with rag.* event prefix
+  - Handle OpenAI API errors gracefully
+  - Validate source_id format
+```
+
+### Task 10: Register Router in main.py
+
+```yaml
+MODIFY: app/main.py
+ADD import: from app.features.rag.routes import router as rag_router
+ADD router: app.include_router(rag_router)
+```
+
+### Task 11: Create Test Fixtures
+
+```yaml
+CREATE: app/features/rag/tests/conftest.py
+MIRROR: app/features/registry/tests/conftest.py
+
+Fixtures:
+  - db_session: Async session with cleanup (delete test-* sources)
+  - client: AsyncClient with db override
+  - sample_markdown_content: Test markdown with headings
+  - sample_openapi_content: Test OpenAPI spec
+  - mock_embedding_service: Mocked EmbeddingService for unit tests
+```
+
+### Task 12: Create Unit Tests
+
+```yaml
+CREATE: app/features/rag/tests/test_schemas.py
+  - Test IndexRequest validation
+  - Test RetrieveRequest validation (query length, threshold bounds)
+
+CREATE: app/features/rag/tests/test_chunkers.py
+  - Test MarkdownChunker respects heading boundaries
+  - Test MarkdownChunker respects chunk_size
+  - Test MarkdownChunker extracts heading metadata
+  - Test OpenAPIChunker creates one chunk per endpoint
+  - Test chunk token counts are within limits
+
+CREATE: app/features/rag/tests/test_embeddings.py
+  - Test embed_texts batching logic
+  - Test embed_query returns correct dimension
+  - Mock OpenAI API responses
+
+CREATE: app/features/rag/tests/test_service.py (unit)
+  - Test content hash computation
+  - Test idempotent re-indexing logic
+  - Test relevance score normalization
+```
+
+### Task 13: Create Integration Tests
+
+```yaml
+CREATE: app/features/rag/tests/test_routes.py
+@pytest.mark.integration tests:
+  - test_index_markdown_creates_chunks
+  - test_index_same_content_returns_unchanged
+  - test_index_updated_content_re_indexes
+  - test_retrieve_returns_relevant_chunks
+  - test_retrieve_respects_threshold
+  - test_list_sources_returns_all
+  - test_delete_source_removes_chunks
+  - test_delete_nonexistent_returns_404
+```
+
+### Task 14: Create Examples
+
+```yaml
+CREATE: examples/rag/index_docs.py
+  - Script to index docs/ directory
+
+CREATE: examples/rag/query.http
+  - HTTP client examples for all endpoints
+```
+
+### Task 15: Update .env.example
+
+```yaml
+MODIFY: .env.example
+ADD:
+  # RAG Configuration
+  OPENAI_API_KEY=sk-...
+  RAG_EMBEDDING_MODEL=text-embedding-3-small
+  RAG_CHUNK_SIZE=512
+  RAG_TOP_K=5
+```
+
+---
+
+## Validation Loop
+
+### Level 1: Syntax & Style
+
+```bash
+# Run FIRST - fix any errors before proceeding
+uv run ruff check app/features/rag/ --fix
+uv run ruff format app/features/rag/
+
+# Expected: No errors
+```
+
+### Level 2: Type Checking
+
+```bash
+# MUST be green
+uv run mypy app/features/rag/
+uv run pyright app/features/rag/
+
+# Expected: 0 errors on both
+```
+
+### Level 3: Unit Tests
+
+```bash
+# No database required
+uv run pytest app/features/rag/tests/ -v -m "not integration"
+
+# Expected: All pass
+# If failing: Read error, fix code, re-run
+```
+
+### Level 4: Integration Tests
+
+```bash
+# Requires PostgreSQL running
+docker-compose up -d
+
+# Run migrations
+uv run alembic upgrade head
+
+# Run integration tests
+uv run pytest app/features/rag/tests/ -v -m integration
+
+# Expected: All pass
+```
+
+### Level 5: Manual Smoke Test
+
+```bash
+# Start API
+uv run uvicorn app.main:app --reload --port 8123
+
+# Index a document
+curl -X POST http://localhost:8123/rag/index \
+  -H "Content-Type: application/json" \
+  -d '{"source_type": "markdown", "source_path": "README.md"}'
+
+# Expected: {"source_id": "...", "chunks_created": N, ...}
+
+# Retrieve
+curl -X POST http://localhost:8123/rag/retrieve \
+  -H "Content-Type: application/json" \
+  -d '{"query": "What is ForecastLabAI?", "top_k": 3}'
+
+# Expected: {"results": [...], ...}
+
+# List sources
+curl http://localhost:8123/rag/sources
+
+# Delete source
+curl -X DELETE http://localhost:8123/rag/sources/{source_id}
+```
+
+---
+
+## Final Validation Checklist
+
+- [ ] All tests pass: `uv run pytest app/features/rag/tests/ -v`
+- [ ] No linting errors: `uv run ruff check app/features/rag/`
+- [ ] No type errors: `uv run mypy app/features/rag/ && uv run pyright app/features/rag/`
+- [ ] Migration applies cleanly: `uv run alembic upgrade head`
+- [ ] Manual smoke test successful
+- [ ] Structured logging events follow `rag.*` prefix
+- [ ] Content hash prevents duplicate embeddings
+- [ ] HNSW index used for similarity queries
+
+---
+
+## Anti-Patterns to Avoid
+
+- ❌ Don't use `l2_distance` when you want cosine similarity
+- ❌ Don't forget to enable pgvector extension in migration
+- ❌ Don't exceed 8191 tokens per embedding input
+- ❌ Don't use sync OpenAI client - use AsyncOpenAI
+- ❌ Don't hardcode embedding dimensions - use settings
+- ❌ Don't catch all exceptions - be specific
+- ❌ Don't skip content hash comparison (wastes API calls)
+- ❌ Don't create new patterns when registry patterns work
+
+---
+
+## Confidence Score: 8.5/10
+
+**Strengths:**
+- Docker already has pgvector image
+- Clear patterns from registry module to follow
+- Comprehensive documentation available
+- ADR decision already made
+
+**Risks:**
+- OpenAI API rate limits during bulk indexing
+- HNSW index creation on large datasets may be slow
+- tiktoken token counting edge cases
+
+**Mitigations:**
+- Implement exponential backoff for API calls
+- Create index after initial data load
+- Extensive unit tests for chunking edge cases
diff --git a/README.md b/README.md
index 82e24494..9d1285a3 100644
--- a/README.md
+++ b/README.md
@@ -454,6 +454,59 @@ curl -X POST http://localhost:8123/jobs \
 - JSONB storage for flexible params and results
 - Links to model_run for train/backtest jobs
 
+### RAG Knowledge Base
+
+- `POST /rag/index` - Index a document into the knowledge base
+- `POST /rag/retrieve` - Semantic search across indexed documents
+- `GET /rag/sources` - List indexed sources
+- `DELETE /rag/sources/{source_id}` - Delete a source and its chunks
+
+**Embedding Providers:**
+
+The RAG system supports two embedding providers:
+
+1. **OpenAI** (default):
+```bash
+RAG_EMBEDDING_PROVIDER=openai
+OPENAI_API_KEY=sk-your-key
+RAG_EMBEDDING_MODEL=text-embedding-3-small
+RAG_EMBEDDING_DIMENSION=1536
+```
+
+2. **Ollama** (local/LAN):
+```bash
+RAG_EMBEDDING_PROVIDER=ollama
+OLLAMA_BASE_URL=http://localhost:11434
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+RAG_EMBEDDING_DIMENSION=768
+```
+
+**Example Index Request:**
+```bash
+curl -X POST http://localhost:8123/rag/index \
+  -H "Content-Type: application/json" \
+  -d '{
+    "source_type": "markdown",
+    "source_path": "docs/ARCHITECTURE.md"
+  }'
+```
+
+**Example Retrieve Request:**
+```bash
+curl -X POST http://localhost:8123/rag/retrieve \
+  -H "Content-Type: application/json" \
+  -d '{
+    "query": "How does backtesting work?",
+    "top_k": 5
+  }'
+```
+
+**Features:**
+- pgvector for HNSW similarity search
+- Idempotent indexing via content hash
+- Markdown and OpenAPI chunking strategies
+- Configurable embedding dimensions
+
 ### Error Responses (RFC 7807)
 
 All error responses follow RFC 7807 Problem Details format with `Content-Type: application/problem+json`:
diff --git a/alembic/env.py b/alembic/env.py
index b3d317b0..8d9890f3 100644
--- a/alembic/env.py
+++ b/alembic/env.py
@@ -14,6 +14,7 @@
 # Import all models for Alembic autogenerate detection
 from app.features.data_platform import models as data_platform_models  # noqa: F401
 from app.features.jobs import models as jobs_models  # noqa: F401
+from app.features.rag import models as rag_models  # noqa: F401
 from app.features.registry import models as registry_models  # noqa: F401
 
 # Alembic Config object
diff --git a/alembic/versions/b4c8d9e0f123_create_rag_tables.py b/alembic/versions/b4c8d9e0f123_create_rag_tables.py
new file mode 100644
index 00000000..e0d76cbc
--- /dev/null
+++ b/alembic/versions/b4c8d9e0f123_create_rag_tables.py
@@ -0,0 +1,153 @@
+"""create_rag_tables
+
+Revision ID: b4c8d9e0f123
+Revises: 37e16ecef223
+Create Date: 2026-02-01 12:00:00.000000
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+from pgvector.sqlalchemy import Vector
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision: str = "b4c8d9e0f123"
+down_revision: Union[str, None] = "37e16ecef223"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Apply migration - create document_source and document_chunk tables with pgvector."""
+    # Enable pgvector extension
+    op.execute("CREATE EXTENSION IF NOT EXISTS vector")
+
+    # Create document_source table
+    op.create_table(
+        "document_source",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("source_id", sa.String(length=32), nullable=False),
+        sa.Column("source_type", sa.String(length=50), nullable=False),
+        sa.Column("source_path", sa.Text(), nullable=False),
+        sa.Column("content_hash", sa.String(length=64), nullable=False),
+        sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+        sa.Column("indexed_at", sa.DateTime(timezone=True), nullable=False),
+        # Timestamps (from TimestampMixin)
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        # Constraints
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("source_type", "source_path", name="uq_source_type_path"),
+    )
+
+    # Create indexes for document_source
+    op.create_index(
+        op.f("ix_document_source_source_id"),
+        "document_source",
+        ["source_id"],
+        unique=True,
+    )
+    op.create_index(
+        op.f("ix_document_source_source_type"),
+        "document_source",
+        ["source_type"],
+        unique=False,
+    )
+
+    # Create document_chunk table with Vector column
+    op.create_table(
+        "document_chunk",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("chunk_id", sa.String(length=32), nullable=False),
+        sa.Column("source_id", sa.Integer(), nullable=False),
+        sa.Column("chunk_index", sa.Integer(), nullable=False),
+        sa.Column("content", sa.Text(), nullable=False),
+        sa.Column("embedding", Vector(1536), nullable=True),
+        sa.Column("token_count", sa.Integer(), nullable=False),
+        sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+        # Timestamps (from TimestampMixin)
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        # Constraints
+        sa.PrimaryKeyConstraint("id"),
+        sa.ForeignKeyConstraint(
+            ["source_id"],
+            ["document_source.id"],
+            ondelete="CASCADE",
+        ),
+        sa.UniqueConstraint("source_id", "chunk_index", name="uq_source_chunk_index"),
+    )
+
+    # Create indexes for document_chunk
+    op.create_index(
+        op.f("ix_document_chunk_chunk_id"),
+        "document_chunk",
+        ["chunk_id"],
+        unique=True,
+    )
+    op.create_index(
+        op.f("ix_document_chunk_source_id"),
+        "document_chunk",
+        ["source_id"],
+        unique=False,
+    )
+
+    # Create HNSW index for vector similarity search (cosine distance)
+    op.create_index(
+        "ix_chunk_embedding_hnsw",
+        "document_chunk",
+        ["embedding"],
+        unique=False,
+        postgresql_using="hnsw",
+        postgresql_with={"m": 16, "ef_construction": 64},
+        postgresql_ops={"embedding": "vector_cosine_ops"},
+    )
+
+    # Create GIN index for metadata filtering
+    op.create_index(
+        "ix_chunk_metadata_gin",
+        "document_chunk",
+        ["metadata"],
+        unique=False,
+        postgresql_using="gin",
+    )
+
+
+def downgrade() -> None:
+    """Revert migration - drop document_source and document_chunk tables."""
+    # Drop document_chunk indexes and table
+    op.drop_index("ix_chunk_metadata_gin", table_name="document_chunk")
+    op.drop_index("ix_chunk_embedding_hnsw", table_name="document_chunk")
+    op.drop_index(op.f("ix_document_chunk_source_id"), table_name="document_chunk")
+    op.drop_index(op.f("ix_document_chunk_chunk_id"), table_name="document_chunk")
+    op.drop_table("document_chunk")
+
+    # Drop document_source indexes and table
+    op.drop_index(op.f("ix_document_source_source_type"), table_name="document_source")
+    op.drop_index(op.f("ix_document_source_source_id"), table_name="document_source")
+    op.drop_table("document_source")
+
+    # Note: We don't drop the vector extension as it might be used by other tables
diff --git a/alembic/versions/c5d9e1f2g345_rag_dynamic_embedding_dimension.py b/alembic/versions/c5d9e1f2g345_rag_dynamic_embedding_dimension.py
new file mode 100644
index 00000000..abc976be
--- /dev/null
+++ b/alembic/versions/c5d9e1f2g345_rag_dynamic_embedding_dimension.py
@@ -0,0 +1,84 @@
+"""rag_dynamic_embedding_dimension
+
+Revision ID: c5d9e1f2g345
+Revises: b4c8d9e0f123
+Create Date: 2026-02-01 12:49:28.000000
+
+CRITICAL: This migration alters the embedding column dimension.
+This migration is deterministic - it changes from 1536 to 1536 (no-op by default).
+To change dimensions, create a NEW migration with the desired target dimension.
+
+If changing to a different dimension, existing embeddings will be incompatible
+and re-indexing is required.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "c5d9e1f2g345"
+down_revision: str | None = "b4c8d9e0f123"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+# CRITICAL: Hardcoded dimensions for deterministic, reversible migrations.
+# To change dimensions, create a NEW migration with updated values.
+PREVIOUS_DIMENSION = 1536  # Dimension before this migration
+TARGET_DIMENSION = 1536  # Dimension after this migration (change this for new dimension)
+
+
+def upgrade() -> None:
+    """Apply migration - alter embedding column to target dimension.
+
+    Uses hardcoded TARGET_DIMENSION for deterministic behavior.
+    WARNING: Changing dimension requires re-indexing all documents.
+    """
+    # Drop the HNSW index first (required before altering column type)
+    op.drop_index("ix_chunk_embedding_hnsw", table_name="document_chunk")
+
+    # Alter the embedding column type with target dimension
+    # Note: This will invalidate any existing embeddings if dimension changes
+    op.execute(
+        f"ALTER TABLE document_chunk ALTER COLUMN embedding TYPE vector({TARGET_DIMENSION})"
+    )
+
+    # Recreate the HNSW index with the target dimension
+    op.create_index(
+        "ix_chunk_embedding_hnsw",
+        "document_chunk",
+        ["embedding"],
+        unique=False,
+        postgresql_using="hnsw",
+        postgresql_with={"m": 16, "ef_construction": 64},
+        postgresql_ops={"embedding": "vector_cosine_ops"},
+    )
+
+
+def downgrade() -> None:
+    """Revert migration - restore embedding column to previous dimension.
+
+    Uses hardcoded PREVIOUS_DIMENSION for deterministic rollback.
+    WARNING: This will invalidate any embeddings that were generated
+    with the target dimension.
+    """
+    # Drop the HNSW index
+    op.drop_index("ix_chunk_embedding_hnsw", table_name="document_chunk")
+
+    # Restore to previous dimension
+    op.execute(
+        f"ALTER TABLE document_chunk ALTER COLUMN embedding TYPE vector({PREVIOUS_DIMENSION})"
+    )
+
+    # Recreate the HNSW index
+    op.create_index(
+        "ix_chunk_embedding_hnsw",
+        "document_chunk",
+        ["embedding"],
+        unique=False,
+        postgresql_using="hnsw",
+        postgresql_with={"m": 16, "ef_construction": 64},
+        postgresql_ops={"embedding": "vector_cosine_ops"},
+    )
diff --git a/app/core/config.py b/app/core/config.py
index 46d5c9c9..ba912fa8 100644
--- a/app/core/config.py
+++ b/app/core/config.py
@@ -64,6 +64,32 @@ class Settings(BaseSettings):
     # Jobs
     jobs_retention_days: int = 30
 
+    # RAG Embedding Configuration
+    rag_embedding_provider: Literal["openai", "ollama"] = "openai"
+    openai_api_key: str = ""
+    rag_embedding_model: str = "text-embedding-3-small"
+    rag_embedding_dimension: int = 1536
+    rag_embedding_batch_size: int = 100
+
+    # Ollama Configuration (when rag_embedding_provider = "ollama")
+    ollama_base_url: str = "http://localhost:11434"
+    ollama_embedding_model: str = "nomic-embed-text"
+
+    # RAG Chunking Configuration
+    rag_chunk_size: int = 512  # tokens
+    rag_chunk_overlap: int = 50  # tokens
+    rag_min_chunk_size: int = 100  # minimum tokens per chunk
+
+    # RAG Retrieval Configuration
+    rag_top_k: int = 5
+    rag_similarity_threshold: float = 0.7
+    rag_max_context_tokens: int = 4000
+
+    # RAG Index Configuration
+    rag_index_type: Literal["hnsw", "ivfflat"] = "hnsw"
+    rag_hnsw_m: int = 16
+    rag_hnsw_ef_construction: int = 64
+
     @property
     def is_development(self) -> bool:
         """Check if running in development mode."""
diff --git a/app/features/rag/__init__.py b/app/features/rag/__init__.py
new file mode 100644
index 00000000..918ac064
--- /dev/null
+++ b/app/features/rag/__init__.py
@@ -0,0 +1,5 @@
+"""RAG (Retrieval-Augmented Generation) knowledge base feature."""
+
+from app.features.rag.routes import router
+
+__all__ = ["router"]
diff --git a/app/features/rag/chunkers.py b/app/features/rag/chunkers.py
new file mode 100644
index 00000000..15c0ecfd
--- /dev/null
+++ b/app/features/rag/chunkers.py
@@ -0,0 +1,650 @@
+"""Document chunking strategies for RAG indexing.
+
+Provides heading-aware and content-aware chunking:
+- MarkdownChunker: Splits on heading boundaries
+- OpenAPIChunker: One chunk per endpoint
+
+CRITICAL: Uses tiktoken for accurate token counting.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Any
+
+import tiktoken
+
+from app.core.config import get_settings
+
+
+@dataclass
+class ChunkData:
+    """Represents a single chunk of document content.
+
+    Args:
+        content: The text content of the chunk.
+        index: Position of this chunk in the source document.
+        token_count: Number of tokens in the content.
+        metadata: Additional context (heading, section_path, etc.).
+    """
+
+    content: str
+    index: int
+    token_count: int
+    metadata: dict[str, Any] = field(default_factory=lambda: {})
+
+
+class BaseChunker(ABC):
+    """Abstract base class for document chunkers.
+
+    All chunkers must:
+    - Use tiktoken for token counting (cl100k_base encoding)
+    - Respect chunk_size and chunk_overlap settings
+    - Never exceed 8191 tokens per chunk (OpenAI limit)
+    """
+
+    MAX_TOKENS_PER_CHUNK = 8191  # OpenAI embedding input limit
+
+    def __init__(self) -> None:
+        """Initialize chunker with settings and tokenizer."""
+        self.settings = get_settings()
+        self.chunk_size = self.settings.rag_chunk_size
+        self.chunk_overlap = self.settings.rag_chunk_overlap
+        self.min_chunk_size = self.settings.rag_min_chunk_size
+        self._encoder = tiktoken.get_encoding("cl100k_base")
+
+    def count_tokens(self, text: str) -> int:
+        """Count tokens in text using tiktoken.
+
+        Args:
+            text: Text to count tokens for.
+
+        Returns:
+            Number of tokens.
+        """
+        return len(self._encoder.encode(text))
+
+    def _truncate_to_tokens(self, text: str, max_tokens: int) -> str:
+        """Truncate text to a maximum number of tokens.
+
+        Args:
+            text: Text to truncate.
+            max_tokens: Maximum number of tokens.
+
+        Returns:
+            Truncated text.
+        """
+        tokens = self._encoder.encode(text)
+        if len(tokens) <= max_tokens:
+            return text
+        return self._encoder.decode(tokens[:max_tokens])
+
+    @abstractmethod
+    def chunk(self, content: str) -> list[ChunkData]:
+        """Split content into chunks.
+
+        Args:
+            content: Full document content.
+
+        Returns:
+            List of ChunkData objects.
+        """
+        pass
+
+
+class MarkdownChunker(BaseChunker):
+    """Chunks markdown documents by heading boundaries.
+
+    Splits content at heading boundaries (# ## ### etc.) while:
+    - Respecting chunk_size limits
+    - Including heading hierarchy in metadata
+    - Preserving context through overlap
+    """
+
+    # Regex to match markdown headings
+    HEADING_PATTERN = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE)
+
+    def chunk(self, content: str) -> list[ChunkData]:
+        """Split markdown content into heading-aware chunks.
+
+        Args:
+            content: Markdown document content.
+
+        Returns:
+            List of ChunkData with heading metadata.
+        """
+        chunks: list[ChunkData] = []
+        sections = self._split_by_headings(content)
+
+        current_chunk = ""
+        current_heading_path: list[str] = []
+        chunk_index = 0
+
+        for section in sections:
+            section_content = section["content"]
+            heading = section.get("heading")
+            level = section.get("level", 0)
+
+            # Update heading path based on level
+            if heading:
+                current_heading_path = self._update_heading_path(
+                    current_heading_path, heading, level
+                )
+
+            section_tokens = self.count_tokens(section_content)
+
+            # If section alone exceeds chunk size, split it further
+            if section_tokens > self.chunk_size:
+                # Flush current chunk if any
+                if current_chunk.strip():
+                    chunks.append(
+                        self._create_chunk(
+                            current_chunk.strip(), chunk_index, current_heading_path.copy()
+                        )
+                    )
+                    chunk_index += 1
+                    current_chunk = ""
+
+                # Split large section into smaller chunks
+                sub_chunks = self._split_large_section(section_content, current_heading_path.copy())
+                for sub_chunk in sub_chunks:
+                    sub_chunk.index = chunk_index
+                    chunks.append(sub_chunk)
+                    chunk_index += 1
+                continue
+
+            # Check if adding this section exceeds chunk size
+            combined = current_chunk + section_content
+            combined_tokens = self.count_tokens(combined)
+
+            if combined_tokens > self.chunk_size:
+                # Save current chunk and start new one
+                if current_chunk.strip():
+                    chunks.append(
+                        self._create_chunk(
+                            current_chunk.strip(), chunk_index, current_heading_path.copy()
+                        )
+                    )
+                    chunk_index += 1
+
+                # Add overlap from previous chunk
+                overlap_text = self._get_overlap_text(current_chunk)
+                current_chunk = overlap_text + section_content
+            else:
+                current_chunk = combined
+
+        # Don't forget the last chunk
+        # Include it even if small when it's the only content
+        if current_chunk.strip():
+            token_count = self.count_tokens(current_chunk.strip())
+            # Include small chunks if: we have no other chunks OR it meets min size
+            if len(chunks) == 0 or token_count >= self.min_chunk_size:
+                chunks.append(
+                    self._create_chunk(
+                        current_chunk.strip(), chunk_index, current_heading_path.copy()
+                    )
+                )
+
+        return chunks
+
+    def _split_by_headings(self, content: str) -> list[dict[str, Any]]:
+        """Split content at heading boundaries.
+
+        Args:
+            content: Markdown content.
+
+        Returns:
+            List of sections with heading info.
+        """
+        sections: list[dict[str, Any]] = []
+        lines = content.split("\n")
+        current_section: dict[str, Any] = {"content": "", "heading": None, "level": 0}
+
+        for line in lines:
+            match = self.HEADING_PATTERN.match(line)
+            if match:
+                # Save current section if it has content
+                if current_section["content"].strip():
+                    sections.append(current_section)
+
+                # Start new section with this heading
+                level = len(match.group(1))
+                heading = match.group(2).strip()
+                current_section = {
+                    "content": line + "\n",
+                    "heading": heading,
+                    "level": level,
+                }
+            else:
+                current_section["content"] += line + "\n"
+
+        # Add final section
+        if current_section["content"].strip():
+            sections.append(current_section)
+
+        return sections
+
+    def _update_heading_path(self, current_path: list[str], heading: str, level: int) -> list[str]:
+        """Update the heading path based on the new heading level.
+
+        Args:
+            current_path: Current list of headings.
+            heading: New heading text.
+            level: Heading level (1-6).
+
+        Returns:
+            Updated heading path.
+        """
+        # Truncate path to current level and add new heading
+        new_path = current_path[: level - 1]
+        new_path.append(heading)
+        return new_path
+
+    def _split_large_section(self, content: str, heading_path: list[str]) -> list[ChunkData]:
+        """Split a large section into smaller chunks by sentences/paragraphs.
+
+        Args:
+            content: Section content that exceeds chunk size.
+            heading_path: Current heading hierarchy.
+
+        Returns:
+            List of smaller chunks.
+        """
+        chunks: list[ChunkData] = []
+        paragraphs = content.split("\n\n")
+        current_chunk = ""
+
+        for para in paragraphs:
+            para = para.strip()
+            if not para:
+                continue
+
+            para_tokens = self.count_tokens(para)
+
+            # If single paragraph exceeds limit, split by sentences
+            if para_tokens > self.chunk_size:
+                if current_chunk.strip():
+                    chunks.append(self._create_chunk(current_chunk.strip(), 0, heading_path))
+                    current_chunk = ""
+
+                sentence_chunks = self._split_by_sentences(para, heading_path)
+                chunks.extend(sentence_chunks)
+                continue
+
+            combined = current_chunk + "\n\n" + para if current_chunk else para
+            combined_tokens = self.count_tokens(combined)
+
+            if combined_tokens > self.chunk_size:
+                if current_chunk.strip():
+                    chunks.append(self._create_chunk(current_chunk.strip(), 0, heading_path))
+                current_chunk = para
+            else:
+                current_chunk = combined
+
+        if current_chunk.strip():
+            chunks.append(self._create_chunk(current_chunk.strip(), 0, heading_path))
+
+        return chunks
+
+    def _split_by_sentences(self, text: str, heading_path: list[str]) -> list[ChunkData]:
+        """Split text by sentences when paragraphs are too large.
+
+        Args:
+            text: Text to split.
+            heading_path: Current heading hierarchy.
+
+        Returns:
+            List of sentence-based chunks.
+        """
+        chunks: list[ChunkData] = []
+        # Simple sentence splitting (handles . ? !)
+        sentences = re.split(r"(?<=[.!?])\s+", text)
+        current_chunk = ""
+
+        for sentence in sentences:
+            sentence = sentence.strip()
+            if not sentence:
+                continue
+
+            sentence_tokens = self.count_tokens(sentence)
+
+            # If single sentence exceeds limit, truncate it
+            if sentence_tokens > self.MAX_TOKENS_PER_CHUNK:
+                if current_chunk.strip():
+                    chunks.append(self._create_chunk(current_chunk.strip(), 0, heading_path))
+                    current_chunk = ""
+
+                truncated = self._truncate_to_tokens(sentence, self.MAX_TOKENS_PER_CHUNK)
+                chunks.append(self._create_chunk(truncated, 0, heading_path))
+                continue
+
+            combined = current_chunk + " " + sentence if current_chunk else sentence
+            combined_tokens = self.count_tokens(combined)
+
+            if combined_tokens > self.chunk_size:
+                if current_chunk.strip():
+                    chunks.append(self._create_chunk(current_chunk.strip(), 0, heading_path))
+                current_chunk = sentence
+            else:
+                current_chunk = combined
+
+        if current_chunk.strip():
+            chunks.append(self._create_chunk(current_chunk.strip(), 0, heading_path))
+
+        return chunks
+
+    def _get_overlap_text(self, text: str) -> str:
+        """Get the last N tokens of text for overlap.
+
+        Args:
+            text: Text to get overlap from.
+
+        Returns:
+            Overlap text.
+        """
+        if not text or self.chunk_overlap <= 0:
+            return ""
+
+        tokens = self._encoder.encode(text)
+        if len(tokens) <= self.chunk_overlap:
+            return text
+
+        overlap_tokens = tokens[-self.chunk_overlap :]
+        return self._encoder.decode(overlap_tokens)
+
+    def _create_chunk(self, content: str, index: int, heading_path: list[str]) -> ChunkData:
+        """Create a ChunkData object with metadata.
+
+        Args:
+            content: Chunk content.
+            index: Chunk index.
+            heading_path: Heading hierarchy.
+
+        Returns:
+            ChunkData instance.
+        """
+        token_count = self.count_tokens(content)
+        metadata: dict[str, Any] = {}
+
+        if heading_path:
+            metadata["heading"] = heading_path[-1]
+            metadata["section_path"] = heading_path
+
+        return ChunkData(
+            content=content,
+            index=index,
+            token_count=token_count,
+            metadata=metadata,
+        )
+
+
+class OpenAPIChunker(BaseChunker):
+    """Chunks OpenAPI specifications by endpoint.
+
+    Creates one chunk per endpoint containing:
+    - Path and method
+    - Operation summary and description
+    - Parameters and request body schema
+    - Response schemas
+    """
+
+    def chunk(self, content: str) -> list[ChunkData]:
+        """Split OpenAPI spec into endpoint-based chunks.
+
+        Args:
+            content: OpenAPI JSON/YAML content.
+
+        Returns:
+            List of ChunkData, one per endpoint.
+        """
+        chunks: list[ChunkData] = []
+
+        spec_data: dict[str, Any]
+        try:
+            spec_data = json.loads(content)
+        except json.JSONDecodeError:
+            # Try YAML if JSON fails
+            try:
+                import yaml  # type: ignore[import-untyped]
+
+                parsed = yaml.safe_load(content)
+                # yaml.safe_load can return non-dict for simple strings
+                if not isinstance(parsed, dict):
+                    return MarkdownChunker().chunk(content)
+                spec_data = parsed  # pyright: ignore[reportUnknownVariableType]
+            except Exception:
+                # Fall back to treating as markdown
+                return MarkdownChunker().chunk(content)
+
+        paths: dict[str, Any] = spec_data.get("paths", {})
+        chunk_index = 0
+
+        # Also include info section as first chunk
+        info: dict[str, Any] = spec_data.get("info", {})
+        if info:
+            servers: list[dict[str, Any]] = spec_data.get("servers", [])
+            info_chunk = self._create_info_chunk(info, servers)
+            info_chunk.index = chunk_index
+            chunks.append(info_chunk)
+            chunk_index += 1
+
+        # Create chunk for each endpoint
+        for path_key, methods in paths.items():
+            path: str = str(path_key)
+            if not isinstance(methods, dict):
+                continue
+
+            methods_dict: dict[str, Any] = dict(methods)  # pyright: ignore[reportUnknownArgumentType]
+            for method_name, operation in methods_dict.items():
+                if method_name.startswith("x-") or not isinstance(operation, dict):
+                    continue
+
+                operation_dict: dict[str, Any] = dict(operation)  # pyright: ignore[reportUnknownArgumentType]
+                chunk = self._create_endpoint_chunk(path, method_name, operation_dict, spec_data)
+                chunk.index = chunk_index
+                chunks.append(chunk)
+                chunk_index += 1
+
+        return chunks
+
+    def _create_info_chunk(self, info: dict[str, Any], servers: list[dict[str, Any]]) -> ChunkData:
+        """Create a chunk for API info section.
+
+        Args:
+            info: OpenAPI info object.
+            servers: OpenAPI servers array.
+
+        Returns:
+            ChunkData for API overview.
+        """
+        parts: list[str] = []
+        title = info.get("title", "API")
+        version = info.get("version", "")
+
+        parts.append(f"# {title}")
+        if version:
+            parts.append(f"Version: {version}")
+        if info.get("description"):
+            parts.append(f"\n{info['description']}")
+        if servers:
+            parts.append("\n## Servers")
+            for server in servers:
+                url = server.get("url", "")
+                desc = server.get("description", "")
+                parts.append(f"- {url}" + (f" ({desc})" if desc else ""))
+
+        content = "\n".join(parts)
+        return ChunkData(
+            content=content,
+            index=0,
+            token_count=self.count_tokens(content),
+            metadata={"type": "api_info", "title": title},
+        )
+
+    def _create_endpoint_chunk(
+        self,
+        path: str,
+        method: str,
+        operation: dict[str, Any],
+        spec: dict[str, Any],
+    ) -> ChunkData:
+        """Create a chunk for a single API endpoint.
+
+        Args:
+            path: Endpoint path.
+            method: HTTP method.
+            operation: OpenAPI operation object.
+            spec: Full OpenAPI spec (for dereferencing).
+
+        Returns:
+            ChunkData for the endpoint.
+        """
+        parts: list[str] = []
+
+        # Endpoint header
+        operation_id = operation.get("operationId", f"{method}_{path}")
+        summary = operation.get("summary", "")
+        parts.append(f"## {method.upper()} {path}")
+        if summary:
+            parts.append(f"**{summary}**")
+
+        # Description
+        if operation.get("description"):
+            parts.append(f"\n{operation['description']}")
+
+        # Tags
+        tags = operation.get("tags", [])
+        if tags:
+            parts.append(f"\nTags: {', '.join(tags)}")
+
+        # Parameters
+        params = operation.get("parameters", [])
+        if params:
+            parts.append("\n### Parameters")
+            for param in params:
+                name = param.get("name", "")
+                location = param.get("in", "")
+                required = param.get("required", False)
+                desc = param.get("description", "")
+                req_str = " (required)" if required else ""
+                parts.append(f"- `{name}` ({location}){req_str}: {desc}")
+
+        # Request body
+        request_body = operation.get("requestBody", {})
+        if request_body:
+            parts.append("\n### Request Body")
+            content_types = request_body.get("content", {})
+            for ct, schema_info in content_types.items():
+                parts.append(f"Content-Type: {ct}")
+                if "schema" in schema_info:
+                    schema_str = self._format_schema(schema_info["schema"], spec)
+                    parts.append(f"```json\n{schema_str}\n```")
+
+        # Responses
+        responses = operation.get("responses", {})
+        if responses:
+            parts.append("\n### Responses")
+            for status, response in responses.items():
+                desc = response.get("description", "")
+                parts.append(f"- **{status}**: {desc}")
+
+        content = "\n".join(parts)
+
+        # Ensure we don't exceed token limit
+        token_count = self.count_tokens(content)
+        if token_count > self.MAX_TOKENS_PER_CHUNK:
+            content = self._truncate_to_tokens(content, self.MAX_TOKENS_PER_CHUNK)
+            token_count = self.count_tokens(content)
+
+        return ChunkData(
+            content=content,
+            index=0,
+            token_count=token_count,
+            metadata={
+                "type": "endpoint",
+                "path": path,
+                "method": method.upper(),
+                "operation_id": operation_id,
+                "tags": tags,
+            },
+        )
+
+    def _format_schema(self, schema: dict[str, Any], spec: dict[str, Any], depth: int = 0) -> str:
+        """Format a JSON schema for display.
+
+        Args:
+            schema: JSON schema object.
+            spec: Full OpenAPI spec (for $ref resolution).
+            depth: Current recursion depth.
+
+        Returns:
+            Formatted schema string.
+        """
+        if depth > 3:  # Prevent deep recursion
+            return "{...}"
+
+        # Handle $ref
+        if "$ref" in schema:
+            ref = schema["$ref"]
+            resolved = self._resolve_ref(ref, spec)
+            if resolved:
+                return self._format_schema(resolved, spec, depth + 1)
+            return f'{{"$ref": "{ref}"}}'
+
+        # Simple formatting
+        try:
+            return json.dumps(schema, indent=2)[:500]  # Limit size
+        except (TypeError, ValueError):
+            return str(schema)[:500]
+
+    def _resolve_ref(self, ref: str, spec: dict[str, Any]) -> dict[str, Any] | None:
+        """Resolve a $ref pointer in the OpenAPI spec.
+
+        Args:
+            ref: Reference string (e.g., "#/components/schemas/User").
+            spec: Full OpenAPI spec.
+
+        Returns:
+            Resolved schema or None.
+        """
+        if not ref.startswith("#/"):
+            return None
+
+        parts = ref[2:].split("/")
+        current: Any = spec
+
+        for part in parts:
+            if isinstance(current, dict) and part in current:
+                current = current[part]  # pyright: ignore[reportUnknownVariableType]
+            else:
+                return None
+
+        if isinstance(current, dict):
+            return dict(current)  # pyright: ignore[reportUnknownArgumentType]
+        return None
+
+
+def get_chunker(source_type: str) -> BaseChunker:
+    """Factory function to get the appropriate chunker.
+
+    Args:
+        source_type: Type of source (markdown, openapi).
+
+    Returns:
+        Appropriate chunker instance.
+
+    Raises:
+        ValueError: If source_type is not supported.
+    """
+    chunkers = {
+        "markdown": MarkdownChunker,
+        "openapi": OpenAPIChunker,
+    }
+
+    if source_type not in chunkers:
+        raise ValueError(f"Unsupported source type: {source_type}")
+
+    return chunkers[source_type]()
diff --git a/app/features/rag/embeddings.py b/app/features/rag/embeddings.py
new file mode 100644
index 00000000..cffa1b1d
--- /dev/null
+++ b/app/features/rag/embeddings.py
@@ -0,0 +1,536 @@
+"""Embedding providers for RAG knowledge base.
+
+Provides async embedding generation with multiple backends:
+- OpenAI API (default): Batch processing with rate limit handling
+- Ollama: Local/LAN embedding generation via HTTP API
+
+CRITICAL: Provider selection via RAG_EMBEDDING_PROVIDER config.
+"""
+
+from __future__ import annotations
+
+import asyncio
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING
+
+import httpx
+import structlog
+import tiktoken
+from openai import AsyncOpenAI, RateLimitError
+
+from app.core.config import get_settings
+
+if TYPE_CHECKING:
+    pass
+
+logger = structlog.get_logger()
+
+
+class EmbeddingError(Exception):
+    """Error during embedding generation."""
+
+    pass
+
+
+class EmbeddingProvider(ABC):
+    """Abstract base class for embedding providers.
+
+    Defines the interface for generating text embeddings.
+    All providers must implement embed_texts, embed_query, and dimension.
+    """
+
+    @abstractmethod
+    async def embed_texts(self, texts: list[str]) -> list[list[float]]:
+        """Generate embeddings for multiple texts.
+
+        Args:
+            texts: List of texts to embed.
+
+        Returns:
+            List of embedding vectors in same order as input texts.
+
+        Raises:
+            EmbeddingError: If embedding generation fails.
+        """
+        ...
+
+    @abstractmethod
+    async def embed_query(self, query: str) -> list[float]:
+        """Generate embedding for a single query.
+
+        Args:
+            query: Query text to embed.
+
+        Returns:
+            Embedding vector.
+
+        Raises:
+            EmbeddingError: If embedding generation fails.
+        """
+        ...
+
+    @property
+    @abstractmethod
+    def dimension(self) -> int:
+        """Return the embedding dimension for this provider.
+
+        Returns:
+            Embedding dimension (e.g., 1536 for OpenAI, 768 for nomic-embed-text).
+        """
+        ...
+
+
+class OpenAIEmbeddingProvider(EmbeddingProvider):
+    """Embedding provider using OpenAI API.
+
+    Handles:
+    - Async batch embedding generation
+    - Rate limit handling with exponential backoff
+    - Token counting and validation
+    - Cost tracking via logging
+
+    CRITICAL: OpenAI embedding input limit is 8192 tokens per text.
+    """
+
+    MAX_TOKENS_PER_INPUT = 8191  # OpenAI limit
+    MAX_INPUTS_PER_BATCH = 2048  # OpenAI batch limit
+
+    def __init__(self) -> None:
+        """Initialize OpenAI embedding provider."""
+        self.settings = get_settings()
+        self._encoder = tiktoken.get_encoding("cl100k_base")
+        self._client: AsyncOpenAI | None = None
+
+    def _get_client(self) -> AsyncOpenAI:
+        """Get or create the async OpenAI client.
+
+        Returns:
+            AsyncOpenAI client instance.
+
+        Raises:
+            EmbeddingError: If OpenAI API key is not configured.
+        """
+        if self._client is None:
+            if not self.settings.openai_api_key:
+                raise EmbeddingError(
+                    "OpenAI API key not configured. Set OPENAI_API_KEY environment variable."
+                )
+            self._client = AsyncOpenAI(api_key=self.settings.openai_api_key)
+        return self._client
+
+    @property
+    def dimension(self) -> int:
+        """Return configured embedding dimension.
+
+        Returns:
+            Embedding dimension from settings.
+        """
+        return self.settings.rag_embedding_dimension
+
+    def count_tokens(self, text: str) -> int:
+        """Count tokens in text using tiktoken.
+
+        Args:
+            text: Text to count tokens for.
+
+        Returns:
+            Number of tokens.
+        """
+        return len(self._encoder.encode(text))
+
+    def truncate_to_tokens(self, text: str, max_tokens: int) -> str:
+        """Truncate text to a maximum number of tokens.
+
+        Args:
+            text: Text to truncate.
+            max_tokens: Maximum number of tokens.
+
+        Returns:
+            Truncated text.
+        """
+        tokens = self._encoder.encode(text)
+        if len(tokens) <= max_tokens:
+            return text
+        return self._encoder.decode(tokens[:max_tokens])
+
+    async def embed_texts(
+        self,
+        texts: list[str],
+        max_retries: int = 3,
+        retry_delay: float = 1.0,
+    ) -> list[list[float]]:
+        """Generate embeddings for multiple texts.
+
+        Processes texts in batches according to settings and OpenAI limits.
+        Handles rate limits with exponential backoff.
+
+        Args:
+            texts: List of texts to embed.
+            max_retries: Maximum retry attempts per batch.
+            retry_delay: Initial delay between retries (doubles each retry).
+
+        Returns:
+            List of embeddings in same order as input texts.
+
+        Raises:
+            EmbeddingError: If embedding generation fails after retries.
+        """
+        if not texts:
+            return []
+
+        client = self._get_client()
+        batch_size = min(self.settings.rag_embedding_batch_size, self.MAX_INPUTS_PER_BATCH)
+
+        # Validate and truncate texts if needed
+        validated_texts: list[str] = []
+        total_tokens = 0
+
+        for text in texts:
+            original_token_count = self.count_tokens(text)
+            if original_token_count > self.MAX_TOKENS_PER_INPUT:
+                text = self.truncate_to_tokens(text, self.MAX_TOKENS_PER_INPUT)
+                token_count = self.count_tokens(text)
+                logger.warning(
+                    "rag.embedding_text_truncated",
+                    original_tokens=original_token_count,
+                    truncated_to=self.MAX_TOKENS_PER_INPUT,
+                )
+            else:
+                token_count = original_token_count
+            validated_texts.append(text)
+            total_tokens += token_count
+
+        embeddings: list[list[float]] = []
+
+        # Process in batches
+        for i in range(0, len(validated_texts), batch_size):
+            batch = validated_texts[i : i + batch_size]
+            batch_embeddings = await self._embed_batch(client, batch, max_retries, retry_delay)
+            embeddings.extend(batch_embeddings)
+
+        logger.info(
+            "rag.embeddings_generated",
+            text_count=len(texts),
+            total_tokens=total_tokens,
+            model=self.settings.rag_embedding_model,
+            provider="openai",
+        )
+
+        return embeddings
+
+    async def embed_query(self, query: str) -> list[float]:
+        """Generate embedding for a single query.
+
+        Optimized for single query embedding (no batching overhead).
+
+        Args:
+            query: Query text to embed.
+
+        Returns:
+            Embedding vector.
+
+        Raises:
+            EmbeddingError: If embedding generation fails.
+        """
+        embeddings = await self.embed_texts([query])
+        return embeddings[0]
+
+    async def _embed_batch(
+        self,
+        client: AsyncOpenAI,
+        texts: list[str],
+        max_retries: int,
+        retry_delay: float,
+    ) -> list[list[float]]:
+        """Embed a single batch of texts with retry logic.
+
+        Args:
+            client: OpenAI async client.
+            texts: Batch of texts to embed.
+            max_retries: Maximum retry attempts.
+            retry_delay: Initial delay between retries.
+
+        Returns:
+            List of embeddings.
+
+        Raises:
+            EmbeddingError: If all retries fail.
+        """
+        last_error: Exception | None = None
+
+        for attempt in range(max_retries + 1):
+            try:
+                response = await client.embeddings.create(
+                    model=self.settings.rag_embedding_model,
+                    input=texts,
+                    dimensions=self.settings.rag_embedding_dimension,
+                )
+
+                # Extract embeddings in order
+                embeddings = [item.embedding for item in response.data]
+
+                # Log token usage
+                if response.usage:
+                    logger.debug(
+                        "rag.embedding_batch_completed",
+                        batch_size=len(texts),
+                        prompt_tokens=response.usage.prompt_tokens,
+                        total_tokens=response.usage.total_tokens,
+                    )
+
+                return embeddings
+
+            except RateLimitError as e:
+                last_error = e
+                if attempt < max_retries:
+                    wait_time = retry_delay * (2**attempt)
+                    logger.warning(
+                        "rag.embedding_rate_limit",
+                        attempt=attempt + 1,
+                        max_retries=max_retries,
+                        wait_seconds=wait_time,
+                    )
+                    await asyncio.sleep(wait_time)
+                continue
+
+            except Exception as e:
+                last_error = e
+                logger.error(
+                    "rag.embedding_error",
+                    error=str(e),
+                    error_type=type(e).__name__,
+                    batch_size=len(texts),
+                )
+                raise EmbeddingError(f"Failed to generate embeddings: {e}") from e
+
+        raise EmbeddingError(
+            f"Failed to generate embeddings after {max_retries} retries: {last_error}"
+        )
+
+
+class OllamaEmbeddingProvider(EmbeddingProvider):
+    """Embedding provider using Ollama's OpenAI-compatible API.
+
+    Provides local/LAN-based embedding generation without OpenAI dependency.
+    Uses the /v1/embeddings endpoint (OpenAI-compatible) which supports
+    the `dimensions` parameter for output dimension control.
+
+    CRITICAL: Requires Ollama server running with an embedding model pulled.
+    """
+
+    def __init__(self) -> None:
+        """Initialize Ollama embedding provider."""
+        self.settings = get_settings()
+        self._client: httpx.AsyncClient | None = None
+
+    def _get_client(self) -> httpx.AsyncClient:
+        """Get or create the async HTTP client.
+
+        Returns:
+            httpx AsyncClient instance.
+        """
+        if self._client is None:
+            self._client = httpx.AsyncClient(
+                base_url=self.settings.ollama_base_url,
+                timeout=httpx.Timeout(60.0, connect=10.0),
+            )
+        return self._client
+
+    @property
+    def dimension(self) -> int:
+        """Return configured embedding dimension.
+
+        Returns:
+            Embedding dimension from settings.
+        """
+        return self.settings.rag_embedding_dimension
+
+    async def embed_texts(
+        self,
+        texts: list[str],
+        max_retries: int = 3,
+        retry_delay: float = 1.0,
+    ) -> list[list[float]]:
+        """Generate embeddings for multiple texts via Ollama's OpenAI-compatible API.
+
+        Uses /v1/embeddings endpoint which supports the `dimensions` parameter
+        to control output embedding size.
+
+        Args:
+            texts: List of texts to embed.
+            max_retries: Maximum retry attempts.
+            retry_delay: Initial delay between retries (doubles each retry).
+
+        Returns:
+            List of embeddings in same order as input texts.
+
+        Raises:
+            EmbeddingError: If embedding generation fails.
+        """
+        if not texts:
+            return []
+
+        client = self._get_client()
+        last_error: Exception | None = None
+
+        for attempt in range(max_retries + 1):
+            try:
+                # Use OpenAI-compatible endpoint with dimensions parameter
+                response = await client.post(
+                    "/v1/embeddings",
+                    json={
+                        "model": self.settings.ollama_embedding_model,
+                        "input": texts,
+                        "dimensions": self.settings.rag_embedding_dimension,
+                    },
+                )
+                response.raise_for_status()
+
+                data = response.json()
+
+                # OpenAI-compatible response format: {"data": [{"embedding": [...], "index": 0}, ...]}
+                embedding_data = data.get("data", [])
+
+                if len(embedding_data) != len(texts):
+                    raise EmbeddingError(
+                        f"Embedding count mismatch: expected {len(texts)}, got {len(embedding_data)}"
+                    )
+
+                # Sort by index to ensure correct order and extract embeddings
+                sorted_data = sorted(embedding_data, key=lambda x: x.get("index", 0))
+                embeddings: list[list[float]] = [item["embedding"] for item in sorted_data]
+
+                logger.info(
+                    "rag.embeddings_generated",
+                    text_count=len(texts),
+                    model=self.settings.ollama_embedding_model,
+                    dimension=self.settings.rag_embedding_dimension,
+                    provider="ollama",
+                )
+
+                return embeddings
+
+            except httpx.HTTPStatusError as e:
+                last_error = e
+                if e.response.status_code == 404:
+                    # Model not found - don't retry
+                    raise EmbeddingError(
+                        f"Ollama model '{self.settings.ollama_embedding_model}' not found. "
+                        f"Run: ollama pull {self.settings.ollama_embedding_model}"
+                    ) from e
+                if e.response.status_code >= 500 and attempt < max_retries:
+                    # Server error - retry
+                    wait_time = retry_delay * (2**attempt)
+                    logger.warning(
+                        "rag.ollama_server_error",
+                        attempt=attempt + 1,
+                        max_retries=max_retries,
+                        wait_seconds=wait_time,
+                        status_code=e.response.status_code,
+                    )
+                    await asyncio.sleep(wait_time)
+                    continue
+                logger.error(
+                    "rag.embedding_error",
+                    error=str(e),
+                    error_type=type(e).__name__,
+                    status_code=e.response.status_code,
+                )
+                raise EmbeddingError(f"Ollama API error: {e}") from e
+
+            except httpx.ConnectError as e:
+                last_error = e
+                logger.error(
+                    "rag.ollama_connection_error",
+                    error=str(e),
+                    base_url=self.settings.ollama_base_url,
+                )
+                raise EmbeddingError(
+                    f"Failed to connect to Ollama at {self.settings.ollama_base_url}. "
+                    "Ensure Ollama is running."
+                ) from e
+
+            except Exception as e:
+                last_error = e
+                logger.error(
+                    "rag.embedding_error",
+                    error=str(e),
+                    error_type=type(e).__name__,
+                )
+                raise EmbeddingError(f"Failed to generate embeddings: {e}") from e
+
+        raise EmbeddingError(
+            f"Failed to generate embeddings after {max_retries} retries: {last_error}"
+        )
+
+    async def embed_query(self, query: str) -> list[float]:
+        """Generate embedding for a single query.
+
+        Args:
+            query: Query text to embed.
+
+        Returns:
+            Embedding vector.
+
+        Raises:
+            EmbeddingError: If embedding generation fails.
+        """
+        embeddings = await self.embed_texts([query])
+        return embeddings[0]
+
+    async def close(self) -> None:
+        """Close the HTTP client.
+
+        Should be called when done using the provider.
+        """
+        if self._client is not None:
+            await self._client.aclose()
+            self._client = None
+
+
+# Legacy alias for backwards compatibility
+EmbeddingService = OpenAIEmbeddingProvider
+
+
+# Singleton instances for dependency injection
+_embedding_provider: EmbeddingProvider | None = None
+
+
+def get_embedding_service() -> EmbeddingProvider:
+    """Get singleton embedding provider instance.
+
+    Returns provider based on RAG_EMBEDDING_PROVIDER config:
+    - "openai": OpenAI API (default)
+    - "ollama": Local Ollama server
+
+    Returns:
+        EmbeddingProvider instance.
+    """
+    global _embedding_provider
+    if _embedding_provider is None:
+        settings = get_settings()
+        if settings.rag_embedding_provider == "ollama":
+            _embedding_provider = OllamaEmbeddingProvider()
+            logger.info(
+                "rag.embedding_provider_initialized",
+                provider="ollama",
+                base_url=settings.ollama_base_url,
+                model=settings.ollama_embedding_model,
+            )
+        else:
+            _embedding_provider = OpenAIEmbeddingProvider()
+            logger.info(
+                "rag.embedding_provider_initialized",
+                provider="openai",
+                model=settings.rag_embedding_model,
+            )
+    return _embedding_provider
+
+
+def reset_embedding_service() -> None:
+    """Reset the singleton embedding provider.
+
+    Useful for testing or reconfiguration.
+    """
+    global _embedding_provider
+    _embedding_provider = None
diff --git a/app/features/rag/models.py b/app/features/rag/models.py
new file mode 100644
index 00000000..ba185b88
--- /dev/null
+++ b/app/features/rag/models.py
@@ -0,0 +1,115 @@
+"""RAG knowledge base ORM models.
+
+This module defines:
+- DocumentSource: Registry of indexed document sources
+- DocumentChunk: Indexed document chunks with embeddings
+
+CRITICAL: Uses PostgreSQL pgvector for embedding storage and similarity search.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+from typing import TYPE_CHECKING, Any
+
+from pgvector.sqlalchemy import Vector  # type: ignore[import-untyped]
+from sqlalchemy import (
+    DateTime,
+    ForeignKey,
+    Index,
+    Integer,
+    String,
+    Text,
+    UniqueConstraint,
+)
+from sqlalchemy.dialects.postgresql import JSONB
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+
+from app.core.database import Base
+from app.shared.models import TimestampMixin
+
+if TYPE_CHECKING:
+    pass
+
+
+class DocumentSource(TimestampMixin, Base):
+    """Registered document source for indexing.
+
+    CRITICAL: Tracks indexed sources with content hash for idempotent re-indexing.
+
+    Attributes:
+        id: Primary key.
+        source_id: Unique external identifier (UUID hex, 32 chars).
+        source_type: Type of source (markdown, openapi, run_report).
+        source_path: Path or identifier for the source.
+        content_hash: SHA-256 hash for change detection.
+        metadata_: Custom metadata as JSONB.
+        indexed_at: When the source was last indexed.
+        chunks: Related document chunks.
+    """
+
+    __tablename__ = "document_source"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    source_id: Mapped[str] = mapped_column(String(32), unique=True, index=True)
+    source_type: Mapped[str] = mapped_column(String(50), index=True)
+    source_path: Mapped[str] = mapped_column(Text, nullable=False)
+    content_hash: Mapped[str] = mapped_column(String(64), nullable=False)
+    metadata_: Mapped[dict[str, Any] | None] = mapped_column("metadata", JSONB, nullable=True)
+    indexed_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False)
+
+    # Relationship to chunks
+    chunks: Mapped[list[DocumentChunk]] = relationship(
+        back_populates="source", cascade="all, delete-orphan"
+    )
+
+    __table_args__ = (UniqueConstraint("source_type", "source_path", name="uq_source_type_path"),)
+
+
+class DocumentChunk(TimestampMixin, Base):
+    """Indexed document chunk with embedding.
+
+    CRITICAL: Stores vector embeddings for semantic similarity search.
+
+    Attributes:
+        id: Primary key.
+        chunk_id: Unique external identifier (UUID hex, 32 chars).
+        source_id: Foreign key to parent source.
+        chunk_index: Position within the source document.
+        content: Chunk text content.
+        embedding: Vector embedding (1536 dimensions for text-embedding-3-small).
+        token_count: Number of tokens in the chunk.
+        metadata_: Heading hierarchy, section path, etc.
+        source: Related document source.
+    """
+
+    __tablename__ = "document_chunk"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    chunk_id: Mapped[str] = mapped_column(String(32), unique=True, index=True)
+    source_id: Mapped[int] = mapped_column(
+        Integer, ForeignKey("document_source.id", ondelete="CASCADE"), index=True
+    )
+    chunk_index: Mapped[int] = mapped_column(Integer, nullable=False)
+    content: Mapped[str] = mapped_column(Text, nullable=False)
+    # Vector column for embeddings - dimension configurable via settings
+    embedding: Mapped[list[float] | None] = mapped_column(Vector(1536), nullable=True)
+    token_count: Mapped[int] = mapped_column(Integer, nullable=False)
+    metadata_: Mapped[dict[str, Any] | None] = mapped_column("metadata", JSONB, nullable=True)
+
+    # Relationship to source
+    source: Mapped[DocumentSource] = relationship(back_populates="chunks")
+
+    __table_args__ = (
+        UniqueConstraint("source_id", "chunk_index", name="uq_source_chunk_index"),
+        # HNSW index for cosine similarity search
+        Index(
+            "ix_chunk_embedding_hnsw",
+            "embedding",
+            postgresql_using="hnsw",
+            postgresql_with={"m": 16, "ef_construction": 64},
+            postgresql_ops={"embedding": "vector_cosine_ops"},
+        ),
+        # GIN index for metadata filtering
+        Index("ix_chunk_metadata_gin", "metadata", postgresql_using="gin"),
+    )
diff --git a/app/features/rag/routes.py b/app/features/rag/routes.py
new file mode 100644
index 00000000..403edd37
--- /dev/null
+++ b/app/features/rag/routes.py
@@ -0,0 +1,345 @@
+"""RAG API routes for document indexing and semantic retrieval."""
+
+from fastapi import APIRouter, Depends, HTTPException, status
+from sqlalchemy.exc import SQLAlchemyError
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.core.database import get_db
+from app.core.exceptions import DatabaseError
+from app.core.logging import get_logger
+from app.features.rag.embeddings import EmbeddingError
+from app.features.rag.schemas import (
+    DeleteResponse,
+    IndexRequest,
+    IndexResponse,
+    RetrieveRequest,
+    RetrieveResponse,
+    SourceListResponse,
+)
+from app.features.rag.service import RAGService, SourceNotFoundError
+
+logger = get_logger(__name__)
+
+router = APIRouter(prefix="/rag", tags=["rag"])
+
+
+# =============================================================================
+# Index Endpoint
+# =============================================================================
+
+
+@router.post(
+    "/index",
+    response_model=IndexResponse,
+    status_code=status.HTTP_201_CREATED,
+    summary="Index a document",
+    description="""
+Index a document into the RAG knowledge base.
+
+**Source Types:**
+- `markdown`: Markdown documents (split by headings)
+- `openapi`: OpenAPI specifications (split by endpoint)
+
+**Content Source:**
+- Provide `content` directly in the request, OR
+- Provide `source_path` to read from file system
+
+**Idempotent Updates:**
+- Documents are identified by `source_type` + `source_path`
+- Content hash is compared to detect changes
+- If unchanged, returns `status: "unchanged"` without re-indexing
+- If changed, old chunks are deleted and new ones created
+
+**Returns:**
+- `source_id`: Unique identifier for the indexed source
+- `chunks_created`: Number of chunks created
+- `tokens_processed`: Total tokens processed
+- `status`: "indexed", "updated", or "unchanged"
+""",
+)
+async def index_document(
+    request: IndexRequest,
+    db: AsyncSession = Depends(get_db),
+) -> IndexResponse:
+    """Index a document into the knowledge base.
+
+    Args:
+        request: Index request with source type, path, and optional content.
+        db: Async database session from dependency.
+
+    Returns:
+        Indexing result with statistics.
+
+    Raises:
+        HTTPException: If file not found or embedding generation fails.
+        DatabaseError: If database operation fails.
+    """
+    logger.info(
+        "rag.index_request_received",
+        source_type=request.source_type,
+        source_path=request.source_path,
+        has_content=request.content is not None,
+    )
+
+    service = RAGService()
+
+    try:
+        response = await service.index_document(db=db, request=request)
+
+        logger.info(
+            "rag.index_request_completed",
+            source_id=response.source_id,
+            chunks_created=response.chunks_created,
+            status=response.status,
+        )
+
+        return response
+
+    except FileNotFoundError as e:
+        logger.warning(
+            "rag.index_request_failed",
+            error=str(e),
+            error_type=type(e).__name__,
+            source_path=request.source_path,
+        )
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=str(e),
+        ) from e
+
+    except EmbeddingError as e:
+        logger.error(
+            "rag.index_request_failed",
+            error=str(e),
+            error_type=type(e).__name__,
+            exc_info=True,
+        )
+        raise HTTPException(
+            status_code=status.HTTP_502_BAD_GATEWAY,
+            detail=f"Embedding generation failed: {e}",
+        ) from e
+
+    except SQLAlchemyError as e:
+        logger.error(
+            "rag.index_request_failed",
+            error=str(e),
+            error_type=type(e).__name__,
+            exc_info=True,
+        )
+        raise DatabaseError(
+            message="Failed to index document",
+            details={"error": str(e)},
+        ) from e
+
+
+# =============================================================================
+# Retrieve Endpoint
+# =============================================================================
+
+
+@router.post(
+    "/retrieve",
+    response_model=RetrieveResponse,
+    summary="Semantic search",
+    description="""
+Perform semantic search across indexed documents.
+
+**Query:**
+- Natural language query (1-2000 characters)
+- Converted to embedding for similarity search
+
+**Parameters:**
+- `top_k`: Number of results (1-50, default: 5)
+- `similarity_threshold`: Minimum similarity (0.0-1.0, default: 0.7)
+- `filters`: Optional metadata filters
+
+**Filters:**
+- `source_type`: List of source types to search
+- `category`: Category from source metadata
+
+**Returns:**
+- List of matching chunks with relevance scores
+- Performance metrics (embedding time, search time)
+- Total chunks searched
+
+**Evidence-Grounded:**
+Returns raw chunks with citations - no answer generation.
+""",
+)
+async def retrieve(
+    request: RetrieveRequest,
+    db: AsyncSession = Depends(get_db),
+) -> RetrieveResponse:
+    """Perform semantic search across indexed documents.
+
+    Args:
+        request: Retrieval request with query and filters.
+        db: Async database session from dependency.
+
+    Returns:
+        Search results with relevance scores.
+
+    Raises:
+        HTTPException: If embedding generation fails.
+        DatabaseError: If database operation fails.
+    """
+    logger.info(
+        "rag.retrieve_request_received",
+        query_length=len(request.query),
+        top_k=request.top_k,
+        threshold=request.similarity_threshold,
+        has_filters=request.filters is not None,
+    )
+
+    service = RAGService()
+
+    try:
+        response = await service.retrieve(db=db, request=request)
+
+        logger.info(
+            "rag.retrieve_request_completed",
+            results_count=len(response.results),
+            query_embedding_time_ms=response.query_embedding_time_ms,
+            search_time_ms=response.search_time_ms,
+        )
+
+        return response
+
+    except EmbeddingError as e:
+        logger.error(
+            "rag.retrieve_request_failed",
+            error=str(e),
+            error_type=type(e).__name__,
+            exc_info=True,
+        )
+        raise HTTPException(
+            status_code=status.HTTP_502_BAD_GATEWAY,
+            detail=f"Embedding generation failed: {e}",
+        ) from e
+
+    except SQLAlchemyError as e:
+        logger.error(
+            "rag.retrieve_request_failed",
+            error=str(e),
+            error_type=type(e).__name__,
+            exc_info=True,
+        )
+        raise DatabaseError(
+            message="Failed to retrieve documents",
+            details={"error": str(e)},
+        ) from e
+
+
+# =============================================================================
+# Sources Endpoints
+# =============================================================================
+
+
+@router.get(
+    "/sources",
+    response_model=SourceListResponse,
+    summary="List indexed sources",
+    description="""
+List all indexed document sources with statistics.
+
+Returns:
+- List of sources with chunk counts
+- Total source count
+- Total chunk count across all sources
+""",
+)
+async def list_sources(
+    db: AsyncSession = Depends(get_db),
+) -> SourceListResponse:
+    """List all indexed sources.
+
+    Args:
+        db: Async database session from dependency.
+
+    Returns:
+        List of sources with statistics.
+    """
+    service = RAGService()
+    response = await service.list_sources(db=db)
+
+    logger.info(
+        "rag.list_sources_completed",
+        total_sources=response.total_sources,
+        total_chunks=response.total_chunks,
+    )
+
+    return response
+
+
+@router.delete(
+    "/sources/{source_id}",
+    response_model=DeleteResponse,
+    summary="Delete a source",
+    description="""
+Delete an indexed source and all its chunks.
+
+**Cascade Delete:**
+All chunks belonging to the source are automatically deleted.
+
+**Returns:**
+- `source_id`: Deleted source identifier
+- `chunks_deleted`: Number of chunks removed
+- `status`: Always "deleted"
+""",
+)
+async def delete_source(
+    source_id: str,
+    db: AsyncSession = Depends(get_db),
+) -> DeleteResponse:
+    """Delete a source and all its chunks.
+
+    Args:
+        source_id: Source identifier.
+        db: Async database session from dependency.
+
+    Returns:
+        Deletion result.
+
+    Raises:
+        HTTPException: If source not found.
+        DatabaseError: If database operation fails.
+    """
+    logger.info("rag.delete_source_request_received", source_id=source_id)
+
+    service = RAGService()
+
+    try:
+        response = await service.delete_source(db=db, source_id=source_id)
+
+        logger.info(
+            "rag.delete_source_request_completed",
+            source_id=source_id,
+            chunks_deleted=response.chunks_deleted,
+        )
+
+        return response
+
+    except SourceNotFoundError as e:
+        logger.warning(
+            "rag.delete_source_request_failed",
+            source_id=source_id,
+            error=str(e),
+            error_type=type(e).__name__,
+        )
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=str(e),
+        ) from e
+
+    except SQLAlchemyError as e:
+        logger.error(
+            "rag.delete_source_request_failed",
+            source_id=source_id,
+            error=str(e),
+            error_type=type(e).__name__,
+            exc_info=True,
+        )
+        raise DatabaseError(
+            message="Failed to delete source",
+            details={"error": str(e)},
+        ) from e
diff --git a/app/features/rag/schemas.py b/app/features/rag/schemas.py
new file mode 100644
index 00000000..3c350c31
--- /dev/null
+++ b/app/features/rag/schemas.py
@@ -0,0 +1,181 @@
+"""Pydantic schemas for RAG API contracts.
+
+Schemas are designed to be:
+- Validated for data integrity
+- Compatible with SQLAlchemy models via from_attributes
+- Evidence-grounded (citations include source metadata)
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any, Literal
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class IndexRequest(BaseModel):
+    """Request to index a document into the knowledge base.
+
+    Args:
+        source_type: Type of document to index (markdown or openapi).
+        source_path: Path to the document or identifier.
+        content: Optional content override (if not reading from path).
+        metadata: Custom metadata to attach to the source.
+    """
+
+    model_config = ConfigDict(extra="forbid")
+
+    source_type: Literal["markdown", "openapi"] = Field(
+        ..., description="Type of document to index"
+    )
+    source_path: str = Field(
+        ...,
+        min_length=1,
+        max_length=500,
+        description="Path to the document or unique identifier",
+    )
+    content: str | None = Field(
+        None, description="Optional content override (if not reading from path)"
+    )
+    metadata: dict[str, Any] | None = Field(
+        None, description="Custom metadata to attach to the source"
+    )
+
+
+class IndexResponse(BaseModel):
+    """Response from document indexing operation.
+
+    Args:
+        source_id: Unique identifier for the indexed source.
+        source_path: Path of the indexed document.
+        chunks_created: Number of chunks created from the document.
+        tokens_processed: Total tokens processed across all chunks.
+        duration_ms: Time taken to index the document.
+        status: Indexing status (indexed, updated, unchanged).
+    """
+
+    model_config = ConfigDict(from_attributes=True)
+
+    source_id: str
+    source_path: str
+    chunks_created: int
+    tokens_processed: int
+    duration_ms: float
+    status: Literal["indexed", "updated", "unchanged"]
+
+
+class RetrieveRequest(BaseModel):
+    """Request for semantic search across indexed documents.
+
+    Args:
+        query: Search query text.
+        top_k: Number of results to return (1-50).
+        similarity_threshold: Minimum similarity score (0.0-1.0).
+        filters: Metadata filters to apply.
+    """
+
+    model_config = ConfigDict(extra="forbid")
+
+    query: str = Field(..., min_length=1, max_length=2000, description="Search query text")
+    top_k: int = Field(default=5, ge=1, le=50, description="Number of results to return")
+    similarity_threshold: float = Field(
+        default=0.7, ge=0.0, le=1.0, description="Minimum similarity score"
+    )
+    filters: dict[str, Any] | None = Field(
+        None, description="Metadata filters (source_type, category, etc.)"
+    )
+
+
+class ChunkResult(BaseModel):
+    """Single chunk in retrieval results with citation metadata.
+
+    CRITICAL: Provides evidence-grounded context with stable citations.
+
+    Args:
+        chunk_id: Unique identifier for the chunk.
+        source_id: Identifier of the parent source.
+        source_path: Path of the source document.
+        source_type: Type of source document.
+        content: Chunk text content.
+        relevance_score: Similarity score (0.0-1.0).
+        metadata: Heading hierarchy, section path, etc.
+    """
+
+    model_config = ConfigDict(from_attributes=True)
+
+    chunk_id: str
+    source_id: str
+    source_path: str
+    source_type: str
+    content: str
+    relevance_score: float = Field(..., ge=0.0, le=1.0)
+    metadata: dict[str, Any] | None = None
+
+
+class RetrieveResponse(BaseModel):
+    """Response from semantic search operation.
+
+    Args:
+        results: List of matching chunks with relevance scores.
+        query_embedding_time_ms: Time to generate query embedding.
+        search_time_ms: Time to execute similarity search.
+        total_chunks_searched: Total chunks in the search space.
+    """
+
+    results: list[ChunkResult]
+    query_embedding_time_ms: float
+    search_time_ms: float
+    total_chunks_searched: int
+
+
+class SourceResponse(BaseModel):
+    """Details of an indexed document source.
+
+    Args:
+        source_id: Unique identifier for the source.
+        source_type: Type of document (markdown, openapi).
+        source_path: Path to the document.
+        chunk_count: Number of chunks from this source.
+        content_hash: SHA-256 hash for change detection.
+        indexed_at: When the source was last indexed.
+        metadata: Custom metadata attached to the source.
+    """
+
+    model_config = ConfigDict(from_attributes=True)
+
+    source_id: str
+    source_type: str
+    source_path: str
+    chunk_count: int
+    content_hash: str
+    indexed_at: datetime
+    metadata: dict[str, Any] | None = None
+
+
+class SourceListResponse(BaseModel):
+    """List of all indexed sources with summary statistics.
+
+    Args:
+        sources: List of indexed sources.
+        total_sources: Total number of sources.
+        total_chunks: Total number of chunks across all sources.
+    """
+
+    sources: list[SourceResponse]
+    total_sources: int
+    total_chunks: int
+
+
+class DeleteResponse(BaseModel):
+    """Response from source deletion operation.
+
+    Args:
+        source_id: Identifier of the deleted source.
+        chunks_deleted: Number of chunks that were deleted.
+        status: Always "deleted".
+    """
+
+    source_id: str
+    chunks_deleted: int
+    status: Literal["deleted"]
diff --git a/app/features/rag/service.py b/app/features/rag/service.py
new file mode 100644
index 00000000..1f38613c
--- /dev/null
+++ b/app/features/rag/service.py
@@ -0,0 +1,613 @@
+"""RAG service for document indexing and semantic retrieval.
+
+Orchestrates:
+- Document indexing with chunking and embedding
+- Semantic retrieval with similarity search
+- Source management (list, delete)
+- Idempotent re-indexing via content hash comparison
+
+CRITICAL: Uses pgvector cosine_distance for similarity search.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import time
+import uuid
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any, Literal
+
+import structlog
+from sqlalchemy import delete, func, select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.core.config import get_settings
+from app.features.rag.chunkers import ChunkData, get_chunker
+from app.features.rag.embeddings import EmbeddingProvider, get_embedding_service
+from app.features.rag.models import DocumentChunk, DocumentSource
+from app.features.rag.schemas import (
+    ChunkResult,
+    DeleteResponse,
+    IndexRequest,
+    IndexResponse,
+    RetrieveRequest,
+    RetrieveResponse,
+    SourceListResponse,
+    SourceResponse,
+)
+
+logger = structlog.get_logger()
+
+
+class SourceNotFoundError(ValueError):
+    """Source not found in the knowledge base."""
+
+    pass
+
+
+class RAGService:
+    """Service for RAG knowledge base operations.
+
+    Provides:
+    - Document indexing with automatic chunking and embedding
+    - Semantic retrieval with configurable similarity threshold
+    - Source management and statistics
+    - Idempotent re-indexing based on content hash
+
+    CRITICAL: Uses cosine_distance for similarity (not l2_distance).
+    """
+
+    def __init__(
+        self,
+        embedding_service: EmbeddingProvider | None = None,
+        base_dir: Path | str | None = None,
+    ) -> None:
+        """Initialize RAG service.
+
+        Args:
+            embedding_service: Optional embedding provider override (for testing).
+            base_dir: Base directory for path validation (for testing).
+                      Defaults to current working directory.
+        """
+        self.settings = get_settings()
+        self._embedding_service = embedding_service or get_embedding_service()
+        # Set base directory for path validation (mirrors registry/storage.py pattern)
+        if base_dir is None:
+            self._base_dir = Path.cwd().resolve()
+        elif isinstance(base_dir, str):
+            self._base_dir = Path(base_dir).resolve()
+        else:
+            self._base_dir = base_dir.resolve()
+
+    def _compute_content_hash(self, content: str) -> str:
+        """Compute SHA-256 hash of content for change detection.
+
+        Args:
+            content: Document content.
+
+        Returns:
+            64-character hex string hash.
+        """
+        return hashlib.sha256(content.encode()).hexdigest()
+
+    def _read_content_from_path(self, source_path: str) -> str:
+        """Read content from a file path with path traversal protection.
+
+        CRITICAL: Validates path is within base directory to prevent
+        directory traversal attacks. Mirrors pattern from registry/storage.py.
+
+        Args:
+            source_path: Path to the file.
+
+        Returns:
+            File content.
+
+        Raises:
+            FileNotFoundError: If file doesn't exist or path traversal attempted.
+        """
+        # Resolve the source path
+        resolved_path = Path(source_path).resolve()
+
+        # Security: ensure path is within base directory
+        try:
+            resolved_path.relative_to(self._base_dir)
+        except ValueError:
+            logger.warning(
+                "rag.path_traversal_attempt",
+                source_path=source_path,
+                base_dir=str(self._base_dir),
+            )
+            raise FileNotFoundError(
+                f"Source file not found or access denied: {source_path}"
+            ) from None
+
+        if not resolved_path.exists():
+            raise FileNotFoundError(f"Source file not found: {source_path}")
+
+        return resolved_path.read_text(encoding="utf-8")
+
+    async def index_document(
+        self,
+        db: AsyncSession,
+        request: IndexRequest,
+    ) -> IndexResponse:
+        """Index a document into the knowledge base.
+
+        Handles:
+        - Content reading (from path or request)
+        - Content hash comparison for idempotent updates
+        - Chunking based on source type
+        - Embedding generation for all chunks
+        - Database upsert (source + chunks)
+
+        Args:
+            db: Database session.
+            request: Index request with source info.
+
+        Returns:
+            Indexing result with statistics.
+        """
+        start_time = time.time()
+
+        logger.info(
+            "rag.index_document_started",
+            source_type=request.source_type,
+            source_path=request.source_path,
+        )
+
+        # Get content (from request or file)
+        if request.content:
+            content = request.content
+        else:
+            content = self._read_content_from_path(request.source_path)
+
+        # Compute content hash
+        content_hash = self._compute_content_hash(content)
+
+        # Check if source already exists
+        existing_source = await self._find_source_by_path(
+            db, request.source_type, request.source_path
+        )
+
+        if existing_source and existing_source.content_hash == content_hash:
+            # Content unchanged - skip re-indexing
+            chunk_count = await self._get_chunk_count(db, existing_source.id)
+            duration_ms = (time.time() - start_time) * 1000
+
+            logger.info(
+                "rag.index_document_unchanged",
+                source_id=existing_source.source_id,
+                source_path=request.source_path,
+            )
+
+            return IndexResponse(
+                source_id=existing_source.source_id,
+                source_path=request.source_path,
+                chunks_created=chunk_count,
+                tokens_processed=0,
+                duration_ms=duration_ms,
+                status="unchanged",
+            )
+
+        # Chunk the content
+        chunker = get_chunker(request.source_type)
+        chunks = chunker.chunk(content)
+
+        if not chunks:
+            logger.warning(
+                "rag.index_document_no_chunks",
+                source_path=request.source_path,
+            )
+            chunks = []
+
+        # Generate embeddings for all chunks
+        chunk_texts = [chunk.content for chunk in chunks]
+        embeddings: list[list[float]] = []
+
+        if chunk_texts:
+            embeddings = await self._embedding_service.embed_texts(chunk_texts)
+
+        # Calculate total tokens
+        total_tokens = sum(chunk.token_count for chunk in chunks)
+
+        # Upsert source and chunks
+        source_id = existing_source.source_id if existing_source else uuid.uuid4().hex
+        status: Literal["indexed", "updated", "unchanged"] = (
+            "updated" if existing_source else "indexed"
+        )
+
+        await self._upsert_source_and_chunks(
+            db=db,
+            source_id=source_id,
+            source_type=request.source_type,
+            source_path=request.source_path,
+            content_hash=content_hash,
+            metadata=request.metadata,
+            chunks=chunks,
+            embeddings=embeddings,
+            existing_source=existing_source,
+        )
+
+        duration_ms = (time.time() - start_time) * 1000
+
+        logger.info(
+            "rag.index_document_completed",
+            source_id=source_id,
+            source_path=request.source_path,
+            chunks_created=len(chunks),
+            tokens_processed=total_tokens,
+            duration_ms=duration_ms,
+            status=status,
+        )
+
+        return IndexResponse(
+            source_id=source_id,
+            source_path=request.source_path,
+            chunks_created=len(chunks),
+            tokens_processed=total_tokens,
+            duration_ms=duration_ms,
+            status=status,
+        )
+
+    async def retrieve(
+        self,
+        db: AsyncSession,
+        request: RetrieveRequest,
+    ) -> RetrieveResponse:
+        """Perform semantic search across indexed documents.
+
+        Uses pgvector cosine_distance for similarity ranking:
+        - relevance_score = 1 - cosine_distance (normalized to 0-1)
+        - Filters by similarity threshold
+        - Supports metadata filtering
+
+        Args:
+            db: Database session.
+            request: Retrieval request with query and filters.
+
+        Returns:
+            Search results with relevance scores.
+        """
+        embed_start = time.time()
+
+        logger.info(
+            "rag.retrieve_started",
+            query_length=len(request.query),
+            top_k=request.top_k,
+            threshold=request.similarity_threshold,
+        )
+
+        # Generate query embedding
+        query_embedding = await self._embedding_service.embed_query(request.query)
+        embed_time_ms = (time.time() - embed_start) * 1000
+
+        search_start = time.time()
+
+        # Get total chunk count for statistics
+        total_chunks = await self._get_total_chunk_count(db)
+
+        # Build similarity search query
+        # CRITICAL: cosine_distance returns values 0-2, so relevance = 1 - distance/2
+        # But for cosine similarity on normalized vectors, distance is 0-1
+        results = await self._search_similar_chunks(
+            db=db,
+            query_embedding=query_embedding,
+            top_k=request.top_k,
+            threshold=request.similarity_threshold,
+            filters=request.filters,
+        )
+
+        search_time_ms = (time.time() - search_start) * 1000
+
+        logger.info(
+            "rag.retrieve_completed",
+            results_count=len(results),
+            query_embedding_time_ms=embed_time_ms,
+            search_time_ms=search_time_ms,
+        )
+
+        return RetrieveResponse(
+            results=results,
+            query_embedding_time_ms=embed_time_ms,
+            search_time_ms=search_time_ms,
+            total_chunks_searched=total_chunks,
+        )
+
+    async def list_sources(
+        self,
+        db: AsyncSession,
+    ) -> SourceListResponse:
+        """List all indexed sources with statistics.
+
+        Args:
+            db: Database session.
+
+        Returns:
+            List of sources with chunk counts.
+        """
+        # Get sources with chunk counts
+        stmt = (
+            select(
+                DocumentSource,
+                func.count(DocumentChunk.id).label("chunk_count"),
+            )
+            .outerjoin(DocumentChunk, DocumentSource.id == DocumentChunk.source_id)
+            .group_by(DocumentSource.id)
+            .order_by(DocumentSource.indexed_at.desc())
+        )
+
+        result = await db.execute(stmt)
+        rows = result.all()
+
+        sources: list[SourceResponse] = []
+        total_chunks = 0
+
+        for source, chunk_count in rows:
+            sources.append(
+                SourceResponse(
+                    source_id=source.source_id,
+                    source_type=source.source_type,
+                    source_path=source.source_path,
+                    chunk_count=chunk_count,
+                    content_hash=source.content_hash,
+                    indexed_at=source.indexed_at,
+                    metadata=source.metadata_,
+                )
+            )
+            total_chunks += chunk_count
+
+        return SourceListResponse(
+            sources=sources,
+            total_sources=len(sources),
+            total_chunks=total_chunks,
+        )
+
+    async def delete_source(
+        self,
+        db: AsyncSession,
+        source_id: str,
+    ) -> DeleteResponse:
+        """Delete a source and all its chunks.
+
+        Args:
+            db: Database session.
+            source_id: Source identifier.
+
+        Returns:
+            Deletion result with chunk count.
+
+        Raises:
+            SourceNotFoundError: If source not found.
+        """
+        logger.info("rag.delete_source_started", source_id=source_id)
+
+        # Find source
+        stmt = select(DocumentSource).where(DocumentSource.source_id == source_id)
+        result = await db.execute(stmt)
+        source = result.scalar_one_or_none()
+
+        if source is None:
+            raise SourceNotFoundError(f"Source not found: {source_id}")
+
+        # Count chunks before deletion
+        chunk_count = await self._get_chunk_count(db, source.id)
+
+        # Delete source (cascades to chunks)
+        await db.delete(source)
+        await db.flush()
+
+        logger.info(
+            "rag.delete_source_completed",
+            source_id=source_id,
+            chunks_deleted=chunk_count,
+        )
+
+        return DeleteResponse(
+            source_id=source_id,
+            chunks_deleted=chunk_count,
+            status="deleted",
+        )
+
+    async def _find_source_by_path(
+        self,
+        db: AsyncSession,
+        source_type: str,
+        source_path: str,
+    ) -> DocumentSource | None:
+        """Find source by type and path.
+
+        Args:
+            db: Database session.
+            source_type: Source type.
+            source_path: Source path.
+
+        Returns:
+            Source or None.
+        """
+        stmt = select(DocumentSource).where(
+            (DocumentSource.source_type == source_type)
+            & (DocumentSource.source_path == source_path)
+        )
+        result = await db.execute(stmt)
+        return result.scalar_one_or_none()
+
+    async def _get_chunk_count(self, db: AsyncSession, source_id: int) -> int:
+        """Get number of chunks for a source.
+
+        Args:
+            db: Database session.
+            source_id: Source internal ID.
+
+        Returns:
+            Chunk count.
+        """
+        stmt = (
+            select(func.count())
+            .select_from(DocumentChunk)
+            .where(DocumentChunk.source_id == source_id)
+        )
+        result = await db.execute(stmt)
+        return result.scalar_one()
+
+    async def _get_total_chunk_count(self, db: AsyncSession) -> int:
+        """Get total number of chunks across all sources.
+
+        Args:
+            db: Database session.
+
+        Returns:
+            Total chunk count.
+        """
+        stmt = select(func.count()).select_from(DocumentChunk)
+        result = await db.execute(stmt)
+        return result.scalar_one()
+
+    async def _upsert_source_and_chunks(
+        self,
+        db: AsyncSession,
+        source_id: str,
+        source_type: str,
+        source_path: str,
+        content_hash: str,
+        metadata: dict[str, Any] | None,
+        chunks: list[ChunkData],
+        embeddings: list[list[float]],
+        existing_source: DocumentSource | None,
+    ) -> None:
+        """Upsert source and chunks in database.
+
+        Args:
+            db: Database session.
+            source_id: External source identifier.
+            source_type: Type of source.
+            source_path: Path to source.
+            content_hash: SHA-256 hash of content.
+            metadata: Custom metadata.
+            chunks: Chunked content.
+            embeddings: Embeddings for each chunk.
+            existing_source: Existing source if updating.
+        """
+        now = datetime.now(UTC)
+
+        if existing_source:
+            # Update existing source
+            existing_source.content_hash = content_hash
+            existing_source.metadata_ = metadata
+            existing_source.indexed_at = now
+
+            # Delete old chunks
+            await db.execute(
+                delete(DocumentChunk).where(DocumentChunk.source_id == existing_source.id)
+            )
+            source_internal_id = existing_source.id
+        else:
+            # Create new source
+            source = DocumentSource(
+                source_id=source_id,
+                source_type=source_type,
+                source_path=source_path,
+                content_hash=content_hash,
+                metadata_=metadata,
+                indexed_at=now,
+            )
+            db.add(source)
+            await db.flush()
+            source_internal_id = source.id
+
+        # Create new chunks
+        for i, (chunk, embedding) in enumerate(zip(chunks, embeddings, strict=True)):
+            chunk_obj = DocumentChunk(
+                chunk_id=uuid.uuid4().hex,
+                source_id=source_internal_id,
+                chunk_index=i,
+                content=chunk.content,
+                embedding=embedding,
+                token_count=chunk.token_count,
+                metadata_=chunk.metadata if chunk.metadata else None,
+            )
+            db.add(chunk_obj)
+
+        await db.flush()
+
+    async def _search_similar_chunks(
+        self,
+        db: AsyncSession,
+        query_embedding: list[float],
+        top_k: int,
+        threshold: float,
+        filters: dict[str, Any] | None,
+    ) -> list[ChunkResult]:
+        """Search for similar chunks using cosine distance.
+
+        Args:
+            db: Database session.
+            query_embedding: Query embedding vector.
+            top_k: Maximum results to return.
+            threshold: Minimum similarity threshold.
+            filters: Optional metadata filters.
+
+        Returns:
+            List of chunk results with relevance scores.
+        """
+        # CRITICAL: Use cosine_distance method from pgvector
+        # cosine_distance returns 1 - cosine_similarity for normalized vectors
+        distance = DocumentChunk.embedding.cosine_distance(query_embedding)
+
+        # Build query with distance calculation
+        stmt = (
+            select(
+                DocumentChunk,
+                DocumentSource,
+                distance.label("distance"),
+            )
+            .join(DocumentSource, DocumentChunk.source_id == DocumentSource.id)
+            .where(DocumentChunk.embedding.isnot(None))
+            .order_by(distance)
+            .limit(top_k * 2)  # Fetch extra to filter by threshold
+        )
+
+        # Apply metadata filters if provided
+        if filters:
+            if "source_type" in filters:
+                source_types = filters["source_type"]
+                if isinstance(source_types, str):
+                    source_types = [source_types]
+                stmt = stmt.where(DocumentSource.source_type.in_(source_types))
+
+            if "category" in filters:
+                # Filter by metadata category
+                stmt = stmt.where(
+                    DocumentSource.metadata_.op("->>")("category") == filters["category"]
+                )
+
+        result = await db.execute(stmt)
+        rows = result.all()
+
+        results: list[ChunkResult] = []
+        for chunk, source, dist in rows:
+            # Convert distance to similarity score
+            # For cosine distance: similarity = 1 - distance
+            relevance_score = 1.0 - float(dist)
+
+            # Apply threshold filter
+            if relevance_score < threshold:
+                continue
+
+            results.append(
+                ChunkResult(
+                    chunk_id=chunk.chunk_id,
+                    source_id=source.source_id,
+                    source_path=source.source_path,
+                    source_type=source.source_type,
+                    content=chunk.content,
+                    relevance_score=round(relevance_score, 4),
+                    metadata=chunk.metadata_,
+                )
+            )
+
+            # Stop if we have enough results
+            if len(results) >= top_k:
+                break
+
+        return results
diff --git a/app/features/rag/tests/__init__.py b/app/features/rag/tests/__init__.py
new file mode 100644
index 00000000..041e4941
--- /dev/null
+++ b/app/features/rag/tests/__init__.py
@@ -0,0 +1 @@
+"""RAG feature tests."""
diff --git a/app/features/rag/tests/conftest.py b/app/features/rag/tests/conftest.py
new file mode 100644
index 00000000..3bf7f318
--- /dev/null
+++ b/app/features/rag/tests/conftest.py
@@ -0,0 +1,265 @@
+"""Test fixtures for RAG module."""
+
+from collections.abc import AsyncGenerator
+from datetime import UTC, datetime
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+from httpx import ASGITransport, AsyncClient
+from sqlalchemy import delete
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
+
+from app.core.config import get_settings
+from app.core.database import get_db
+from app.features.rag.embeddings import EmbeddingService
+from app.features.rag.models import DocumentChunk, DocumentSource
+from app.features.rag.schemas import IndexRequest, RetrieveRequest
+from app.main import app
+
+# =============================================================================
+# Database Fixtures for Integration Tests
+# =============================================================================
+
+
+@pytest.fixture
+async def db_session() -> AsyncGenerator[AsyncSession, None]:
+    """Create async database session for integration tests.
+
+    Creates tables if needed, provides a session, and cleans up test data.
+    Requires PostgreSQL to be running (docker-compose up -d).
+    """
+    settings = get_settings()
+    engine = create_async_engine(settings.database_url, echo=False)
+
+    async_session_maker = async_sessionmaker(
+        engine,
+        class_=AsyncSession,
+        expire_on_commit=False,
+    )
+
+    async with async_session_maker() as session:
+        try:
+            yield session
+        finally:
+            # Clean up test data (delete sources with test- prefix)
+            test_source_ids = delete(DocumentSource).where(
+                DocumentSource.source_path.like("test-%")
+            )
+            await session.execute(test_source_ids)
+            await session.commit()
+
+    await engine.dispose()
+
+
+@pytest.fixture
+async def client(db_session: AsyncSession) -> AsyncGenerator[AsyncClient, None]:
+    """Create test client with database dependency override."""
+
+    async def override_get_db() -> AsyncGenerator[AsyncSession, None]:
+        try:
+            yield db_session
+            await db_session.commit()
+        except Exception:
+            await db_session.rollback()
+            raise
+
+    app.dependency_overrides[get_db] = override_get_db
+
+    async with AsyncClient(
+        transport=ASGITransport(app=app),
+        base_url="http://test",
+    ) as ac:
+        yield ac
+
+    app.dependency_overrides.clear()
+
+
+# =============================================================================
+# Mock Embedding Service
+# =============================================================================
+
+
+@pytest.fixture
+def mock_embedding_service() -> EmbeddingService:
+    """Create a mocked EmbeddingService for unit tests.
+
+    Returns embeddings of correct dimension (1536) without calling OpenAI API.
+    """
+    service = MagicMock(spec=EmbeddingService)
+
+    # Mock embed_texts to return deterministic embeddings
+    async def mock_embed_texts(texts, **kwargs):
+        # Return embedding vector of correct dimension for each text
+        return [[0.1] * 1536 for _ in texts]
+
+    # Mock embed_query to return single embedding
+    async def mock_embed_query(query):
+        return [0.1] * 1536
+
+    service.embed_texts = AsyncMock(side_effect=mock_embed_texts)
+    service.embed_query = AsyncMock(side_effect=mock_embed_query)
+    service.count_tokens = MagicMock(side_effect=lambda text: len(text.split()))
+    service.truncate_to_tokens = MagicMock(side_effect=lambda text, max_tokens: text)
+
+    return service
+
+
+# =============================================================================
+# Sample Content Fixtures
+# =============================================================================
+
+
+@pytest.fixture
+def sample_markdown_content() -> str:
+    """Sample markdown content with headings for testing."""
+    return """# Main Title
+
+This is the introduction paragraph with some content.
+
+## Section One
+
+First section content goes here. It has multiple sentences.
+This is the second sentence. And a third one.
+
+### Subsection 1.1
+
+Subsection content with details about the topic.
+
+### Subsection 1.2
+
+More subsection content here.
+
+## Section Two
+
+Second section with different content.
+
+### Subsection 2.1
+
+Final subsection content.
+"""
+
+
+@pytest.fixture
+def sample_openapi_content() -> str:
+    """Sample OpenAPI JSON content for testing."""
+    return """{
+  "openapi": "3.0.0",
+  "info": {
+    "title": "Test API",
+    "version": "1.0.0",
+    "description": "A test API for unit testing"
+  },
+  "servers": [
+    {"url": "https://api.example.com", "description": "Production"}
+  ],
+  "paths": {
+    "/users": {
+      "get": {
+        "operationId": "listUsers",
+        "summary": "List all users",
+        "description": "Returns a paginated list of users",
+        "tags": ["users"],
+        "parameters": [
+          {
+            "name": "page",
+            "in": "query",
+            "description": "Page number",
+            "required": false
+          }
+        ],
+        "responses": {
+          "200": {"description": "Success"}
+        }
+      },
+      "post": {
+        "operationId": "createUser",
+        "summary": "Create a user",
+        "tags": ["users"],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {"type": "object", "properties": {"name": {"type": "string"}}}
+            }
+          }
+        },
+        "responses": {
+          "201": {"description": "Created"}
+        }
+      }
+    }
+  }
+}"""
+
+
+@pytest.fixture
+def sample_large_markdown_content() -> str:
+    """Large markdown content that exceeds chunk size for testing."""
+    # Generate content that will need multiple chunks
+    paragraphs = []
+    for i in range(50):
+        paragraphs.append(
+            f"## Section {i}\n\n"
+            f"This is paragraph {i} with enough content to make it substantial. "
+            f"It contains multiple sentences to ensure proper chunking behavior. "
+            f"The content is designed to test the chunker's ability to handle large documents. "
+            f"Each section has similar structure but different section numbers.\n"
+        )
+    return "\n".join(paragraphs)
+
+
+# =============================================================================
+# Schema Fixtures
+# =============================================================================
+
+
+@pytest.fixture
+def sample_index_request() -> IndexRequest:
+    """Sample index request for testing."""
+    return IndexRequest(
+        source_type="markdown",
+        source_path="test-document.md",
+        content="# Test\n\nThis is test content.",
+        metadata={"category": "testing"},
+    )
+
+
+@pytest.fixture
+def sample_retrieve_request() -> RetrieveRequest:
+    """Sample retrieve request for testing."""
+    return RetrieveRequest(
+        query="What is the test about?",
+        top_k=5,
+        similarity_threshold=0.7,
+    )
+
+
+# =============================================================================
+# Model Fixtures
+# =============================================================================
+
+
+@pytest.fixture
+def sample_document_source() -> DocumentSource:
+    """Sample DocumentSource ORM object for testing."""
+    return DocumentSource(
+        source_id="test123456789012345678901234",
+        source_type="markdown",
+        source_path="test-sample.md",
+        content_hash="a" * 64,
+        metadata_={"category": "testing"},
+        indexed_at=datetime.now(UTC),
+    )
+
+
+@pytest.fixture
+def sample_document_chunk() -> DocumentChunk:
+    """Sample DocumentChunk ORM object for testing."""
+    return DocumentChunk(
+        chunk_id="chunk12345678901234567890123",
+        source_id=1,
+        chunk_index=0,
+        content="Test chunk content",
+        embedding=[0.1] * 1536,
+        token_count=3,
+        metadata_={"heading": "Test"},
+    )
diff --git a/app/features/rag/tests/test_chunkers.py b/app/features/rag/tests/test_chunkers.py
new file mode 100644
index 00000000..77d63141
--- /dev/null
+++ b/app/features/rag/tests/test_chunkers.py
@@ -0,0 +1,295 @@
+"""Unit tests for RAG chunkers."""
+
+import json
+
+import pytest
+
+from app.features.rag.chunkers import (
+    BaseChunker,
+    ChunkData,
+    MarkdownChunker,
+    OpenAPIChunker,
+    get_chunker,
+)
+
+
+class TestMarkdownChunker:
+    """Tests for MarkdownChunker."""
+
+    def test_chunk_simple_document(self, sample_markdown_content):
+        """Test chunking a simple markdown document."""
+        chunker = MarkdownChunker()
+        chunks = chunker.chunk(sample_markdown_content)
+
+        assert len(chunks) > 0
+        for chunk in chunks:
+            assert isinstance(chunk, ChunkData)
+            assert chunk.content
+            assert chunk.token_count > 0
+
+    def test_chunk_respects_heading_boundaries(self):
+        """Test that chunker respects heading boundaries."""
+        content = """# Title
+
+Introduction.
+
+## Section One
+
+Content one.
+
+## Section Two
+
+Content two.
+"""
+        chunker = MarkdownChunker()
+        chunker.chunk_size = 1000  # Large enough to not split within sections
+        chunks = chunker.chunk(content)
+
+        # Each section should be relatively intact
+        contents = [c.content for c in chunks]
+        full_content = "\n".join(contents)
+
+        assert "# Title" in full_content or "Title" in full_content
+        assert "Section One" in full_content
+        assert "Section Two" in full_content
+
+    def test_chunk_extracts_heading_metadata(self):
+        """Test that heading metadata is extracted."""
+        content = """# Main
+
+## Sub
+
+Content here.
+"""
+        chunker = MarkdownChunker()
+        chunks = chunker.chunk(content)
+
+        # Find chunk with heading metadata
+        chunks_with_headings = [c for c in chunks if c.metadata.get("heading")]
+        assert len(chunks_with_headings) > 0
+
+        # Check section_path is populated
+        for chunk in chunks_with_headings:
+            if chunk.metadata.get("section_path"):
+                assert isinstance(chunk.metadata["section_path"], list)
+
+    def test_chunk_respects_chunk_size(self, sample_large_markdown_content):
+        """Test that chunks respect the configured chunk size."""
+        chunker = MarkdownChunker()
+        chunker.chunk_size = 200  # Small chunk size
+        chunks = chunker.chunk(sample_large_markdown_content)
+
+        # Chunks should not vastly exceed chunk size
+        for chunk in chunks:
+            # Allow some tolerance for overlap and heading context
+            assert chunk.token_count <= chunker.chunk_size * 2
+
+    def test_chunk_handles_empty_content(self):
+        """Test handling of empty content."""
+        chunker = MarkdownChunker()
+        chunks = chunker.chunk("")
+
+        assert len(chunks) == 0
+
+    def test_chunk_handles_content_without_headings(self):
+        """Test handling content without headings."""
+        content = "This is just plain text without any headings. It has multiple sentences."
+        chunker = MarkdownChunker()
+        chunks = chunker.chunk(content)
+
+        assert len(chunks) >= 1
+        assert chunks[0].content.strip() == content.strip()
+
+    def test_chunk_updates_heading_path_correctly(self):
+        """Test heading path updates with nested headings."""
+        content = """# Level 1
+
+## Level 2
+
+### Level 3
+
+Back to level 2 content.
+
+## Another Level 2
+
+Content here.
+"""
+        chunker = MarkdownChunker()
+        chunks = chunker.chunk(content)
+
+        # Find chunks with section_path
+        paths = [c.metadata.get("section_path") for c in chunks if c.metadata.get("section_path")]
+
+        # Should have various heading depths
+        assert len(paths) > 0
+
+    def test_chunk_token_counting(self):
+        """Test that token counting is accurate."""
+        chunker = MarkdownChunker()
+
+        # Count tokens for known text
+        text = "Hello, this is a test."
+        token_count = chunker.count_tokens(text)
+
+        assert token_count > 0
+        assert token_count < len(text)  # Tokens should be fewer than characters
+
+    def test_chunk_indices_are_sequential(self):
+        """Test that chunk indices are sequential."""
+        content = """# One
+
+Content one.
+
+# Two
+
+Content two.
+
+# Three
+
+Content three.
+"""
+        chunker = MarkdownChunker()
+        chunks = chunker.chunk(content)
+
+        indices = [c.index for c in chunks]
+        expected = list(range(len(chunks)))
+        assert indices == expected
+
+    def test_overlap_text_extraction(self):
+        """Test overlap text extraction works correctly."""
+        chunker = MarkdownChunker()
+        chunker.chunk_overlap = 10
+
+        text = "This is a longer piece of text that we want to extract overlap from."
+        overlap = chunker._get_overlap_text(text)
+
+        assert len(overlap) > 0
+        assert text.endswith(overlap) or overlap in text
+
+
+class TestOpenAPIChunker:
+    """Tests for OpenAPIChunker."""
+
+    def test_chunk_openapi_json(self, sample_openapi_content):
+        """Test chunking OpenAPI JSON content."""
+        chunker = OpenAPIChunker()
+        chunks = chunker.chunk(sample_openapi_content)
+
+        assert len(chunks) >= 2  # At least info + endpoints
+
+        # Check for endpoint metadata
+        endpoint_chunks = [c for c in chunks if c.metadata.get("type") == "endpoint"]
+        assert len(endpoint_chunks) >= 2  # GET and POST /users
+
+    def test_chunk_creates_info_chunk(self, sample_openapi_content):
+        """Test that an info chunk is created."""
+        chunker = OpenAPIChunker()
+        chunks = chunker.chunk(sample_openapi_content)
+
+        info_chunks = [c for c in chunks if c.metadata.get("type") == "api_info"]
+        assert len(info_chunks) == 1
+        assert "Test API" in info_chunks[0].content
+
+    def test_chunk_extracts_endpoint_metadata(self, sample_openapi_content):
+        """Test endpoint metadata extraction."""
+        chunker = OpenAPIChunker()
+        chunks = chunker.chunk(sample_openapi_content)
+
+        endpoint_chunks = [c for c in chunks if c.metadata.get("type") == "endpoint"]
+
+        # Check GET /users endpoint
+        get_users = [
+            c
+            for c in endpoint_chunks
+            if c.metadata.get("path") == "/users" and c.metadata.get("method") == "GET"
+        ]
+        assert len(get_users) == 1
+        assert get_users[0].metadata.get("operation_id") == "listUsers"
+
+    def test_chunk_includes_parameters(self, sample_openapi_content):
+        """Test that parameters are included in chunk content."""
+        chunker = OpenAPIChunker()
+        chunks = chunker.chunk(sample_openapi_content)
+
+        endpoint_chunks = [c for c in chunks if c.metadata.get("type") == "endpoint"]
+        get_users = next(c for c in endpoint_chunks if c.metadata.get("method") == "GET")
+
+        assert "Parameters" in get_users.content
+        assert "page" in get_users.content
+
+    def test_chunk_handles_invalid_json(self):
+        """Test handling of invalid JSON content."""
+        chunker = OpenAPIChunker()
+        chunks = chunker.chunk("not valid json")
+
+        # Should fall back to markdown chunking
+        assert len(chunks) >= 1
+
+    def test_chunk_handles_minimal_spec(self):
+        """Test handling minimal OpenAPI spec."""
+        minimal_spec = json.dumps(
+            {
+                "openapi": "3.0.0",
+                "info": {"title": "Minimal", "version": "1.0"},
+                "paths": {},
+            }
+        )
+        chunker = OpenAPIChunker()
+        chunks = chunker.chunk(minimal_spec)
+
+        # Should at least have info chunk
+        assert len(chunks) >= 1
+
+    def test_chunk_respects_token_limit(self, sample_openapi_content):
+        """Test that chunks don't exceed token limit."""
+        chunker = OpenAPIChunker()
+        chunks = chunker.chunk(sample_openapi_content)
+
+        for chunk in chunks:
+            assert chunk.token_count <= BaseChunker.MAX_TOKENS_PER_CHUNK
+
+
+class TestGetChunker:
+    """Tests for get_chunker factory function."""
+
+    def test_get_markdown_chunker(self):
+        """Test getting markdown chunker."""
+        chunker = get_chunker("markdown")
+        assert isinstance(chunker, MarkdownChunker)
+
+    def test_get_openapi_chunker(self):
+        """Test getting openapi chunker."""
+        chunker = get_chunker("openapi")
+        assert isinstance(chunker, OpenAPIChunker)
+
+    def test_invalid_source_type_raises(self):
+        """Test that invalid source type raises ValueError."""
+        with pytest.raises(ValueError) as exc_info:
+            get_chunker("invalid_type")
+        assert "Unsupported source type" in str(exc_info.value)
+
+
+class TestChunkData:
+    """Tests for ChunkData dataclass."""
+
+    def test_chunk_data_creation(self):
+        """Test creating ChunkData."""
+        chunk = ChunkData(
+            content="Test content",
+            index=0,
+            token_count=2,
+            metadata={"heading": "Test"},
+        )
+        assert chunk.content == "Test content"
+        assert chunk.index == 0
+        assert chunk.token_count == 2
+        assert chunk.metadata == {"heading": "Test"}
+
+    def test_chunk_data_default_metadata(self):
+        """Test default metadata is empty dict."""
+        chunk = ChunkData(
+            content="Test",
+            index=0,
+            token_count=1,
+        )
+        assert chunk.metadata == {}
diff --git a/app/features/rag/tests/test_embeddings.py b/app/features/rag/tests/test_embeddings.py
new file mode 100644
index 00000000..2eb59b70
--- /dev/null
+++ b/app/features/rag/tests/test_embeddings.py
@@ -0,0 +1,452 @@
+"""Unit tests for RAG embedding providers."""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import httpx
+import pytest
+
+from app.features.rag.embeddings import (
+    EmbeddingError,
+    EmbeddingProvider,
+    EmbeddingService,
+    OllamaEmbeddingProvider,
+    OpenAIEmbeddingProvider,
+    get_embedding_service,
+    reset_embedding_service,
+)
+
+
+class TestEmbeddingProvider:
+    """Tests for EmbeddingProvider abstract base class."""
+
+    def test_cannot_instantiate_directly(self):
+        """Test that EmbeddingProvider cannot be instantiated directly."""
+        with pytest.raises(TypeError):
+            EmbeddingProvider()  # type: ignore[abstract]
+
+
+class TestOpenAIEmbeddingProvider:
+    """Tests for OpenAIEmbeddingProvider."""
+
+    def test_init_without_api_key(self):
+        """Test initialization without API key."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = ""
+            mock_settings.return_value.rag_embedding_dimension = 1536
+            provider = OpenAIEmbeddingProvider()
+            # Should not raise during init
+            assert provider._client is None
+
+    def test_get_client_raises_without_api_key(self):
+        """Test _get_client raises when no API key configured."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = ""
+            provider = OpenAIEmbeddingProvider()
+
+            with pytest.raises(EmbeddingError) as exc_info:
+                provider._get_client()
+            assert "API key not configured" in str(exc_info.value)
+
+    def test_dimension_property(self):
+        """Test dimension property returns configured value."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = "test-key"
+            mock_settings.return_value.rag_embedding_dimension = 768
+            provider = OpenAIEmbeddingProvider()
+
+            assert provider.dimension == 768
+
+    def test_count_tokens(self):
+        """Test token counting."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = "test-key"
+            mock_settings.return_value.rag_embedding_model = "text-embedding-3-small"
+            mock_settings.return_value.rag_embedding_dimension = 1536
+            mock_settings.return_value.rag_embedding_batch_size = 100
+
+            provider = OpenAIEmbeddingProvider()
+
+            count = provider.count_tokens("Hello, world!")
+            assert count > 0
+            assert count < 20  # Should be a reasonable count
+
+    def test_count_tokens_empty_string(self):
+        """Test token counting for empty string."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = "test-key"
+            provider = OpenAIEmbeddingProvider()
+
+            count = provider.count_tokens("")
+            assert count == 0
+
+    def test_truncate_to_tokens(self):
+        """Test token truncation."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = "test-key"
+            provider = OpenAIEmbeddingProvider()
+
+            long_text = "This is a longer piece of text that will be truncated."
+            truncated = provider.truncate_to_tokens(long_text, 5)
+
+            assert len(truncated) < len(long_text)
+            assert provider.count_tokens(truncated) <= 5
+
+    def test_truncate_to_tokens_no_truncation_needed(self):
+        """Test truncation when text is already within limit."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = "test-key"
+            provider = OpenAIEmbeddingProvider()
+
+            short_text = "Hi"
+            truncated = provider.truncate_to_tokens(short_text, 100)
+
+            assert truncated == short_text
+
+    @pytest.mark.asyncio
+    async def test_embed_texts_empty_list(self):
+        """Test embedding empty list returns empty list."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = "test-key"
+            provider = OpenAIEmbeddingProvider()
+
+            result = await provider.embed_texts([])
+            assert result == []
+
+    @pytest.mark.asyncio
+    async def test_embed_texts_batching(self):
+        """Test that texts are batched correctly."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = "test-key"
+            mock_settings.return_value.rag_embedding_model = "text-embedding-3-small"
+            mock_settings.return_value.rag_embedding_dimension = 1536
+            mock_settings.return_value.rag_embedding_batch_size = 2
+
+            provider = OpenAIEmbeddingProvider()
+
+            # Mock the client
+            mock_client = MagicMock()
+
+            # Need to adjust mock to handle multiple calls
+            mock_response_1 = MagicMock()
+            mock_response_1.data = [
+                MagicMock(embedding=[0.1] * 1536),
+                MagicMock(embedding=[0.2] * 1536),
+            ]
+            mock_response_1.usage = MagicMock(prompt_tokens=10, total_tokens=10)
+
+            mock_response_2 = MagicMock()
+            mock_response_2.data = [
+                MagicMock(embedding=[0.3] * 1536),
+                MagicMock(embedding=[0.4] * 1536),
+            ]
+            mock_response_2.usage = MagicMock(prompt_tokens=10, total_tokens=10)
+
+            mock_client.embeddings.create = AsyncMock(
+                side_effect=[mock_response_1, mock_response_2]
+            )
+            provider._client = mock_client
+
+            # Test with 4 texts (should be 2 batches)
+            texts = ["text1", "text2", "text3", "text4"]
+            result = await provider.embed_texts(texts)
+
+            assert len(result) == 4
+            assert mock_client.embeddings.create.call_count == 2
+
+    @pytest.mark.asyncio
+    async def test_embed_query_returns_single_embedding(self):
+        """Test embed_query returns single embedding."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = "test-key"
+            mock_settings.return_value.rag_embedding_model = "text-embedding-3-small"
+            mock_settings.return_value.rag_embedding_dimension = 1536
+            mock_settings.return_value.rag_embedding_batch_size = 100
+
+            provider = OpenAIEmbeddingProvider()
+
+            # Mock the client
+            mock_client = MagicMock()
+            mock_response = MagicMock()
+            mock_response.data = [MagicMock(embedding=[0.1] * 1536)]
+            mock_response.usage = MagicMock(prompt_tokens=5, total_tokens=5)
+            mock_client.embeddings.create = AsyncMock(return_value=mock_response)
+            provider._client = mock_client
+
+            result = await provider.embed_query("test query")
+
+            assert len(result) == 1536
+            assert result == [0.1] * 1536
+
+    @pytest.mark.asyncio
+    async def test_embed_texts_truncates_long_input(self):
+        """Test that long inputs are truncated."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = "test-key"
+            mock_settings.return_value.rag_embedding_model = "text-embedding-3-small"
+            mock_settings.return_value.rag_embedding_dimension = 1536
+            mock_settings.return_value.rag_embedding_batch_size = 100
+
+            provider = OpenAIEmbeddingProvider()
+
+            # Mock the client
+            mock_client = MagicMock()
+            mock_response = MagicMock()
+            mock_response.data = [MagicMock(embedding=[0.1] * 1536)]
+            mock_response.usage = MagicMock(prompt_tokens=100, total_tokens=100)
+            mock_client.embeddings.create = AsyncMock(return_value=mock_response)
+            provider._client = mock_client
+
+            # (In reality, truncation happens before API call)
+            result = await provider.embed_texts(["short text"])
+
+            assert len(result) == 1
+
+
+class TestOllamaEmbeddingProvider:
+    """Tests for OllamaEmbeddingProvider."""
+
+    def test_init(self):
+        """Test initialization."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.ollama_base_url = "http://localhost:11434"
+            mock_settings.return_value.ollama_embedding_model = "nomic-embed-text"
+            mock_settings.return_value.rag_embedding_dimension = 768
+
+            provider = OllamaEmbeddingProvider()
+            assert provider._client is None
+
+    def test_dimension_property(self):
+        """Test dimension property returns configured value."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.ollama_base_url = "http://localhost:11434"
+            mock_settings.return_value.ollama_embedding_model = "nomic-embed-text"
+            mock_settings.return_value.rag_embedding_dimension = 768
+
+            provider = OllamaEmbeddingProvider()
+            assert provider.dimension == 768
+
+    @pytest.mark.asyncio
+    async def test_embed_texts_empty_list(self):
+        """Test embedding empty list returns empty list."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.ollama_base_url = "http://localhost:11434"
+            mock_settings.return_value.ollama_embedding_model = "nomic-embed-text"
+            mock_settings.return_value.rag_embedding_dimension = 768
+
+            provider = OllamaEmbeddingProvider()
+            result = await provider.embed_texts([])
+            assert result == []
+
+    @pytest.mark.asyncio
+    async def test_embed_texts_success(self):
+        """Test successful embedding generation."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.ollama_base_url = "http://localhost:11434"
+            mock_settings.return_value.ollama_embedding_model = "nomic-embed-text"
+            mock_settings.return_value.rag_embedding_dimension = 768
+
+            provider = OllamaEmbeddingProvider()
+
+            # Mock the HTTP client with OpenAI-compatible response format
+            mock_response = MagicMock()
+            mock_response.json.return_value = {
+                "data": [
+                    {"embedding": [0.1] * 768, "index": 0},
+                    {"embedding": [0.2] * 768, "index": 1},
+                ]
+            }
+            mock_response.raise_for_status = MagicMock()
+
+            mock_client = MagicMock(spec=httpx.AsyncClient)
+            mock_client.post = AsyncMock(return_value=mock_response)
+            provider._client = mock_client
+
+            result = await provider.embed_texts(["text1", "text2"])
+
+            assert len(result) == 2
+            assert result[0] == [0.1] * 768
+            assert result[1] == [0.2] * 768
+            mock_client.post.assert_called_once_with(
+                "/v1/embeddings",
+                json={
+                    "model": "nomic-embed-text",
+                    "input": ["text1", "text2"],
+                    "dimensions": 768,
+                },
+            )
+
+    @pytest.mark.asyncio
+    async def test_embed_query_returns_single_embedding(self):
+        """Test embed_query returns single embedding."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.ollama_base_url = "http://localhost:11434"
+            mock_settings.return_value.ollama_embedding_model = "nomic-embed-text"
+            mock_settings.return_value.rag_embedding_dimension = 768
+
+            provider = OllamaEmbeddingProvider()
+
+            # Mock the HTTP client with OpenAI-compatible response format
+            mock_response = MagicMock()
+            mock_response.json.return_value = {"data": [{"embedding": [0.5] * 768, "index": 0}]}
+            mock_response.raise_for_status = MagicMock()
+
+            mock_client = MagicMock(spec=httpx.AsyncClient)
+            mock_client.post = AsyncMock(return_value=mock_response)
+            provider._client = mock_client
+
+            result = await provider.embed_query("test query")
+
+            assert len(result) == 768
+            assert result == [0.5] * 768
+
+    @pytest.mark.asyncio
+    async def test_embed_texts_model_not_found(self):
+        """Test error handling when model not found."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.ollama_base_url = "http://localhost:11434"
+            mock_settings.return_value.ollama_embedding_model = "nonexistent-model"
+            mock_settings.return_value.rag_embedding_dimension = 768
+
+            provider = OllamaEmbeddingProvider()
+
+            # Mock 404 response
+            mock_response = MagicMock()
+            mock_response.status_code = 404
+            error = httpx.HTTPStatusError(
+                "Not Found",
+                request=MagicMock(),
+                response=mock_response,
+            )
+
+            mock_client = MagicMock(spec=httpx.AsyncClient)
+            mock_client.post = AsyncMock(side_effect=error)
+            provider._client = mock_client
+
+            with pytest.raises(EmbeddingError) as exc_info:
+                await provider.embed_texts(["test"])
+            assert "not found" in str(exc_info.value).lower()
+            assert "ollama pull" in str(exc_info.value)
+
+    @pytest.mark.asyncio
+    async def test_embed_texts_connection_error(self):
+        """Test error handling when Ollama not reachable."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.ollama_base_url = "http://localhost:11434"
+            mock_settings.return_value.ollama_embedding_model = "nomic-embed-text"
+            mock_settings.return_value.rag_embedding_dimension = 768
+
+            provider = OllamaEmbeddingProvider()
+
+            # Mock connection error
+            mock_client = MagicMock(spec=httpx.AsyncClient)
+            mock_client.post = AsyncMock(side_effect=httpx.ConnectError("Connection refused"))
+            provider._client = mock_client
+
+            with pytest.raises(EmbeddingError) as exc_info:
+                await provider.embed_texts(["test"])
+            assert "Failed to connect to Ollama" in str(exc_info.value)
+
+    @pytest.mark.asyncio
+    async def test_embed_texts_count_mismatch(self):
+        """Test error when embedding count doesn't match input count."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.ollama_base_url = "http://localhost:11434"
+            mock_settings.return_value.ollama_embedding_model = "nomic-embed-text"
+            mock_settings.return_value.rag_embedding_dimension = 768
+
+            provider = OllamaEmbeddingProvider()
+
+            # Mock response with wrong count (OpenAI-compatible format)
+            mock_response = MagicMock()
+            mock_response.json.return_value = {
+                "data": [{"embedding": [0.1] * 768, "index": 0}]  # Only 1 embedding for 2 texts
+            }
+            mock_response.raise_for_status = MagicMock()
+
+            mock_client = MagicMock(spec=httpx.AsyncClient)
+            mock_client.post = AsyncMock(return_value=mock_response)
+            provider._client = mock_client
+
+            with pytest.raises(EmbeddingError) as exc_info:
+                await provider.embed_texts(["text1", "text2"])
+            assert "mismatch" in str(exc_info.value).lower()
+
+    @pytest.mark.asyncio
+    async def test_close(self):
+        """Test close method properly closes HTTP client."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.ollama_base_url = "http://localhost:11434"
+            mock_settings.return_value.ollama_embedding_model = "nomic-embed-text"
+            mock_settings.return_value.rag_embedding_dimension = 768
+
+            provider = OllamaEmbeddingProvider()
+
+            # Mock client
+            mock_client = MagicMock(spec=httpx.AsyncClient)
+            mock_client.aclose = AsyncMock()
+            provider._client = mock_client
+
+            await provider.close()
+
+            mock_client.aclose.assert_called_once()
+            assert provider._client is None
+
+
+class TestGetEmbeddingService:
+    """Tests for get_embedding_service factory."""
+
+    def test_returns_openai_by_default(self):
+        """Test that OpenAI provider is returned by default."""
+        reset_embedding_service()
+
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.rag_embedding_provider = "openai"
+            mock_settings.return_value.openai_api_key = ""
+            mock_settings.return_value.rag_embedding_model = "text-embedding-3-small"
+            mock_settings.return_value.rag_embedding_dimension = 1536
+            mock_settings.return_value.rag_embedding_batch_size = 100
+
+            provider = get_embedding_service()
+            assert isinstance(provider, OpenAIEmbeddingProvider)
+
+        reset_embedding_service()
+
+    def test_returns_ollama_when_configured(self):
+        """Test that Ollama provider is returned when configured."""
+        reset_embedding_service()
+
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.rag_embedding_provider = "ollama"
+            mock_settings.return_value.ollama_base_url = "http://localhost:11434"
+            mock_settings.return_value.ollama_embedding_model = "nomic-embed-text"
+            mock_settings.return_value.rag_embedding_dimension = 768
+
+            provider = get_embedding_service()
+            assert isinstance(provider, OllamaEmbeddingProvider)
+
+        reset_embedding_service()
+
+    def test_returns_same_instance(self):
+        """Test that singleton returns same instance."""
+        reset_embedding_service()
+
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.rag_embedding_provider = "openai"
+            mock_settings.return_value.openai_api_key = ""
+            mock_settings.return_value.rag_embedding_model = "text-embedding-3-small"
+            mock_settings.return_value.rag_embedding_dimension = 1536
+            mock_settings.return_value.rag_embedding_batch_size = 100
+
+            provider1 = get_embedding_service()
+            provider2 = get_embedding_service()
+            assert provider1 is provider2
+
+        reset_embedding_service()
+
+
+class TestEmbeddingServiceAlias:
+    """Tests for backwards compatibility alias."""
+
+    def test_embedding_service_is_openai_provider(self):
+        """Test that EmbeddingService alias points to OpenAIEmbeddingProvider."""
+        assert EmbeddingService is OpenAIEmbeddingProvider
diff --git a/app/features/rag/tests/test_routes.py b/app/features/rag/tests/test_routes.py
new file mode 100644
index 00000000..ce09a05a
--- /dev/null
+++ b/app/features/rag/tests/test_routes.py
@@ -0,0 +1,433 @@
+"""Integration tests for RAG API routes.
+
+These tests require:
+- PostgreSQL running with pgvector extension (docker-compose up -d)
+- Migrations applied (uv run alembic upgrade head)
+
+Note: These tests mock the OpenAI embedding service to avoid API calls.
+"""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from httpx import AsyncClient
+
+from app.features.rag.embeddings import EmbeddingService
+
+# =============================================================================
+# Mock Embedding Service for Integration Tests
+# =============================================================================
+
+
+def create_mock_embedding_service() -> EmbeddingService:
+    """Create a mock embedding service for integration tests."""
+    service = MagicMock(spec=EmbeddingService)
+
+    async def mock_embed_texts(texts, **kwargs):
+        return [[0.1 + i * 0.01] * 1536 for i, _ in enumerate(texts)]
+
+    async def mock_embed_query(query):
+        return [0.1] * 1536
+
+    service.embed_texts = AsyncMock(side_effect=mock_embed_texts)
+    service.embed_query = AsyncMock(side_effect=mock_embed_query)
+    service.count_tokens = MagicMock(side_effect=lambda text: len(text.split()))
+    service.truncate_to_tokens = MagicMock(side_effect=lambda text, max_tokens: text)
+
+    return service
+
+
+# =============================================================================
+# Index Endpoint Tests
+# =============================================================================
+
+
+@pytest.mark.integration
+class TestIndexEndpoint:
+    """Integration tests for POST /rag/index endpoint."""
+
+    @pytest.mark.asyncio
+    async def test_index_markdown_creates_chunks(self, client: AsyncClient):
+        """Test that indexing markdown creates chunks in database."""
+        mock_service = create_mock_embedding_service()
+
+        with patch(
+            "app.features.rag.service.get_embedding_service",
+            return_value=mock_service,
+        ):
+            response = await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-index-md-001",
+                    "content": "# Test Document\n\nThis is test content for indexing.",
+                    "metadata": {"category": "testing"},
+                },
+            )
+
+        assert response.status_code == 201
+        data = response.json()
+        assert data["status"] == "indexed"
+        assert data["chunks_created"] >= 1
+        assert data["source_path"] == "test-index-md-001"
+        assert "source_id" in data
+
+    @pytest.mark.asyncio
+    async def test_index_same_content_returns_unchanged(self, client: AsyncClient):
+        """Test that re-indexing unchanged content returns 'unchanged' status."""
+        mock_service = create_mock_embedding_service()
+
+        content = "# Unchanged\n\nSame content twice."
+
+        with patch(
+            "app.features.rag.service.get_embedding_service",
+            return_value=mock_service,
+        ):
+            # First index
+            response1 = await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-unchanged-001",
+                    "content": content,
+                },
+            )
+            assert response1.status_code == 201
+            assert response1.json()["status"] == "indexed"
+
+            # Second index with same content
+            response2 = await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-unchanged-001",
+                    "content": content,
+                },
+            )
+            assert response2.status_code == 201
+            assert response2.json()["status"] == "unchanged"
+
+    @pytest.mark.asyncio
+    async def test_index_updated_content_re_indexes(self, client: AsyncClient):
+        """Test that updated content triggers re-indexing."""
+        mock_service = create_mock_embedding_service()
+
+        with patch(
+            "app.features.rag.service.get_embedding_service",
+            return_value=mock_service,
+        ):
+            # First index
+            response1 = await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-updated-001",
+                    "content": "# Original\n\nOriginal content.",
+                },
+            )
+            assert response1.status_code == 201
+            source_id = response1.json()["source_id"]
+
+            # Second index with different content
+            response2 = await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-updated-001",
+                    "content": "# Updated\n\nNew updated content.",
+                },
+            )
+            assert response2.status_code == 201
+            assert response2.json()["status"] == "updated"
+            assert response2.json()["source_id"] == source_id
+
+    @pytest.mark.asyncio
+    async def test_index_invalid_source_type(self, client: AsyncClient):
+        """Test that invalid source type returns 422."""
+        response = await client.post(
+            "/rag/index",
+            json={
+                "source_type": "invalid",
+                "source_path": "test.txt",
+                "content": "test",
+            },
+        )
+        assert response.status_code == 422
+
+    @pytest.mark.asyncio
+    async def test_index_file_not_found(self, client: AsyncClient):
+        """Test that missing file returns 404."""
+        response = await client.post(
+            "/rag/index",
+            json={
+                "source_type": "markdown",
+                "source_path": "/nonexistent/path/file.md",
+            },
+        )
+        assert response.status_code == 404
+
+
+# =============================================================================
+# Retrieve Endpoint Tests
+# =============================================================================
+
+
+@pytest.mark.integration
+class TestRetrieveEndpoint:
+    """Integration tests for POST /rag/retrieve endpoint."""
+
+    @pytest.mark.asyncio
+    async def test_retrieve_returns_relevant_chunks(self, client: AsyncClient):
+        """Test that retrieval returns matching chunks."""
+        mock_service = create_mock_embedding_service()
+
+        with patch(
+            "app.features.rag.service.get_embedding_service",
+            return_value=mock_service,
+        ):
+            # First, index a document
+            await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-retrieve-001",
+                    "content": "# Backtesting Guide\n\nBacktesting prevents data leakage by using time-based splits.",
+                },
+            )
+
+            # Then retrieve
+            response = await client.post(
+                "/rag/retrieve",
+                json={
+                    "query": "How does backtesting prevent leakage?",
+                    "top_k": 5,
+                    "similarity_threshold": 0.0,  # Low threshold to ensure results
+                },
+            )
+
+        assert response.status_code == 200
+        data = response.json()
+        assert "results" in data
+        assert "query_embedding_time_ms" in data
+        assert "search_time_ms" in data
+        assert "total_chunks_searched" in data
+
+    @pytest.mark.asyncio
+    async def test_retrieve_respects_threshold(self, client: AsyncClient):
+        """Test that retrieval respects similarity threshold."""
+        mock_service = create_mock_embedding_service()
+
+        with patch(
+            "app.features.rag.service.get_embedding_service",
+            return_value=mock_service,
+        ):
+            # Index a document
+            await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-threshold-001",
+                    "content": "# Test Content\n\nSome test content here.",
+                },
+            )
+
+            # Retrieve with very high threshold
+            response = await client.post(
+                "/rag/retrieve",
+                json={
+                    "query": "unrelated query",
+                    "top_k": 5,
+                    "similarity_threshold": 0.99,  # Very high threshold
+                },
+            )
+
+        assert response.status_code == 200
+        # With high threshold and mock embeddings, results may be empty
+        data = response.json()
+        assert isinstance(data["results"], list)
+
+    @pytest.mark.asyncio
+    async def test_retrieve_empty_database(self, client: AsyncClient):
+        """Test retrieval on empty database returns empty results."""
+        mock_service = create_mock_embedding_service()
+
+        with patch(
+            "app.features.rag.service.get_embedding_service",
+            return_value=mock_service,
+        ):
+            response = await client.post(
+                "/rag/retrieve",
+                json={
+                    "query": "anything",
+                    "top_k": 5,
+                },
+            )
+
+        assert response.status_code == 200
+        data = response.json()
+        assert isinstance(data["results"], list)
+
+    @pytest.mark.asyncio
+    async def test_retrieve_validates_query(self, client: AsyncClient):
+        """Test that empty query is rejected."""
+        response = await client.post(
+            "/rag/retrieve",
+            json={
+                "query": "",
+                "top_k": 5,
+            },
+        )
+        assert response.status_code == 422
+
+
+# =============================================================================
+# Sources Endpoint Tests
+# =============================================================================
+
+
+@pytest.mark.integration
+class TestSourcesEndpoint:
+    """Integration tests for /rag/sources endpoints."""
+
+    @pytest.mark.asyncio
+    async def test_list_sources_returns_all(self, client: AsyncClient):
+        """Test listing all indexed sources."""
+        mock_service = create_mock_embedding_service()
+
+        with patch(
+            "app.features.rag.service.get_embedding_service",
+            return_value=mock_service,
+        ):
+            # Index a couple of documents
+            await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-list-001",
+                    "content": "# First Doc",
+                },
+            )
+            await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-list-002",
+                    "content": "# Second Doc",
+                },
+            )
+
+            # List sources
+            response = await client.get("/rag/sources")
+
+        assert response.status_code == 200
+        data = response.json()
+        assert "sources" in data
+        assert "total_sources" in data
+        assert "total_chunks" in data
+        assert data["total_sources"] >= 2
+
+    @pytest.mark.asyncio
+    async def test_delete_source_removes_chunks(self, client: AsyncClient):
+        """Test that deleting a source removes all its chunks."""
+        mock_service = create_mock_embedding_service()
+
+        with patch(
+            "app.features.rag.service.get_embedding_service",
+            return_value=mock_service,
+        ):
+            # Index a document
+            index_response = await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-delete-001",
+                    "content": "# Delete Me\n\nThis will be deleted.",
+                },
+            )
+            source_id = index_response.json()["source_id"]
+
+            # Delete the source
+            delete_response = await client.delete(f"/rag/sources/{source_id}")
+
+        assert delete_response.status_code == 200
+        data = delete_response.json()
+        assert data["status"] == "deleted"
+        assert data["chunks_deleted"] >= 1
+
+    @pytest.mark.asyncio
+    async def test_delete_nonexistent_returns_404(self, client: AsyncClient):
+        """Test that deleting non-existent source returns 404."""
+        response = await client.delete("/rag/sources/nonexistent123456789012")
+        assert response.status_code == 404
+
+    @pytest.mark.asyncio
+    async def test_source_not_in_list_after_delete(self, client: AsyncClient):
+        """Test that deleted source no longer appears in list."""
+        mock_service = create_mock_embedding_service()
+
+        with patch(
+            "app.features.rag.service.get_embedding_service",
+            return_value=mock_service,
+        ):
+            # Index a document
+            index_response = await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-delete-verify-001",
+                    "content": "# Verify Delete",
+                },
+            )
+            source_id = index_response.json()["source_id"]
+
+            # Delete the source
+            await client.delete(f"/rag/sources/{source_id}")
+
+            # Verify not in list
+            list_response = await client.get("/rag/sources")
+            source_ids = [s["source_id"] for s in list_response.json()["sources"]]
+            assert source_id not in source_ids
+
+
+# =============================================================================
+# OpenAPI Indexing Tests
+# =============================================================================
+
+
+@pytest.mark.integration
+class TestOpenAPIIndexing:
+    """Integration tests for OpenAPI document indexing."""
+
+    @pytest.mark.asyncio
+    async def test_index_openapi_creates_endpoint_chunks(self, client: AsyncClient):
+        """Test that OpenAPI spec creates endpoint-based chunks."""
+        mock_service = create_mock_embedding_service()
+
+        openapi_spec = """{
+            "openapi": "3.0.0",
+            "info": {"title": "Test API", "version": "1.0"},
+            "paths": {
+                "/users": {
+                    "get": {"summary": "List users", "operationId": "listUsers", "responses": {"200": {"description": "OK"}}},
+                    "post": {"summary": "Create user", "operationId": "createUser", "responses": {"201": {"description": "Created"}}}
+                }
+            }
+        }"""
+
+        with patch(
+            "app.features.rag.service.get_embedding_service",
+            return_value=mock_service,
+        ):
+            response = await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "openapi",
+                    "source_path": "test-openapi-001",
+                    "content": openapi_spec,
+                },
+            )
+
+        assert response.status_code == 201
+        data = response.json()
+        # Should have at least: info chunk + 2 endpoint chunks
+        assert data["chunks_created"] >= 3
diff --git a/app/features/rag/tests/test_schemas.py b/app/features/rag/tests/test_schemas.py
new file mode 100644
index 00000000..a3bb0292
--- /dev/null
+++ b/app/features/rag/tests/test_schemas.py
@@ -0,0 +1,345 @@
+"""Unit tests for RAG schemas."""
+
+import pytest
+from pydantic import ValidationError
+
+from app.features.rag.schemas import (
+    ChunkResult,
+    DeleteResponse,
+    IndexRequest,
+    IndexResponse,
+    RetrieveRequest,
+    RetrieveResponse,
+    SourceListResponse,
+    SourceResponse,
+)
+
+
+class TestIndexRequest:
+    """Tests for IndexRequest schema."""
+
+    def test_valid_markdown_request(self):
+        """Test valid markdown index request."""
+        request = IndexRequest(
+            source_type="markdown",
+            source_path="docs/README.md",
+            content="# Hello\n\nWorld",
+            metadata={"category": "docs"},
+        )
+        assert request.source_type == "markdown"
+        assert request.source_path == "docs/README.md"
+        assert request.content == "# Hello\n\nWorld"
+        assert request.metadata == {"category": "docs"}
+
+    def test_valid_openapi_request(self):
+        """Test valid openapi index request."""
+        request = IndexRequest(
+            source_type="openapi",
+            source_path="api/openapi.json",
+        )
+        assert request.source_type == "openapi"
+        assert request.content is None
+        assert request.metadata is None
+
+    def test_invalid_source_type(self):
+        """Test invalid source type is rejected."""
+        with pytest.raises(ValidationError) as exc_info:
+            IndexRequest(
+                source_type="invalid",  # type: ignore[arg-type]
+                source_path="test.txt",
+            )
+        assert "source_type" in str(exc_info.value)
+
+    def test_empty_source_path_rejected(self):
+        """Test empty source path is rejected."""
+        with pytest.raises(ValidationError) as exc_info:
+            IndexRequest(
+                source_type="markdown",
+                source_path="",
+            )
+        assert "source_path" in str(exc_info.value)
+
+    def test_source_path_max_length(self):
+        """Test source path max length is enforced."""
+        with pytest.raises(ValidationError) as exc_info:
+            IndexRequest(
+                source_type="markdown",
+                source_path="x" * 501,
+            )
+        assert "source_path" in str(exc_info.value)
+
+    def test_extra_fields_rejected(self):
+        """Test extra fields are rejected."""
+        with pytest.raises(ValidationError) as exc_info:
+            IndexRequest(
+                source_type="markdown",
+                source_path="test.md",
+                extra_field="not allowed",  # type: ignore[call-arg]
+            )
+        assert "extra_field" in str(exc_info.value)
+
+
+class TestRetrieveRequest:
+    """Tests for RetrieveRequest schema."""
+
+    def test_valid_request_defaults(self):
+        """Test valid request with defaults."""
+        request = RetrieveRequest(query="What is forecasting?")
+        assert request.query == "What is forecasting?"
+        assert request.top_k == 5
+        assert request.similarity_threshold == 0.7
+        assert request.filters is None
+
+    def test_valid_request_custom_params(self):
+        """Test valid request with custom parameters."""
+        request = RetrieveRequest(
+            query="How does backtesting work?",
+            top_k=10,
+            similarity_threshold=0.8,
+            filters={"source_type": ["markdown"]},
+        )
+        assert request.top_k == 10
+        assert request.similarity_threshold == 0.8
+        assert request.filters == {"source_type": ["markdown"]}
+
+    def test_empty_query_rejected(self):
+        """Test empty query is rejected."""
+        with pytest.raises(ValidationError) as exc_info:
+            RetrieveRequest(query="")
+        assert "query" in str(exc_info.value)
+
+    def test_query_max_length(self):
+        """Test query max length is enforced."""
+        with pytest.raises(ValidationError) as exc_info:
+            RetrieveRequest(query="x" * 2001)
+        assert "query" in str(exc_info.value)
+
+    def test_top_k_bounds(self):
+        """Test top_k bounds are enforced."""
+        # Below minimum
+        with pytest.raises(ValidationError):
+            RetrieveRequest(query="test", top_k=0)
+
+        # Above maximum
+        with pytest.raises(ValidationError):
+            RetrieveRequest(query="test", top_k=51)
+
+        # Valid bounds
+        request_min = RetrieveRequest(query="test", top_k=1)
+        assert request_min.top_k == 1
+
+        request_max = RetrieveRequest(query="test", top_k=50)
+        assert request_max.top_k == 50
+
+    def test_similarity_threshold_bounds(self):
+        """Test similarity threshold bounds are enforced."""
+        # Below minimum
+        with pytest.raises(ValidationError):
+            RetrieveRequest(query="test", similarity_threshold=-0.1)
+
+        # Above maximum
+        with pytest.raises(ValidationError):
+            RetrieveRequest(query="test", similarity_threshold=1.1)
+
+        # Valid bounds
+        request_min = RetrieveRequest(query="test", similarity_threshold=0.0)
+        assert request_min.similarity_threshold == 0.0
+
+        request_max = RetrieveRequest(query="test", similarity_threshold=1.0)
+        assert request_max.similarity_threshold == 1.0
+
+
+class TestIndexResponse:
+    """Tests for IndexResponse schema."""
+
+    def test_indexed_status(self):
+        """Test indexed status response."""
+        response = IndexResponse(
+            source_id="abc123",
+            source_path="test.md",
+            chunks_created=5,
+            tokens_processed=1000,
+            duration_ms=123.45,
+            status="indexed",
+        )
+        assert response.status == "indexed"
+        assert response.chunks_created == 5
+
+    def test_updated_status(self):
+        """Test updated status response."""
+        response = IndexResponse(
+            source_id="abc123",
+            source_path="test.md",
+            chunks_created=3,
+            tokens_processed=500,
+            duration_ms=50.0,
+            status="updated",
+        )
+        assert response.status == "updated"
+
+    def test_unchanged_status(self):
+        """Test unchanged status response."""
+        response = IndexResponse(
+            source_id="abc123",
+            source_path="test.md",
+            chunks_created=5,
+            tokens_processed=0,
+            duration_ms=10.0,
+            status="unchanged",
+        )
+        assert response.status == "unchanged"
+        assert response.tokens_processed == 0
+
+
+class TestChunkResult:
+    """Tests for ChunkResult schema."""
+
+    def test_valid_chunk_result(self):
+        """Test valid chunk result."""
+        result = ChunkResult(
+            chunk_id="chunk123",
+            source_id="src123",
+            source_path="docs/test.md",
+            source_type="markdown",
+            content="This is chunk content",
+            relevance_score=0.95,
+            metadata={"heading": "Introduction"},
+        )
+        assert result.relevance_score == 0.95
+        assert result.metadata == {"heading": "Introduction"}
+
+    def test_relevance_score_bounds(self):
+        """Test relevance score bounds."""
+        # Valid bounds
+        result_zero = ChunkResult(
+            chunk_id="c1",
+            source_id="s1",
+            source_path="test.md",
+            source_type="markdown",
+            content="test",
+            relevance_score=0.0,
+        )
+        assert result_zero.relevance_score == 0.0
+
+        result_one = ChunkResult(
+            chunk_id="c1",
+            source_id="s1",
+            source_path="test.md",
+            source_type="markdown",
+            content="test",
+            relevance_score=1.0,
+        )
+        assert result_one.relevance_score == 1.0
+
+        # Out of bounds
+        with pytest.raises(ValidationError):
+            ChunkResult(
+                chunk_id="c1",
+                source_id="s1",
+                source_path="test.md",
+                source_type="markdown",
+                content="test",
+                relevance_score=1.5,
+            )
+
+
+class TestRetrieveResponse:
+    """Tests for RetrieveResponse schema."""
+
+    def test_valid_response(self):
+        """Test valid retrieve response."""
+        response = RetrieveResponse(
+            results=[
+                ChunkResult(
+                    chunk_id="c1",
+                    source_id="s1",
+                    source_path="test.md",
+                    source_type="markdown",
+                    content="test content",
+                    relevance_score=0.9,
+                )
+            ],
+            query_embedding_time_ms=45.5,
+            search_time_ms=12.3,
+            total_chunks_searched=100,
+        )
+        assert len(response.results) == 1
+        assert response.total_chunks_searched == 100
+
+    def test_empty_results(self):
+        """Test response with no results."""
+        response = RetrieveResponse(
+            results=[],
+            query_embedding_time_ms=50.0,
+            search_time_ms=10.0,
+            total_chunks_searched=0,
+        )
+        assert len(response.results) == 0
+
+
+class TestSourceResponse:
+    """Tests for SourceResponse schema."""
+
+    def test_valid_source_response(self):
+        """Test valid source response."""
+        from datetime import UTC, datetime
+
+        response = SourceResponse(
+            source_id="src123",
+            source_type="markdown",
+            source_path="docs/README.md",
+            chunk_count=10,
+            content_hash="a" * 64,
+            indexed_at=datetime.now(UTC),
+            metadata={"category": "docs"},
+        )
+        assert response.chunk_count == 10
+        assert response.source_type == "markdown"
+
+
+class TestSourceListResponse:
+    """Tests for SourceListResponse schema."""
+
+    def test_valid_list_response(self):
+        """Test valid source list response."""
+        from datetime import UTC, datetime
+
+        response = SourceListResponse(
+            sources=[
+                SourceResponse(
+                    source_id="src1",
+                    source_type="markdown",
+                    source_path="doc1.md",
+                    chunk_count=5,
+                    content_hash="a" * 64,
+                    indexed_at=datetime.now(UTC),
+                )
+            ],
+            total_sources=1,
+            total_chunks=5,
+        )
+        assert response.total_sources == 1
+        assert response.total_chunks == 5
+
+    def test_empty_list_response(self):
+        """Test empty source list response."""
+        response = SourceListResponse(
+            sources=[],
+            total_sources=0,
+            total_chunks=0,
+        )
+        assert len(response.sources) == 0
+
+
+class TestDeleteResponse:
+    """Tests for DeleteResponse schema."""
+
+    def test_valid_delete_response(self):
+        """Test valid delete response."""
+        response = DeleteResponse(
+            source_id="src123",
+            chunks_deleted=10,
+            status="deleted",
+        )
+        assert response.status == "deleted"
+        assert response.chunks_deleted == 10
diff --git a/app/features/rag/tests/test_service.py b/app/features/rag/tests/test_service.py
new file mode 100644
index 00000000..52a7afc2
--- /dev/null
+++ b/app/features/rag/tests/test_service.py
@@ -0,0 +1,273 @@
+"""Unit tests for RAG service."""
+
+import hashlib
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from app.features.rag.schemas import IndexRequest, RetrieveRequest
+from app.features.rag.service import RAGService, SourceNotFoundError
+
+
+class TestRAGServiceUnit:
+    """Unit tests for RAGService (no database)."""
+
+    def test_compute_content_hash(self):
+        """Test content hash computation."""
+        service = RAGService()
+
+        content = "Test content"
+        hash1 = service._compute_content_hash(content)
+
+        # Should be SHA-256 hex (64 characters)
+        assert len(hash1) == 64
+        assert all(c in "0123456789abcdef" for c in hash1)
+
+        # Same content should produce same hash
+        hash2 = service._compute_content_hash(content)
+        assert hash1 == hash2
+
+        # Different content should produce different hash
+        hash3 = service._compute_content_hash("Different content")
+        assert hash1 != hash3
+
+    def test_compute_content_hash_deterministic(self):
+        """Test hash is deterministic."""
+        service = RAGService()
+
+        content = "# Test\n\nWith some content."
+        expected = hashlib.sha256(content.encode()).hexdigest()
+
+        result = service._compute_content_hash(content)
+        assert result == expected
+
+    def test_read_content_from_path_not_found(self, tmp_path):
+        """Test reading from non-existent path raises."""
+        service = RAGService()
+
+        with pytest.raises(FileNotFoundError):
+            service._read_content_from_path("/nonexistent/path.md")
+
+    def test_read_content_from_path_success(self, tmp_path):
+        """Test reading from existing path."""
+        # Pass tmp_path as base_dir to allow test files in tmp directory
+        service = RAGService(base_dir=tmp_path)
+
+        # Create test file
+        test_file = tmp_path / "test.md"
+        test_file.write_text("# Test Content")
+
+        content = service._read_content_from_path(str(test_file))
+        assert content == "# Test Content"
+
+    def test_read_content_from_path_traversal_blocked(self, tmp_path):
+        """Test that path traversal attempts are blocked."""
+        # Set base_dir to tmp_path
+        service = RAGService(base_dir=tmp_path)
+
+        # Try to read file outside base_dir (should fail)
+        with pytest.raises(FileNotFoundError, match="not found or access denied"):
+            service._read_content_from_path("/etc/passwd")
+
+
+class TestRAGServiceIndexDocument:
+    """Tests for index_document method."""
+
+    @pytest.mark.asyncio
+    async def test_index_with_content_provided(self, mock_embedding_service):
+        """Test indexing when content is provided directly."""
+        service = RAGService(embedding_service=mock_embedding_service)
+
+        request = IndexRequest(
+            source_type="markdown",
+            source_path="test-direct-content.md",
+            content="# Test\n\nDirect content.",
+        )
+
+        # Mock database session
+        mock_db = AsyncMock()
+        mock_db.execute = AsyncMock(
+            return_value=MagicMock(scalar_one_or_none=MagicMock(return_value=None))
+        )
+        mock_db.flush = AsyncMock()
+        mock_db.add = MagicMock()
+
+        with patch.object(service, "_find_source_by_path", return_value=None):
+            with patch.object(service, "_upsert_source_and_chunks", new_callable=AsyncMock):
+                response = await service.index_document(db=mock_db, request=request)
+
+        assert response.status == "indexed"
+        assert response.source_path == "test-direct-content.md"
+        assert response.chunks_created > 0
+
+    @pytest.mark.asyncio
+    async def test_index_unchanged_content(self, mock_embedding_service):
+        """Test that unchanged content returns 'unchanged' status."""
+        service = RAGService(embedding_service=mock_embedding_service)
+
+        content = "# Test\n\nContent."
+        content_hash = service._compute_content_hash(content)
+
+        request = IndexRequest(
+            source_type="markdown",
+            source_path="test-unchanged.md",
+            content=content,
+        )
+
+        # Mock existing source with same hash
+        mock_source = MagicMock()
+        mock_source.source_id = "existing123"
+        mock_source.content_hash = content_hash
+
+        mock_db = AsyncMock()
+
+        with patch.object(service, "_find_source_by_path", return_value=mock_source):
+            with patch.object(service, "_get_chunk_count", return_value=5):
+                response = await service.index_document(db=mock_db, request=request)
+
+        assert response.status == "unchanged"
+        assert response.tokens_processed == 0
+        assert response.chunks_created == 5
+
+    @pytest.mark.asyncio
+    async def test_index_updated_content(self, mock_embedding_service):
+        """Test that changed content returns 'updated' status."""
+        service = RAGService(embedding_service=mock_embedding_service)
+
+        request = IndexRequest(
+            source_type="markdown",
+            source_path="test-updated.md",
+            content="# Updated\n\nNew content.",
+        )
+
+        # Mock existing source with different hash
+        mock_source = MagicMock()
+        mock_source.source_id = "existing123"
+        mock_source.content_hash = "different_hash"
+
+        mock_db = AsyncMock()
+
+        with patch.object(service, "_find_source_by_path", return_value=mock_source):
+            with patch.object(service, "_upsert_source_and_chunks", new_callable=AsyncMock):
+                response = await service.index_document(db=mock_db, request=request)
+
+        assert response.status == "updated"
+        assert response.source_id == "existing123"
+
+
+class TestRAGServiceRetrieve:
+    """Tests for retrieve method."""
+
+    @pytest.mark.asyncio
+    async def test_retrieve_calls_embedding_service(self, mock_embedding_service):
+        """Test that retrieve calls embedding service for query."""
+        service = RAGService(embedding_service=mock_embedding_service)
+
+        request = RetrieveRequest(
+            query="Test query",
+            top_k=5,
+            similarity_threshold=0.7,
+        )
+
+        mock_db = AsyncMock()
+
+        with patch.object(service, "_get_total_chunk_count", return_value=100):
+            with patch.object(service, "_search_similar_chunks", return_value=[]):
+                response = await service.retrieve(db=mock_db, request=request)
+
+        # Verify embedding service was called
+        mock_embedding_service.embed_query.assert_called_once_with("Test query")
+
+        assert response.total_chunks_searched == 100
+        assert len(response.results) == 0
+
+    @pytest.mark.asyncio
+    async def test_retrieve_returns_results(self, mock_embedding_service):
+        """Test that retrieve returns search results."""
+        from app.features.rag.schemas import ChunkResult
+
+        service = RAGService(embedding_service=mock_embedding_service)
+
+        request = RetrieveRequest(
+            query="Test query",
+            top_k=5,
+        )
+
+        mock_db = AsyncMock()
+
+        mock_results = [
+            ChunkResult(
+                chunk_id="chunk1",
+                source_id="src1",
+                source_path="test.md",
+                source_type="markdown",
+                content="Result content",
+                relevance_score=0.95,
+            )
+        ]
+
+        with patch.object(service, "_get_total_chunk_count", return_value=50):
+            with patch.object(service, "_search_similar_chunks", return_value=mock_results):
+                response = await service.retrieve(db=mock_db, request=request)
+
+        assert len(response.results) == 1
+        assert response.results[0].relevance_score == 0.95
+
+
+class TestRAGServiceListSources:
+    """Tests for list_sources method."""
+
+    @pytest.mark.asyncio
+    async def test_list_sources_empty(self):
+        """Test listing sources when none exist."""
+        service = RAGService()
+
+        mock_db = AsyncMock()
+        mock_result = MagicMock()
+        mock_result.all.return_value = []
+        mock_db.execute = AsyncMock(return_value=mock_result)
+
+        response = await service.list_sources(db=mock_db)
+
+        assert response.total_sources == 0
+        assert response.total_chunks == 0
+        assert len(response.sources) == 0
+
+
+class TestRAGServiceDeleteSource:
+    """Tests for delete_source method."""
+
+    @pytest.mark.asyncio
+    async def test_delete_source_not_found(self):
+        """Test deleting non-existent source raises."""
+        service = RAGService()
+
+        mock_db = AsyncMock()
+        mock_result = MagicMock()
+        mock_result.scalar_one_or_none.return_value = None
+        mock_db.execute = AsyncMock(return_value=mock_result)
+
+        with pytest.raises(SourceNotFoundError):
+            await service.delete_source(db=mock_db, source_id="nonexistent")
+
+    @pytest.mark.asyncio
+    async def test_delete_source_success(self):
+        """Test successful source deletion."""
+        service = RAGService()
+
+        mock_source = MagicMock()
+        mock_source.id = 1
+
+        mock_db = AsyncMock()
+        mock_result = MagicMock()
+        mock_result.scalar_one_or_none.return_value = mock_source
+        mock_db.execute = AsyncMock(return_value=mock_result)
+        mock_db.delete = AsyncMock()
+        mock_db.flush = AsyncMock()
+
+        with patch.object(service, "_get_chunk_count", return_value=10):
+            response = await service.delete_source(db=mock_db, source_id="test123")
+
+        assert response.status == "deleted"
+        assert response.chunks_deleted == 10
+        mock_db.delete.assert_called_once_with(mock_source)
diff --git a/app/main.py b/app/main.py
index 4b425db3..323c7987 100644
--- a/app/main.py
+++ b/app/main.py
@@ -17,6 +17,7 @@
 from app.features.forecasting.routes import router as forecasting_router
 from app.features.ingest.routes import router as ingest_router
 from app.features.jobs.routes import router as jobs_router
+from app.features.rag.routes import router as rag_router
 from app.features.registry.routes import router as registry_router
 
 logger = get_logger(__name__)
@@ -82,6 +83,7 @@ def create_app() -> FastAPI:
     app.include_router(forecasting_router)
     app.include_router(backtesting_router)
     app.include_router(registry_router)
+    app.include_router(rag_router)
 
     return app
 
diff --git a/docs/DAILY-FLOW.md b/docs/DAILY-FLOW.md
index 66521dbc..72622625 100644
--- a/docs/DAILY-FLOW.md
+++ b/docs/DAILY-FLOW.md
@@ -162,21 +162,29 @@ gh run watch <RUN_ID>
 
 ---
 
-## Következő Phase: Forecasting (PRP-5)
+## Következő Phases (INITIAL-9 → INITIAL-11)
 
-```bash
-# Kezdés
-git checkout dev
-git pull origin dev
-git checkout -b feat/prp-5-forecasting
+A projekt a moduláris három-fázisú roadmap szerint halad:
 
-# Fejlesztés...
-# PR → dev → main → release → phase-4 snapshot
+```text
+Phase 8: RAG Knowledge Base ("The Memory")
+         ↓
+Phase 9: Agentic Layer ("The Brain")
+         ↓
+Phase 10: ForecastLab Dashboard ("The Face")
 ```
 
-### PRP-5 Scope (INITIAL-5)
-- Model zoo: naive, seasonal naive, moving average
-- Unified model interface: fit/predict, serialize/load
-- Scikit-learn Pipeline: Scaling → Encoding → Regressor
-- Joblib-based ModelBundle persistence
-- Multi-horizon recursive forecasting
+### Phase 8: RAG Knowledge Base (INITIAL-9)
+- pgvector embeddings + semantic retrieval
+- Markdown/OpenAPI chunking
+- POST /rag/index, POST /rag/retrieve endpoints
+
+### Phase 9: Agentic Layer (INITIAL-10)
+- PydanticAI agents (Experiment Orchestrator, RAG Assistant)
+- Tool orchestration + structured outputs
+- WebSocket streaming
+
+### Phase 10: Dashboard (INITIAL-11)
+- React 19 + Vite + shadcn/ui
+- Data tables + time series charts
+- Agent chat interface
diff --git a/docs/PHASE-index.md b/docs/PHASE-index.md
index 280fa43b..836c63ef 100644
--- a/docs/PHASE-index.md
+++ b/docs/PHASE-index.md
@@ -16,9 +16,9 @@ This document indexes all implementation phases of the ForecastLabAI project.
 | 5 | Backtesting | Completed | PRP-6 | [5-BACKTESTING.md](./PHASE/5-BACKTESTING.md) |
 | 6 | Model Registry | Completed | PRP-7 | [6-MODEL_REGISTRY.md](./PHASE/6-MODEL_REGISTRY.md) |
 | 7 | Serving Layer | Completed | PRP-8 | [7-SERVING_LAYER.md](./PHASE/7-SERVING_LAYER.md) |
-| 8 | RAG Knowledge Base | Pending | PRP-9 | - |
-| 9 | Dashboard | Pending | PRP-10 | - |
-| 10 | Agentic Layer | Pending | - | - |
+| 8 | RAG Knowledge Base | Completed | PRP-9 | [8-RAG_KNOWLEDGE_BASE.md](./PHASE/8-RAG_KNOWLEDGE_BASE.md) |
+| 9 | Agentic Layer | Pending | PRP-10 | - |
+| 10 | ForecastLab Dashboard | Pending | PRP-11 | - |
 
 ---
 
@@ -273,18 +273,66 @@ jobs_retention_days: int = 30
 - Pyright: 0 errors
 - Pytest: 426 unit tests passed
 
----
+### [Phase 8: RAG Knowledge Base](./PHASE/8-RAG_KNOWLEDGE_BASE.md)
 
-## Pending Phases
+**Date Completed**: 2026-02-01
+
+**Summary**: RAG Knowledge Base with pgvector and multiple embedding providers:
+- PostgreSQL pgvector for HNSW similarity search
+- Embedding Provider Pattern: OpenAI (default) and Ollama (local/LAN)
+- Ollama uses `/v1/embeddings` OpenAI-compatible endpoint with `dimensions` parameter
+- Markdown-aware and OpenAPI endpoint-aware chunking
+- Idempotent indexing via SHA-256 content hash
+- Configurable embedding dimensions (1536 default, 768 for nomic-embed-text, etc.)
+
+**Key Deliverables**:
+- `app/features/rag/embeddings.py` - EmbeddingProvider, OpenAIEmbeddingProvider, OllamaEmbeddingProvider
+- `app/features/rag/chunkers.py` - MarkdownChunker, OpenAPIChunker
+- `app/features/rag/models.py` - DocumentSource, DocumentChunk ORM models
+- `app/features/rag/service.py` - RAGService (index, retrieve, list, delete)
+- `app/features/rag/routes.py` - API endpoints
+- `alembic/versions/b4c8d9e0f123_create_rag_tables.py` - Base RAG tables
+- `alembic/versions/c5d9e1f2g345_rag_dynamic_embedding_dimension.py` - Dynamic dimension
+
+**API Endpoints**:
+- `POST /rag/index` - Index document into knowledge base
+- `POST /rag/retrieve` - Semantic search with similarity threshold
+- `GET /rag/sources` - List indexed sources
+- `DELETE /rag/sources/{source_id}` - Delete source and chunks
+
+**Configuration (Settings)**:
+```python
+rag_embedding_provider: Literal["openai", "ollama"] = "openai"
+rag_embedding_dimension: int = 1536
+ollama_base_url: str = "http://localhost:11434"
+ollama_embedding_model: str = "nomic-embed-text"
+```
 
-### Phase 8: RAG Knowledge Base
-pgvector embeddings with evidence-grounded answers and citations.
+**Validation Results**:
+- Ruff: All checks passed
+- MyPy: 0 errors (117 source files)
+- Pyright: 0 errors
+- Pytest: 82 unit tests + 14 integration tests
+
+---
 
-### Phase 9: Dashboard
-React + Vite + shadcn/ui frontend with data tables and visualizations.
+## Pending Phases
 
-### Phase 10: Agentic Layer (Optional)
-PydanticAI integration for experiment orchestration.
+### Phase 9: Agentic Layer ("The Brain")
+Autonomous decision-making, tool orchestration, and structured outputs using PydanticAI.
+- Experiment Orchestrator Agent (backtest → compare → deploy workflow)
+- RAG Assistant Agent (query → retrieve → answer with citations)
+- Human-in-the-loop approval for sensitive operations
+- WebSocket streaming for real-time responses
+- Endpoints: POST /agents/experiment/run, POST /agents/rag/query, WS /agents/stream
+
+### Phase 10: ForecastLab Dashboard ("The Face")
+User interface, data visualization, and agent interaction.
+- React 19 + Vite + shadcn/ui + Tailwind CSS 4
+- TanStack Table for server-side data grids
+- TanStack Query for data fetching and caching
+- Recharts for time series visualization
+- Agent chat interface with streaming and citations
 
 ---
 
@@ -331,3 +379,4 @@ Each phase document (`docs/PHASE/X-PHASE_NAME.md`) contains:
 | 2026-01-31 | 5 | Backtesting module with time-series CV completed |
 | 2026-02-01 | 6 | Model Registry with run tracking and deployment aliases completed |
 | 2026-02-01 | 7 | Serving Layer with RFC 7807, dimensions, analytics, and jobs completed |
+| 2026-02-01 | 8 | RAG Knowledge Base with pgvector and Ollama embedding provider completed |
diff --git a/docs/PHASE/8-RAG_KNOWLEDGE_BASE.md b/docs/PHASE/8-RAG_KNOWLEDGE_BASE.md
new file mode 100644
index 00000000..aec1f984
--- /dev/null
+++ b/docs/PHASE/8-RAG_KNOWLEDGE_BASE.md
@@ -0,0 +1,398 @@
+# Phase 8: RAG Knowledge Base
+
+**Date Completed**: 2026-02-01
+**PRP**: PRP-9
+**Status**: ✅ Completed
+
+---
+
+## Executive Summary
+
+Phase 8 implements the RAG (Retrieval-Augmented Generation) Knowledge Base for ForecastLabAI with PostgreSQL pgvector for semantic similarity search, multiple embedding providers (OpenAI and Ollama), and evidence-grounded retrieval with citations.
+
+### Objectives Achieved
+
+1. **pgvector Integration** - HNSW index for fast cosine similarity search
+2. **Embedding Provider Pattern** - Abstract base class with OpenAI and Ollama implementations
+3. **Document Indexing** - Markdown and OpenAPI-aware chunking with content hash for idempotency
+4. **Semantic Retrieval** - Configurable top-k retrieval with similarity threshold
+5. **Source Management** - List, index, and delete document sources
+
+---
+
+## Deliverables
+
+### 1. Embedding Provider Pattern
+
+**File**: `app/features/rag/embeddings.py`
+
+Implements abstract `EmbeddingProvider` base class with two concrete implementations:
+
+```python
+class EmbeddingProvider(ABC):
+    """Abstract base class for embedding providers."""
+
+    @abstractmethod
+    async def embed_texts(self, texts: list[str]) -> list[list[float]]: ...
+
+    @abstractmethod
+    async def embed_query(self, query: str) -> list[float]: ...
+
+    @property
+    @abstractmethod
+    def dimension(self) -> int: ...
+```
+
+**Providers**:
+
+| Provider | Endpoint | Features |
+|----------|----------|----------|
+| `OpenAIEmbeddingProvider` | OpenAI API | Batch processing, rate limit handling, token validation |
+| `OllamaEmbeddingProvider` | `/v1/embeddings` | OpenAI-compatible, configurable dimensions, local/LAN |
+
+**Factory Function**:
+
+```python
+def get_embedding_service() -> EmbeddingProvider:
+    """Returns provider based on RAG_EMBEDDING_PROVIDER config."""
+    settings = get_settings()
+    if settings.rag_embedding_provider == "ollama":
+        return OllamaEmbeddingProvider()
+    return OpenAIEmbeddingProvider()
+```
+
+### 2. Document Chunking
+
+**File**: `app/features/rag/chunkers.py`
+
+| Chunker | Source Type | Strategy |
+|---------|-------------|----------|
+| `MarkdownChunker` | `markdown` | Respects heading boundaries, extracts heading hierarchy metadata |
+| `OpenAPIChunker` | `openapi` | Chunks by endpoint, extracts method/path/parameters metadata |
+
+**ChunkData Structure**:
+
+```python
+@dataclass
+class ChunkData:
+    content: str              # Chunk text
+    token_count: int          # Token count for the chunk
+    chunk_index: int          # Position in source document
+    metadata: dict | None     # Heading path, endpoint info, etc.
+```
+
+### 3. RAG Service
+
+**File**: `app/features/rag/service.py`
+
+| Method | Description |
+|--------|-------------|
+| `index_document()` | Index document with chunking and embedding |
+| `retrieve()` | Semantic search with similarity scoring |
+| `list_sources()` | List indexed sources with statistics |
+| `delete_source()` | Delete source and its chunks |
+
+**Idempotent Indexing**:
+- SHA-256 content hash for change detection
+- Returns `"unchanged"` status if content matches existing source
+- Re-indexes only when content changes
+
+### 4. ORM Models
+
+**File**: `app/features/rag/models.py`
+
+```python
+class DocumentSource(TimestampMixin, Base):
+    """Registry of indexed document sources."""
+    __tablename__ = "document_source"
+
+    id: Mapped[int]
+    source_id: Mapped[str]      # UUID hex (32 chars)
+    source_type: Mapped[str]    # markdown, openapi
+    source_path: Mapped[str]    # File path or identifier
+    content_hash: Mapped[str]   # SHA-256 for change detection
+    metadata_: Mapped[dict]     # JSONB custom metadata
+    indexed_at: Mapped[datetime]
+
+
+class DocumentChunk(TimestampMixin, Base):
+    """Indexed document chunk with embedding."""
+    __tablename__ = "document_chunk"
+
+    id: Mapped[int]
+    chunk_id: Mapped[str]       # UUID hex (32 chars)
+    source_id: Mapped[int]      # FK to document_source
+    chunk_index: Mapped[int]    # Position in document
+    content: Mapped[str]        # Chunk text
+    embedding: Mapped[list[float]]  # Vector(dimension)
+    token_count: Mapped[int]
+    metadata_: Mapped[dict]     # Heading hierarchy, etc.
+```
+
+### 5. API Endpoints
+
+**File**: `app/features/rag/routes.py`
+
+| Method | Path | Description |
+|--------|------|-------------|
+| POST | `/rag/index` | Index a document into the knowledge base |
+| POST | `/rag/retrieve` | Semantic search across indexed documents |
+| GET | `/rag/sources` | List all indexed sources |
+| DELETE | `/rag/sources/{source_id}` | Delete source and its chunks |
+
+---
+
+## Configuration
+
+### New Settings in `app/core/config.py`
+
+```python
+# Embedding Provider
+rag_embedding_provider: Literal["openai", "ollama"] = "openai"
+
+# OpenAI Configuration
+openai_api_key: str = ""
+rag_embedding_model: str = "text-embedding-3-small"
+
+# Ollama Configuration
+ollama_base_url: str = "http://localhost:11434"
+ollama_embedding_model: str = "nomic-embed-text"
+
+# Common Embedding Settings
+rag_embedding_dimension: int = 1536
+rag_embedding_batch_size: int = 100
+
+# Chunking Configuration
+rag_chunk_size: int = 512         # tokens
+rag_chunk_overlap: int = 50       # tokens
+rag_min_chunk_size: int = 100     # minimum tokens per chunk
+
+# Retrieval Configuration
+rag_top_k: int = 5
+rag_similarity_threshold: float = 0.7
+rag_max_context_tokens: int = 4000
+
+# Index Configuration
+rag_index_type: Literal["hnsw", "ivfflat"] = "hnsw"
+rag_hnsw_m: int = 16
+rag_hnsw_ef_construction: int = 64
+```
+
+### Environment Variables
+
+**OpenAI Provider (default)**:
+```bash
+RAG_EMBEDDING_PROVIDER=openai
+OPENAI_API_KEY=sk-your-key
+RAG_EMBEDDING_MODEL=text-embedding-3-small
+RAG_EMBEDDING_DIMENSION=1536
+```
+
+**Ollama Provider (local/LAN)**:
+```bash
+RAG_EMBEDDING_PROVIDER=ollama
+OLLAMA_BASE_URL=http://localhost:11434
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+RAG_EMBEDDING_DIMENSION=768
+```
+
+---
+
+## Database Changes
+
+### Migration: `b4c8d9e0f123_create_rag_tables.py`
+
+Creates base RAG tables with pgvector:
+
+**Tables**:
+- `document_source` - Source registry with content hash
+- `document_chunk` - Chunks with vector embeddings
+
+**Indexes**:
+- `ix_document_source_source_id` (unique)
+- `ix_document_source_source_type`
+- `ix_document_chunk_chunk_id` (unique)
+- `ix_document_chunk_source_id`
+- `ix_chunk_embedding_hnsw` - HNSW index for cosine similarity
+- `ix_chunk_metadata_gin` - GIN index for metadata filtering
+
+### Migration: `c5d9e1f2g345_rag_dynamic_embedding_dimension.py`
+
+Enables configurable embedding dimension:
+
+```python
+def upgrade() -> None:
+    dimension = int(os.environ.get("RAG_EMBEDDING_DIMENSION", "1536"))
+    op.drop_index("ix_chunk_embedding_hnsw")
+    op.execute(f"ALTER TABLE document_chunk ALTER COLUMN embedding TYPE vector({dimension})")
+    op.create_index("ix_chunk_embedding_hnsw", ...)
+```
+
+**Note**: Changing dimension requires re-indexing all documents.
+
+---
+
+## Integration
+
+### Router Registration in `app/main.py`
+
+```python
+from app.features.rag.routes import router as rag_router
+
+# In create_app():
+app.include_router(rag_router)
+```
+
+### Alembic Model Import in `alembic/env.py`
+
+```python
+from app.features.rag import models as rag_models  # noqa: F401
+```
+
+---
+
+## Test Coverage
+
+### Test Files
+
+| File | Tests | Description |
+|------|-------|-------------|
+| `test_embeddings.py` | 25 | Provider pattern, OpenAI, Ollama, factory |
+| `test_chunkers.py` | 22 | Markdown and OpenAPI chunking |
+| `test_schemas.py` | 22 | Request/response validation |
+| `test_service.py` | 12 | Service unit tests |
+| `test_routes.py` | 14 | Integration tests (require DB) |
+
+### Validation Results
+
+```
+Ruff:    All checks passed
+MyPy:    0 errors (117 source files)
+Pyright: 0 errors
+Pytest:  82 unit tests passed + 14 integration tests
+```
+
+---
+
+## Directory Structure
+
+```
+app/
+├── core/
+│   └── config.py              # MODIFIED: Added RAG and Ollama settings
+├── features/
+│   └── rag/                   # NEW: RAG Knowledge Base
+│       ├── __init__.py
+│       ├── models.py          # DocumentSource, DocumentChunk ORM
+│       ├── schemas.py         # Request/response Pydantic schemas
+│       ├── embeddings.py      # EmbeddingProvider, OpenAI, Ollama
+│       ├── chunkers.py        # MarkdownChunker, OpenAPIChunker
+│       ├── service.py         # RAGService
+│       ├── routes.py          # API endpoints
+│       └── tests/
+│           ├── __init__.py
+│           ├── conftest.py
+│           ├── test_embeddings.py
+│           ├── test_chunkers.py
+│           ├── test_schemas.py
+│           ├── test_service.py
+│           └── test_routes.py
+└── main.py                    # MODIFIED: Router registration
+
+alembic/
+├── env.py                     # MODIFIED: RAG model import
+└── versions/
+    ├── b4c8d9e0f123_create_rag_tables.py         # NEW
+    └── c5d9e1f2g345_rag_dynamic_embedding_dimension.py  # NEW
+```
+
+---
+
+## API Usage Examples
+
+### Index Documents
+
+```bash
+# Index a markdown file
+curl -X POST http://localhost:8123/rag/index \
+  -H "Content-Type: application/json" \
+  -d '{
+    "source_type": "markdown",
+    "source_path": "docs/ARCHITECTURE.md"
+  }'
+
+# Index with inline content
+curl -X POST http://localhost:8123/rag/index \
+  -H "Content-Type: application/json" \
+  -d '{
+    "source_type": "markdown",
+    "source_path": "inline/readme",
+    "content": "# Project Overview\n\nThis is the project readme...",
+    "metadata": {"category": "documentation"}
+  }'
+
+# Index OpenAPI spec
+curl -X POST http://localhost:8123/rag/index \
+  -H "Content-Type: application/json" \
+  -d '{
+    "source_type": "openapi",
+    "source_path": "openapi.json"
+  }'
+```
+
+### Semantic Retrieval
+
+```bash
+# Basic query
+curl -X POST http://localhost:8123/rag/retrieve \
+  -H "Content-Type: application/json" \
+  -d '{
+    "query": "How does backtesting work?"
+  }'
+
+# Query with filters
+curl -X POST http://localhost:8123/rag/retrieve \
+  -H "Content-Type: application/json" \
+  -d '{
+    "query": "API endpoints for forecasting",
+    "top_k": 10,
+    "similarity_threshold": 0.8,
+    "filters": {
+      "source_type": "openapi"
+    }
+  }'
+```
+
+### Source Management
+
+```bash
+# List all sources
+curl http://localhost:8123/rag/sources
+
+# Delete a source
+curl -X DELETE http://localhost:8123/rag/sources/abc123def456...
+```
+
+---
+
+## Embedding Provider Comparison
+
+| Feature | OpenAI | Ollama |
+|---------|--------|--------|
+| Endpoint | OpenAI API | `/v1/embeddings` |
+| Authentication | API key required | None |
+| Rate Limiting | Yes, with backoff | No |
+| Token Validation | Yes (8191 max) | No |
+| Batch Size | Configurable (2048 max) | Native batch support |
+| Dimensions | 1536 (text-embedding-3-small) | Model-dependent |
+| Network | Internet required | Local/LAN |
+
+---
+
+## Next Phase Preparation
+
+Phase 9 (Agentic Layer) will build on this RAG infrastructure to:
+- Create RAG Assistant Agent for evidence-grounded Q&A
+- Implement citation formatting with source references
+- Add WebSocket streaming for real-time responses
+- Integrate with Experiment Orchestrator Agent
diff --git a/examples/rag/index_docs.py b/examples/rag/index_docs.py
new file mode 100644
index 00000000..7ce2902d
--- /dev/null
+++ b/examples/rag/index_docs.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python
+"""Example: Index documentation into RAG knowledge base.
+
+This script demonstrates how to index markdown documentation
+from the docs/ directory into the RAG knowledge base.
+
+Usage:
+    # Make sure the API is running
+    uv run uvicorn app.main:app --reload --port 8123
+
+    # Run this script
+    uv run python examples/rag/index_docs.py
+
+Requirements:
+    - OPENAI_API_KEY environment variable must be set
+    - PostgreSQL with pgvector must be running (docker-compose up -d)
+    - Migrations applied (uv run alembic upgrade head)
+"""
+
+import asyncio
+from pathlib import Path
+
+import httpx
+
+
+async def index_markdown_docs(base_url: str = "http://localhost:8123") -> None:
+    """Index all markdown docs from docs/ directory.
+
+    Args:
+        base_url: Base URL of the API server.
+    """
+    docs_dir = Path("docs")
+
+    if not docs_dir.exists():
+        print(f"Error: {docs_dir} directory not found")
+        return
+
+    async with httpx.AsyncClient(base_url=base_url, timeout=60.0) as client:
+        # Find all markdown files
+        md_files = list(docs_dir.rglob("*.md"))
+        print(f"Found {len(md_files)} markdown files to index")
+
+        total_chunks = 0
+        total_tokens = 0
+        indexed = 0
+        unchanged = 0
+        failed = 0
+
+        for md_file in md_files:
+            try:
+                # Read file content
+                content = md_file.read_text(encoding="utf-8")
+
+                # Index the document
+                response = await client.post(
+                    "/rag/index",
+                    json={
+                        "source_type": "markdown",
+                        "source_path": str(md_file),
+                        "content": content,
+                        "metadata": {
+                            "category": "documentation",
+                            "file_type": "markdown",
+                        },
+                    },
+                )
+
+                if response.status_code in (200, 201):
+                    result = response.json()
+                    status = result["status"]
+
+                    if status == "unchanged":
+                        unchanged += 1
+                        print(f"  [unchanged] {md_file}")
+                    else:
+                        indexed += 1
+                        total_chunks += result["chunks_created"]
+                        total_tokens += result["tokens_processed"]
+                        print(
+                            f"  [{status}] {md_file}: "
+                            f"{result['chunks_created']} chunks, "
+                            f"{result['tokens_processed']} tokens"
+                        )
+                else:
+                    failed += 1
+                    print(f"  [FAILED] {md_file}: {response.status_code} - {response.text}")
+
+            except Exception as e:
+                failed += 1
+                print(f"  [ERROR] {md_file}: {e}")
+
+        print("\n" + "=" * 50)
+        print("Indexing Summary:")
+        print(f"  Indexed: {indexed}")
+        print(f"  Unchanged: {unchanged}")
+        print(f"  Failed: {failed}")
+        print(f"  Total chunks created: {total_chunks}")
+        print(f"  Total tokens processed: {total_tokens}")
+
+
+async def index_readme(base_url: str = "http://localhost:8123") -> None:
+    """Index the main README.md file.
+
+    Args:
+        base_url: Base URL of the API server.
+    """
+    readme_path = Path("README.md")
+
+    if not readme_path.exists():
+        print("README.md not found")
+        return
+
+    async with httpx.AsyncClient(base_url=base_url, timeout=60.0) as client:
+        content = readme_path.read_text(encoding="utf-8")
+
+        response = await client.post(
+            "/rag/index",
+            json={
+                "source_type": "markdown",
+                "source_path": str(readme_path),
+                "content": content,
+                "metadata": {"category": "overview"},
+            },
+        )
+
+        if response.status_code == 201:
+            result = response.json()
+            print(f"README.md indexed: {result['chunks_created']} chunks ({result['status']})")
+        else:
+            print(f"Failed to index README.md: {response.status_code}")
+
+
+async def list_sources(base_url: str = "http://localhost:8123") -> None:
+    """List all indexed sources.
+
+    Args:
+        base_url: Base URL of the API server.
+    """
+    async with httpx.AsyncClient(base_url=base_url) as client:
+        response = await client.get("/rag/sources")
+
+        if response.status_code == 200:
+            data = response.json()
+            print(f"\nIndexed Sources: {data['total_sources']}")
+            print(f"Total Chunks: {data['total_chunks']}")
+            print("\nSources:")
+            for source in data["sources"]:
+                print(f"  - {source['source_path']} ({source['chunk_count']} chunks)")
+        else:
+            print(f"Failed to list sources: {response.status_code}")
+
+
+async def main() -> None:
+    """Main entry point."""
+    print("RAG Knowledge Base - Document Indexer")
+    print("=" * 50)
+
+    # Index README first
+    print("\n1. Indexing README.md...")
+    await index_readme()
+
+    # Index documentation
+    print("\n2. Indexing docs/ directory...")
+    await index_markdown_docs()
+
+    # List all sources
+    print("\n3. Listing indexed sources...")
+    await list_sources()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/rag/query.http b/examples/rag/query.http
new file mode 100644
index 00000000..04937945
--- /dev/null
+++ b/examples/rag/query.http
@@ -0,0 +1,123 @@
+### RAG Knowledge Base - HTTP Client Examples
+### Use with VS Code REST Client or similar tools
+
+### =============================================================================
+### Index Endpoints
+### =============================================================================
+
+### Index a markdown document (with content)
+POST http://localhost:8123/rag/index
+Content-Type: application/json
+
+{
+  "source_type": "markdown",
+  "source_path": "docs/example.md",
+  "content": "# Example Document\n\nThis is an example markdown document for testing the RAG indexing pipeline.\n\n## Section One\n\nFirst section with some content about forecasting.\n\n## Section Two\n\nSecond section about backtesting strategies.",
+  "metadata": {
+    "category": "documentation",
+    "author": "test"
+  }
+}
+
+### Index a markdown document (read from file path)
+POST http://localhost:8123/rag/index
+Content-Type: application/json
+
+{
+  "source_type": "markdown",
+  "source_path": "README.md"
+}
+
+### Index an OpenAPI specification
+POST http://localhost:8123/rag/index
+Content-Type: application/json
+
+{
+  "source_type": "openapi",
+  "source_path": "api/openapi.json",
+  "content": "{\"openapi\":\"3.0.0\",\"info\":{\"title\":\"Test API\",\"version\":\"1.0\"},\"paths\":{\"/users\":{\"get\":{\"summary\":\"List users\",\"operationId\":\"listUsers\",\"responses\":{\"200\":{\"description\":\"OK\"}}}}}}"
+}
+
+### =============================================================================
+### Retrieve Endpoints
+### =============================================================================
+
+### Semantic search - basic query
+POST http://localhost:8123/rag/retrieve
+Content-Type: application/json
+
+{
+  "query": "How does backtesting prevent data leakage?",
+  "top_k": 5,
+  "similarity_threshold": 0.7
+}
+
+### Semantic search - with filters
+POST http://localhost:8123/rag/retrieve
+Content-Type: application/json
+
+{
+  "query": "What forecasting models are available?",
+  "top_k": 10,
+  "similarity_threshold": 0.6,
+  "filters": {
+    "source_type": ["markdown"],
+    "category": "documentation"
+  }
+}
+
+### Semantic search - lower threshold for more results
+POST http://localhost:8123/rag/retrieve
+Content-Type: application/json
+
+{
+  "query": "time series cross validation",
+  "top_k": 20,
+  "similarity_threshold": 0.5
+}
+
+### =============================================================================
+### Sources Endpoints
+### =============================================================================
+
+### List all indexed sources
+GET http://localhost:8123/rag/sources
+
+### Delete a specific source (replace source_id with actual value)
+DELETE http://localhost:8123/rag/sources/abc123def456789012345678901234
+
+### =============================================================================
+### Example Workflows
+### =============================================================================
+
+### Workflow 1: Index and then query
+### Step 1: Index a document
+POST http://localhost:8123/rag/index
+Content-Type: application/json
+
+{
+  "source_type": "markdown",
+  "source_path": "test-workflow.md",
+  "content": "# Backtesting Guide\n\nBacktesting is a method to evaluate forecasting models using historical data.\n\n## Time-Based Splits\n\nWe use expanding or sliding window strategies to prevent data leakage.\n\n## Metrics\n\nKey metrics include MAE, sMAPE, WAPE, and Bias."
+}
+
+### Step 2: Query the indexed content
+POST http://localhost:8123/rag/retrieve
+Content-Type: application/json
+
+{
+  "query": "What metrics are used in backtesting?",
+  "top_k": 3,
+  "similarity_threshold": 0.6
+}
+
+### Workflow 2: Re-index with updated content
+### (Using same source_path will update existing chunks)
+POST http://localhost:8123/rag/index
+Content-Type: application/json
+
+{
+  "source_type": "markdown",
+  "source_path": "test-workflow.md",
+  "content": "# Backtesting Guide (Updated)\n\nBacktesting evaluates forecasting models.\n\n## Time-Based Splits\n\nWe use expanding or sliding window strategies.\n\n## Metrics\n\nKey metrics: MAE, sMAPE, WAPE, Bias, and Stability Index."
+}
diff --git a/pyproject.toml b/pyproject.toml
index 187facf4..a5c70231 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,6 +20,12 @@ dependencies = [
     "numpy>=2.4.1",
     "scikit-learn>=1.6.0",
     "joblib>=1.4.0",
+    # RAG dependencies
+    "pgvector>=0.3.0",
+    "openai>=1.40.0",
+    "tiktoken>=0.7.0",
+    "httpx>=0.28.0",
+    "pyyaml>=6.0.0",
 ]
 
 [project.optional-dependencies]
diff --git a/uv.lock b/uv.lock
index 85d3d0c8..7451e80c 100644
--- a/uv.lock
+++ b/uv.lock
@@ -104,6 +104,63 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" },
 ]
 
+[[package]]
+name = "charset-normalizer"
+version = "3.4.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" },
+    { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" },
+    { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" },
+    { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" },
+    { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" },
+    { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" },
+    { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" },
+    { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091, upload-time = "2025-10-14T04:41:13.346Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936, upload-time = "2025-10-14T04:41:14.461Z" },
+    { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180, upload-time = "2025-10-14T04:41:15.588Z" },
+    { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346, upload-time = "2025-10-14T04:41:16.738Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874, upload-time = "2025-10-14T04:41:17.923Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076, upload-time = "2025-10-14T04:41:19.106Z" },
+    { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601, upload-time = "2025-10-14T04:41:20.245Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376, upload-time = "2025-10-14T04:41:21.398Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825, upload-time = "2025-10-14T04:41:22.583Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583, upload-time = "2025-10-14T04:41:23.754Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366, upload-time = "2025-10-14T04:41:25.27Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300, upload-time = "2025-10-14T04:41:26.725Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465, upload-time = "2025-10-14T04:41:28.322Z" },
+    { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404, upload-time = "2025-10-14T04:41:29.95Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092, upload-time = "2025-10-14T04:41:31.188Z" },
+    { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408, upload-time = "2025-10-14T04:41:32.624Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746, upload-time = "2025-10-14T04:41:33.773Z" },
+    { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889, upload-time = "2025-10-14T04:41:34.897Z" },
+    { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641, upload-time = "2025-10-14T04:41:36.116Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779, upload-time = "2025-10-14T04:41:37.229Z" },
+    { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035, upload-time = "2025-10-14T04:41:38.368Z" },
+    { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542, upload-time = "2025-10-14T04:41:39.862Z" },
+    { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524, upload-time = "2025-10-14T04:41:41.319Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395, upload-time = "2025-10-14T04:41:42.539Z" },
+    { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680, upload-time = "2025-10-14T04:41:43.661Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045, upload-time = "2025-10-14T04:41:44.821Z" },
+    { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687, upload-time = "2025-10-14T04:41:46.442Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014, upload-time = "2025-10-14T04:41:47.631Z" },
+    { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044, upload-time = "2025-10-14T04:41:48.81Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940, upload-time = "2025-10-14T04:41:49.946Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104, upload-time = "2025-10-14T04:41:51.051Z" },
+    { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743, upload-time = "2025-10-14T04:41:52.122Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" },
+]
+
 [[package]]
 name = "click"
 version = "8.3.1"
@@ -199,6 +256,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d2/db/d291e30fdf7ea617a335531e72294e0c723356d7fdde8fba00610a76bda9/coverage-7.13.2-py3-none-any.whl", hash = "sha256:40ce1ea1e25125556d8e76bd0b61500839a07944cc287ac21d5626f3e620cad5", size = 210943, upload-time = "2026-01-25T13:00:02.388Z" },
 ]
 
+[[package]]
+name = "distro"
+version = "1.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" },
+]
+
 [[package]]
 name = "fastapi"
 version = "0.128.0"
@@ -216,21 +282,26 @@ wheels = [
 
 [[package]]
 name = "forecastlabai"
-version = "0.1.8"
+version = "0.2.1"
 source = { editable = "." }
 dependencies = [
     { name = "alembic" },
     { name = "asyncpg" },
     { name = "fastapi" },
+    { name = "httpx" },
     { name = "joblib" },
     { name = "numpy" },
+    { name = "openai" },
     { name = "pandas" },
+    { name = "pgvector" },
     { name = "pydantic" },
     { name = "pydantic-settings" },
     { name = "python-dotenv" },
+    { name = "pyyaml" },
     { name = "scikit-learn" },
     { name = "sqlalchemy", extra = ["asyncio"] },
     { name = "structlog" },
+    { name = "tiktoken" },
     { name = "uvicorn", extra = ["standard"] },
 ]
 
@@ -255,11 +326,14 @@ requires-dist = [
     { name = "alembic", specifier = ">=1.14.0" },
     { name = "asyncpg", specifier = ">=0.30.0" },
     { name = "fastapi", specifier = ">=0.115.0" },
+    { name = "httpx", specifier = ">=0.28.0" },
     { name = "httpx", marker = "extra == 'dev'", specifier = ">=0.28.0" },
     { name = "joblib", specifier = ">=1.4.0" },
     { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.13.0" },
     { name = "numpy", specifier = ">=2.4.1" },
+    { name = "openai", specifier = ">=1.40.0" },
     { name = "pandas", specifier = ">=3.0.0" },
+    { name = "pgvector", specifier = ">=0.3.0" },
     { name = "pydantic", specifier = ">=2.10.0" },
     { name = "pydantic-settings", specifier = ">=2.6.0" },
     { name = "pyright", marker = "extra == 'dev'", specifier = ">=1.1.390" },
@@ -267,10 +341,12 @@ requires-dist = [
     { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24.0" },
     { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=6.0.0" },
     { name = "python-dotenv", specifier = ">=1.0.1" },
+    { name = "pyyaml", specifier = ">=6.0.0" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.8.0" },
     { name = "scikit-learn", specifier = ">=1.6.0" },
     { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.36" },
     { name = "structlog", specifier = ">=24.4.0" },
+    { name = "tiktoken", specifier = ">=0.7.0" },
     { name = "uvicorn", extras = ["standard"], specifier = ">=0.32.0" },
 ]
 provides-extras = ["dev"]
@@ -405,6 +481,74 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
 ]
 
+[[package]]
+name = "jiter"
+version = "0.12.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/45/9d/e0660989c1370e25848bb4c52d061c71837239738ad937e83edca174c273/jiter-0.12.0.tar.gz", hash = "sha256:64dfcd7d5c168b38d3f9f8bba7fc639edb3418abcc74f22fdbe6b8938293f30b", size = 168294, upload-time = "2025-11-09T20:49:23.302Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/92/c9/5b9f7b4983f1b542c64e84165075335e8a236fa9e2ea03a0c79780062be8/jiter-0.12.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:305e061fa82f4680607a775b2e8e0bcb071cd2205ac38e6ef48c8dd5ebe1cf37", size = 314449, upload-time = "2025-11-09T20:47:22.999Z" },
+    { url = "https://files.pythonhosted.org/packages/98/6e/e8efa0e78de00db0aee82c0cf9e8b3f2027efd7f8a71f859d8f4be8e98ef/jiter-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5c1860627048e302a528333c9307c818c547f214d8659b0705d2195e1a94b274", size = 319855, upload-time = "2025-11-09T20:47:24.779Z" },
+    { url = "https://files.pythonhosted.org/packages/20/26/894cd88e60b5d58af53bec5c6759d1292bd0b37a8b5f60f07abf7a63ae5f/jiter-0.12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df37577a4f8408f7e0ec3205d2a8f87672af8f17008358063a4d6425b6081ce3", size = 350171, upload-time = "2025-11-09T20:47:26.469Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/27/a7b818b9979ac31b3763d25f3653ec3a954044d5e9f5d87f2f247d679fd1/jiter-0.12.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:75fdd787356c1c13a4f40b43c2156276ef7a71eb487d98472476476d803fb2cf", size = 365590, upload-time = "2025-11-09T20:47:27.918Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/7e/e46195801a97673a83746170b17984aa8ac4a455746354516d02ca5541b4/jiter-0.12.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1eb5db8d9c65b112aacf14fcd0faae9913d07a8afea5ed06ccdd12b724e966a1", size = 479462, upload-time = "2025-11-09T20:47:29.654Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/75/f833bfb009ab4bd11b1c9406d333e3b4357709ed0570bb48c7c06d78c7dd/jiter-0.12.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:73c568cc27c473f82480abc15d1301adf333a7ea4f2e813d6a2c7d8b6ba8d0df", size = 378983, upload-time = "2025-11-09T20:47:31.026Z" },
+    { url = "https://files.pythonhosted.org/packages/71/b3/7a69d77943cc837d30165643db753471aff5df39692d598da880a6e51c24/jiter-0.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4321e8a3d868919bcb1abb1db550d41f2b5b326f72df29e53b2df8b006eb9403", size = 361328, upload-time = "2025-11-09T20:47:33.286Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/ac/a78f90caf48d65ba70d8c6efc6f23150bc39dc3389d65bbec2a95c7bc628/jiter-0.12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0a51bad79f8cc9cac2b4b705039f814049142e0050f30d91695a2d9a6611f126", size = 386740, upload-time = "2025-11-09T20:47:34.703Z" },
+    { url = "https://files.pythonhosted.org/packages/39/b6/5d31c2cc8e1b6a6bcf3c5721e4ca0a3633d1ab4754b09bc7084f6c4f5327/jiter-0.12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:2a67b678f6a5f1dd6c36d642d7db83e456bc8b104788262aaefc11a22339f5a9", size = 520875, upload-time = "2025-11-09T20:47:36.058Z" },
+    { url = "https://files.pythonhosted.org/packages/30/b5/4df540fae4e9f68c54b8dab004bd8c943a752f0b00efd6e7d64aa3850339/jiter-0.12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efe1a211fe1fd14762adea941e3cfd6c611a136e28da6c39272dbb7a1bbe6a86", size = 511457, upload-time = "2025-11-09T20:47:37.932Z" },
+    { url = "https://files.pythonhosted.org/packages/07/65/86b74010e450a1a77b2c1aabb91d4a91dd3cd5afce99f34d75fd1ac64b19/jiter-0.12.0-cp312-cp312-win32.whl", hash = "sha256:d779d97c834b4278276ec703dc3fc1735fca50af63eb7262f05bdb4e62203d44", size = 204546, upload-time = "2025-11-09T20:47:40.47Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/c7/6659f537f9562d963488e3e55573498a442503ced01f7e169e96a6110383/jiter-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e8269062060212b373316fe69236096aaf4c49022d267c6736eebd66bbbc60bb", size = 205196, upload-time = "2025-11-09T20:47:41.794Z" },
+    { url = "https://files.pythonhosted.org/packages/21/f4/935304f5169edadfec7f9c01eacbce4c90bb9a82035ac1de1f3bd2d40be6/jiter-0.12.0-cp312-cp312-win_arm64.whl", hash = "sha256:06cb970936c65de926d648af0ed3d21857f026b1cf5525cb2947aa5e01e05789", size = 186100, upload-time = "2025-11-09T20:47:43.007Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/a6/97209693b177716e22576ee1161674d1d58029eb178e01866a0422b69224/jiter-0.12.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:6cc49d5130a14b732e0612bc76ae8db3b49898732223ef8b7599aa8d9810683e", size = 313658, upload-time = "2025-11-09T20:47:44.424Z" },
+    { url = "https://files.pythonhosted.org/packages/06/4d/125c5c1537c7d8ee73ad3d530a442d6c619714b95027143f1b61c0b4dfe0/jiter-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:37f27a32ce36364d2fa4f7fdc507279db604d27d239ea2e044c8f148410defe1", size = 318605, upload-time = "2025-11-09T20:47:45.973Z" },
+    { url = "https://files.pythonhosted.org/packages/99/bf/a840b89847885064c41a5f52de6e312e91fa84a520848ee56c97e4fa0205/jiter-0.12.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbc0944aa3d4b4773e348cda635252824a78f4ba44328e042ef1ff3f6080d1cf", size = 349803, upload-time = "2025-11-09T20:47:47.535Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/88/e63441c28e0db50e305ae23e19c1d8fae012d78ed55365da392c1f34b09c/jiter-0.12.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:da25c62d4ee1ffbacb97fac6dfe4dcd6759ebdc9015991e92a6eae5816287f44", size = 365120, upload-time = "2025-11-09T20:47:49.284Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/7c/49b02714af4343970eb8aca63396bc1c82fa01197dbb1e9b0d274b550d4e/jiter-0.12.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:048485c654b838140b007390b8182ba9774621103bd4d77c9c3f6f117474ba45", size = 479918, upload-time = "2025-11-09T20:47:50.807Z" },
+    { url = "https://files.pythonhosted.org/packages/69/ba/0a809817fdd5a1db80490b9150645f3aae16afad166960bcd562be194f3b/jiter-0.12.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:635e737fbb7315bef0037c19b88b799143d2d7d3507e61a76751025226b3ac87", size = 379008, upload-time = "2025-11-09T20:47:52.211Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/c3/c9fc0232e736c8877d9e6d83d6eeb0ba4e90c6c073835cc2e8f73fdeef51/jiter-0.12.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e017c417b1ebda911bd13b1e40612704b1f5420e30695112efdbed8a4b389ed", size = 361785, upload-time = "2025-11-09T20:47:53.512Z" },
+    { url = "https://files.pythonhosted.org/packages/96/61/61f69b7e442e97ca6cd53086ddc1cf59fb830549bc72c0a293713a60c525/jiter-0.12.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:89b0bfb8b2bf2351fba36bb211ef8bfceba73ef58e7f0c68fb67b5a2795ca2f9", size = 386108, upload-time = "2025-11-09T20:47:54.893Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/2e/76bb3332f28550c8f1eba3bf6e5efe211efda0ddbbaf24976bc7078d42a5/jiter-0.12.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:f5aa5427a629a824a543672778c9ce0c5e556550d1569bb6ea28a85015287626", size = 519937, upload-time = "2025-11-09T20:47:56.253Z" },
+    { url = "https://files.pythonhosted.org/packages/84/d6/fa96efa87dc8bff2094fb947f51f66368fa56d8d4fc9e77b25d7fbb23375/jiter-0.12.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ed53b3d6acbcb0fd0b90f20c7cb3b24c357fe82a3518934d4edfa8c6898e498c", size = 510853, upload-time = "2025-11-09T20:47:58.32Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/28/93f67fdb4d5904a708119a6ab58a8f1ec226ff10a94a282e0215402a8462/jiter-0.12.0-cp313-cp313-win32.whl", hash = "sha256:4747de73d6b8c78f2e253a2787930f4fffc68da7fa319739f57437f95963c4de", size = 204699, upload-time = "2025-11-09T20:47:59.686Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/1f/30b0eb087045a0abe2a5c9c0c0c8da110875a1d3be83afd4a9a4e548be3c/jiter-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:e25012eb0c456fcc13354255d0338cd5397cce26c77b2832b3c4e2e255ea5d9a", size = 204258, upload-time = "2025-11-09T20:48:01.01Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/f4/2b4daf99b96bce6fc47971890b14b2a36aef88d7beb9f057fafa032c6141/jiter-0.12.0-cp313-cp313-win_arm64.whl", hash = "sha256:c97b92c54fe6110138c872add030a1f99aea2401ddcdaa21edf74705a646dd60", size = 185503, upload-time = "2025-11-09T20:48:02.35Z" },
+    { url = "https://files.pythonhosted.org/packages/39/ca/67bb15a7061d6fe20b9b2a2fd783e296a1e0f93468252c093481a2f00efa/jiter-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:53839b35a38f56b8be26a7851a48b89bc47e5d88e900929df10ed93b95fea3d6", size = 317965, upload-time = "2025-11-09T20:48:03.783Z" },
+    { url = "https://files.pythonhosted.org/packages/18/af/1788031cd22e29c3b14bc6ca80b16a39a0b10e611367ffd480c06a259831/jiter-0.12.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94f669548e55c91ab47fef8bddd9c954dab1938644e715ea49d7e117015110a4", size = 345831, upload-time = "2025-11-09T20:48:05.55Z" },
+    { url = "https://files.pythonhosted.org/packages/05/17/710bf8472d1dff0d3caf4ced6031060091c1320f84ee7d5dcbed1f352417/jiter-0.12.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:351d54f2b09a41600ffea43d081522d792e81dcfb915f6d2d242744c1cc48beb", size = 361272, upload-time = "2025-11-09T20:48:06.951Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/f1/1dcc4618b59761fef92d10bcbb0b038b5160be653b003651566a185f1a5c/jiter-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2a5e90604620f94bf62264e7c2c038704d38217b7465b863896c6d7c902b06c7", size = 204604, upload-time = "2025-11-09T20:48:08.328Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/32/63cb1d9f1c5c6632a783c0052cde9ef7ba82688f7065e2f0d5f10a7e3edb/jiter-0.12.0-cp313-cp313t-win_arm64.whl", hash = "sha256:88ef757017e78d2860f96250f9393b7b577b06a956ad102c29c8237554380db3", size = 185628, upload-time = "2025-11-09T20:48:09.572Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/99/45c9f0dbe4a1416b2b9a8a6d1236459540f43d7fb8883cff769a8db0612d/jiter-0.12.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:c46d927acd09c67a9fb1416df45c5a04c27e83aae969267e98fba35b74e99525", size = 312478, upload-time = "2025-11-09T20:48:10.898Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/a7/54ae75613ba9e0f55fcb0bc5d1f807823b5167cc944e9333ff322e9f07dd/jiter-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:774ff60b27a84a85b27b88cd5583899c59940bcc126caca97eb2a9df6aa00c49", size = 318706, upload-time = "2025-11-09T20:48:12.266Z" },
+    { url = "https://files.pythonhosted.org/packages/59/31/2aa241ad2c10774baf6c37f8b8e1f39c07db358f1329f4eb40eba179c2a2/jiter-0.12.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5433fab222fb072237df3f637d01b81f040a07dcac1cb4a5c75c7aa9ed0bef1", size = 351894, upload-time = "2025-11-09T20:48:13.673Z" },
+    { url = "https://files.pythonhosted.org/packages/54/4f/0f2759522719133a9042781b18cc94e335b6d290f5e2d3e6899d6af933e3/jiter-0.12.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f8c593c6e71c07866ec6bfb790e202a833eeec885022296aff6b9e0b92d6a70e", size = 365714, upload-time = "2025-11-09T20:48:15.083Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/6f/806b895f476582c62a2f52c453151edd8a0fde5411b0497baaa41018e878/jiter-0.12.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:90d32894d4c6877a87ae00c6b915b609406819dce8bc0d4e962e4de2784e567e", size = 478989, upload-time = "2025-11-09T20:48:16.706Z" },
+    { url = "https://files.pythonhosted.org/packages/86/6c/012d894dc6e1033acd8db2b8346add33e413ec1c7c002598915278a37f79/jiter-0.12.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:798e46eed9eb10c3adbbacbd3bdb5ecd4cf7064e453d00dbef08802dae6937ff", size = 378615, upload-time = "2025-11-09T20:48:18.614Z" },
+    { url = "https://files.pythonhosted.org/packages/87/30/d718d599f6700163e28e2c71c0bbaf6dace692e7df2592fd793ac9276717/jiter-0.12.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3f1368f0a6719ea80013a4eb90ba72e75d7ea67cfc7846db2ca504f3df0169a", size = 364745, upload-time = "2025-11-09T20:48:20.117Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/85/315b45ce4b6ddc7d7fceca24068543b02bdc8782942f4ee49d652e2cc89f/jiter-0.12.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:65f04a9d0b4406f7e51279710b27484af411896246200e461d80d3ba0caa901a", size = 386502, upload-time = "2025-11-09T20:48:21.543Z" },
+    { url = "https://files.pythonhosted.org/packages/74/0b/ce0434fb40c5b24b368fe81b17074d2840748b4952256bab451b72290a49/jiter-0.12.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:fd990541982a24281d12b67a335e44f117e4c6cbad3c3b75c7dea68bf4ce3a67", size = 519845, upload-time = "2025-11-09T20:48:22.964Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/a3/7a7a4488ba052767846b9c916d208b3ed114e3eb670ee984e4c565b9cf0d/jiter-0.12.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:b111b0e9152fa7df870ecaebb0bd30240d9f7fff1f2003bcb4ed0f519941820b", size = 510701, upload-time = "2025-11-09T20:48:24.483Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/16/052ffbf9d0467b70af24e30f91e0579e13ded0c17bb4a8eb2aed3cb60131/jiter-0.12.0-cp314-cp314-win32.whl", hash = "sha256:a78befb9cc0a45b5a5a0d537b06f8544c2ebb60d19d02c41ff15da28a9e22d42", size = 205029, upload-time = "2025-11-09T20:48:25.749Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/18/3cf1f3f0ccc789f76b9a754bdb7a6977e5d1d671ee97a9e14f7eb728d80e/jiter-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:e1fe01c082f6aafbe5c8faf0ff074f38dfb911d53f07ec333ca03f8f6226debf", size = 204960, upload-time = "2025-11-09T20:48:27.415Z" },
+    { url = "https://files.pythonhosted.org/packages/02/68/736821e52ecfdeeb0f024b8ab01b5a229f6b9293bbdb444c27efade50b0f/jiter-0.12.0-cp314-cp314-win_arm64.whl", hash = "sha256:d72f3b5a432a4c546ea4bedc84cce0c3404874f1d1676260b9c7f048a9855451", size = 185529, upload-time = "2025-11-09T20:48:29.125Z" },
+    { url = "https://files.pythonhosted.org/packages/30/61/12ed8ee7a643cce29ac97c2281f9ce3956eb76b037e88d290f4ed0d41480/jiter-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e6ded41aeba3603f9728ed2b6196e4df875348ab97b28fc8afff115ed42ba7a7", size = 318974, upload-time = "2025-11-09T20:48:30.87Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/c6/f3041ede6d0ed5e0e79ff0de4c8f14f401bbf196f2ef3971cdbe5fd08d1d/jiter-0.12.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a947920902420a6ada6ad51892082521978e9dd44a802663b001436e4b771684", size = 345932, upload-time = "2025-11-09T20:48:32.658Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/5d/4d94835889edd01ad0e2dbfc05f7bdfaed46292e7b504a6ac7839aa00edb/jiter-0.12.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:add5e227e0554d3a52cf390a7635edaffdf4f8fce4fdbcef3cc2055bb396a30c", size = 367243, upload-time = "2025-11-09T20:48:34.093Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/76/0051b0ac2816253a99d27baf3dda198663aff882fa6ea7deeb94046da24e/jiter-0.12.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f9b1cda8fcb736250d7e8711d4580ebf004a46771432be0ae4796944b5dfa5d", size = 479315, upload-time = "2025-11-09T20:48:35.507Z" },
+    { url = "https://files.pythonhosted.org/packages/70/ae/83f793acd68e5cb24e483f44f482a1a15601848b9b6f199dacb970098f77/jiter-0.12.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:deeb12a2223fe0135c7ff1356a143d57f95bbf1f4a66584f1fc74df21d86b993", size = 380714, upload-time = "2025-11-09T20:48:40.014Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/5e/4808a88338ad2c228b1126b93fcd8ba145e919e886fe910d578230dabe3b/jiter-0.12.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c596cc0f4cb574877550ce4ecd51f8037469146addd676d7c1a30ebe6391923f", size = 365168, upload-time = "2025-11-09T20:48:41.462Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/d4/04619a9e8095b42aef436b5aeb4c0282b4ff1b27d1db1508df9f5dc82750/jiter-0.12.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ab4c823b216a4aeab3fdbf579c5843165756bd9ad87cc6b1c65919c4715f783", size = 387893, upload-time = "2025-11-09T20:48:42.921Z" },
+    { url = "https://files.pythonhosted.org/packages/17/ea/d3c7e62e4546fdc39197fa4a4315a563a89b95b6d54c0d25373842a59cbe/jiter-0.12.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:e427eee51149edf962203ff8db75a7514ab89be5cb623fb9cea1f20b54f1107b", size = 520828, upload-time = "2025-11-09T20:48:44.278Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/0b/c6d3562a03fd767e31cb119d9041ea7958c3c80cb3d753eafb19b3b18349/jiter-0.12.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:edb868841f84c111255ba5e80339d386d937ec1fdce419518ce1bd9370fac5b6", size = 511009, upload-time = "2025-11-09T20:48:45.726Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/51/2cb4468b3448a8385ebcd15059d325c9ce67df4e2758d133ab9442b19834/jiter-0.12.0-cp314-cp314t-win32.whl", hash = "sha256:8bbcfe2791dfdb7c5e48baf646d37a6a3dcb5a97a032017741dea9f817dca183", size = 205110, upload-time = "2025-11-09T20:48:47.033Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/c5/ae5ec83dec9c2d1af805fd5fe8f74ebded9c8670c5210ec7820ce0dbeb1e/jiter-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2fa940963bf02e1d8226027ef461e36af472dea85d36054ff835aeed944dd873", size = 205223, upload-time = "2025-11-09T20:48:49.076Z" },
+    { url = "https://files.pythonhosted.org/packages/97/9a/3c5391907277f0e55195550cf3fa8e293ae9ee0c00fb402fec1e38c0c82f/jiter-0.12.0-cp314-cp314t-win_arm64.whl", hash = "sha256:506c9708dd29b27288f9f8f1140c3cb0e3d8ddb045956d7757b1fa0e0f39a473", size = 185564, upload-time = "2025-11-09T20:48:50.376Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/f5/12efb8ada5f5c9edc1d4555fe383c1fb2eac05ac5859258a72d61981d999/jiter-0.12.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:e8547883d7b96ef2e5fe22b88f8a4c8725a56e7f4abafff20fd5272d634c7ecb", size = 309974, upload-time = "2025-11-09T20:49:17.187Z" },
+    { url = "https://files.pythonhosted.org/packages/85/15/d6eb3b770f6a0d332675141ab3962fd4a7c270ede3515d9f3583e1d28276/jiter-0.12.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:89163163c0934854a668ed783a2546a0617f71706a2551a4a0666d91ab365d6b", size = 304233, upload-time = "2025-11-09T20:49:18.734Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/3e/e7e06743294eea2cf02ced6aa0ff2ad237367394e37a0e2b4a1108c67a36/jiter-0.12.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d96b264ab7d34bbb2312dedc47ce07cd53f06835eacbc16dde3761f47c3a9e7f", size = 338537, upload-time = "2025-11-09T20:49:20.317Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/9c/6753e6522b8d0ef07d3a3d239426669e984fb0eba15a315cdbc1253904e4/jiter-0.12.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c24e864cb30ab82311c6425655b0cdab0a98c5d973b065c66a3f020740c2324c", size = 346110, upload-time = "2025-11-09T20:49:21.817Z" },
+]
+
 [[package]]
 name = "joblib"
 version = "1.5.3"
@@ -653,6 +797,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ad/0d/eca3d962f9eef265f01a8e0d20085c6dd1f443cbffc11b6dede81fd82356/numpy-2.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:6436cffb4f2bf26c974344439439c95e152c9a527013f26b3577be6c2ca64295", size = 10667121, upload-time = "2026-01-10T06:44:41.644Z" },
 ]
 
+[[package]]
+name = "openai"
+version = "2.16.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "distro" },
+    { name = "httpx" },
+    { name = "jiter" },
+    { name = "pydantic" },
+    { name = "sniffio" },
+    { name = "tqdm" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b1/6c/e4c964fcf1d527fdf4739e7cc940c60075a4114d50d03871d5d5b1e13a88/openai-2.16.0.tar.gz", hash = "sha256:42eaa22ca0d8ded4367a77374104d7a2feafee5bd60a107c3c11b5243a11cd12", size = 629649, upload-time = "2026-01-27T23:28:02.579Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/16/83/0315bf2cfd75a2ce8a7e54188e9456c60cec6c0cf66728ed07bd9859ff26/openai-2.16.0-py3-none-any.whl", hash = "sha256:5f46643a8f42899a84e80c38838135d7038e7718333ce61396994f887b09a59b", size = 1068612, upload-time = "2026-01-27T23:28:00.356Z" },
+]
+
 [[package]]
 name = "packaging"
 version = "26.0"
@@ -736,6 +899,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/32/2b/121e912bd60eebd623f873fd090de0e84f322972ab25a7f9044c056804ed/pathspec-1.0.3-py3-none-any.whl", hash = "sha256:e80767021c1cc524aa3fb14bedda9c34406591343cc42797b386ce7b9354fb6c", size = 55021, upload-time = "2026-01-09T15:46:44.652Z" },
 ]
 
+[[package]]
+name = "pgvector"
+version = "0.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/25/6c/6d8b4b03b958c02fa8687ec6063c49d952a189f8c91ebbe51e877dfab8f7/pgvector-0.4.2.tar.gz", hash = "sha256:322cac0c1dc5d41c9ecf782bd9991b7966685dee3a00bc873631391ed949513a", size = 31354, upload-time = "2025-12-05T01:07:17.87Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/26/6cee8a1ce8c43625ec561aff19df07f9776b7525d9002c86bceb3e0ac970/pgvector-0.4.2-py3-none-any.whl", hash = "sha256:549d45f7a18593783d5eec609ea1684a724ba8405c4cb182a0b2b08aeff04e08", size = 27441, upload-time = "2025-12-05T01:07:16.536Z" },
+]
+
 [[package]]
 name = "pluggy"
 version = "1.6.0"
@@ -977,6 +1152,109 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
 ]
 
+[[package]]
+name = "regex"
+version = "2026.1.15"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0b/86/07d5056945f9ec4590b518171c4254a5925832eb727b56d3c38a7476f316/regex-2026.1.15.tar.gz", hash = "sha256:164759aa25575cbc0651bef59a0b18353e54300d79ace8084c818ad8ac72b7d5", size = 414811, upload-time = "2026-01-14T23:18:02.775Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/92/81/10d8cf43c807d0326efe874c1b79f22bfb0fb226027b0b19ebc26d301408/regex-2026.1.15-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:4c8fcc5793dde01641a35905d6731ee1548f02b956815f8f1cab89e515a5bdf1", size = 489398, upload-time = "2026-01-14T23:14:43.741Z" },
+    { url = "https://files.pythonhosted.org/packages/90/b0/7c2a74e74ef2a7c32de724658a69a862880e3e4155cba992ba04d1c70400/regex-2026.1.15-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bfd876041a956e6a90ad7cdb3f6a630c07d491280bfeed4544053cd434901681", size = 291339, upload-time = "2026-01-14T23:14:45.183Z" },
+    { url = "https://files.pythonhosted.org/packages/19/4d/16d0773d0c818417f4cc20aa0da90064b966d22cd62a8c46765b5bd2d643/regex-2026.1.15-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9250d087bc92b7d4899ccd5539a1b2334e44eee85d848c4c1aef8e221d3f8c8f", size = 289003, upload-time = "2026-01-14T23:14:47.25Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/e4/1fc4599450c9f0863d9406e944592d968b8d6dfd0d552a7d569e43bceada/regex-2026.1.15-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c8a154cf6537ebbc110e24dabe53095e714245c272da9c1be05734bdad4a61aa", size = 798656, upload-time = "2026-01-14T23:14:48.77Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/e6/59650d73a73fa8a60b3a590545bfcf1172b4384a7df2e7fe7b9aab4e2da9/regex-2026.1.15-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8050ba2e3ea1d8731a549e83c18d2f0999fbc99a5f6bd06b4c91449f55291804", size = 864252, upload-time = "2026-01-14T23:14:50.528Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/ab/1d0f4d50a1638849a97d731364c9a80fa304fec46325e48330c170ee8e80/regex-2026.1.15-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0bf065240704cb8951cc04972cf107063917022511273e0969bdb34fc173456c", size = 912268, upload-time = "2026-01-14T23:14:52.952Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/df/0d722c030c82faa1d331d1921ee268a4e8fb55ca8b9042c9341c352f17fa/regex-2026.1.15-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c32bef3e7aeee75746748643667668ef941d28b003bfc89994ecf09a10f7a1b5", size = 803589, upload-time = "2026-01-14T23:14:55.182Z" },
+    { url = "https://files.pythonhosted.org/packages/66/23/33289beba7ccb8b805c6610a8913d0131f834928afc555b241caabd422a9/regex-2026.1.15-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d5eaa4a4c5b1906bd0d2508d68927f15b81821f85092e06f1a34a4254b0e1af3", size = 775700, upload-time = "2026-01-14T23:14:56.707Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/65/bf3a42fa6897a0d3afa81acb25c42f4b71c274f698ceabd75523259f6688/regex-2026.1.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:86c1077a3cc60d453d4084d5b9649065f3bf1184e22992bd322e1f081d3117fb", size = 787928, upload-time = "2026-01-14T23:14:58.312Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/f5/13bf65864fc314f68cdd6d8ca94adcab064d4d39dbd0b10fef29a9da48fc/regex-2026.1.15-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:2b091aefc05c78d286657cd4db95f2e6313375ff65dcf085e42e4c04d9c8d410", size = 858607, upload-time = "2026-01-14T23:15:00.657Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/31/040e589834d7a439ee43fb0e1e902bc81bd58a5ba81acffe586bb3321d35/regex-2026.1.15-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:57e7d17f59f9ebfa9667e6e5a1c0127b96b87cb9cede8335482451ed00788ba4", size = 763729, upload-time = "2026-01-14T23:15:02.248Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/84/6921e8129687a427edf25a34a5594b588b6d88f491320b9de5b6339a4fcb/regex-2026.1.15-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:c6c4dcdfff2c08509faa15d36ba7e5ef5fcfab25f1e8f85a0c8f45bc3a30725d", size = 850697, upload-time = "2026-01-14T23:15:03.878Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/87/3d06143d4b128f4229158f2de5de6c8f2485170c7221e61bf381313314b2/regex-2026.1.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cf8ff04c642716a7f2048713ddc6278c5fd41faa3b9cab12607c7abecd012c22", size = 789849, upload-time = "2026-01-14T23:15:06.102Z" },
+    { url = "https://files.pythonhosted.org/packages/77/69/c50a63842b6bd48850ebc7ab22d46e7a2a32d824ad6c605b218441814639/regex-2026.1.15-cp312-cp312-win32.whl", hash = "sha256:82345326b1d8d56afbe41d881fdf62f1926d7264b2fc1537f99ae5da9aad7913", size = 266279, upload-time = "2026-01-14T23:15:07.678Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/36/39d0b29d087e2b11fd8191e15e81cce1b635fcc845297c67f11d0d19274d/regex-2026.1.15-cp312-cp312-win_amd64.whl", hash = "sha256:4def140aa6156bc64ee9912383d4038f3fdd18fee03a6f222abd4de6357ce42a", size = 277166, upload-time = "2026-01-14T23:15:09.257Z" },
+    { url = "https://files.pythonhosted.org/packages/28/32/5b8e476a12262748851fa8ab1b0be540360692325975b094e594dfebbb52/regex-2026.1.15-cp312-cp312-win_arm64.whl", hash = "sha256:c6c565d9a6e1a8d783c1948937ffc377dd5771e83bd56de8317c450a954d2056", size = 270415, upload-time = "2026-01-14T23:15:10.743Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/2e/6870bb16e982669b674cce3ee9ff2d1d46ab80528ee6bcc20fb2292efb60/regex-2026.1.15-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e69d0deeb977ffe7ed3d2e4439360089f9c3f217ada608f0f88ebd67afb6385e", size = 489164, upload-time = "2026-01-14T23:15:13.962Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/67/9774542e203849b0286badf67199970a44ebdb0cc5fb739f06e47ada72f8/regex-2026.1.15-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3601ffb5375de85a16f407854d11cca8fe3f5febbe3ac78fb2866bb220c74d10", size = 291218, upload-time = "2026-01-14T23:15:15.647Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/87/b0cda79f22b8dee05f774922a214da109f9a4c0eca5da2c9d72d77ea062c/regex-2026.1.15-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4c5ef43b5c2d4114eb8ea424bb8c9cec01d5d17f242af88b2448f5ee81caadbc", size = 288895, upload-time = "2026-01-14T23:15:17.788Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/6a/0041f0a2170d32be01ab981d6346c83a8934277d82c780d60b127331f264/regex-2026.1.15-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:968c14d4f03e10b2fd960f1d5168c1f0ac969381d3c1fcc973bc45fb06346599", size = 798680, upload-time = "2026-01-14T23:15:19.342Z" },
+    { url = "https://files.pythonhosted.org/packages/58/de/30e1cfcdbe3e891324aa7568b7c968771f82190df5524fabc1138cb2d45a/regex-2026.1.15-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:56a5595d0f892f214609c9f76b41b7428bed439d98dc961efafdd1354d42baae", size = 864210, upload-time = "2026-01-14T23:15:22.005Z" },
+    { url = "https://files.pythonhosted.org/packages/64/44/4db2f5c5ca0ccd40ff052ae7b1e9731352fcdad946c2b812285a7505ca75/regex-2026.1.15-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0bf650f26087363434c4e560011f8e4e738f6f3e029b85d4904c50135b86cfa5", size = 912358, upload-time = "2026-01-14T23:15:24.569Z" },
+    { url = "https://files.pythonhosted.org/packages/79/b6/e6a5665d43a7c42467138c8a2549be432bad22cbd206f5ec87162de74bd7/regex-2026.1.15-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18388a62989c72ac24de75f1449d0fb0b04dfccd0a1a7c1c43af5eb503d890f6", size = 803583, upload-time = "2026-01-14T23:15:26.526Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/53/7cd478222169d85d74d7437e74750005e993f52f335f7c04ff7adfda3310/regex-2026.1.15-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6d220a2517f5893f55daac983bfa9fe998a7dbcaee4f5d27a88500f8b7873788", size = 775782, upload-time = "2026-01-14T23:15:29.352Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/b5/75f9a9ee4b03a7c009fe60500fe550b45df94f0955ca29af16333ef557c5/regex-2026.1.15-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c9c08c2fbc6120e70abff5d7f28ffb4d969e14294fb2143b4b5c7d20e46d1714", size = 787978, upload-time = "2026-01-14T23:15:31.295Z" },
+    { url = "https://files.pythonhosted.org/packages/72/b3/79821c826245bbe9ccbb54f6eadb7879c722fd3e0248c17bfc90bf54e123/regex-2026.1.15-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7ef7d5d4bd49ec7364315167a4134a015f61e8266c6d446fc116a9ac4456e10d", size = 858550, upload-time = "2026-01-14T23:15:33.558Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/85/2ab5f77a1c465745bfbfcb3ad63178a58337ae8d5274315e2cc623a822fa/regex-2026.1.15-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:6e42844ad64194fa08d5ccb75fe6a459b9b08e6d7296bd704460168d58a388f3", size = 763747, upload-time = "2026-01-14T23:15:35.206Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/84/c27df502d4bfe2873a3e3a7cf1bdb2b9cc10284d1a44797cf38bed790470/regex-2026.1.15-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:cfecdaa4b19f9ca534746eb3b55a5195d5c95b88cac32a205e981ec0a22b7d31", size = 850615, upload-time = "2026-01-14T23:15:37.523Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/b7/658a9782fb253680aa8ecb5ccbb51f69e088ed48142c46d9f0c99b46c575/regex-2026.1.15-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:08df9722d9b87834a3d701f3fca570b2be115654dbfd30179f30ab2f39d606d3", size = 789951, upload-time = "2026-01-14T23:15:39.582Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/2a/5928af114441e059f15b2f63e188bd00c6529b3051c974ade7444b85fcda/regex-2026.1.15-cp313-cp313-win32.whl", hash = "sha256:d426616dae0967ca225ab12c22274eb816558f2f99ccb4a1d52ca92e8baf180f", size = 266275, upload-time = "2026-01-14T23:15:42.108Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/16/5bfbb89e435897bff28cf0352a992ca719d9e55ebf8b629203c96b6ce4f7/regex-2026.1.15-cp313-cp313-win_amd64.whl", hash = "sha256:febd38857b09867d3ed3f4f1af7d241c5c50362e25ef43034995b77a50df494e", size = 277145, upload-time = "2026-01-14T23:15:44.244Z" },
+    { url = "https://files.pythonhosted.org/packages/56/c1/a09ff7392ef4233296e821aec5f78c51be5e91ffde0d163059e50fd75835/regex-2026.1.15-cp313-cp313-win_arm64.whl", hash = "sha256:8e32f7896f83774f91499d239e24cebfadbc07639c1494bb7213983842348337", size = 270411, upload-time = "2026-01-14T23:15:45.858Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/38/0cfd5a78e5c6db00e6782fdae70458f89850ce95baa5e8694ab91d89744f/regex-2026.1.15-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:ec94c04149b6a7b8120f9f44565722c7ae31b7a6d2275569d2eefa76b83da3be", size = 492068, upload-time = "2026-01-14T23:15:47.616Z" },
+    { url = "https://files.pythonhosted.org/packages/50/72/6c86acff16cb7c959c4355826bbf06aad670682d07c8f3998d9ef4fee7cd/regex-2026.1.15-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:40c86d8046915bb9aeb15d3f3f15b6fd500b8ea4485b30e1bbc799dab3fe29f8", size = 292756, upload-time = "2026-01-14T23:15:49.307Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/58/df7fb69eadfe76526ddfce28abdc0af09ffe65f20c2c90932e89d705153f/regex-2026.1.15-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:726ea4e727aba21643205edad8f2187ec682d3305d790f73b7a51c7587b64bdd", size = 291114, upload-time = "2026-01-14T23:15:51.484Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/6c/a4011cd1cf96b90d2cdc7e156f91efbd26531e822a7fbb82a43c1016678e/regex-2026.1.15-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1cb740d044aff31898804e7bf1181cc72c03d11dfd19932b9911ffc19a79070a", size = 807524, upload-time = "2026-01-14T23:15:53.102Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/25/a53ffb73183f69c3e9f4355c4922b76d2840aee160af6af5fac229b6201d/regex-2026.1.15-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:05d75a668e9ea16f832390d22131fe1e8acc8389a694c8febc3e340b0f810b93", size = 873455, upload-time = "2026-01-14T23:15:54.956Z" },
+    { url = "https://files.pythonhosted.org/packages/66/0b/8b47fc2e8f97d9b4a851736f3890a5f786443aa8901061c55f24c955f45b/regex-2026.1.15-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d991483606f3dbec93287b9f35596f41aa2e92b7c2ebbb935b63f409e243c9af", size = 915007, upload-time = "2026-01-14T23:15:57.041Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/fa/97de0d681e6d26fabe71968dbee06dd52819e9a22fdce5dac7256c31ed84/regex-2026.1.15-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:194312a14819d3e44628a44ed6fea6898fdbecb0550089d84c403475138d0a09", size = 812794, upload-time = "2026-01-14T23:15:58.916Z" },
+    { url = "https://files.pythonhosted.org/packages/22/38/e752f94e860d429654aa2b1c51880bff8dfe8f084268258adf9151cf1f53/regex-2026.1.15-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fe2fda4110a3d0bc163c2e0664be44657431440722c5c5315c65155cab92f9e5", size = 781159, upload-time = "2026-01-14T23:16:00.817Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/a7/d739ffaef33c378fc888302a018d7f81080393d96c476b058b8c64fd2b0d/regex-2026.1.15-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:124dc36c85d34ef2d9164da41a53c1c8c122cfb1f6e1ec377a1f27ee81deb794", size = 795558, upload-time = "2026-01-14T23:16:03.267Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/c4/542876f9a0ac576100fc73e9c75b779f5c31e3527576cfc9cb3009dcc58a/regex-2026.1.15-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:a1774cd1981cd212506a23a14dba7fdeaee259f5deba2df6229966d9911e767a", size = 868427, upload-time = "2026-01-14T23:16:05.646Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/0f/d5655bea5b22069e32ae85a947aa564912f23758e112cdb74212848a1a1b/regex-2026.1.15-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:b5f7d8d2867152cdb625e72a530d2ccb48a3d199159144cbdd63870882fb6f80", size = 769939, upload-time = "2026-01-14T23:16:07.542Z" },
+    { url = "https://files.pythonhosted.org/packages/20/06/7e18a4fa9d326daeda46d471a44ef94201c46eaa26dbbb780b5d92cbfdda/regex-2026.1.15-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:492534a0ab925d1db998defc3c302dae3616a2fc3fe2e08db1472348f096ddf2", size = 854753, upload-time = "2026-01-14T23:16:10.395Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/67/dc8946ef3965e166f558ef3b47f492bc364e96a265eb4a2bb3ca765c8e46/regex-2026.1.15-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c661fc820cfb33e166bf2450d3dadbda47c8d8981898adb9b6fe24e5e582ba60", size = 799559, upload-time = "2026-01-14T23:16:12.347Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/61/1bba81ff6d50c86c65d9fd84ce9699dd106438ee4cdb105bf60374ee8412/regex-2026.1.15-cp313-cp313t-win32.whl", hash = "sha256:99ad739c3686085e614bf77a508e26954ff1b8f14da0e3765ff7abbf7799f952", size = 268879, upload-time = "2026-01-14T23:16:14.049Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/5e/cef7d4c5fb0ea3ac5c775fd37db5747f7378b29526cc83f572198924ff47/regex-2026.1.15-cp313-cp313t-win_amd64.whl", hash = "sha256:32655d17905e7ff8ba5c764c43cb124e34a9245e45b83c22e81041e1071aee10", size = 280317, upload-time = "2026-01-14T23:16:15.718Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/52/4317f7a5988544e34ab57b4bde0f04944c4786128c933fb09825924d3e82/regex-2026.1.15-cp313-cp313t-win_arm64.whl", hash = "sha256:b2a13dd6a95e95a489ca242319d18fc02e07ceb28fa9ad146385194d95b3c829", size = 271551, upload-time = "2026-01-14T23:16:17.533Z" },
+    { url = "https://files.pythonhosted.org/packages/52/0a/47fa888ec7cbbc7d62c5f2a6a888878e76169170ead271a35239edd8f0e8/regex-2026.1.15-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:d920392a6b1f353f4aa54328c867fec3320fa50657e25f64abf17af054fc97ac", size = 489170, upload-time = "2026-01-14T23:16:19.835Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/c4/d000e9b7296c15737c9301708e9e7fbdea009f8e93541b6b43bdb8219646/regex-2026.1.15-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b5a28980a926fa810dbbed059547b02783952e2efd9c636412345232ddb87ff6", size = 291146, upload-time = "2026-01-14T23:16:21.541Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/b6/921cc61982e538682bdf3bdf5b2c6ab6b34368da1f8e98a6c1ddc503c9cf/regex-2026.1.15-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:621f73a07595d83f28952d7bd1e91e9d1ed7625fb7af0064d3516674ec93a2a2", size = 288986, upload-time = "2026-01-14T23:16:23.381Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/33/eb7383dde0bbc93f4fb9d03453aab97e18ad4024ac7e26cef8d1f0a2cff0/regex-2026.1.15-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3d7d92495f47567a9b1669c51fc8d6d809821849063d168121ef801bbc213846", size = 799098, upload-time = "2026-01-14T23:16:25.088Z" },
+    { url = "https://files.pythonhosted.org/packages/27/56/b664dccae898fc8d8b4c23accd853f723bde0f026c747b6f6262b688029c/regex-2026.1.15-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8dd16fba2758db7a3780a051f245539c4451ca20910f5a5e6ea1c08d06d4a76b", size = 864980, upload-time = "2026-01-14T23:16:27.297Z" },
+    { url = "https://files.pythonhosted.org/packages/16/40/0999e064a170eddd237bae9ccfcd8f28b3aa98a38bf727a086425542a4fc/regex-2026.1.15-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1e1808471fbe44c1a63e5f577a1d5f02fe5d66031dcbdf12f093ffc1305a858e", size = 911607, upload-time = "2026-01-14T23:16:29.235Z" },
+    { url = "https://files.pythonhosted.org/packages/07/78/c77f644b68ab054e5a674fb4da40ff7bffb2c88df58afa82dbf86573092d/regex-2026.1.15-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0751a26ad39d4f2ade8fe16c59b2bf5cb19eb3d2cd543e709e583d559bd9efde", size = 803358, upload-time = "2026-01-14T23:16:31.369Z" },
+    { url = "https://files.pythonhosted.org/packages/27/31/d4292ea8566eaa551fafc07797961c5963cf5235c797cc2ae19b85dfd04d/regex-2026.1.15-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0f0c7684c7f9ca241344ff95a1de964f257a5251968484270e91c25a755532c5", size = 775833, upload-time = "2026-01-14T23:16:33.141Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/b2/cff3bf2fea4133aa6fb0d1e370b37544d18c8350a2fa118c7e11d1db0e14/regex-2026.1.15-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:74f45d170a21df41508cb67165456538425185baaf686281fa210d7e729abc34", size = 788045, upload-time = "2026-01-14T23:16:35.005Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/99/2cb9b69045372ec877b6f5124bda4eb4253bc58b8fe5848c973f752bc52c/regex-2026.1.15-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f1862739a1ffb50615c0fde6bae6569b5efbe08d98e59ce009f68a336f64da75", size = 859374, upload-time = "2026-01-14T23:16:36.919Z" },
+    { url = "https://files.pythonhosted.org/packages/09/16/710b0a5abe8e077b1729a562d2f297224ad079f3a66dce46844c193416c8/regex-2026.1.15-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:453078802f1b9e2b7303fb79222c054cb18e76f7bdc220f7530fdc85d319f99e", size = 763940, upload-time = "2026-01-14T23:16:38.685Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/d1/7585c8e744e40eb3d32f119191969b91de04c073fca98ec14299041f6e7e/regex-2026.1.15-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:a30a68e89e5a218b8b23a52292924c1f4b245cb0c68d1cce9aec9bbda6e2c160", size = 850112, upload-time = "2026-01-14T23:16:40.646Z" },
+    { url = "https://files.pythonhosted.org/packages/af/d6/43e1dd85df86c49a347aa57c1f69d12c652c7b60e37ec162e3096194a278/regex-2026.1.15-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9479cae874c81bf610d72b85bb681a94c95722c127b55445285fb0e2c82db8e1", size = 789586, upload-time = "2026-01-14T23:16:42.799Z" },
+    { url = "https://files.pythonhosted.org/packages/93/38/77142422f631e013f316aaae83234c629555729a9fbc952b8a63ac91462a/regex-2026.1.15-cp314-cp314-win32.whl", hash = "sha256:d639a750223132afbfb8f429c60d9d318aeba03281a5f1ab49f877456448dcf1", size = 271691, upload-time = "2026-01-14T23:16:44.671Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/a9/ab16b4649524ca9e05213c1cdbb7faa85cc2aa90a0230d2f796cbaf22736/regex-2026.1.15-cp314-cp314-win_amd64.whl", hash = "sha256:4161d87f85fa831e31469bfd82c186923070fc970b9de75339b68f0c75b51903", size = 280422, upload-time = "2026-01-14T23:16:46.607Z" },
+    { url = "https://files.pythonhosted.org/packages/be/2a/20fd057bf3521cb4791f69f869635f73e0aaf2b9ad2d260f728144f9047c/regex-2026.1.15-cp314-cp314-win_arm64.whl", hash = "sha256:91c5036ebb62663a6b3999bdd2e559fd8456d17e2b485bf509784cd31a8b1705", size = 273467, upload-time = "2026-01-14T23:16:48.967Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/77/0b1e81857060b92b9cad239104c46507dd481b3ff1fa79f8e7f865aae38a/regex-2026.1.15-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:ee6854c9000a10938c79238de2379bea30c82e4925a371711af45387df35cab8", size = 492073, upload-time = "2026-01-14T23:16:51.154Z" },
+    { url = "https://files.pythonhosted.org/packages/70/f3/f8302b0c208b22c1e4f423147e1913fd475ddd6230565b299925353de644/regex-2026.1.15-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2c2b80399a422348ce5de4fe40c418d6299a0fa2803dd61dc0b1a2f28e280fcf", size = 292757, upload-time = "2026-01-14T23:16:53.08Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/f0/ef55de2460f3b4a6da9d9e7daacd0cb79d4ef75c64a2af316e68447f0df0/regex-2026.1.15-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:dca3582bca82596609959ac39e12b7dad98385b4fefccb1151b937383cec547d", size = 291122, upload-time = "2026-01-14T23:16:55.383Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/55/bb8ccbacabbc3a11d863ee62a9f18b160a83084ea95cdfc5d207bfc3dd75/regex-2026.1.15-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef71d476caa6692eea743ae5ea23cde3260677f70122c4d258ca952e5c2d4e84", size = 807761, upload-time = "2026-01-14T23:16:57.251Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/84/f75d937f17f81e55679a0509e86176e29caa7298c38bd1db7ce9c0bf6075/regex-2026.1.15-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c243da3436354f4af6c3058a3f81a97d47ea52c9bd874b52fd30274853a1d5df", size = 873538, upload-time = "2026-01-14T23:16:59.349Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/d9/0da86327df70349aa8d86390da91171bd3ca4f0e7c1d1d453a9c10344da3/regex-2026.1.15-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8355ad842a7c7e9e5e55653eade3b7d1885ba86f124dd8ab1f722f9be6627434", size = 915066, upload-time = "2026-01-14T23:17:01.607Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/5e/f660fb23fc77baa2a61aa1f1fe3a4eea2bbb8a286ddec148030672e18834/regex-2026.1.15-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f192a831d9575271a22d804ff1a5355355723f94f31d9eef25f0d45a152fdc1a", size = 812938, upload-time = "2026-01-14T23:17:04.366Z" },
+    { url = "https://files.pythonhosted.org/packages/69/33/a47a29bfecebbbfd1e5cd3f26b28020a97e4820f1c5148e66e3b7d4b4992/regex-2026.1.15-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:166551807ec20d47ceaeec380081f843e88c8949780cd42c40f18d16168bed10", size = 781314, upload-time = "2026-01-14T23:17:06.378Z" },
+    { url = "https://files.pythonhosted.org/packages/65/ec/7ec2bbfd4c3f4e494a24dec4c6943a668e2030426b1b8b949a6462d2c17b/regex-2026.1.15-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f9ca1cbdc0fbfe5e6e6f8221ef2309988db5bcede52443aeaee9a4ad555e0dac", size = 795652, upload-time = "2026-01-14T23:17:08.521Z" },
+    { url = "https://files.pythonhosted.org/packages/46/79/a5d8651ae131fe27d7c521ad300aa7f1c7be1dbeee4d446498af5411b8a9/regex-2026.1.15-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:b30bcbd1e1221783c721483953d9e4f3ab9c5d165aa709693d3f3946747b1aea", size = 868550, upload-time = "2026-01-14T23:17:10.573Z" },
+    { url = "https://files.pythonhosted.org/packages/06/b7/25635d2809664b79f183070786a5552dd4e627e5aedb0065f4e3cf8ee37d/regex-2026.1.15-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:2a8d7b50c34578d0d3bf7ad58cde9652b7d683691876f83aedc002862a35dc5e", size = 769981, upload-time = "2026-01-14T23:17:12.871Z" },
+    { url = "https://files.pythonhosted.org/packages/16/8b/fc3fcbb2393dcfa4a6c5ffad92dc498e842df4581ea9d14309fcd3c55fb9/regex-2026.1.15-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:9d787e3310c6a6425eb346be4ff2ccf6eece63017916fd77fe8328c57be83521", size = 854780, upload-time = "2026-01-14T23:17:14.837Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/38/dde117c76c624713c8a2842530be9c93ca8b606c0f6102d86e8cd1ce8bea/regex-2026.1.15-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:619843841e220adca114118533a574a9cd183ed8a28b85627d2844c500a2b0db", size = 799778, upload-time = "2026-01-14T23:17:17.369Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/0d/3a6cfa9ae99606afb612d8fb7a66b245a9d5ff0f29bb347c8a30b6ad561b/regex-2026.1.15-cp314-cp314t-win32.whl", hash = "sha256:e90b8db97f6f2c97eb045b51a6b2c5ed69cedd8392459e0642d4199b94fabd7e", size = 274667, upload-time = "2026-01-14T23:17:19.301Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/b2/297293bb0742fd06b8d8e2572db41a855cdf1cae0bf009b1cb74fe07e196/regex-2026.1.15-cp314-cp314t-win_amd64.whl", hash = "sha256:5ef19071f4ac9f0834793af85bd04a920b4407715624e40cb7a0631a11137cdf", size = 284386, upload-time = "2026-01-14T23:17:21.231Z" },
+    { url = "https://files.pythonhosted.org/packages/95/e4/a3b9480c78cf8ee86626cb06f8d931d74d775897d44201ccb813097ae697/regex-2026.1.15-cp314-cp314t-win_arm64.whl", hash = "sha256:ca89c5e596fc05b015f27561b3793dc2fa0917ea0d7507eebb448efd35274a70", size = 274837, upload-time = "2026-01-14T23:17:23.146Z" },
+]
+
+[[package]]
+name = "requests"
+version = "2.32.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "charset-normalizer" },
+    { name = "idna" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
+]
+
 [[package]]
 name = "ruff"
 version = "0.14.14"
@@ -1117,6 +1395,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
 ]
 
+[[package]]
+name = "sniffio"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
+]
+
 [[package]]
 name = "sqlalchemy"
 version = "2.0.46"
@@ -1195,6 +1482,65 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" },
 ]
 
+[[package]]
+name = "tiktoken"
+version = "0.12.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "regex" },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806, upload-time = "2025-10-06T20:22:45.419Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a4/85/be65d39d6b647c79800fd9d29241d081d4eeb06271f383bb87200d74cf76/tiktoken-0.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b97f74aca0d78a1ff21b8cd9e9925714c15a9236d6ceacf5c7327c117e6e21e8", size = 1050728, upload-time = "2025-10-06T20:21:52.756Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/42/6573e9129bc55c9bf7300b3a35bef2c6b9117018acca0dc760ac2d93dffe/tiktoken-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b90f5ad190a4bb7c3eb30c5fa32e1e182ca1ca79f05e49b448438c3e225a49b", size = 994049, upload-time = "2025-10-06T20:21:53.782Z" },
+    { url = "https://files.pythonhosted.org/packages/66/c5/ed88504d2f4a5fd6856990b230b56d85a777feab84e6129af0822f5d0f70/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:65b26c7a780e2139e73acc193e5c63ac754021f160df919add909c1492c0fb37", size = 1129008, upload-time = "2025-10-06T20:21:54.832Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/90/3dae6cc5436137ebd38944d396b5849e167896fc2073da643a49f372dc4f/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:edde1ec917dfd21c1f2f8046b86348b0f54a2c0547f68149d8600859598769ad", size = 1152665, upload-time = "2025-10-06T20:21:56.129Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/fe/26df24ce53ffde419a42f5f53d755b995c9318908288c17ec3f3448313a3/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:35a2f8ddd3824608b3d650a000c1ef71f730d0c56486845705a8248da00f9fe5", size = 1194230, upload-time = "2025-10-06T20:21:57.546Z" },
+    { url = "https://files.pythonhosted.org/packages/20/cc/b064cae1a0e9fac84b0d2c46b89f4e57051a5f41324e385d10225a984c24/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83d16643edb7fa2c99eff2ab7733508aae1eebb03d5dfc46f5565862810f24e3", size = 1254688, upload-time = "2025-10-06T20:21:58.619Z" },
+    { url = "https://files.pythonhosted.org/packages/81/10/b8523105c590c5b8349f2587e2fdfe51a69544bd5a76295fc20f2374f470/tiktoken-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffc5288f34a8bc02e1ea7047b8d041104791d2ddbf42d1e5fa07822cbffe16bd", size = 878694, upload-time = "2025-10-06T20:21:59.876Z" },
+    { url = "https://files.pythonhosted.org/packages/00/61/441588ee21e6b5cdf59d6870f86beb9789e532ee9718c251b391b70c68d6/tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3", size = 1050802, upload-time = "2025-10-06T20:22:00.96Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/05/dcf94486d5c5c8d34496abe271ac76c5b785507c8eae71b3708f1ad9b45a/tiktoken-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160", size = 993995, upload-time = "2025-10-06T20:22:02.788Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/70/5163fe5359b943f8db9946b62f19be2305de8c3d78a16f629d4165e2f40e/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa", size = 1128948, upload-time = "2025-10-06T20:22:03.814Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/da/c028aa0babf77315e1cef357d4d768800c5f8a6de04d0eac0f377cb619fa/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be", size = 1151986, upload-time = "2025-10-06T20:22:05.173Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/5a/886b108b766aa53e295f7216b509be95eb7d60b166049ce2c58416b25f2a/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a", size = 1194222, upload-time = "2025-10-06T20:22:06.265Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/f8/4db272048397636ac7a078d22773dd2795b1becee7bc4922fe6207288d57/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3", size = 1255097, upload-time = "2025-10-06T20:22:07.403Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/32/45d02e2e0ea2be3a9ed22afc47d93741247e75018aac967b713b2941f8ea/tiktoken-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:b8a0cd0c789a61f31bf44851defbd609e8dd1e2c8589c614cc1060940ef1f697", size = 879117, upload-time = "2025-10-06T20:22:08.418Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/76/994fc868f88e016e6d05b0da5ac24582a14c47893f4474c3e9744283f1d5/tiktoken-0.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16", size = 1050309, upload-time = "2025-10-06T20:22:10.939Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/b8/57ef1456504c43a849821920d582a738a461b76a047f352f18c0b26c6516/tiktoken-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a", size = 993712, upload-time = "2025-10-06T20:22:12.115Z" },
+    { url = "https://files.pythonhosted.org/packages/72/90/13da56f664286ffbae9dbcfadcc625439142675845baa62715e49b87b68b/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27", size = 1128725, upload-time = "2025-10-06T20:22:13.541Z" },
+    { url = "https://files.pythonhosted.org/packages/05/df/4f80030d44682235bdaecd7346c90f67ae87ec8f3df4a3442cb53834f7e4/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb", size = 1151875, upload-time = "2025-10-06T20:22:14.559Z" },
+    { url = "https://files.pythonhosted.org/packages/22/1f/ae535223a8c4ef4c0c1192e3f9b82da660be9eb66b9279e95c99288e9dab/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e", size = 1194451, upload-time = "2025-10-06T20:22:15.545Z" },
+    { url = "https://files.pythonhosted.org/packages/78/a7/f8ead382fce0243cb625c4f266e66c27f65ae65ee9e77f59ea1653b6d730/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25", size = 1253794, upload-time = "2025-10-06T20:22:16.624Z" },
+    { url = "https://files.pythonhosted.org/packages/93/e0/6cc82a562bc6365785a3ff0af27a2a092d57c47d7a81d9e2295d8c36f011/tiktoken-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dc2dd125a62cb2b3d858484d6c614d136b5b848976794edfb63688d539b8b93f", size = 878777, upload-time = "2025-10-06T20:22:18.036Z" },
+    { url = "https://files.pythonhosted.org/packages/72/05/3abc1db5d2c9aadc4d2c76fa5640134e475e58d9fbb82b5c535dc0de9b01/tiktoken-0.12.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646", size = 1050188, upload-time = "2025-10-06T20:22:19.563Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/7b/50c2f060412202d6c95f32b20755c7a6273543b125c0985d6fa9465105af/tiktoken-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88", size = 993978, upload-time = "2025-10-06T20:22:20.702Z" },
+    { url = "https://files.pythonhosted.org/packages/14/27/bf795595a2b897e271771cd31cb847d479073497344c637966bdf2853da1/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff", size = 1129271, upload-time = "2025-10-06T20:22:22.06Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/de/9341a6d7a8f1b448573bbf3425fa57669ac58258a667eb48a25dfe916d70/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830", size = 1151216, upload-time = "2025-10-06T20:22:23.085Z" },
+    { url = "https://files.pythonhosted.org/packages/75/0d/881866647b8d1be4d67cb24e50d0c26f9f807f994aa1510cb9ba2fe5f612/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b", size = 1194860, upload-time = "2025-10-06T20:22:24.602Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/1e/b651ec3059474dab649b8d5b69f5c65cd8fcd8918568c1935bd4136c9392/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b", size = 1254567, upload-time = "2025-10-06T20:22:25.671Z" },
+    { url = "https://files.pythonhosted.org/packages/80/57/ce64fd16ac390fafde001268c364d559447ba09b509181b2808622420eec/tiktoken-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:399c3dd672a6406719d84442299a490420b458c44d3ae65516302a99675888f3", size = 921067, upload-time = "2025-10-06T20:22:26.753Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/a4/72eed53e8976a099539cdd5eb36f241987212c29629d0a52c305173e0a68/tiktoken-0.12.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365", size = 1050473, upload-time = "2025-10-06T20:22:27.775Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/d7/0110b8f54c008466b19672c615f2168896b83706a6611ba6e47313dbc6e9/tiktoken-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e", size = 993855, upload-time = "2025-10-06T20:22:28.799Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/77/4f268c41a3957c418b084dd576ea2fad2e95da0d8e1ab705372892c2ca22/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63", size = 1129022, upload-time = "2025-10-06T20:22:29.981Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/2b/fc46c90fe5028bd094cd6ee25a7db321cb91d45dc87531e2bdbb26b4867a/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0", size = 1150736, upload-time = "2025-10-06T20:22:30.996Z" },
+    { url = "https://files.pythonhosted.org/packages/28/c0/3c7a39ff68022ddfd7d93f3337ad90389a342f761c4d71de99a3ccc57857/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a", size = 1194908, upload-time = "2025-10-06T20:22:32.073Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/0d/c1ad6f4016a3968c048545f5d9b8ffebf577774b2ede3e2e352553b685fe/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0", size = 1253706, upload-time = "2025-10-06T20:22:33.385Z" },
+    { url = "https://files.pythonhosted.org/packages/af/df/c7891ef9d2712ad774777271d39fdef63941ffba0a9d59b7ad1fd2765e57/tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71", size = 920667, upload-time = "2025-10-06T20:22:34.444Z" },
+]
+
+[[package]]
+name = "tqdm"
+version = "4.67.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/27/89/4b0001b2dab8df0a5ee2787dcbe771de75ded01f18f1f8d53dedeea2882b/tqdm-4.67.2.tar.gz", hash = "sha256:649aac53964b2cb8dec76a14b405a4c0d13612cb8933aae547dd144eacc99653", size = 169514, upload-time = "2026-01-30T23:12:06.555Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f5/e2/31eac96de2915cf20ccaed0225035db149dfb9165a9ed28d4b252ef3f7f7/tqdm-4.67.2-py3-none-any.whl", hash = "sha256:9a12abcbbff58b6036b2167d9d3853042b9d436fe7330f06ae047867f2f8e0a7", size = 78354, upload-time = "2026-01-30T23:12:04.368Z" },
+]
+
 [[package]]
 name = "types-pytz"
 version = "2025.2.0.20251108"
@@ -1234,6 +1580,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c7/b0/003792df09decd6849a5e39c28b513c06e84436a54440380862b5aeff25d/tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", size = 348521, upload-time = "2025-12-13T17:45:33.889Z" },
 ]
 
+[[package]]
+name = "urllib3"
+version = "2.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
+]
+
 [[package]]
 name = "uvicorn"
 version = "0.40.0"