diff --git a/.env.example b/.env.example index 442da0c0..7c4e121b 100644 --- a/.env.example +++ b/.env.example @@ -22,5 +22,36 @@ FORECAST_MAX_HORIZON=90 FORECAST_MODEL_ARTIFACTS_DIR=./artifacts/models FORECAST_ENABLE_LIGHTGBM=false +# RAG Configuration +# Embedding Provider: "openai" or "ollama" +RAG_EMBEDDING_PROVIDER=openai + +# OpenAI Configuration (when RAG_EMBEDDING_PROVIDER=openai) +OPENAI_API_KEY=sk-your-openai-api-key-here +RAG_EMBEDDING_MODEL=text-embedding-3-small + +# Ollama Configuration (when RAG_EMBEDDING_PROVIDER=ollama) +# OLLAMA_BASE_URL=http://localhost:11434 +# OLLAMA_EMBEDDING_MODEL=nomic-embed-text + +# Embedding dimension (must match your model: OpenAI=1536, nomic-embed-text=768, etc.) +RAG_EMBEDDING_DIMENSION=1536 +RAG_EMBEDDING_BATCH_SIZE=100 + +# Chunking settings +RAG_CHUNK_SIZE=512 +RAG_CHUNK_OVERLAP=50 +RAG_MIN_CHUNK_SIZE=100 + +# Retrieval settings +RAG_TOP_K=5 +RAG_SIMILARITY_THRESHOLD=0.7 +RAG_MAX_CONTEXT_TOKENS=4000 + +# pgvector index settings +RAG_INDEX_TYPE=hnsw +RAG_HNSW_M=16 +RAG_HNSW_EF_CONSTRUCTION=64 + # Frontend (Vite) VITE_API_BASE_URL=http://localhost:8123 diff --git a/INITIAL-10.md b/INITIAL-10.md new file mode 100644 index 00000000..4e6eb4d3 --- /dev/null +++ b/INITIAL-10.md @@ -0,0 +1,430 @@ +# INITIAL-10.md — Agentic Layer (The Brain) + +## Architectural Role + +**"The Brain"** - Autonomous decision-making, tool orchestration, and structured outputs using PydanticAI. + +This phase provides intelligent orchestration capabilities: +- Experiment automation (config generation → backtest → deploy) +- RAG-powered Q&A with evidence-grounded answers and citations +- Human-in-the-loop approval for sensitive operations +- Structured, schema-enforced outputs + +--- + +## Tech Stack + +| Component | Technology | Purpose | +| --------- | ---------- | ------- | +| Agent Framework | [PydanticAI](https://ai.pydantic.dev/) | Type-safe agent orchestration | +| Tool System | [Function Tools](https://ai.pydantic.dev/tools/) | API binding | +| Tool Groups | [Toolsets](https://ai.pydantic.dev/toolsets/) | Grouped tool management | +| LLM Provider | Anthropic Claude / OpenAI GPT-4 | Configurable provider | +| Streaming | [PydanticAI Streaming](https://ai.pydantic.dev/results/#streamed-results) | Real-time responses | + +--- + +## FEATURE + +### Experiment Orchestrator Agent +Autonomous experiment workflow management: +- **Tools**: `list_models`, `run_backtest`, `compare_runs`, `create_alias`, `archive_run` +- **Workflow**: Generate configs → Run backtests → Analyze metrics → Select best → Deploy alias +- **Output**: Structured `ExperimentReport` with methodology, results, and recommendations + +### RAG Assistant Agent +Evidence-grounded question answering: +- **Tools**: `retrieve_context` (from INITIAL-9), `format_citation` +- **Workflow**: Parse query → Retrieve chunks → Synthesize answer → Format citations +- **Output**: Structured `RAGResponse` with answer, citations, and confidence score + +### Agent Session Management +- Session state persistence for multi-turn conversations +- Tool call logging with correlation IDs +- Human-in-the-loop approval for sensitive actions +- Graceful LLM API failure handling with retries + +--- + +## ENDPOINTS + +### POST /agents/experiment/run +Execute an experiment workflow with the Orchestrator Agent. + +**Request**: + +```json +{ + "objective": "Find the best model configuration for store S001, product P001", + "constraints": { + "model_types": ["moving_average", "seasonal_naive"], + "min_train_size": 60, + "max_splits": 5 + }, + "auto_deploy": false, + "session_id": "optional-session-id" +} +``` + +**Response**: + +```json +{ + "session_id": "sess_abc123", + "status": "completed", + "report": { + "objective": "Find the best model configuration for store S001, product P001", + "methodology": "Evaluated 6 configurations using 5-fold expanding window CV", + "experiments_run": 6, + "best_run": { + "run_id": "run_xyz789", + "model_type": "moving_average", + "config": {"window": 14}, + "metrics": { + "mae": 12.5, + "smape": 15.2, + "wape": 0.08 + } + }, + "baseline_comparison": { + "vs_naive": { + "mae_improvement_pct": 23.5, + "smape_improvement_pct": 18.2 + } + }, + "recommendation": "Deploy moving_average with window=14", + "approval_required": true, + "pending_action": "create_alias" + }, + "tool_calls": [ + { + "tool": "list_models", + "args": {}, + "result_summary": "Found 4 model types" + }, + { + "tool": "run_backtest", + "args": {"model_type": "moving_average", "window": 7}, + "result_summary": "MAE: 15.2" + } + ], + "tokens_used": 2450, + "duration_ms": 45000 +} +``` + +### POST /agents/experiment/approve +Approve a pending action from an experiment session. + +**Request**: + +```json +{ + "session_id": "sess_abc123", + "action": "create_alias", + "approved": true, + "comment": "Approved for staging deployment" +} +``` + +**Response**: + +```json +{ + "session_id": "sess_abc123", + "action": "create_alias", + "status": "executed", + "result": { + "alias_name": "production", + "run_id": "run_xyz789" + } +} +``` + +### POST /agents/rag/query +Query with answer generation using the RAG Assistant Agent. + +**Request**: + +```json +{ + "query": "How does the backtesting module prevent data leakage?", + "session_id": "optional-session-id", + "include_sources": true +} +``` + +**Response**: + +```json +{ + "session_id": "sess_def456", + "answer": "The backtesting module prevents data leakage through several mechanisms:\n\n1. **Time-based splits only**: The TimeSeriesSplitter uses expanding or sliding window strategies, never random splits.\n\n2. **Gap parameter**: Configurable gap between train and test sets simulates operational latency.\n\n3. **Lag feature validation**: Features are computed with explicit cutoff dates to prevent future data access.", + "confidence": 0.92, + "citations": [ + { + "source_type": "markdown", + "source_path": "docs/PHASE/5-BACKTESTING.md", + "chunk_id": "chunk_abc123", + "snippet": "TimeSeriesSplitter uses time-based splits (expanding/sliding window)...", + "relevance_score": 0.94 + }, + { + "source_type": "markdown", + "source_path": "CLAUDE.md", + "chunk_id": "chunk_def456", + "snippet": "Backtesting uses time-based splits (rolling/expanding), never random split...", + "relevance_score": 0.89 + } + ], + "tokens_used": 1250, + "duration_ms": 3200 +} +``` + +### GET /agents/status/{session_id} +Check agent session status. + +**Response**: + +```json +{ + "session_id": "sess_abc123", + "agent_type": "experiment_orchestrator", + "status": "awaiting_approval", + "created_at": "2026-02-01T10:30:00Z", + "last_activity": "2026-02-01T10:35:00Z", + "pending_action": { + "action": "create_alias", + "details": { + "alias_name": "production", + "run_id": "run_xyz789" + } + }, + "tool_calls_count": 8, + "tokens_used": 2450 +} +``` + +### WS /agents/stream +WebSocket endpoint for streaming responses. + +**Client → Server**: + +```json +{ + "type": "query", + "agent": "rag_assistant", + "payload": { + "query": "Explain the model registry workflow" + } +} +``` + +**Server → Client (streaming)**: + +```json +{"type": "token", "content": "The"} +{"type": "token", "content": " model"} +{"type": "token", "content": " registry"} +{"type": "tool_call", "tool": "retrieve_context", "status": "started"} +{"type": "tool_call", "tool": "retrieve_context", "status": "completed", "summary": "Found 5 relevant chunks"} +{"type": "token", "content": " tracks..."} +{"type": "complete", "session_id": "sess_xyz", "tokens_used": 850} +``` + +--- + +## AGENT DEFINITIONS + +### Experiment Orchestrator Agent + +```python +from pydantic_ai import Agent +from pydantic import BaseModel + +class ExperimentReport(BaseModel): + """Structured output for experiment results.""" + objective: str + methodology: str + experiments_run: int + best_run: RunSummary + baseline_comparison: BaselineComparison + recommendation: str + approval_required: bool + pending_action: str | None + +experiment_agent = Agent( + model="anthropic:claude-sonnet-4-20250514", + result_type=ExperimentReport, + system_prompt="""You are an ML experiment orchestrator for retail demand forecasting. + +Your goal is to find the best model configuration through systematic experimentation. +Always: +1. Start with baseline models (naive, seasonal_naive) +2. Compare against baselines with improvement percentages +3. Use time-based backtesting with appropriate train/test splits +4. Recommend the best configuration with justification +5. Request approval before deployment actions""", + tools=[list_models, run_backtest, compare_runs, create_alias, archive_run] +) +``` + +### RAG Assistant Agent + +```python +class RAGResponse(BaseModel): + """Structured output for RAG queries.""" + answer: str + confidence: float # 0.0 - 1.0 + citations: list[Citation] + insufficient_context: bool = False + +rag_agent = Agent( + model="anthropic:claude-sonnet-4-20250514", + result_type=RAGResponse, + system_prompt="""You are a documentation assistant for ForecastLabAI. + +Your responses must be evidence-grounded: +- Only answer based on retrieved context +- Include citations for all claims +- If context is insufficient, set insufficient_context=True and explain what's missing +- Never hallucinate information not in the retrieved chunks""", + tools=[retrieve_context, format_citation] +) +``` + +--- + +## TOOL DEFINITIONS + +### list_models +```python +@tool +async def list_models(ctx: RunContext[AgentDeps]) -> list[ModelInfo]: + """List available forecasting models with their configurations. + + Use this to discover what model types can be experimented with. + Returns model_type, default_config, and description. + """ + ... +``` + +### run_backtest +```python +@tool +async def run_backtest( + ctx: RunContext[AgentDeps], + model_type: str, + config: dict[str, Any], + store_id: str, + product_id: str, + n_splits: int = 5 +) -> BacktestResult: + """Run a backtest for a model configuration. + + Use this to evaluate model performance with time-series CV. + Returns per-fold and aggregated metrics (MAE, sMAPE, WAPE). + """ + ... +``` + +### retrieve_context +```python +@tool +async def retrieve_context( + ctx: RunContext[AgentDeps], + query: str, + top_k: int = 5 +) -> list[RetrievedChunk]: + """Retrieve relevant documentation chunks for a query. + + Use this before answering any question about the system. + Returns chunks with content, source_path, and relevance_score. + """ + ... +``` + +--- + +## CONFIGURATION (Settings) + +```python +# app/core/config.py additions + +# Agent LLM Configuration +agent_default_model: str = "anthropic:claude-sonnet-4-20250514" +agent_fallback_model: str = "openai:gpt-4o" +agent_temperature: float = 0.1 +agent_max_tokens: int = 4096 + +# Agent Execution Configuration +agent_max_tool_calls: int = 10 +agent_timeout_seconds: int = 120 +agent_retry_attempts: int = 3 +agent_retry_delay_seconds: float = 1.0 + +# Human-in-the-Loop Configuration +agent_require_approval: list[str] = ["create_alias", "archive_run"] +agent_approval_timeout_minutes: int = 60 + +# Streaming Configuration +agent_enable_streaming: bool = True +agent_stream_chunk_size: int = 10 # tokens per chunk + +# Session Configuration +agent_session_ttl_minutes: int = 120 +agent_max_sessions_per_user: int = 5 +``` + +--- + +## SUCCESS CRITERIA + +- [ ] Agents produce schema-enforced structured outputs +- [ ] Tool calls are logged with correlation IDs and timing +- [ ] Human-in-the-loop approval blocks sensitive actions +- [ ] Graceful handling of LLM API failures with retries +- [ ] WebSocket streaming delivers tokens in real-time +- [ ] Session state persists across multiple requests +- [ ] Unit tests with mocked LLM responses +- [ ] Integration tests with real LLM calls (rate-limited) +- [ ] Structured logging for all agent operations +- [ ] Token usage tracked per session for cost monitoring + +--- + +## CROSS-MODULE INTEGRATION + +| Direction | Module | Integration Point | +| --------- | ------ | ----------------- | +| **← RAG Layer** | INITIAL-9 | Uses `retrieve_context` tool | +| **← Registry** | Phase 6 | Uses `list_runs`, `compare_runs`, `create_alias` tools | +| **← Backtesting** | Phase 5 | Uses `run_backtest` tool | +| **← Forecasting** | Phase 4 | Uses `list_models`, `train_model` tools | +| **→ Dashboard** | INITIAL-11 | Provides chat interface backend | +| **→ Jobs** | Phase 7 | Creates job records for audit trail | + +--- + +## DOCUMENTATION LINKS + +- [PydanticAI Documentation](https://ai.pydantic.dev/) +- [PydanticAI Agents](https://ai.pydantic.dev/agents/) +- [PydanticAI Tools](https://ai.pydantic.dev/tools/) +- [PydanticAI Toolsets](https://ai.pydantic.dev/toolsets/) +- [PydanticAI Built-in Tools](https://ai.pydantic.dev/builtin-tools/) +- [PydanticAI Streaming Results](https://ai.pydantic.dev/results/#streamed-results) +- [PydanticAI GitHub](https://github.com/pydantic/pydantic-ai) +- [Anthropic Claude API](https://docs.anthropic.com/en/api) + +--- + +## OTHER CONSIDERATIONS + +- **Structured Outputs**: All agent responses are Pydantic models, never raw text +- **Tool Docstrings**: Follow guidance in CLAUDE.md for agent-optimized tool documentation +- **Cost Control**: Track and limit token usage per session +- **Audit Trail**: All tool calls logged with request correlation for debugging +- **Fallback Provider**: Automatic failover to fallback model on primary failure +- **Approval Workflow**: Pending actions expire after `agent_approval_timeout_minutes` diff --git a/INITIAL-11.md b/INITIAL-11.md new file mode 100644 index 00000000..3138f3c6 --- /dev/null +++ b/INITIAL-11.md @@ -0,0 +1,417 @@ +# INITIAL-11.md — ForecastLab Dashboard (The Face) + +## Architectural Role + +**"The Face"** - User interface, data visualization, and agent interaction using React 19 + shadcn/ui. + +This phase provides the visual layer for: +- Data exploration with server-side pagination and filtering +- Time series visualization with interactive charts +- Agent chat interface with streaming responses +- Admin panel for system management + +--- + +## Tech Stack + +| Component | Technology | Purpose | +|-----------|------------|---------| +| Framework | React 19 + [Vite](https://vite.dev/) | Fast build, HMR | +| Components | [shadcn/ui](https://ui.shadcn.com/) | Accessible, customizable UI | +| Data Tables | [TanStack Table](https://tanstack.com/table/latest) | Server-side data grids | +| Data Fetching | [TanStack Query](https://tanstack.com/query/latest) | Caching, invalidation | +| Charts | [Recharts](https://recharts.org/) | Time series visualization | +| Styling | Tailwind CSS 4 | Utility-first CSS | +| State | React 19 `use()` + TanStack Query | Server state management | + +--- + +## FEATURE + +### Data Explorer +Interactive data tables with full server-side capabilities: +- **Tables**: Sales, Stores, Products, Model Runs, Jobs +- **Features**: Pagination, sorting, filtering, column visibility +- **Export**: CSV download for selected/all rows +- **Pattern**: [shadcn/ui Data Table](https://ui.shadcn.com/docs/components/data-table) + +### Time Series Visualizers +Charts for forecasting analysis: +- **Actual vs Predicted**: Line chart with confidence intervals +- **Backtest Folds**: Train/test split visualization +- **Metric Comparison**: Bar charts for model comparison +- **Interactive**: Tooltips, zoom, pan, brush selection + +### Agent Chat Interface +Real-time interaction with AI agents: +- **Streaming**: WebSocket-based token streaming +- **Citations**: Rendered with source links +- **Tool Calls**: Collapsible visualization of agent actions +- **History**: Session sidebar with conversation threads + +### Admin Panel +System management and monitoring: +- **RAG Sources**: Index/delete documentation sources +- **Model Aliases**: Manage deployment aliases +- **Health Dashboard**: Service status, recent errors +- **Job Monitor**: Active and historical job status + +--- + +## PAGE STRUCTURE + +### /dashboard +Main dashboard with KPI summary cards and quick actions. + +### /explorer/sales +Sales data explorer with date range filtering. + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Sales Explorer [Export] │ +├─────────────────────────────────────────────────────────────┤ +│ Filters: [Date Range] [Store ▼] [Product ▼] [Search...] │ +├─────────────────────────────────────────────────────────────┤ +│ Date │ Store │ Product │ Quantity │ Revenue │ +│ 2026-01-15 │ S001 │ P001 │ 150 │ $2,250.00 │ +│ 2026-01-15 │ S001 │ P002 │ 75 │ $1,125.00 │ +│ ... │ ... │ ... │ ... │ ... │ +├─────────────────────────────────────────────────────────────┤ +│ Page 1 of 50 │ [< Prev] [1] [2] [3] ... [50] [Next >] │ +└─────────────────────────────────────────────────────────────┘ +``` + +### /explorer/runs +Model run explorer with comparison capabilities. + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Model Runs [Compare Selected] │ +├─────────────────────────────────────────────────────────────┤ +│ [☐] │ Run ID │ Model │ Status │ MAE │ Created │ +│ [☐] │ run_abc │ MA(14) │ SUCCESS │ 12.5 │ 2h ago │ +│ [☐] │ run_def │ SN(7) │ SUCCESS │ 15.2 │ 3h ago │ +│ [☐] │ run_ghi │ Naive │ SUCCESS │ 18.9 │ 5h ago │ +├─────────────────────────────────────────────────────────────┤ +│ Showing 3 of 127 runs │ +└─────────────────────────────────────────────────────────────┘ +``` + +### /visualize/forecast +Forecast visualization with actual vs predicted overlay. + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Forecast: Store S001, Product P001 │ +├─────────────────────────────────────────────────────────────┤ +│ [Store ▼] [Product ▼] [Model Run ▼] [Date Range] │ +├─────────────────────────────────────────────────────────────┤ +│ │ +│ 200 ─┤ ╭────── │ +│ │ ╭────╯ Predicted │ +│ 150 ─┤ ╭────╯ │ +│ │ ╭────╯ ───── Actual │ +│ 100 ─┤ ╭────╯ - - - Confidence │ +│ │ ╭────╯ │ +│ 50 ─┤ ╭────╯ │ +│ │─╯ │ +│ 0 ─┼──────────────────────────────────────────────── │ +│ Jan 1 Jan 15 Feb 1 Feb 15 Mar 1 │ +│ │ +├─────────────────────────────────────────────────────────────┤ +│ MAE: 12.5 │ sMAPE: 15.2% │ WAPE: 8.1% │ Bias: -2.3 │ +└─────────────────────────────────────────────────────────────┘ +``` + +### /visualize/backtest +Backtest fold visualization. + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Backtest: run_abc123 (5-fold Expanding Window) │ +├─────────────────────────────────────────────────────────────┤ +│ │ +│ Fold 1: ████████████░░░░ MAE: 14.2 sMAPE: 16.8% │ +│ Fold 2: █████████████████░░░░ MAE: 13.1 sMAPE: 15.4% │ +│ Fold 3: ███████████████████████░░░░ MAE: 12.8 sMAPE: 14.9│ +│ Fold 4: █████████████████████████████░░░░ MAE: 11.9 │ +│ Fold 5: ███████████████████████████████████░░░░ MAE: 11.2│ +│ │ +│ █ Train ░ Test │ +├─────────────────────────────────────────────────────────────┤ +│ Aggregated: MAE: 12.6 ± 1.1 │ Stability: 0.91 │ +└─────────────────────────────────────────────────────────────┘ +``` + +### /chat +Agent chat interface with streaming. + +``` +┌─────────────────────────────────────────────────────────────┐ +│ ForecastLab Assistant │ +├────────────┬────────────────────────────────────────────────┤ +│ Sessions │ │ +│ ─────────│ How does backtesting prevent data leakage? │ +│ Today │ │ +│ ◉ Current │ The backtesting module prevents data leakage │ +│ ○ 10:30am │ through several mechanisms: │ +│ ○ 9:15am │ │ +│ Yesterday │ 1. **Time-based splits**: Uses expanding... │ +│ ○ 4:45pm │ │ +│ │ 📚 Citations: │ +│ │ [1] docs/PHASE/5-BACKTESTING.md │ +│ │ [2] CLAUDE.md │ +│ │ │ +│ │ ────────────────────────────────────────── │ +│ │ 🔧 Tool: retrieve_context (5 chunks found) │ +│ │ ────────────────────────────────────────── │ +├────────────┴────────────────────────────────────────────────┤ +│ [Type your question...] [Send ➤] │ +└─────────────────────────────────────────────────────────────┘ +``` + +### /admin +Admin panel for system management. + +--- + +## COMPONENTS + +### DataTable (shadcn/ui pattern) + +```tsx +// components/data-table/data-table.tsx +import { + ColumnDef, + flexRender, + getCoreRowModel, + useReactTable, +} from "@tanstack/react-table" + +interface DataTableProps { + columns: ColumnDef[] + data: TData[] + pageCount: number + pageIndex: number + pageSize: number + onPaginationChange: (pagination: PaginationState) => void + onSortingChange: (sorting: SortingState) => void + onFilterChange: (filters: ColumnFiltersState) => void +} + +export function DataTable({ + columns, + data, + pageCount, + ...props +}: DataTableProps) { + const table = useReactTable({ + data, + columns, + pageCount, + manualPagination: true, + manualSorting: true, + manualFiltering: true, + getCoreRowModel: getCoreRowModel(), + // ... + }) + + return ( + + ... + ... +
+ ) +} +``` + +### TimeSeriesChart + +```tsx +// components/charts/time-series-chart.tsx +import { LineChart, Line, XAxis, YAxis, Tooltip, Legend } from 'recharts' + +interface TimeSeriesChartProps { + data: { date: string; actual: number; predicted?: number }[] + showConfidence?: boolean + height?: number +} + +export function TimeSeriesChart({ data, showConfidence, height = 400 }: TimeSeriesChartProps) { + return ( + + + + + + + {data[0]?.predicted !== undefined && ( + + )} + + ) +} +``` + +### ChatMessage + +```tsx +// components/chat/chat-message.tsx +interface ChatMessageProps { + role: 'user' | 'assistant' + content: string + citations?: Citation[] + toolCalls?: ToolCall[] + isStreaming?: boolean +} + +export function ChatMessage({ role, content, citations, toolCalls, isStreaming }: ChatMessageProps) { + return ( +
+
+ {content} + {isStreaming && } + {citations && } + {toolCalls && } +
+
+ ) +} +``` + +--- + +## API HOOKS (TanStack Query) + +```tsx +// hooks/use-sales.ts +export function useSales(params: SalesQueryParams) { + return useQuery({ + queryKey: ['sales', params], + queryFn: () => api.get('/analytics/drilldowns', { params }), + placeholderData: keepPreviousData, + }) +} + +// hooks/use-runs.ts +export function useRuns(params: RunsQueryParams) { + return useQuery({ + queryKey: ['runs', params], + queryFn: () => api.get('/registry/runs', { params }), + }) +} + +// hooks/use-chat.ts +export function useChat(sessionId?: string) { + const [messages, setMessages] = useState([]) + const ws = useWebSocket(`${WS_URL}/agents/stream`) + + const sendMessage = useCallback((content: string) => { + ws.send(JSON.stringify({ type: 'query', agent: 'rag_assistant', payload: { query: content } })) + }, [ws]) + + return { messages, sendMessage, isConnected: ws.readyState === WebSocket.OPEN } +} +``` + +--- + +## CONFIGURATION (Environment) + +```env +# .env.example for frontend + +# API Configuration +VITE_API_BASE_URL=http://localhost:8123 +VITE_WS_URL=ws://localhost:8123/agents/stream + +# Feature Flags +VITE_ENABLE_AGENT_CHAT=true +VITE_ENABLE_ADMIN_PANEL=true + +# Visualization +VITE_DEFAULT_PAGE_SIZE=25 +VITE_MAX_CHART_POINTS=365 +``` + +--- + +## EXAMPLES + +### examples/ui/README.md +```markdown +# Dashboard Page Map + +| Page | API Endpoints | Description | +|------|---------------|-------------| +| /dashboard | GET /analytics/kpis | KPI summary cards | +| /explorer/sales | GET /analytics/drilldowns | Sales data table | +| /explorer/runs | GET /registry/runs | Model run table | +| /visualize/forecast | GET /forecasting/predict | Forecast chart | +| /visualize/backtest | GET /backtesting/results/{run_id} | Fold visualization | +| /chat | WS /agents/stream | Agent chat | +| /admin | GET /rag/sources, GET /registry/aliases | Admin panel | + +## Running the Dashboard + +\`\`\`bash +cd frontend +pnpm install +pnpm dev +\`\`\` + +Open http://localhost:5173 +``` + +--- + +## SUCCESS CRITERIA + +- [ ] Data tables handle 10k+ rows with virtual scrolling +- [ ] Server-side pagination, sorting, filtering all functional +- [ ] Charts render smoothly with 365+ data points +- [ ] WebSocket chat shows streaming tokens in real-time +- [ ] Citations render as clickable source links +- [ ] Tool calls displayed in collapsible sections +- [ ] Responsive design works on tablet and mobile +- [ ] Lighthouse performance score > 90 +- [ ] Accessibility: keyboard navigation, screen reader support +- [ ] Dark/light theme toggle + +--- + +## CROSS-MODULE INTEGRATION + +| Direction | Module | Integration Point | +|-----------|--------|-------------------| +| **← RAG Layer** | INITIAL-9 | Displays indexed sources, allows re-indexing | +| **← Agentic Layer** | INITIAL-10 | Chat interface, experiment status display | +| **← Registry** | Phase 6 | Run leaderboard, comparison views | +| **← Analytics** | Phase 7 | KPI dashboard, drilldown charts | +| **← Jobs** | Phase 7 | Job status monitoring | +| **← Dimensions** | Phase 7 | Store/product selectors | + +--- + +## DOCUMENTATION LINKS + +- [shadcn/ui Documentation](https://ui.shadcn.com/) +- [shadcn/ui Data Table](https://ui.shadcn.com/docs/components/data-table) +- [shadcn/ui Table](https://ui.shadcn.com/docs/components/table) +- [TanStack Table](https://tanstack.com/table/latest) +- [TanStack Query](https://tanstack.com/query/latest) +- [Recharts](https://recharts.org/) +- [Vite Documentation](https://vite.dev/) +- [React 19 Documentation](https://react.dev/) +- [Tailwind CSS 4](https://tailwindcss.com/) + +--- + +## OTHER CONSIDERATIONS + +- **No Hardcoded URLs**: API base URL from environment variable only +- **Error Boundaries**: Graceful error handling with retry options +- **Loading States**: Skeleton components for all async data +- **Optimistic Updates**: Instant UI feedback for mutations +- **Caching**: TanStack Query manages cache invalidation +- **Bundle Size**: Code splitting per route for fast initial load diff --git a/INITIAL-9.md b/INITIAL-9.md index e82c4453..da491760 100644 --- a/INITIAL-9.md +++ b/INITIAL-9.md @@ -1,33 +1,319 @@ -# INITIAL-9.md — Dashboard + RAG + Agentic Layer (PydanticAI) - -## FEATURE: -- Dashboard (React + Vite + shadcn/ui Data Table): - - Data Explorer (tables, filters, export) - - Model Runs (leaderboard, compare) - - Train & Predict (forms, status) - - Predictions (tabular view) -- RAG assistant (pgvector): - - indexed sources: README.md, /docs/*, OpenAPI export, run reports - - retrieve top-k → answer with citations -- Optional PydanticAI: - - agent with tools: - - experiment orchestrator (generate configs → backtest → select best → report) - - rag assistant (query → retrieve → structured answer) - - enforced structured outputs - -## EXAMPLES: -- `examples/ui/README.md` — page map + API mapping (no hardcoded base URL). -- `examples/rag/index_docs.py` — chunk+embed+store (Settings-driven). -- `examples/rag/query.http` — Q&A returning a citations schema. -- `examples/agent/` — best-practice agent setup (providers, tools, dependencies). - -## DOCUMENTATION: -- shadcn/ui Data Table pattern + TanStack Table -- pgvector similarity search + indexing strategies -- PydanticAI docs (include link in README as a code block) - -## OTHER CONSIDERATIONS: -- Required: `.env.example` for frontend (`VITE_API_BASE_URL`). -- RAG must be evidence-grounded: if no support, return “not found” (no hallucinations). -- Stable citation schema: source_type, source_id/path, chunk_id, snippet/span. -- Embedding model + dimension must come from Settings (never hardcoded). +# INITIAL-9.md — RAG Knowledge Base (The Memory) + +## Architectural Role + +**"The Memory"** - Vector storage, document ingestion, and semantic retrieval infrastructure. + +This phase provides the foundational knowledge layer that enables: +- Indexed documentation and run reports for AI-assisted search +- Semantic retrieval with relevance scoring +- Evidence-grounded context for the Agentic Layer (INITIAL-10) + +--- + +## Tech Stack + +| Component | Technology | Purpose | +|-----------|------------|---------| +| Vector Store | PostgreSQL 16 + [pgvector](https://github.com/pgvector/pgvector) | Similarity search | +| Embeddings | [OpenAI text-embedding-3-small](https://platform.openai.com/docs/models/text-embedding-3-small) | 1536-dim vectors (configurable) | +| Chunking | Markdown-aware, OpenAPI endpoint-aware | Semantic boundaries | +| Index Type | HNSW (default) or IVFFlat | Approximate nearest neighbor | + +--- + +## FEATURE + +### Database Layer +- `document_chunk` table with vector column (`embedding VECTOR(1536)`) +- HNSW index for cosine similarity search +- Unique constraint `(source_id, chunk_index)` for idempotent re-indexing +- Metadata JSONB for source type, heading hierarchy, timestamps + +### Ingestion Pipeline +- **Markdown Chunker**: Heading-aware splitting (configurable size/overlap) +- **OpenAPI Chunker**: Endpoint-based granularity (one chunk per operation) +- **Embedding Service**: Async batch processing with rate limiting +- **Source Registry**: Track indexed sources with version/hash for change detection + +### Retrieval Engine +- Top-k semantic search with configurable similarity threshold +- Metadata filtering (source_type, date_range, tags) +- Relevance score normalization (0.0 - 1.0) +- Context window assembly for downstream consumption + +--- + +## ENDPOINTS + +### POST /rag/index +Index documents from various sources. + +**Request**: +```json +{ + "source_type": "markdown", + "source_path": "docs/ARCHITECTURE.md", + "metadata": { + "category": "documentation", + "version": "1.0.0" + } +} +``` + +**Response**: +```json +{ + "source_id": "src_abc123", + "chunks_created": 15, + "tokens_processed": 4250, + "duration_ms": 1234.56, + "status": "indexed" +} +``` + +### POST /rag/retrieve +Semantic search across indexed documents. + +**Request**: +```json +{ + "query": "How does backtesting prevent data leakage?", + "top_k": 5, + "similarity_threshold": 0.7, + "filters": { + "source_type": ["markdown", "openapi"], + "category": "documentation" + } +} +``` + +**Response**: +```json +{ + "results": [ + { + "chunk_id": "chunk_xyz789", + "source_id": "src_abc123", + "source_path": "docs/PHASE/5-BACKTESTING.md", + "content": "TimeSeriesSplitter uses time-based splits (expanding/sliding window) to prevent leakage...", + "relevance_score": 0.92, + "metadata": { + "heading": "Leakage Prevention", + "section_path": ["Phase 5: Backtesting", "Implementation", "Leakage Prevention"] + } + } + ], + "query_embedding_time_ms": 45.2, + "search_time_ms": 12.8, + "total_chunks_searched": 1250 +} +``` + +### GET /rag/sources +List all indexed sources with metadata. + +**Response**: +```json +{ + "sources": [ + { + "source_id": "src_abc123", + "source_type": "markdown", + "source_path": "docs/ARCHITECTURE.md", + "chunk_count": 15, + "indexed_at": "2026-02-01T10:30:00Z", + "content_hash": "sha256:abc123..." + } + ], + "total_sources": 12, + "total_chunks": 450 +} +``` + +### DELETE /rag/sources/{source_id} +Remove an indexed source and all its chunks. + +**Response**: +```json +{ + "source_id": "src_abc123", + "chunks_deleted": 15, + "status": "deleted" +} +``` + +--- + +## DATABASE SCHEMA + +```sql +-- Enable pgvector extension +CREATE EXTENSION IF NOT EXISTS vector; + +-- Document source registry +CREATE TABLE document_source ( + source_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + source_type VARCHAR(50) NOT NULL, -- 'markdown', 'openapi', 'run_report' + source_path TEXT NOT NULL, + content_hash VARCHAR(64) NOT NULL, -- SHA-256 for change detection + metadata JSONB DEFAULT '{}', + indexed_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + UNIQUE (source_type, source_path) +); + +-- Document chunks with embeddings +CREATE TABLE document_chunk ( + chunk_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + source_id UUID NOT NULL REFERENCES document_source(source_id) ON DELETE CASCADE, + chunk_index INTEGER NOT NULL, + content TEXT NOT NULL, + embedding VECTOR(1536), -- Configurable dimension + token_count INTEGER NOT NULL, + metadata JSONB DEFAULT '{}', -- heading, section_path, etc. + created_at TIMESTAMPTZ DEFAULT NOW(), + UNIQUE (source_id, chunk_index) +); + +-- HNSW index for cosine similarity +CREATE INDEX idx_chunk_embedding_hnsw +ON document_chunk +USING hnsw (embedding vector_cosine_ops) +WITH (m = 16, ef_construction = 64); + +-- Metadata filtering index +CREATE INDEX idx_chunk_metadata ON document_chunk USING gin (metadata); +``` + +--- + +## EXAMPLES + +### examples/rag/index_docs.py +```python +"""Index documentation into RAG knowledge base.""" +import asyncio +from pathlib import Path +import httpx + +async def index_markdown_docs(): + """Index all markdown docs from docs/ directory.""" + async with httpx.AsyncClient(base_url="http://localhost:8123") as client: + docs_dir = Path("docs") + for md_file in docs_dir.rglob("*.md"): + response = await client.post( + "/rag/index", + json={ + "source_type": "markdown", + "source_path": str(md_file), + "metadata": {"category": "documentation"} + } + ) + result = response.json() + print(f"Indexed {md_file}: {result['chunks_created']} chunks") + +if __name__ == "__main__": + asyncio.run(index_markdown_docs()) +``` + +### examples/rag/query.http +```http +### Semantic search query +POST http://localhost:8123/rag/retrieve +Content-Type: application/json + +{ + "query": "How do I configure backtesting splits?", + "top_k": 5, + "similarity_threshold": 0.7 +} + +### List all indexed sources +GET http://localhost:8123/rag/sources + +### Re-index after documentation update +POST http://localhost:8123/rag/index +Content-Type: application/json + +{ + "source_type": "markdown", + "source_path": "README.md", + "metadata": {"category": "overview"} +} +``` + +--- + +## CONFIGURATION (Settings) + +```python +# app/core/config.py additions + +# RAG Embedding Configuration +rag_embedding_model: str = "text-embedding-3-small" +rag_embedding_dimension: int = 1536 +rag_embedding_batch_size: int = 100 + +# RAG Chunking Configuration +rag_chunk_size: int = 512 # tokens +rag_chunk_overlap: int = 50 # tokens +rag_min_chunk_size: int = 100 # minimum tokens per chunk + +# RAG Retrieval Configuration +rag_top_k: int = 5 +rag_similarity_threshold: float = 0.7 +rag_max_context_tokens: int = 4000 + +# RAG Index Configuration +rag_index_type: Literal["hnsw", "ivfflat"] = "hnsw" +rag_hnsw_m: int = 16 +rag_hnsw_ef_construction: int = 64 +``` + +--- + +## SUCCESS CRITERIA + +- [ ] pgvector extension enabled and tested in docker-compose +- [ ] Markdown chunker respects heading boundaries +- [ ] OpenAPI chunker produces one chunk per endpoint +- [ ] Embeddings generated via async batch processing +- [ ] Retrieval returns top-k with normalized relevance scores +- [ ] Re-indexing is idempotent (content_hash change detection) +- [ ] Unique constraint prevents duplicate chunks +- [ ] HNSW index provides sub-100ms search latency +- [ ] Integration tests with real embeddings (mocked in unit tests) +- [ ] Structured logging for all index/retrieve operations + +--- + +## CROSS-MODULE INTEGRATION + +| Direction | Module | Integration Point | +|-----------|--------|-------------------| +| **→ Agentic Layer** | INITIAL-10 | Provides `retrieve_context` tool for RAG Assistant agent | +| **→ Dashboard** | INITIAL-11 | Sources list displayed in Admin panel | +| **← Registry** | Phase 6 | Run reports indexed as knowledge sources | +| **← Jobs** | Phase 7 | Indexing operations tracked as jobs | + +--- + +## DOCUMENTATION LINKS + +- [pgvector GitHub](https://github.com/pgvector/pgvector) +- [pgvector Tutorial (DataCamp)](https://www.datacamp.com/tutorial/pgvector-tutorial) +- [OpenAI Embeddings Guide](https://platform.openai.com/docs/guides/embeddings) +- [OpenAI API Reference](https://platform.openai.com/docs/api-reference/embeddings) +- [Neon pgvector Docs](https://neon.com/docs/extensions/pgvector) +- [HNSW Algorithm Paper](https://arxiv.org/abs/1603.09320) + +--- + +## OTHER CONSIDERATIONS + +- **Evidence-Grounded**: Retrieval returns raw chunks only; no answer generation in this layer +- **Idempotency**: Content hash comparison prevents unnecessary re-embedding +- **Rate Limiting**: Respect OpenAI API rate limits during batch embedding +- **Cost Tracking**: Log token counts for embedding cost monitoring +- **Dimension Flexibility**: Support for other embedding models (e.g., 3072-dim text-embedding-3-large) diff --git a/PRPs/PRP-10-agentic-layer.md b/PRPs/PRP-10-agentic-layer.md new file mode 100644 index 00000000..3c1a97ea --- /dev/null +++ b/PRPs/PRP-10-agentic-layer.md @@ -0,0 +1,920 @@ +# PRP-10: Agentic Layer ("The Brain") + +**Feature**: INITIAL-10.md — Agentic Layer +**Status**: Ready for Implementation +**Confidence Score**: 7.5/10 + +--- + +## Goal + +Build the Agentic Layer using PydanticAI providing: +1. **Experiment Orchestrator Agent** - Autonomous model experimentation workflow +2. **RAG Assistant Agent** - Evidence-grounded Q&A with citations +3. **Human-in-the-Loop Approval** - Blocking sensitive actions until approved +4. **WebSocket Streaming** - Real-time token delivery to clients +5. **Session Management** - Persistent state across multi-turn conversations + +This is the "Brain" layer that orchestrates tools from INITIAL-9 (RAG), Phase 5 (Backtesting), and Phase 6 (Registry). + +--- + +## Why + +- **Autonomous Experimentation**: Agent runs backtests, compares results, deploys winners +- **Evidence-Grounded Answers**: RAG-powered Q&A prevents hallucination +- **Safety Controls**: Human approval for deployment actions +- **Real-Time UX**: Streaming responses for responsive chat interface +- **Portfolio Value**: Demonstrates modern AI agent architecture + +--- + +## What + +### Endpoints + +| Method | Path | Description | +| ------ | ---- | ----------- | +| `POST` | `/agents/experiment/run` | Execute experiment workflow | +| `POST` | `/agents/experiment/approve` | Approve pending action | +| `POST` | `/agents/rag/query` | Query with answer generation | +| `GET` | `/agents/status/{session_id}` | Check session status | +| `WS` | `/agents/stream` | WebSocket for streaming | + +### Success Criteria + +- [ ] Agents produce schema-enforced structured outputs +- [ ] Tool calls logged with correlation IDs and timing +- [ ] Human-in-the-loop blocks sensitive actions +- [ ] WebSocket streaming delivers tokens in real-time +- [ ] Session state persists across requests +- [ ] Graceful LLM API failure handling with retries +- [ ] 60+ unit tests with mocked LLM responses +- [ ] 15+ integration tests (rate-limited real LLM calls) +- [ ] All validation gates green + +--- + +## All Needed Context + +### Documentation & References + +```yaml +# CRITICAL - PydanticAI Documentation +- url: https://ai.pydantic.dev/ + why: "Official PydanticAI docs - main reference" + +- url: https://ai.pydantic.dev/agents/ + why: "Agent constructor, result_type, system_prompt, run/run_stream methods" + +- url: https://ai.pydantic.dev/tools/ + why: "@agent.tool decorator, RunContext, deps_type, tool parameters" + +- url: https://ai.pydantic.dev/output/ + why: "AgentRunResult, StreamedRunResult, token usage tracking" + +- url: https://ai.pydantic.dev/examples/chat-app/ + why: "FastAPI + streaming integration example" + +- url: https://github.com/pydantic/pydantic-ai + why: "Source code for edge cases" + +# Anthropic API (fallback reference) +- url: https://docs.anthropic.com/en/api + why: "Claude model IDs, rate limits, error codes" + +# Codebase Patterns (CRITICAL) +- file: app/features/registry/service.py + why: "Service pattern - __init__, get_settings(), structured logging" + +- file: app/features/jobs/service.py + why: "Job execution pattern - state machine, error handling, audit trail" + +- file: app/features/backtesting/service.py + why: "BacktestingService - the agent will call this via tools" + +- file: app/features/registry/routes.py + why: "Route patterns - APIRouter, response_model, HTTPException" + +- file: app/features/registry/tests/conftest.py + why: "Test fixtures - db_session, client, async patterns" + +# RAG Integration (INITIAL-9 dependency) +- file: PRPs/PRP-9-rag-knowledge-base.md + why: "RAG layer the agent will consume via retrieve_context tool" +``` + +### Current Codebase Tree (Relevant Parts) + +```text +app/ +├── core/ +│ ├── config.py # Settings - ADD agent settings +│ ├── database.py # get_db dependency +│ ├── logging.py # get_logger +│ └── exceptions.py # ForecastLabError base +├── features/ +│ ├── backtesting/ # Agent tool: run_backtest +│ ├── registry/ # Agent tools: list_runs, compare_runs, create_alias +│ ├── forecasting/ # Agent tool: list_models +│ ├── rag/ # INITIAL-9 - Agent tool: retrieve_context +│ └── agents/ # NEW: Create this vertical slice +├── main.py # Include agents router + WebSocket +``` + +### Desired Codebase Tree (Files to Create) + +```text +app/features/agents/ +├── __init__.py # Export router +├── models.py # AgentSession ORM model +├── schemas.py # Request/Response Pydantic schemas +├── routes.py # REST endpoints +├── websocket.py # WebSocket endpoint handler +├── service.py # AgentService orchestration +├── agents/ +│ ├── __init__.py +│ ├── base.py # Base agent configuration +│ ├── experiment.py # Experiment Orchestrator Agent +│ └── rag_assistant.py # RAG Assistant Agent +├── tools/ +│ ├── __init__.py +│ ├── registry_tools.py # list_runs, compare_runs, create_alias +│ ├── backtesting_tools.py # run_backtest +│ ├── forecasting_tools.py # list_models +│ └── rag_tools.py # retrieve_context, format_citation +├── deps.py # AgentDeps dataclass for dependency injection +├── tests/ +│ ├── __init__.py +│ ├── conftest.py # Fixtures with mocked LLM +│ ├── test_schemas.py +│ ├── test_tools.py +│ ├── test_agents.py +│ ├── test_service.py +│ └── test_routes.py + +alembic/versions/ +└── xxxx_create_agent_sessions_table.py + +examples/agents/ +├── experiment_demo.py +├── rag_query.http +└── websocket_client.py +``` + +### Known Gotchas & Library Quirks + +```python +# CRITICAL: PydanticAI model identifier format +# Use "anthropic:claude-sonnet-4-20250514" NOT "claude-sonnet-4-20250514" +agent = Agent(model="anthropic:claude-sonnet-4-20250514") + +# CRITICAL: deps_type must match RunContext generic parameter +agent = Agent( + model="anthropic:claude-sonnet-4-20250514", + deps_type=AgentDeps, # Your dependency dataclass +) + +@agent.tool +def my_tool(ctx: RunContext[AgentDeps], param: str) -> str: + # ctx.deps is typed as AgentDeps + db = ctx.deps.db + ... + +# CRITICAL: Use @agent.tool for context access, @agent.tool_plain without +@agent.tool_plain +def roll_dice() -> str: + """No RunContext needed here.""" + return str(random.randint(1, 6)) + +# CRITICAL: output_type (not result_type) for structured outputs +agent = Agent( + model="...", + output_type=ExperimentReport, # NOT result_type +) + +# CRITICAL: run() is async, run_sync() is sync wrapper +result = await agent.run(prompt, deps=deps) # Async +result = agent.run_sync(prompt, deps=deps) # Sync + +# CRITICAL: Streaming requires async context manager +async with agent.run_stream(prompt, deps=deps) as result: + async for text in result.stream_text(): + yield text + +# CRITICAL: Access token usage after run completes +print(result.usage()) # RunUsage(input_tokens=X, output_tokens=Y) + +# CRITICAL: Message history for multi-turn +result2 = await agent.run( + "follow-up question", + deps=deps, + message_history=result.messages, # Previous messages +) + +# CRITICAL: Tool docstrings become schema descriptions +@agent.tool +async def run_backtest( + ctx: RunContext[AgentDeps], + model_type: str, + config: dict[str, Any], +) -> BacktestResult: + """Run a backtest for a model configuration. + + Use this to evaluate model performance with time-series CV. + Returns per-fold and aggregated metrics (MAE, sMAPE, WAPE). + + Args: + model_type: Type of model (naive, seasonal_naive, moving_average) + config: Model-specific configuration + """ + ... + +# CRITICAL: FastAPI WebSocket pattern +from fastapi import WebSocket, WebSocketDisconnect + +@router.websocket("/agents/stream") +async def websocket_stream(websocket: WebSocket): + await websocket.accept() + try: + while True: + data = await websocket.receive_json() + # Process and stream response + async for chunk in stream_agent_response(data): + await websocket.send_json(chunk) + except WebSocketDisconnect: + pass + +# CRITICAL: PydanticAI retry mechanism +from pydantic_ai import ModelRetry + +@agent.tool +async def risky_tool(ctx: RunContext[AgentDeps]) -> str: + try: + return await external_api() + except APIError as e: + raise ModelRetry(f"API failed: {e}. Please try again.") from e +``` + +--- + +## Implementation Blueprint + +### Data Models + +#### ORM Model (models.py) + +```python +"""Agent session persistence.""" +from __future__ import annotations +from datetime import datetime +from enum import Enum +from typing import Any +from sqlalchemy import DateTime, Integer, String, Text +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column +from app.core.database import Base +from app.shared.models import TimestampMixin + + +class SessionStatus(str, Enum): + """Agent session states.""" + ACTIVE = "active" + AWAITING_APPROVAL = "awaiting_approval" + COMPLETED = "completed" + EXPIRED = "expired" + FAILED = "failed" + + +class AgentType(str, Enum): + """Available agent types.""" + EXPERIMENT_ORCHESTRATOR = "experiment_orchestrator" + RAG_ASSISTANT = "rag_assistant" + + +class AgentSession(TimestampMixin, Base): + """Persistent agent session for multi-turn conversations.""" + __tablename__ = "agent_session" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + session_id: Mapped[str] = mapped_column(String(32), unique=True, index=True) + agent_type: Mapped[str] = mapped_column(String(50), index=True) + status: Mapped[str] = mapped_column(String(30), default=SessionStatus.ACTIVE.value) + + # Message history for multi-turn + message_history: Mapped[list[dict[str, Any]]] = mapped_column(JSONB, default=list) + + # Pending approval + pending_action: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + + # Usage tracking + total_tokens_used: Mapped[int] = mapped_column(Integer, default=0) + tool_calls_count: Mapped[int] = mapped_column(Integer, default=0) + + # Timing + last_activity: Mapped[datetime] = mapped_column(DateTime(timezone=True)) + expires_at: Mapped[datetime] = mapped_column(DateTime(timezone=True)) +``` + +#### Dependencies (deps.py) + +```python +"""Agent dependencies for tool access.""" +from dataclasses import dataclass +from sqlalchemy.ext.asyncio import AsyncSession + + +@dataclass +class AgentDeps: + """Dependencies passed to agent tools via RunContext.""" + db: AsyncSession + session_id: str + request_id: str | None = None +``` + +#### Pydantic Schemas (schemas.py) + +```python +"""Agent API schemas.""" +from datetime import datetime +from typing import Any, Literal +from pydantic import BaseModel, ConfigDict, Field + + +# === Experiment Agent === + +class ExperimentConstraints(BaseModel): + """Constraints for experiment search.""" + model_config = ConfigDict(extra="forbid") + + model_types: list[str] = Field(default_factory=lambda: ["naive", "seasonal_naive"]) + min_train_size: int = Field(default=60, ge=30) + max_splits: int = Field(default=5, ge=1, le=20) + + +class ExperimentRequest(BaseModel): + """Request to run experiment workflow.""" + model_config = ConfigDict(extra="forbid") + + objective: str = Field(..., min_length=10, max_length=500) + store_id: int = Field(..., ge=1) + product_id: int = Field(..., ge=1) + constraints: ExperimentConstraints = Field(default_factory=ExperimentConstraints) + auto_deploy: bool = False + session_id: str | None = None + + +class RunSummary(BaseModel): + """Summary of a model run.""" + run_id: str + model_type: str + config: dict[str, Any] + metrics: dict[str, float] + + +class BaselineComparison(BaseModel): + """Comparison against baseline models.""" + vs_naive: dict[str, float] | None = None + vs_seasonal_naive: dict[str, float] | None = None + + +class ExperimentReport(BaseModel): + """Structured output from Experiment Agent.""" + objective: str + methodology: str + experiments_run: int + best_run: RunSummary | None + baseline_comparison: BaselineComparison | None + recommendation: str + approval_required: bool + pending_action: str | None = None + + +class ToolCallSummary(BaseModel): + """Summary of a tool call.""" + tool: str + args: dict[str, Any] + result_summary: str + duration_ms: float + + +class ExperimentResponse(BaseModel): + """Response from experiment workflow.""" + session_id: str + status: Literal["completed", "awaiting_approval", "failed"] + report: ExperimentReport | None = None + tool_calls: list[ToolCallSummary] = Field(default_factory=list) + tokens_used: int = 0 + duration_ms: float = 0 + + +# === Approval === + +class ApprovalRequest(BaseModel): + """Request to approve/reject pending action.""" + model_config = ConfigDict(extra="forbid") + + session_id: str + action: str + approved: bool + comment: str | None = Field(None, max_length=500) + + +class ApprovalResponse(BaseModel): + """Response from approval action.""" + session_id: str + action: str + status: Literal["executed", "rejected"] + result: dict[str, Any] | None = None + + +# === RAG Agent === + +class RAGQueryRequest(BaseModel): + """Request for RAG-powered Q&A.""" + model_config = ConfigDict(extra="forbid") + + query: str = Field(..., min_length=5, max_length=2000) + session_id: str | None = None + include_sources: bool = True + + +class Citation(BaseModel): + """Citation from RAG retrieval.""" + source_type: str + source_path: str + chunk_id: str + snippet: str + relevance_score: float + + +class RAGQueryResponse(BaseModel): + """Response from RAG query.""" + session_id: str + answer: str + confidence: float = Field(..., ge=0.0, le=1.0) + citations: list[Citation] = Field(default_factory=list) + insufficient_context: bool = False + tokens_used: int = 0 + duration_ms: float = 0 + + +# === Session Status === + +class SessionStatusResponse(BaseModel): + """Session status details.""" + session_id: str + agent_type: str + status: str + created_at: datetime + last_activity: datetime + pending_action: dict[str, Any] | None = None + tool_calls_count: int + tokens_used: int + + +# === WebSocket Messages === + +class WSMessage(BaseModel): + """WebSocket message from client.""" + type: Literal["query", "approve", "cancel"] + agent: Literal["rag_assistant", "experiment_orchestrator"] + payload: dict[str, Any] + + +class WSEvent(BaseModel): + """WebSocket event to client.""" + type: Literal["token", "tool_call", "complete", "error"] + content: str | None = None + tool: str | None = None + status: str | None = None + summary: str | None = None + session_id: str | None = None + tokens_used: int | None = None +``` + +--- + +## Task List + +### Task 1: Add Dependencies to pyproject.toml + +```yaml +MODIFY: pyproject.toml +ADD to dependencies: + - "pydantic-ai>=0.1.0" # PydanticAI agent framework + - "anthropic>=0.40.0" # Anthropic SDK for Claude + - "websockets>=13.0" # WebSocket support (already in uvicorn[standard]) +``` + +### Task 2: Add Agent Settings to config.py + +```yaml +MODIFY: app/core/config.py +ADD after RAG settings: + + # Agent LLM Configuration + agent_default_model: str = "anthropic:claude-sonnet-4-20250514" + agent_fallback_model: str = "openai:gpt-4o" + agent_temperature: float = 0.1 + agent_max_tokens: int = 4096 + anthropic_api_key: str = "" # Required + + # Agent Execution Configuration + agent_max_tool_calls: int = 10 + agent_timeout_seconds: int = 120 + agent_retry_attempts: int = 3 + agent_retry_delay_seconds: float = 1.0 + + # Human-in-the-Loop Configuration + agent_require_approval: list[str] = ["create_alias", "archive_run"] + agent_approval_timeout_minutes: int = 60 + + # Session Configuration + agent_session_ttl_minutes: int = 120 + agent_max_sessions_per_user: int = 5 + + # Streaming Configuration + agent_enable_streaming: bool = True +``` + +### Task 3: Create Alembic Migration + +```yaml +CREATE: alembic/versions/xxxx_create_agent_sessions_table.py +PATTERN: Follow existing migration patterns + +Key columns: + - session_id (String 32, unique, indexed) + - agent_type (String 50, indexed) + - status (String 30) + - message_history (JSONB) + - pending_action (JSONB, nullable) + - total_tokens_used (Integer) + - tool_calls_count (Integer) + - last_activity (DateTime TZ) + - expires_at (DateTime TZ) + - created_at, updated_at (TimestampMixin) +``` + +### Task 4: Create ORM Models + +```yaml +CREATE: app/features/agents/models.py +MIRROR: app/features/registry/models.py pattern +INCLUDE: + - SessionStatus enum + - AgentType enum + - AgentSession model with JSONB columns +``` + +### Task 5: Create Dependencies Dataclass + +```yaml +CREATE: app/features/agents/deps.py +CONTENT: + - AgentDeps dataclass + - Fields: db (AsyncSession), session_id, request_id +``` + +### Task 6: Create Pydantic Schemas + +```yaml +CREATE: app/features/agents/schemas.py +MIRROR: app/features/registry/schemas.py pattern +INCLUDE: + - ExperimentRequest, ExperimentResponse, ExperimentReport + - ApprovalRequest, ApprovalResponse + - RAGQueryRequest, RAGQueryResponse, Citation + - SessionStatusResponse + - WSMessage, WSEvent +``` + +### Task 7: Create Tool Modules + +```yaml +CREATE: app/features/agents/tools/registry_tools.py +TOOLS: + - list_runs(ctx, filters) -> list[RunSummary] + - compare_runs(ctx, run_id_a, run_id_b) -> CompareResult + - create_alias(ctx, alias_name, run_id) -> AliasResult + - archive_run(ctx, run_id) -> ArchiveResult + +CREATE: app/features/agents/tools/backtesting_tools.py +TOOLS: + - run_backtest(ctx, model_type, config, store_id, product_id, n_splits) -> BacktestResult + +CREATE: app/features/agents/tools/forecasting_tools.py +TOOLS: + - list_models(ctx) -> list[ModelInfo] + +CREATE: app/features/agents/tools/rag_tools.py +TOOLS: + - retrieve_context(ctx, query, top_k) -> list[RetrievedChunk] + - format_citation(ctx, chunk) -> Citation + +CRITICAL for all tools: + - Use @agent.tool decorator (not @agent.tool_plain) for db access + - First param is RunContext[AgentDeps] + - Detailed docstrings for LLM schema + - Structured logging with timing +``` + +### Task 8: Create Agent Definitions + +```yaml +CREATE: app/features/agents/agents/base.py +CONTENT: + - get_agent_settings() helper + - Common model configuration + +CREATE: app/features/agents/agents/experiment.py +CONTENT: + - ExperimentReport output schema + - experiment_agent = Agent(...) + - System prompt for experiment orchestration + - Tools: list_models, run_backtest, compare_runs, create_alias + +CREATE: app/features/agents/agents/rag_assistant.py +CONTENT: + - RAGResponse output schema + - rag_agent = Agent(...) + - System prompt for evidence-grounded answers + - Tools: retrieve_context, format_citation +``` + +### Task 9: Create Agent Service + +```yaml +CREATE: app/features/agents/service.py +MIRROR: app/features/jobs/service.py pattern + +Class AgentService: + async def run_experiment(self, db, request) -> ExperimentResponse: + - Create/resume session + - Build AgentDeps + - Run experiment_agent with tools + - Capture tool calls and timing + - Handle approval_required check + - Update session state + - Return structured response + + async def run_rag_query(self, db, request) -> RAGQueryResponse: + - Create/resume session + - Run rag_agent with tools + - Extract citations from tool results + - Return structured response + + async def approve_action(self, db, request) -> ApprovalResponse: + - Load session + - Validate pending_action matches + - Execute action if approved + - Update session status + - Return result + + async def get_session_status(self, db, session_id) -> SessionStatusResponse: + - Load session + - Return status details + + async def stream_response(self, db, message) -> AsyncGenerator[WSEvent]: + - Route to appropriate agent + - Use run_stream for token-by-token delivery + - Yield WSEvent for each chunk +``` + +### Task 10: Create REST Routes + +```yaml +CREATE: app/features/agents/routes.py +MIRROR: app/features/registry/routes.py pattern + +Routes: + POST /agents/experiment/run -> ExperimentResponse + POST /agents/experiment/approve -> ApprovalResponse + POST /agents/rag/query -> RAGQueryResponse + GET /agents/status/{session_id} -> SessionStatusResponse + +CRITICAL: + - Structured logging with agents.* prefix + - Handle LLM API errors gracefully + - Timeout handling +``` + +### Task 11: Create WebSocket Handler + +```yaml +CREATE: app/features/agents/websocket.py +PATTERN: FastAPI WebSocket with async iteration + +Key functions: + websocket_stream(websocket: WebSocket): + - Accept connection + - Receive JSON messages + - Parse WSMessage + - Call service.stream_response() + - Send WSEvent for each chunk + - Handle disconnect gracefully + +CRITICAL: + - Use asyncio.wait_for for timeout + - Catch WebSocketDisconnect + - Log all events with correlation ID +``` + +### Task 12: Register Router in main.py + +```yaml +MODIFY: app/main.py +ADD import: from app.features.agents.routes import router as agents_router +ADD import: from app.features.agents.websocket import websocket_stream +ADD router: app.include_router(agents_router) +ADD websocket: app.add_api_websocket_route("/agents/stream", websocket_stream) +``` + +### Task 13: Create Test Fixtures + +```yaml +CREATE: app/features/agents/tests/conftest.py +FIXTURES: + - db_session: Async session with cleanup + - client: AsyncClient with db override + - mock_anthropic: Mock Anthropic API responses + - sample_experiment_request: Test request + - sample_rag_request: Test request +``` + +### Task 14: Create Unit Tests + +```yaml +CREATE: app/features/agents/tests/test_schemas.py + - Test all request/response validation + +CREATE: app/features/agents/tests/test_tools.py + - Test each tool function with mocked deps + - Test tool return types + - Test error handling + +CREATE: app/features/agents/tests/test_agents.py + - Test agent with mocked LLM + - Test structured output parsing + - Test tool call ordering +``` + +### Task 15: Create Integration Tests + +```yaml +CREATE: app/features/agents/tests/test_routes.py +@pytest.mark.integration: + - test_experiment_run_creates_session + - test_experiment_approval_workflow + - test_rag_query_returns_citations + - test_session_status_returns_details + - test_websocket_streaming (with TestClient) +``` + +### Task 16: Create Examples + +```yaml +CREATE: examples/agents/experiment_demo.py + - Full experiment workflow demo + +CREATE: examples/agents/rag_query.http + - HTTP client examples + +CREATE: examples/agents/websocket_client.py + - Python WebSocket client example +``` + +### Task 17: Update .env.example + +```yaml +MODIFY: .env.example +ADD: + # Agent Configuration + ANTHROPIC_API_KEY=sk-ant-... + AGENT_DEFAULT_MODEL=anthropic:claude-sonnet-4-20250514 + AGENT_MAX_TOOL_CALLS=10 + AGENT_TIMEOUT_SECONDS=120 +``` + +--- + +## Validation Loop + +### Level 1: Syntax & Style + +```bash +# Run FIRST +uv run ruff check app/features/agents/ --fix +uv run ruff format app/features/agents/ + +# Expected: No errors +``` + +### Level 2: Type Checking + +```bash +# MUST be green +uv run mypy app/features/agents/ +uv run pyright app/features/agents/ + +# Expected: 0 errors +``` + +### Level 3: Unit Tests + +```bash +# No LLM calls required (mocked) +uv run pytest app/features/agents/tests/ -v -m "not integration" + +# Expected: All pass +``` + +### Level 4: Integration Tests + +```bash +# Requires PostgreSQL + API keys +docker-compose up -d +uv run alembic upgrade head +uv run pytest app/features/agents/tests/ -v -m integration + +# Expected: All pass (rate-limited) +``` + +### Level 5: Manual Smoke Test + +```bash +# Start API +uv run uvicorn app.main:app --reload --port 8123 + +# RAG Query +curl -X POST http://localhost:8123/agents/rag/query \ + -H "Content-Type: application/json" \ + -d '{"query": "How does backtesting prevent data leakage?"}' + +# Expected: {"session_id": "...", "answer": "...", "citations": [...]} + +# Experiment (requires indexed RAG data) +curl -X POST http://localhost:8123/agents/experiment/run \ + -H "Content-Type: application/json" \ + -d '{ + "objective": "Find best model for store 1, product 1", + "store_id": 1, + "product_id": 1 + }' + +# Expected: {"session_id": "...", "status": "completed", "report": {...}} + +# WebSocket test +python examples/agents/websocket_client.py +``` + +--- + +## Final Validation Checklist + +- [ ] All tests pass: `uv run pytest app/features/agents/tests/ -v` +- [ ] No linting errors: `uv run ruff check app/features/agents/` +- [ ] No type errors: `uv run mypy && pyright` +- [ ] Migration applies: `uv run alembic upgrade head` +- [ ] Manual smoke tests pass +- [ ] Structured logging with `agents.*` prefix +- [ ] Tool calls logged with timing +- [ ] Session state persists across requests +- [ ] Approval workflow blocks sensitive actions +- [ ] WebSocket streaming works + +--- + +## Anti-Patterns to Avoid + +- ❌ Don't use `result_type` - use `output_type` in PydanticAI +- ❌ Don't forget `deps_type` when using `RunContext[AgentDeps]` +- ❌ Don't use `@agent.tool_plain` when db access needed +- ❌ Don't forget to handle `WebSocketDisconnect` +- ❌ Don't block on LLM calls without timeout +- ❌ Don't store raw message_history as strings - use JSONB +- ❌ Don't skip structured logging for tool calls +- ❌ Don't hardcode model names - use settings + +--- + +## Confidence Score: 7.5/10 + +**Strengths:** +- PydanticAI has excellent documentation +- Clear FastAPI integration patterns +- Existing service patterns to follow +- Tool integrations with existing modules + +**Risks:** +- PydanticAI is relatively new (versioning may change) +- WebSocket streaming with tools is complex +- LLM rate limits may affect tests +- Message history serialization edge cases + +**Mitigations:** +- Pin PydanticAI version in pyproject.toml +- Comprehensive mocking for unit tests +- Rate-limited integration tests +- JSONB for flexible message storage diff --git a/PRPs/PRP-11-forecastlab-dashboard.md b/PRPs/PRP-11-forecastlab-dashboard.md new file mode 100644 index 00000000..fd98f21f --- /dev/null +++ b/PRPs/PRP-11-forecastlab-dashboard.md @@ -0,0 +1,2147 @@ +# PRP-11: ForecastLab Dashboard ("The Face") + +**Feature**: INITIAL-11.md — ForecastLab Dashboard +**Status**: Ready for Implementation +**Confidence Score**: 7.5/10 + +--- + +## Goal + +Build the ForecastLab Dashboard providing: +1. **Data Explorer** with server-side pagination, sorting, and filtering using TanStack Table +2. **Time Series Visualization** for forecasts and backtest results using Recharts +3. **Agent Chat Interface** with WebSocket streaming (depends on INITIAL-10 completion) +4. **Admin Panel** for RAG sources and deployment alias management + +This is the "Face" layer that consumes the backend API (Phases 1-10) and provides a user-friendly interface. + +--- + +## Why + +- **User Experience**: No CLI required for data exploration and visualization +- **Agent Interaction**: Chat interface for RAG queries and experiment orchestration +- **Portfolio Value**: Demonstrates full-stack React 19 + FastAPI integration +- **Operational**: Admin panel for system management without API calls + +--- + +## What + +### Page Structure + +| Route | Purpose | Backend Endpoints | +|-------|---------|-------------------| +| `/dashboard` | KPI summary cards | `GET /analytics/kpis` | +| `/explorer/sales` | Sales data table | `GET /analytics/drilldowns` | +| `/explorer/stores` | Store dimension table | `GET /dimensions/stores` | +| `/explorer/products` | Product dimension table | `GET /dimensions/products` | +| `/explorer/runs` | Model run leaderboard | `GET /registry/runs` | +| `/explorer/jobs` | Job monitor | `GET /jobs` | +| `/visualize/forecast` | Forecast chart | (via job results) | +| `/visualize/backtest` | Backtest fold visualization | (via job results) | +| `/chat` | Agent chat interface | `WS /agents/stream` | +| `/admin` | RAG sources + aliases | `GET /rag/sources`, `GET /registry/aliases` | + +### Success Criteria + +- [ ] Vite + React 19 project scaffolded with TypeScript strict mode +- [ ] shadcn/ui components installed and configured (Table, Card, Button, Dialog, etc.) +- [ ] TanStack Table with server-side pagination/sorting/filtering +- [ ] TanStack Query for all API calls with proper caching +- [ ] Recharts time series chart with actual/predicted lines +- [ ] WebSocket hook for agent chat streaming +- [ ] Dark/light theme toggle via shadcn/ui +- [ ] Responsive design (mobile-friendly) +- [ ] Error boundaries with retry functionality +- [ ] Lighthouse performance score > 90 +- [ ] All TypeScript strict checks pass + +--- + +## All Needed Context + +### Documentation & References + +```yaml +# React 19 + Vite Setup +- url: https://vite.dev/guide/ + why: "Vite project scaffolding, environment variables (import.meta.env)" + section: "Getting Started, Env Variables" + +- url: https://react.dev/ + why: "React 19 hooks (use(), useState, useEffect), Suspense, ErrorBoundary" + +# shadcn/ui (CRITICAL - Primary Component Library) +- url: https://ui.shadcn.com/docs/installation/vite + why: "Vite + React installation steps, tailwind.config.js setup" + critical: "Must use 'npx shadcn@latest init' NOT 'shadcn-ui'" + +- url: https://ui.shadcn.com/docs/components/data-table + why: "TanStack Table integration pattern - the core pattern for all data tables" + critical: "Uses @tanstack/react-table, manualPagination=true for server-side" + +- url: https://ui.shadcn.com/docs/components/table + why: "Base Table component used by Data Table" + +- url: https://ui.shadcn.com/docs/dark-mode/vite + why: "Dark mode setup with ThemeProvider" + +# TanStack Table (Server-Side Pattern) +- url: https://tanstack.com/table/latest/docs/guide/pagination + why: "Server-side pagination with manualPagination=true" + critical: "pageCount must be passed, onPaginationChange callback" + +- url: https://tanstack.com/table/latest/docs/guide/sorting + why: "Server-side sorting with manualSorting=true" + +- url: https://tanstack.com/table/latest/docs/guide/column-filtering + why: "Server-side filtering with manualFiltering=true" + +# TanStack Query (Data Fetching) +- url: https://tanstack.com/query/latest/docs/framework/react/guides/queries + why: "useQuery pattern with queryKey and queryFn" + +- url: https://tanstack.com/query/latest/docs/framework/react/guides/paginated-queries + why: "keepPreviousData for smooth pagination transitions" + +- url: https://tanstack.com/query/latest/docs/framework/react/guides/mutations + why: "useMutation for POST/DELETE/PATCH operations" + +# Recharts +- url: https://recharts.org/en-US/api/LineChart + why: "Time series visualization with LineChart, XAxis, YAxis" + +- url: https://recharts.org/en-US/api/Tooltip + why: "Interactive tooltips" + +- url: https://recharts.org/en-US/examples/SimpleLineChart + why: "Basic example to follow" + +# Tailwind CSS 4 +- url: https://tailwindcss.com/docs/installation/using-vite + why: "Tailwind 4 setup with Vite" + +# WebSocket (for Agent Chat) +- url: https://developer.mozilla.org/en-US/docs/Web/API/WebSocket + why: "Native WebSocket API - useWebSocket custom hook pattern" +``` + +### Backend API Contract Summary + +```typescript +// ALL LIST ENDPOINTS USE THIS PAGINATION PATTERN: +// Query params: page (1-indexed), page_size (max 100) +// Response: { items[], total, page, page_size } + +// Dimensions +GET /dimensions/stores?page=1&page_size=20®ion=&store_type=&search= +// Response: StoreListResponse { stores[], total, page, page_size } + +GET /dimensions/products?page=1&page_size=20&category=&brand=&search= +// Response: ProductListResponse { products[], total, page, page_size } + +// Analytics +GET /analytics/kpis?start_date=&end_date=&store_id=&product_id=&category= +// Response: KPIResponse { metrics: KPIMetrics, start_date, end_date, ... } + +GET /analytics/drilldowns?dimension=store&start_date=&end_date=&max_items=20 +// Response: DrilldownResponse { dimension, items[], total_items, ... } + +// Registry +GET /registry/runs?page=1&page_size=20&model_type=&status=&store_id=&product_id= +// Response: RunListResponse { runs[], total, page, page_size } + +GET /registry/compare/{run_id_a}/{run_id_b} +// Response: RunCompareResponse { run_a, run_b, config_diff, metrics_diff } + +POST /registry/aliases +// Body: { alias_name, run_id, description } +// Response: AliasResponse + +GET /registry/aliases +// Response: AliasResponse[] + +// Jobs +GET /jobs?page=1&page_size=20&job_type=&status= +// Response: JobListResponse { jobs[], total, page, page_size } + +POST /jobs +// Body: { job_type: 'train'|'predict'|'backtest', params: {...} } +// Response: JobResponse (202 ACCEPTED) + +DELETE /jobs/{job_id} +// Response: 204 NO CONTENT (only for pending jobs) + +// Error Responses (RFC 7807) +// All errors return: { type, title, status, detail, instance, errors?, code, request_id } +``` + +### Current Codebase Tree + +``` +. +├── alembic/ +├── app/ +│ ├── core/ +│ ├── features/ +│ │ ├── analytics/ # GET /analytics/kpis, /drilldowns +│ │ ├── backtesting/ # POST /backtesting/run +│ │ ├── dimensions/ # GET /dimensions/stores, /products +│ │ ├── forecasting/ # POST /forecasting/train, /predict +│ │ ├── jobs/ # POST/GET/DELETE /jobs +│ │ └── registry/ # CRUD /registry/runs, /aliases +│ └── main.py +├── docs/ +├── examples/ +├── PRPs/ +├── docker-compose.yml +├── pyproject.toml +└── README.md +``` + +### Desired Codebase Tree (Files to Create) + +``` +frontend/ # NEW: React 19 + Vite project +├── public/ +│ └── favicon.ico +├── src/ +│ ├── components/ +│ │ ├── ui/ # shadcn/ui components (auto-generated) +│ │ │ ├── button.tsx +│ │ │ ├── card.tsx +│ │ │ ├── dialog.tsx +│ │ │ ├── dropdown-menu.tsx +│ │ │ ├── input.tsx +│ │ │ ├── label.tsx +│ │ │ ├── select.tsx +│ │ │ ├── skeleton.tsx +│ │ │ ├── table.tsx +│ │ │ └── toast.tsx +│ │ ├── data-table/ # Reusable data table components +│ │ │ ├── data-table.tsx # Main DataTable component +│ │ │ ├── data-table-pagination.tsx +│ │ │ ├── data-table-column-header.tsx +│ │ │ └── data-table-toolbar.tsx +│ │ ├── charts/ +│ │ │ ├── time-series-chart.tsx +│ │ │ ├── kpi-card.tsx +│ │ │ └── metric-bar-chart.tsx +│ │ ├── chat/ # Agent chat (Phase 2 - after INITIAL-10) +│ │ │ ├── chat-message.tsx +│ │ │ ├── chat-input.tsx +│ │ │ └── citation-list.tsx +│ │ ├── layout/ +│ │ │ ├── app-layout.tsx # Main layout with sidebar +│ │ │ ├── sidebar.tsx +│ │ │ ├── header.tsx +│ │ │ └── theme-toggle.tsx +│ │ └── error-boundary.tsx +│ ├── hooks/ +│ │ ├── use-stores.ts # TanStack Query hooks for /dimensions/stores +│ │ ├── use-products.ts # TanStack Query hooks for /dimensions/products +│ │ ├── use-kpis.ts # TanStack Query hook for /analytics/kpis +│ │ ├── use-drilldowns.ts # TanStack Query hook for /analytics/drilldowns +│ │ ├── use-runs.ts # TanStack Query hooks for /registry/runs +│ │ ├── use-aliases.ts # TanStack Query hooks for /registry/aliases +│ │ ├── use-jobs.ts # TanStack Query hooks for /jobs +│ │ └── use-websocket.ts # WebSocket hook for agent streaming +│ ├── lib/ +│ │ ├── api.ts # Axios/fetch client with base URL +│ │ ├── query-client.ts # TanStack Query client config +│ │ └── utils.ts # cn() for class merging (shadcn pattern) +│ ├── pages/ +│ │ ├── dashboard.tsx +│ │ ├── explorer/ +│ │ │ ├── sales.tsx +│ │ │ ├── stores.tsx +│ │ │ ├── products.tsx +│ │ │ ├── runs.tsx +│ │ │ └── jobs.tsx +│ │ ├── visualize/ +│ │ │ ├── forecast.tsx +│ │ │ └── backtest.tsx +│ │ ├── chat.tsx # Phase 2 - after INITIAL-10 +│ │ └── admin.tsx +│ ├── types/ +│ │ ├── api.ts # TypeScript types matching backend schemas +│ │ └── index.ts +│ ├── App.tsx # Main app with router +│ ├── main.tsx # Entry point +│ └── index.css # Tailwind imports +├── .env.example # VITE_API_BASE_URL, VITE_WS_URL +├── .gitignore +├── components.json # shadcn/ui config +├── eslint.config.js +├── index.html +├── package.json +├── postcss.config.js +├── tailwind.config.ts +├── tsconfig.json +├── tsconfig.node.json +└── vite.config.ts + +examples/ui/ +└── README.md # Dashboard page map and setup instructions +``` + +### Known Gotchas & Library Quirks + +```typescript +// CRITICAL: shadcn/ui installation command +// Use: npx shadcn@latest init +// NOT: npx shadcn-ui init (deprecated) + +// CRITICAL: TanStack Table v8 breaking changes +// - useReactTable (NOT useTable) +// - getCoreRowModel() required +// - manualPagination, manualSorting, manualFiltering for server-side + +// CRITICAL: Vite environment variables +// - Must prefix with VITE_ (e.g., VITE_API_BASE_URL) +// - Access via import.meta.env.VITE_API_BASE_URL +// - NOT process.env (that's Node.js) + +// CRITICAL: TanStack Query v5 +// - queryKey is now an array: ['runs', params] +// - useQuery returns object with { data, isLoading, error } +// - placeholderData replaces keepPreviousData option + +// CRITICAL: Recharts responsive container +// - ResponsiveContainer requires explicit parent height +// - Use CSS: min-height: 400px on parent + +// CRITICAL: WebSocket reconnection +// - Browser WebSocket API has no auto-reconnect +// - Must implement exponential backoff manually + +// CRITICAL: shadcn/ui dark mode +// - Requires ThemeProvider wrapper +// - Uses localStorage for persistence +// - HTML class="dark" toggling + +// CRITICAL: Decimal handling from backend +// - Backend sends Decimal as string (e.g., "1234.56") +// - Parse with parseFloat() or use library like decimal.js +// - Format with Intl.NumberFormat for currency display +``` + +--- + +## Implementation Blueprint + +### Phase 1: Project Scaffolding (Tasks 1-5) + +#### Task 1: Initialize Vite + React 19 + TypeScript Project + +```bash +# Commands to run (in project root) +cd /path/to/ForecastLabAI +pnpm create vite@latest frontend -- --template react-ts +cd frontend +pnpm install +``` + +Configure TypeScript strict mode in `tsconfig.json`: +```json +{ + "compilerOptions": { + "strict": true, + "noUncheckedIndexedAccess": true, + "noImplicitReturns": true, + "strictNullChecks": true + } +} +``` + +#### Task 2: Install Tailwind CSS 4 + +```bash +pnpm add -D tailwindcss @tailwindcss/vite +``` + +Update `vite.config.ts`: +```typescript +import tailwindcss from '@tailwindcss/vite' + +export default defineConfig({ + plugins: [react(), tailwindcss()], +}) +``` + +Create `src/index.css`: +```css +@import "tailwindcss"; +``` + +#### Task 3: Initialize shadcn/ui + +```bash +npx shadcn@latest init +# Choose: +# - Style: Default +# - Base color: Neutral +# - CSS variables: Yes +``` + +Install required components: +```bash +npx shadcn@latest add button card dialog dropdown-menu input label select skeleton table toast +``` + +#### Task 4: Install TanStack Libraries + +```bash +pnpm add @tanstack/react-table @tanstack/react-query +``` + +Create `src/lib/query-client.ts`: +```typescript +import { QueryClient } from '@tanstack/react-query' + +export const queryClient = new QueryClient({ + defaultOptions: { + queries: { + staleTime: 5 * 60 * 1000, // 5 minutes + retry: 1, + refetchOnWindowFocus: false, + }, + }, +}) +``` + +#### Task 5: Install Recharts and React Router + +```bash +pnpm add recharts react-router-dom +``` + +--- + +### Phase 2: Core Infrastructure (Tasks 6-10) + +#### Task 6: Create API Client + +File: `src/lib/api.ts` + +```typescript +const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://localhost:8123' + +interface RequestConfig { + method?: 'GET' | 'POST' | 'PATCH' | 'DELETE' + body?: unknown + params?: Record +} + +interface ProblemDetail { + type: string + title: string + status: number + detail: string + instance?: string + errors?: Array<{ field: string; message: string; type: string }> + code?: string + request_id?: string +} + +export class ApiError extends Error { + constructor( + message: string, + public status: number, + public detail?: ProblemDetail + ) { + super(message) + this.name = 'ApiError' + } +} + +export async function api(endpoint: string, config: RequestConfig = {}): Promise { + const { method = 'GET', body, params } = config + + const url = new URL(`${API_BASE_URL}${endpoint}`) + if (params) { + Object.entries(params).forEach(([key, value]) => { + if (value !== undefined) { + url.searchParams.set(key, String(value)) + } + }) + } + + const response = await fetch(url.toString(), { + method, + headers: { + 'Content-Type': 'application/json', + }, + body: body ? JSON.stringify(body) : undefined, + }) + + if (!response.ok) { + const detail = await response.json() as ProblemDetail + throw new ApiError(detail.detail || response.statusText, response.status, detail) + } + + return response.json() as Promise +} +``` + +#### Task 7: Create TypeScript Types (Match Backend Schemas) + +File: `src/types/api.ts` + +```typescript +// Pagination +export interface PaginationParams { + page: number + page_size: number +} + +export interface PaginatedResponse { + total: number + page: number + page_size: number +} + +// Dimensions +export interface Store { + id: number + code: string + name: string + region: string | null + city: string | null + store_type: string | null + created_at: string + updated_at: string +} + +export interface StoreListResponse extends PaginatedResponse { + stores: Store[] +} + +export interface Product { + id: number + sku: string + name: string + category: string | null + brand: string | null + base_price: string | null // Decimal as string + base_cost: string | null // Decimal as string + created_at: string + updated_at: string +} + +export interface ProductListResponse extends PaginatedResponse { + products: Product[] +} + +// Analytics +export interface KPIMetrics { + total_revenue: string // Decimal as string + total_units: number + total_transactions: number + avg_unit_price: string | null + avg_basket_value: string | null +} + +export interface KPIResponse { + metrics: KPIMetrics + start_date: string + end_date: string + store_id: number | null + product_id: number | null + category: string | null +} + +export interface DrilldownItem { + dimension_value: string + dimension_id: number | null + metrics: KPIMetrics + rank: number + revenue_share_pct: string // Decimal as string +} + +export type DrilldownDimension = 'store' | 'product' | 'category' | 'region' | 'date' + +export interface DrilldownResponse { + dimension: DrilldownDimension + items: DrilldownItem[] + total_items: number + start_date: string + end_date: string + store_id: number | null + product_id: number | null +} + +// Registry +export type RunStatus = 'pending' | 'running' | 'success' | 'failed' | 'archived' + +export interface ModelRun { + run_id: string + status: RunStatus + model_type: string + model_config: Record + feature_config: Record | null + config_hash: string + data_window_start: string + data_window_end: string + store_id: number + product_id: number + metrics: Record | null + artifact_uri: string | null + artifact_hash: string | null + artifact_size_bytes: number | null + runtime_info: Record | null + agent_context: Record | null + git_sha: string | null + error_message: string | null + started_at: string | null + completed_at: string | null + created_at: string + updated_at: string +} + +export interface RunListResponse extends PaginatedResponse { + runs: ModelRun[] +} + +export interface Alias { + alias_name: string + run_id: string + run_status: RunStatus + model_type: string + description: string | null + created_at: string + updated_at: string +} + +export interface RunCompareResponse { + run_a: ModelRun + run_b: ModelRun + config_diff: Record + metrics_diff: Record +} + +// Jobs +export type JobType = 'train' | 'predict' | 'backtest' +export type JobStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled' + +export interface Job { + job_id: string + job_type: JobType + status: JobStatus + params: Record + result: Record | null + error_message: string | null + error_type: string | null + run_id: string | null + started_at: string | null + completed_at: string | null + created_at: string + updated_at: string +} + +export interface JobListResponse extends PaginatedResponse { + jobs: Job[] +} + +export interface JobCreate { + job_type: JobType + params: Record +} +``` + +#### Task 8: Create TanStack Query Hooks + +File: `src/hooks/use-stores.ts` + +```typescript +import { useQuery } from '@tanstack/react-query' +import { api } from '@/lib/api' +import type { StoreListResponse } from '@/types/api' + +interface UseStoresParams { + page: number + pageSize: number + region?: string + storeType?: string + search?: string +} + +export function useStores({ page, pageSize, region, storeType, search }: UseStoresParams) { + return useQuery({ + queryKey: ['stores', { page, pageSize, region, storeType, search }], + queryFn: () => api('/dimensions/stores', { + params: { + page, + page_size: pageSize, + region, + store_type: storeType, + search, + }, + }), + placeholderData: (previousData) => previousData, + }) +} +``` + +File: `src/hooks/use-runs.ts` + +```typescript +import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query' +import { api } from '@/lib/api' +import type { RunListResponse, RunCompareResponse, Alias } from '@/types/api' + +interface UseRunsParams { + page: number + pageSize: number + modelType?: string + status?: string + storeId?: number + productId?: number +} + +export function useRuns({ page, pageSize, modelType, status, storeId, productId }: UseRunsParams) { + return useQuery({ + queryKey: ['runs', { page, pageSize, modelType, status, storeId, productId }], + queryFn: () => api('/registry/runs', { + params: { + page, + page_size: pageSize, + model_type: modelType, + status, + store_id: storeId, + product_id: productId, + }, + }), + placeholderData: (previousData) => previousData, + }) +} + +export function useCompareRuns(runIdA: string, runIdB: string, enabled = false) { + return useQuery({ + queryKey: ['runs', 'compare', runIdA, runIdB], + queryFn: () => api(`/registry/compare/${runIdA}/${runIdB}`), + enabled, + }) +} + +export function useAliases() { + return useQuery({ + queryKey: ['aliases'], + queryFn: () => api('/registry/aliases'), + }) +} + +export function useCreateAlias() { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (data: { alias_name: string; run_id: string; description?: string }) => + api('/registry/aliases', { method: 'POST', body: data }), + onSuccess: () => { + queryClient.invalidateQueries({ queryKey: ['aliases'] }) + }, + }) +} +``` + +File: `src/hooks/use-jobs.ts` + +```typescript +import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query' +import { api } from '@/lib/api' +import type { JobListResponse, Job, JobCreate } from '@/types/api' + +interface UseJobsParams { + page: number + pageSize: number + jobType?: string + status?: string +} + +export function useJobs({ page, pageSize, jobType, status }: UseJobsParams) { + return useQuery({ + queryKey: ['jobs', { page, pageSize, jobType, status }], + queryFn: () => api('/jobs', { + params: { + page, + page_size: pageSize, + job_type: jobType, + status, + }, + }), + placeholderData: (previousData) => previousData, + refetchInterval: 5000, // Poll every 5 seconds for status updates + }) +} + +export function useJob(jobId: string, enabled = true) { + return useQuery({ + queryKey: ['jobs', jobId], + queryFn: () => api(`/jobs/${jobId}`), + enabled, + refetchInterval: (query) => { + // Stop polling when job is complete + const status = query.state.data?.status + return status === 'pending' || status === 'running' ? 2000 : false + }, + }) +} + +export function useCreateJob() { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (data: JobCreate) => + api('/jobs', { method: 'POST', body: data }), + onSuccess: () => { + queryClient.invalidateQueries({ queryKey: ['jobs'] }) + }, + }) +} + +export function useCancelJob() { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (jobId: string) => + api(`/jobs/${jobId}`, { method: 'DELETE' }), + onSuccess: () => { + queryClient.invalidateQueries({ queryKey: ['jobs'] }) + }, + }) +} +``` + +#### Task 9: Create Layout Components + +File: `src/components/layout/app-layout.tsx` + +```typescript +import { Outlet } from 'react-router-dom' +import { Sidebar } from './sidebar' +import { Header } from './header' + +export function AppLayout() { + return ( +
+ +
+
+
+ +
+
+
+ ) +} +``` + +File: `src/components/layout/sidebar.tsx` + +```typescript +import { NavLink } from 'react-router-dom' +import { cn } from '@/lib/utils' +import { + LayoutDashboard, + Table2, + LineChart, + MessageSquare, + Settings, + Store, + Package, + FlaskConical, + ListTodo, +} from 'lucide-react' + +const navigation = [ + { name: 'Dashboard', href: '/', icon: LayoutDashboard }, + { name: 'Sales', href: '/explorer/sales', icon: Table2 }, + { name: 'Stores', href: '/explorer/stores', icon: Store }, + { name: 'Products', href: '/explorer/products', icon: Package }, + { name: 'Model Runs', href: '/explorer/runs', icon: FlaskConical }, + { name: 'Jobs', href: '/explorer/jobs', icon: ListTodo }, + { name: 'Forecast', href: '/visualize/forecast', icon: LineChart }, + { name: 'Chat', href: '/chat', icon: MessageSquare }, + { name: 'Admin', href: '/admin', icon: Settings }, +] + +export function Sidebar() { + return ( + + ) +} +``` + +#### Task 10: Create Error Boundary + +File: `src/components/error-boundary.tsx` + +```typescript +import { Component, type ReactNode } from 'react' +import { Button } from '@/components/ui/button' +import { Card, CardHeader, CardTitle, CardContent, CardFooter } from '@/components/ui/card' + +interface Props { + children: ReactNode +} + +interface State { + hasError: boolean + error: Error | null +} + +export class ErrorBoundary extends Component { + constructor(props: Props) { + super(props) + this.state = { hasError: false, error: null } + } + + static getDerivedStateFromError(error: Error): State { + return { hasError: true, error } + } + + render() { + if (this.state.hasError) { + return ( + + + Something went wrong + + +

+ {this.state.error?.message || 'An unexpected error occurred'} +

+
+ + + +
+ ) + } + + return this.props.children + } +} +``` + +--- + +### Phase 3: Data Table Components (Tasks 11-15) + +#### Task 11: Create Reusable DataTable Component + +File: `src/components/data-table/data-table.tsx` + +```typescript +import { + type ColumnDef, + type PaginationState, + type SortingState, + type ColumnFiltersState, + flexRender, + getCoreRowModel, + useReactTable, +} from '@tanstack/react-table' +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from '@/components/ui/table' +import { DataTablePagination } from './data-table-pagination' +import { Skeleton } from '@/components/ui/skeleton' + +interface DataTableProps { + columns: ColumnDef[] + data: TData[] + pageCount: number + pagination: PaginationState + onPaginationChange: (updater: PaginationState | ((old: PaginationState) => PaginationState)) => void + sorting?: SortingState + onSortingChange?: (updater: SortingState | ((old: SortingState) => SortingState)) => void + isLoading?: boolean +} + +export function DataTable({ + columns, + data, + pageCount, + pagination, + onPaginationChange, + sorting, + onSortingChange, + isLoading = false, +}: DataTableProps) { + const table = useReactTable({ + data, + columns, + pageCount, + state: { + pagination, + sorting, + }, + onPaginationChange, + onSortingChange, + getCoreRowModel: getCoreRowModel(), + manualPagination: true, + manualSorting: true, + }) + + return ( +
+
+ + + {table.getHeaderGroups().map((headerGroup) => ( + + {headerGroup.headers.map((header) => ( + + {header.isPlaceholder + ? null + : flexRender(header.column.columnDef.header, header.getContext())} + + ))} + + ))} + + + {isLoading ? ( + Array.from({ length: pagination.pageSize }).map((_, i) => ( + + {columns.map((_, j) => ( + + + + ))} + + )) + ) : table.getRowModel().rows?.length ? ( + table.getRowModel().rows.map((row) => ( + + {row.getVisibleCells().map((cell) => ( + + {flexRender(cell.column.columnDef.cell, cell.getContext())} + + ))} + + )) + ) : ( + + + No results. + + + )} + +
+
+ +
+ ) +} +``` + +#### Task 12: Create Stores Explorer Page + +File: `src/pages/explorer/stores.tsx` + +```typescript +import { useState } from 'react' +import { type ColumnDef, type PaginationState } from '@tanstack/react-table' +import { DataTable } from '@/components/data-table/data-table' +import { useStores } from '@/hooks/use-stores' +import { Input } from '@/components/ui/input' +import type { Store } from '@/types/api' + +const columns: ColumnDef[] = [ + { accessorKey: 'id', header: 'ID' }, + { accessorKey: 'code', header: 'Code' }, + { accessorKey: 'name', header: 'Name' }, + { accessorKey: 'region', header: 'Region' }, + { accessorKey: 'city', header: 'City' }, + { accessorKey: 'store_type', header: 'Type' }, +] + +export default function StoresPage() { + const [pagination, setPagination] = useState({ + pageIndex: 0, + pageSize: 20, + }) + const [search, setSearch] = useState('') + + const { data, isLoading } = useStores({ + page: pagination.pageIndex + 1, + pageSize: pagination.pageSize, + search: search || undefined, + }) + + return ( +
+
+

Stores

+ setSearch(e.target.value)} + className="max-w-sm" + /> +
+ +
+ ) +} +``` + +#### Task 13: Create Runs Explorer Page + +File: `src/pages/explorer/runs.tsx` + +```typescript +import { useState } from 'react' +import { type ColumnDef, type PaginationState } from '@tanstack/react-table' +import { DataTable } from '@/components/data-table/data-table' +import { useRuns, useCompareRuns } from '@/hooks/use-runs' +import { Button } from '@/components/ui/button' +import { Badge } from '@/components/ui/badge' +import { Checkbox } from '@/components/ui/checkbox' +import type { ModelRun, RunStatus } from '@/types/api' + +const statusColors: Record = { + pending: 'bg-yellow-100 text-yellow-800', + running: 'bg-blue-100 text-blue-800', + success: 'bg-green-100 text-green-800', + failed: 'bg-red-100 text-red-800', + archived: 'bg-gray-100 text-gray-800', +} + +const columns: ColumnDef[] = [ + { + id: 'select', + header: ({ table }) => ( + table.toggleAllPageRowsSelected(!!value)} + /> + ), + cell: ({ row }) => ( + row.toggleSelected(!!value)} + /> + ), + }, + { accessorKey: 'run_id', header: 'Run ID', cell: ({ row }) => row.original.run_id.slice(0, 8) }, + { accessorKey: 'model_type', header: 'Model' }, + { + accessorKey: 'status', + header: 'Status', + cell: ({ row }) => ( + {row.original.status} + ), + }, + { + accessorKey: 'metrics.mae', + header: 'MAE', + cell: ({ row }) => row.original.metrics?.mae?.toFixed(2) ?? '-', + }, + { + accessorKey: 'metrics.smape', + header: 'sMAPE', + cell: ({ row }) => row.original.metrics?.smape ? `${row.original.metrics.smape.toFixed(1)}%` : '-', + }, + { + accessorKey: 'created_at', + header: 'Created', + cell: ({ row }) => new Date(row.original.created_at).toLocaleDateString(), + }, +] + +export default function RunsPage() { + const [pagination, setPagination] = useState({ + pageIndex: 0, + pageSize: 20, + }) + const [selectedRuns, setSelectedRuns] = useState([]) + + const { data, isLoading } = useRuns({ + page: pagination.pageIndex + 1, + pageSize: pagination.pageSize, + }) + + const canCompare = selectedRuns.length === 2 + const { data: comparison, refetch: compare } = useCompareRuns( + selectedRuns[0] || '', + selectedRuns[1] || '', + canCompare + ) + + return ( +
+
+

Model Runs

+ +
+ +
+ ) +} +``` + +#### Task 14: Create Jobs Monitor Page + +File: `src/pages/explorer/jobs.tsx` + +```typescript +import { useState } from 'react' +import { type ColumnDef, type PaginationState } from '@tanstack/react-table' +import { DataTable } from '@/components/data-table/data-table' +import { useJobs, useCancelJob } from '@/hooks/use-jobs' +import { Button } from '@/components/ui/button' +import { Badge } from '@/components/ui/badge' +import type { Job, JobStatus } from '@/types/api' + +const statusColors: Record = { + pending: 'bg-yellow-100 text-yellow-800', + running: 'bg-blue-100 text-blue-800', + completed: 'bg-green-100 text-green-800', + failed: 'bg-red-100 text-red-800', + cancelled: 'bg-gray-100 text-gray-800', +} + +export default function JobsPage() { + const [pagination, setPagination] = useState({ + pageIndex: 0, + pageSize: 20, + }) + + const { data, isLoading } = useJobs({ + page: pagination.pageIndex + 1, + pageSize: pagination.pageSize, + }) + + const cancelJob = useCancelJob() + + const columns: ColumnDef[] = [ + { accessorKey: 'job_id', header: 'Job ID', cell: ({ row }) => row.original.job_id.slice(0, 8) }, + { accessorKey: 'job_type', header: 'Type' }, + { + accessorKey: 'status', + header: 'Status', + cell: ({ row }) => ( + {row.original.status} + ), + }, + { + accessorKey: 'created_at', + header: 'Created', + cell: ({ row }) => new Date(row.original.created_at).toLocaleString(), + }, + { + id: 'actions', + cell: ({ row }) => { + if (row.original.status !== 'pending') return null + return ( + + ) + }, + }, + ] + + return ( +
+

Jobs

+ +
+ ) +} +``` + +#### Task 15: Create Dashboard Page with KPI Cards + +File: `src/pages/dashboard.tsx` + +```typescript +import { useState } from 'react' +import { Card, CardHeader, CardTitle, CardContent } from '@/components/ui/card' +import { useKPIs } from '@/hooks/use-kpis' +import { Skeleton } from '@/components/ui/skeleton' + +function formatCurrency(value: string | null): string { + if (!value) return '-' + return new Intl.NumberFormat('en-US', { + style: 'currency', + currency: 'USD', + }).format(parseFloat(value)) +} + +function formatNumber(value: number | null): string { + if (value === null) return '-' + return new Intl.NumberFormat('en-US').format(value) +} + +export default function DashboardPage() { + const [dateRange] = useState({ + startDate: new Date(Date.now() - 30 * 24 * 60 * 60 * 1000).toISOString().split('T')[0], + endDate: new Date().toISOString().split('T')[0], + }) + + const { data, isLoading } = useKPIs({ + startDate: dateRange.startDate, + endDate: dateRange.endDate, + }) + + const kpiCards = [ + { title: 'Total Revenue', value: formatCurrency(data?.metrics.total_revenue ?? null) }, + { title: 'Total Units', value: formatNumber(data?.metrics.total_units ?? null) }, + { title: 'Transactions', value: formatNumber(data?.metrics.total_transactions ?? null) }, + { title: 'Avg Unit Price', value: formatCurrency(data?.metrics.avg_unit_price ?? null) }, + ] + + return ( +
+

Dashboard

+
+ {kpiCards.map((card) => ( + + + + {card.title} + + + + {isLoading ? ( + + ) : ( +
{card.value}
+ )} +
+
+ ))} +
+
+ ) +} +``` + +--- + +### Phase 4: Visualization Components (Tasks 16-18) + +#### Task 16: Create Time Series Chart Component + +File: `src/components/charts/time-series-chart.tsx` + +```typescript +import { + LineChart, + Line, + XAxis, + YAxis, + CartesianGrid, + Tooltip, + Legend, + ResponsiveContainer, + Area, + ComposedChart, +} from 'recharts' + +interface DataPoint { + date: string + actual?: number + predicted?: number + lower_bound?: number + upper_bound?: number +} + +interface TimeSeriesChartProps { + data: DataPoint[] + showConfidence?: boolean + height?: number +} + +export function TimeSeriesChart({ + data, + showConfidence = false, + height = 400, +}: TimeSeriesChartProps) { + return ( + + + + new Date(value).toLocaleDateString('en-US', { month: 'short', day: 'numeric' })} + /> + + new Date(value).toLocaleDateString()} + /> + + + {showConfidence && ( + + )} + + + + + + + ) +} +``` + +#### Task 17: Create Forecast Visualization Page + +File: `src/pages/visualize/forecast.tsx` + +```typescript +import { useState } from 'react' +import { Card, CardHeader, CardTitle, CardContent } from '@/components/ui/card' +import { TimeSeriesChart } from '@/components/charts/time-series-chart' +import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select' +import { useStores } from '@/hooks/use-stores' +import { useProducts } from '@/hooks/use-products' + +export default function ForecastPage() { + const [storeId, setStoreId] = useState('') + const [productId, setProductId] = useState('') + + const { data: stores } = useStores({ page: 1, pageSize: 100 }) + const { data: products } = useProducts({ page: 1, pageSize: 100 }) + + // Placeholder data - in production, fetch from job results + const chartData = [ + { date: '2026-01-01', actual: 100, predicted: 98 }, + { date: '2026-01-02', actual: 120, predicted: 115 }, + { date: '2026-01-03', actual: 110, predicted: 112 }, + { date: '2026-01-04', actual: 140, predicted: 135 }, + { date: '2026-01-05', actual: 130, predicted: 128 }, + ] + + return ( +
+

Forecast Visualization

+ +
+ + + +
+ + + + Actual vs Predicted + + + + + +
+ ) +} +``` + +#### Task 18: Create Main App Router + +File: `src/App.tsx` + +```typescript +import { BrowserRouter, Routes, Route } from 'react-router-dom' +import { QueryClientProvider } from '@tanstack/react-query' +import { queryClient } from '@/lib/query-client' +import { ThemeProvider } from '@/components/theme-provider' +import { AppLayout } from '@/components/layout/app-layout' +import { ErrorBoundary } from '@/components/error-boundary' + +// Pages +import DashboardPage from '@/pages/dashboard' +import StoresPage from '@/pages/explorer/stores' +import ProductsPage from '@/pages/explorer/products' +import RunsPage from '@/pages/explorer/runs' +import JobsPage from '@/pages/explorer/jobs' +import ForecastPage from '@/pages/visualize/forecast' + +export default function App() { + return ( + + + + + + }> + } /> + } /> + } /> + } /> + } /> + } /> + + + + + + + ) +} +``` + +--- + +### Phase 5: Agent Chat (Tasks 19-21) - DEPENDS ON INITIAL-10 + +#### Task 19: Create WebSocket Hook + +File: `src/hooks/use-websocket.ts` + +```typescript +import { useEffect, useRef, useState, useCallback } from 'react' + +type ConnectionStatus = 'connecting' | 'connected' | 'disconnected' | 'error' + +interface UseWebSocketOptions { + onMessage?: (data: unknown) => void + onError?: (error: Event) => void + reconnectAttempts?: number + reconnectInterval?: number +} + +export function useWebSocket(url: string | null, options: UseWebSocketOptions = {}) { + const { + onMessage, + onError, + reconnectAttempts = 5, + reconnectInterval = 3000, + } = options + + const [status, setStatus] = useState('disconnected') + const wsRef = useRef(null) + const reconnectCountRef = useRef(0) + + const connect = useCallback(() => { + if (!url) return + + setStatus('connecting') + const ws = new WebSocket(url) + + ws.onopen = () => { + setStatus('connected') + reconnectCountRef.current = 0 + } + + ws.onmessage = (event) => { + try { + const data = JSON.parse(event.data) + onMessage?.(data) + } catch { + onMessage?.(event.data) + } + } + + ws.onerror = (error) => { + setStatus('error') + onError?.(error) + } + + ws.onclose = () => { + setStatus('disconnected') + if (reconnectCountRef.current < reconnectAttempts) { + reconnectCountRef.current++ + setTimeout(connect, reconnectInterval) + } + } + + wsRef.current = ws + }, [url, onMessage, onError, reconnectAttempts, reconnectInterval]) + + const disconnect = useCallback(() => { + reconnectCountRef.current = reconnectAttempts // Prevent auto-reconnect + wsRef.current?.close() + wsRef.current = null + }, [reconnectAttempts]) + + const send = useCallback((data: unknown) => { + if (wsRef.current?.readyState === WebSocket.OPEN) { + wsRef.current.send(typeof data === 'string' ? data : JSON.stringify(data)) + } + }, []) + + useEffect(() => { + connect() + return () => disconnect() + }, [connect, disconnect]) + + return { status, send, disconnect, reconnect: connect } +} +``` + +#### Task 20: Create Chat Message Component + +File: `src/components/chat/chat-message.tsx` + +```typescript +import { cn } from '@/lib/utils' +import { Card } from '@/components/ui/card' + +interface Citation { + source_type: string + source_id: string + chunk_id: string + snippet: string +} + +interface ToolCall { + name: string + arguments: Record + result?: unknown +} + +interface ChatMessageProps { + role: 'user' | 'assistant' + content: string + citations?: Citation[] + toolCalls?: ToolCall[] + isStreaming?: boolean +} + +export function ChatMessage({ + role, + content, + citations, + toolCalls, + isStreaming, +}: ChatMessageProps) { + return ( +
+ +
+ {content} + {isStreaming && |} +
+ + {citations && citations.length > 0 && ( +
+

Sources:

+
    + {citations.map((citation, i) => ( +
  • + [{i + 1}] {citation.source_id} +
  • + ))} +
+
+ )} + + {toolCalls && toolCalls.length > 0 && ( +
+ Tool Calls ({toolCalls.length}) +
+ {toolCalls.map((call, i) => ( +
+ {call.name} +
+ ))} +
+
+ )} +
+
+ ) +} +``` + +#### Task 21: Create Chat Page + +File: `src/pages/chat.tsx` + +```typescript +import { useState, useCallback } from 'react' +import { useWebSocket } from '@/hooks/use-websocket' +import { ChatMessage } from '@/components/chat/chat-message' +import { Button } from '@/components/ui/button' +import { Input } from '@/components/ui/input' +import { Card } from '@/components/ui/card' +import { Send } from 'lucide-react' + +interface Message { + id: string + role: 'user' | 'assistant' + content: string + citations?: Array<{ source_type: string; source_id: string; chunk_id: string; snippet: string }> + toolCalls?: Array<{ name: string; arguments: Record; result?: unknown }> + isStreaming?: boolean +} + +const WS_URL = import.meta.env.VITE_WS_URL || 'ws://localhost:8123/agents/stream' + +export default function ChatPage() { + const [messages, setMessages] = useState([]) + const [input, setInput] = useState('') + const [streamingContent, setStreamingContent] = useState('') + + const handleMessage = useCallback((data: unknown) => { + const msg = data as { type: string; content?: string; done?: boolean; citations?: Message['citations']; tool_calls?: Message['toolCalls'] } + + if (msg.type === 'token') { + setStreamingContent((prev) => prev + (msg.content || '')) + } else if (msg.type === 'done') { + setMessages((prev) => [ + ...prev, + { + id: crypto.randomUUID(), + role: 'assistant', + content: streamingContent, + citations: msg.citations, + toolCalls: msg.tool_calls, + }, + ]) + setStreamingContent('') + } + }, [streamingContent]) + + const { status, send } = useWebSocket(WS_URL, { onMessage: handleMessage }) + + const handleSubmit = (e: React.FormEvent) => { + e.preventDefault() + if (!input.trim()) return + + setMessages((prev) => [ + ...prev, + { id: crypto.randomUUID(), role: 'user', content: input }, + ]) + + send({ + type: 'query', + agent: 'rag_assistant', + payload: { query: input }, + }) + + setInput('') + } + + return ( +
+

ForecastLab Assistant

+ + + {messages.map((msg) => ( + + ))} + {streamingContent && ( + + )} + + +
+ setInput(e.target.value)} + placeholder="Ask about forecasting, backtesting, or data..." + disabled={status !== 'connected'} + /> + +
+ +

+ Status: {status} +

+
+ ) +} +``` + +--- + +### Phase 6: Admin Panel & Polish (Tasks 22-24) + +#### Task 22: Create Admin Page + +File: `src/pages/admin.tsx` + +```typescript +import { useAliases, useCreateAlias } from '@/hooks/use-runs' +import { Card, CardHeader, CardTitle, CardContent } from '@/components/ui/card' +import { Button } from '@/components/ui/button' +import { Badge } from '@/components/ui/badge' +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from '@/components/ui/table' + +export default function AdminPage() { + const { data: aliases, isLoading } = useAliases() + + return ( +
+

Admin Panel

+ + + + Deployment Aliases + + + + + + Alias Name + Run ID + Model Type + Status + Created + + + + {isLoading ? ( + + Loading... + + ) : aliases?.length === 0 ? ( + + No aliases configured + + ) : ( + aliases?.map((alias) => ( + + {alias.alias_name} + {alias.run_id.slice(0, 8)} + {alias.model_type} + + + {alias.run_status} + + + {new Date(alias.created_at).toLocaleDateString()} + + )) + )} + +
+
+
+
+ ) +} +``` + +#### Task 23: Create Environment Configuration + +File: `frontend/.env.example` + +```env +# API Configuration +VITE_API_BASE_URL=http://localhost:8123 +VITE_WS_URL=ws://localhost:8123/agents/stream + +# Feature Flags +VITE_ENABLE_AGENT_CHAT=true +VITE_ENABLE_ADMIN_PANEL=true + +# Visualization +VITE_DEFAULT_PAGE_SIZE=25 +VITE_MAX_CHART_POINTS=365 +``` + +#### Task 24: Create Examples Documentation + +File: `examples/ui/README.md` + +```markdown +# ForecastLab Dashboard + +## Page Map + +| Page | Route | API Endpoints | Description | +|------|-------|---------------|-------------| +| Dashboard | `/` | `GET /analytics/kpis` | KPI summary cards | +| Stores | `/explorer/stores` | `GET /dimensions/stores` | Store dimension table | +| Products | `/explorer/products` | `GET /dimensions/products` | Product dimension table | +| Model Runs | `/explorer/runs` | `GET /registry/runs` | Model run leaderboard | +| Jobs | `/explorer/jobs` | `GET /jobs` | Job status monitor | +| Forecast | `/visualize/forecast` | Job results | Forecast visualization | +| Chat | `/chat` | `WS /agents/stream` | Agent chat interface | +| Admin | `/admin` | `GET /registry/aliases` | Admin panel | + +## Running the Dashboard + +### Prerequisites +- Node.js 20+ +- pnpm (recommended) or npm +- Backend running on port 8123 + +### Development + +```bash +cd frontend +pnpm install +pnpm dev +``` + +Open http://localhost:5173 + +### Production Build + +```bash +cd frontend +pnpm build +pnpm preview +``` + +## Environment Variables + +Copy `.env.example` to `.env` and configure: + +| Variable | Default | Description | +|----------|---------|-------------| +| `VITE_API_BASE_URL` | `http://localhost:8123` | Backend API base URL | +| `VITE_WS_URL` | `ws://localhost:8123/agents/stream` | WebSocket URL for chat | + +## Tech Stack + +- React 19 + TypeScript +- Vite for bundling +- shadcn/ui components +- TanStack Table for data grids +- TanStack Query for data fetching +- Recharts for visualization +- Tailwind CSS 4 for styling +``` + +--- + +## Validation Loop + +### Level 1: Syntax & Style + +```bash +cd frontend + +# TypeScript compilation +pnpm tsc --noEmit + +# ESLint +pnpm eslint src/ + +# Expected: No errors +``` + +### Level 2: Build Validation + +```bash +cd frontend + +# Development build +pnpm dev # Should start without errors + +# Production build +pnpm build + +# Expected: Build completes, outputs to dist/ +``` + +### Level 3: Integration Test + +```bash +# 1. Start backend +docker-compose up -d +uv run uvicorn app.main:app --port 8123 + +# 2. Start frontend +cd frontend && pnpm dev + +# 3. Manual verification: +# - Open http://localhost:5173 +# - Navigate to /explorer/stores +# - Verify data loads from API +# - Check pagination works +# - Verify dark mode toggle + +# 4. Lighthouse audit (Chrome DevTools) +# - Performance > 90 +# - Accessibility > 90 +``` + +--- + +## Final Validation Checklist + +- [ ] Vite project scaffolded with React 19 + TypeScript strict +- [ ] shadcn/ui components installed and working +- [ ] TanStack Table with server-side pagination +- [ ] TanStack Query hooks for all API endpoints +- [ ] Recharts time series visualization +- [ ] WebSocket hook for agent chat (placeholder if INITIAL-10 not ready) +- [ ] Dark/light theme toggle +- [ ] Responsive sidebar navigation +- [ ] Error boundary with retry +- [ ] All TypeScript strict checks pass +- [ ] ESLint passes +- [ ] Production build succeeds +- [ ] Lighthouse performance > 90 + +--- + +## Integration Points + +```yaml +BACKEND_DEPENDENCY: + - Requires backend running on VITE_API_BASE_URL + - Uses /dimensions/*, /analytics/*, /registry/*, /jobs/* endpoints + - WebSocket requires INITIAL-10 completion for full chat functionality + +PHASE_DEPENDENCIES: + - INITIAL-9 (RAG): Admin panel shows /rag/sources (placeholder if not ready) + - INITIAL-10 (Agentic): Chat interface uses WS /agents/stream + - Phase 7 (Serving): All data tables consume serving layer endpoints + +FEATURE_FLAGS: + - VITE_ENABLE_AGENT_CHAT: Gate chat interface until INITIAL-10 ready + - VITE_ENABLE_ADMIN_PANEL: Gate admin features +``` + +--- + +## Anti-Patterns to Avoid + +- Do NOT hardcode API URLs - always use `import.meta.env.VITE_API_BASE_URL` +- Do NOT use `process.env` - that's Node.js, use `import.meta.env` for Vite +- Do NOT install `shadcn-ui` package - use `npx shadcn@latest` CLI +- Do NOT use `useTable` - TanStack Table v8 uses `useReactTable` +- Do NOT forget `manualPagination: true` for server-side tables +- Do NOT skip error boundaries - API errors should be caught gracefully +- Do NOT create custom fetch wrappers with Promise.race timeout - use AbortController + +--- + +## Confidence Score Breakdown + +| Area | Score | Rationale | +|------|-------|-----------| +| Project Scaffolding | 9/10 | Vite + React well documented | +| shadcn/ui Integration | 8/10 | CLI-based, clear patterns | +| TanStack Table | 8/10 | Server-side examples available | +| TanStack Query | 9/10 | Mature library, clear docs | +| Recharts | 8/10 | Straightforward API | +| WebSocket Chat | 6/10 | Custom implementation needed, depends on INITIAL-10 | +| TypeScript Types | 8/10 | Backend schemas well-defined | +| Overall | **7.5/10** | Chat dependency on INITIAL-10 lowers confidence | + +**Note**: Full chat functionality requires INITIAL-10 (Agentic Layer) WebSocket endpoint. Implement chat page with placeholder/disabled state if INITIAL-10 not ready. diff --git a/PRPs/PRP-9-rag-knowledge-base.md b/PRPs/PRP-9-rag-knowledge-base.md new file mode 100644 index 00000000..011ef88b --- /dev/null +++ b/PRPs/PRP-9-rag-knowledge-base.md @@ -0,0 +1,776 @@ +# PRP-9: RAG Knowledge Base ("The Memory") + +**Feature**: INITIAL-9.md — RAG Knowledge Base +**Status**: Ready for Implementation +**Confidence Score**: 8.5/10 + +--- + +## Goal + +Build the RAG Knowledge Base layer providing: +1. **Document ingestion** with markdown-aware and OpenAPI-aware chunking +2. **Vector storage** using PostgreSQL + pgvector for embeddings +3. **Semantic retrieval** with configurable top-k and similarity thresholds +4. **Idempotent re-indexing** via content hash comparison + +This is the foundational "Memory" layer that INITIAL-10 (Agentic Layer) will consume via the `retrieve_context` tool. + +--- + +## Why + +- **Agent-Ready**: Provides `retrieve_context` tool for INITIAL-10 RAG Assistant +- **Evidence-Grounded**: Returns raw chunks with citations (no hallucination) +- **Cost-Effective**: Uses existing PostgreSQL (no new infrastructure) +- **Portfolio Value**: Demonstrates full-stack RAG implementation + +--- + +## What + +### Endpoints + +| Method | Path | Description | +|--------|------|-------------| +| `POST` | `/rag/index` | Index document (markdown/openapi) | +| `POST` | `/rag/retrieve` | Semantic search with filters | +| `GET` | `/rag/sources` | List indexed sources | +| `DELETE` | `/rag/sources/{source_id}` | Remove source and chunks | + +### Success Criteria + +- [ ] pgvector extension enabled via migration +- [ ] Markdown chunker respects heading boundaries +- [ ] OpenAPI chunker produces one chunk per endpoint +- [ ] Async batch embedding with OpenAI API +- [ ] HNSW index for sub-100ms retrieval +- [ ] Idempotent re-indexing (content_hash change detection) +- [ ] 80+ unit tests, 15+ integration tests +- [ ] All validation gates green (ruff, mypy, pyright, pytest) + +--- + +## All Needed Context + +### Documentation & References + +```yaml +# CRITICAL - pgvector SQLAlchemy Integration +- url: https://github.com/pgvector/pgvector-python + why: "Official pgvector Python library - Vector column, HNSW index, cosine_distance" + +- url: https://github.com/pgvector/pgvector-python/blob/master/README.md + why: "SQLAlchemy 2.0 patterns, Index creation with postgresql_ops" + +# pgvector Indexing +- url: https://neon.com/blog/understanding-vector-search-and-hnsw-index-with-pgvector + why: "HNSW vs IVFFlat tradeoffs, index tuning parameters" + +# OpenAI Embeddings +- url: https://platform.openai.com/docs/api-reference/embeddings + why: "Embeddings API reference - batch processing, input limits (8192 tokens)" + +- url: https://platform.openai.com/docs/guides/embeddings + why: "Best practices, token counting with tiktoken cl100k_base" + +# Markdown Chunking +- url: https://python.langchain.com/docs/how_to/markdown_header_metadata_splitter/ + why: "MarkdownHeaderTextSplitter pattern for heading-aware splitting" + +# Codebase Patterns (CRITICAL) +- file: app/features/registry/models.py + why: "ORM pattern with JSONB, TimestampMixin, Index creation" + +- file: app/features/registry/schemas.py + why: "Pydantic v2 patterns - ConfigDict, field_validator, from_attributes" + +- file: app/features/registry/routes.py + why: "FastAPI patterns - APIRouter, response_model, HTTPException" + +- file: app/features/registry/service.py + why: "Async service pattern - get_settings(), structured logging" + +- file: app/features/registry/tests/conftest.py + why: "Test fixtures - db_session, client, cleanup patterns" + +# ADR +- file: docs/ADR/ADR-0003-vector-storage-pgvector-in-postgres.md + why: "Architectural decision for pgvector over dedicated vector DB" +``` + +### Current Codebase Tree (Relevant Parts) + +``` +app/ +├── core/ +│ ├── config.py # Settings singleton - ADD RAG settings here +│ ├── database.py # Base, get_db, get_engine +│ ├── logging.py # get_logger, structured logging +│ └── exceptions.py # ForecastLabError base class +├── shared/ +│ └── models.py # TimestampMixin +├── features/ +│ ├── registry/ # REFERENCE: Follow this pattern exactly +│ │ ├── models.py +│ │ ├── schemas.py +│ │ ├── routes.py +│ │ ├── service.py +│ │ ├── storage.py +│ │ └── tests/ +│ └── rag/ # NEW: Create this vertical slice +├── main.py # Include rag router here +docker-compose.yml # Already uses pgvector/pgvector:pg16 +alembic/versions/ # Add migration for pgvector extension + tables +``` + +### Desired Codebase Tree (Files to Create) + +``` +app/features/rag/ +├── __init__.py # Export router +├── models.py # DocumentSource, DocumentChunk ORM models +├── schemas.py # IndexRequest/Response, RetrieveRequest/Response, etc. +├── routes.py # FastAPI router with /rag/* endpoints +├── service.py # RAGService - indexing and retrieval logic +├── chunkers.py # MarkdownChunker, OpenAPIChunker classes +├── embeddings.py # EmbeddingService - async OpenAI API calls +├── tests/ +│ ├── __init__.py +│ ├── conftest.py # db_session, client fixtures +│ ├── test_schemas.py # Schema validation tests +│ ├── test_chunkers.py # Chunking logic tests (unit, no DB) +│ ├── test_embeddings.py # Embedding tests with mocked API +│ ├── test_service.py # Service tests (unit + integration) +│ └── test_routes.py # Route integration tests + +alembic/versions/ +└── xxxx_create_rag_tables.py # Migration with CREATE EXTENSION vector + +examples/rag/ +├── index_docs.py # Example: index docs/ directory +└── query.http # HTTP client examples +``` + +### Known Gotchas & Library Quirks + +```python +# CRITICAL: pgvector SQLAlchemy requires explicit import +from pgvector.sqlalchemy import Vector # NOT from sqlalchemy + +# CRITICAL: HNSW index requires vector_cosine_ops for cosine distance +Index( + "ix_embedding_hnsw", + DocumentChunk.embedding, + postgresql_using="hnsw", + postgresql_with={"m": 16, "ef_construction": 64}, + postgresql_ops={"embedding": "vector_cosine_ops"}, # MUST match query distance +) + +# CRITICAL: Cosine distance query uses cosine_distance method +from pgvector.sqlalchemy import Vector +stmt = select(DocumentChunk).order_by( + DocumentChunk.embedding.cosine_distance(query_embedding) # NOT <=> operator +).limit(top_k) + +# CRITICAL: OpenAI embeddings input limit is 8192 tokens per text +# Use tiktoken to count tokens before sending to API +import tiktoken +enc = tiktoken.get_encoding("cl100k_base") +tokens = enc.encode(text) +if len(tokens) > 8191: + # Truncate or split + +# CRITICAL: OpenAI API returns embeddings in same order as input +# But batch requests should be <= 2048 inputs per call + +# CRITICAL: Pydantic v2 uses ConfigDict, not class Config +from pydantic import BaseModel, ConfigDict +class MySchema(BaseModel): + model_config = ConfigDict(from_attributes=True, extra="forbid") + +# CRITICAL: SQLAlchemy 2.0 uses Mapped[] and mapped_column() +from sqlalchemy.orm import Mapped, mapped_column +embedding = mapped_column(Vector(1536)) # Vector column + +# CRITICAL: Alembic migration needs op.execute for CREATE EXTENSION +op.execute("CREATE EXTENSION IF NOT EXISTS vector") +``` + +--- + +## Implementation Blueprint + +### Data Models + +#### ORM Models (models.py) + +```python +"""RAG knowledge base ORM models.""" +from __future__ import annotations +import uuid +from datetime import datetime +from typing import Any +from sqlalchemy import ( + DateTime, Index, Integer, String, Text, UniqueConstraint, ForeignKey, +) +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column, relationship +from pgvector.sqlalchemy import Vector +from app.core.database import Base +from app.shared.models import TimestampMixin + + +class DocumentSource(TimestampMixin, Base): + """Registered document source for indexing.""" + __tablename__ = "document_source" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + source_id: Mapped[str] = mapped_column(String(32), unique=True, index=True) + source_type: Mapped[str] = mapped_column(String(50), index=True) # markdown, openapi + source_path: Mapped[str] = mapped_column(Text, nullable=False) + content_hash: Mapped[str] = mapped_column(String(64), nullable=False) # SHA-256 + metadata_: Mapped[dict[str, Any] | None] = mapped_column("metadata", JSONB, nullable=True) + indexed_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False) + + # Relationship + chunks: Mapped[list[DocumentChunk]] = relationship( + back_populates="source", cascade="all, delete-orphan" + ) + + __table_args__ = ( + UniqueConstraint("source_type", "source_path", name="uq_source_type_path"), + ) + + +class DocumentChunk(TimestampMixin, Base): + """Indexed document chunk with embedding.""" + __tablename__ = "document_chunk" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + chunk_id: Mapped[str] = mapped_column(String(32), unique=True, index=True) + source_id: Mapped[int] = mapped_column( + Integer, ForeignKey("document_source.id", ondelete="CASCADE"), index=True + ) + chunk_index: Mapped[int] = mapped_column(Integer, nullable=False) + content: Mapped[str] = mapped_column(Text, nullable=False) + embedding = mapped_column(Vector(1536), nullable=True) # Dimension from settings + token_count: Mapped[int] = mapped_column(Integer, nullable=False) + metadata_: Mapped[dict[str, Any] | None] = mapped_column("metadata", JSONB, nullable=True) + + # Relationship + source: Mapped[DocumentSource] = relationship(back_populates="chunks") + + __table_args__ = ( + UniqueConstraint("source_id", "chunk_index", name="uq_source_chunk_index"), + Index( + "ix_chunk_embedding_hnsw", + "embedding", + postgresql_using="hnsw", + postgresql_with={"m": 16, "ef_construction": 64}, + postgresql_ops={"embedding": "vector_cosine_ops"}, + ), + Index("ix_chunk_metadata_gin", "metadata", postgresql_using="gin"), + ) +``` + +#### Pydantic Schemas (schemas.py) + +```python +"""Pydantic schemas for RAG API contracts.""" +from datetime import datetime +from typing import Any, Literal +from pydantic import BaseModel, ConfigDict, Field, field_validator + + +class IndexRequest(BaseModel): + """Request to index a document.""" + model_config = ConfigDict(extra="forbid") + + source_type: Literal["markdown", "openapi"] = Field( + ..., description="Type of document to index" + ) + source_path: str = Field(..., min_length=1, max_length=500) + content: str | None = Field(None, description="Optional content override") + metadata: dict[str, Any] | None = Field(None, description="Custom metadata") + + +class IndexResponse(BaseModel): + """Response from indexing operation.""" + model_config = ConfigDict(from_attributes=True) + + source_id: str + source_path: str + chunks_created: int + tokens_processed: int + duration_ms: float + status: Literal["indexed", "updated", "unchanged"] + + +class RetrieveRequest(BaseModel): + """Request for semantic search.""" + model_config = ConfigDict(extra="forbid") + + query: str = Field(..., min_length=1, max_length=2000) + top_k: int = Field(default=5, ge=1, le=50) + similarity_threshold: float = Field(default=0.7, ge=0.0, le=1.0) + filters: dict[str, Any] | None = Field(None, description="Metadata filters") + + +class ChunkResult(BaseModel): + """Single chunk in retrieval results.""" + model_config = ConfigDict(from_attributes=True) + + chunk_id: str + source_id: str + source_path: str + source_type: str + content: str + relevance_score: float + metadata: dict[str, Any] | None = None + + +class RetrieveResponse(BaseModel): + """Response from retrieval operation.""" + results: list[ChunkResult] + query_embedding_time_ms: float + search_time_ms: float + total_chunks_searched: int + + +class SourceResponse(BaseModel): + """Source details response.""" + model_config = ConfigDict(from_attributes=True) + + source_id: str + source_type: str + source_path: str + chunk_count: int + content_hash: str + indexed_at: datetime + metadata: dict[str, Any] | None = None + + +class SourceListResponse(BaseModel): + """List of indexed sources.""" + sources: list[SourceResponse] + total_sources: int + total_chunks: int + + +class DeleteResponse(BaseModel): + """Response from delete operation.""" + source_id: str + chunks_deleted: int + status: Literal["deleted"] +``` + +--- + +## Task List + +### Task 1: Add Dependencies to pyproject.toml + +```yaml +MODIFY: pyproject.toml +ADD to dependencies: + - "pgvector>=0.3.0" # pgvector SQLAlchemy support + - "openai>=1.40.0" # OpenAI API client (async) + - "tiktoken>=0.7.0" # Token counting for chunk size + - "httpx>=0.28.0" # Already in dev, may need in main for async HTTP +``` + +### Task 2: Add RAG Settings to config.py + +```yaml +MODIFY: app/core/config.py +ADD after "jobs_retention_days" (~line 65): + # RAG Embedding Configuration + rag_embedding_model: str = "text-embedding-3-small" + rag_embedding_dimension: int = 1536 + rag_embedding_batch_size: int = 100 + openai_api_key: str = "" # Required for embeddings + + # RAG Chunking Configuration + rag_chunk_size: int = 512 # tokens + rag_chunk_overlap: int = 50 # tokens + rag_min_chunk_size: int = 100 + + # RAG Retrieval Configuration + rag_top_k: int = 5 + rag_similarity_threshold: float = 0.7 + rag_max_context_tokens: int = 4000 + + # RAG Index Configuration + rag_index_type: Literal["hnsw", "ivfflat"] = "hnsw" + rag_hnsw_m: int = 16 + rag_hnsw_ef_construction: int = 64 +``` + +### Task 3: Create Alembic Migration + +```yaml +CREATE: alembic/versions/xxxx_create_rag_tables.py +PATTERN: Follow app/features/registry migration pattern + +Pseudocode: +def upgrade(): + # Enable pgvector extension + op.execute("CREATE EXTENSION IF NOT EXISTS vector") + + # Create document_source table + op.create_table("document_source", ...) + + # Create document_chunk table with Vector column + op.create_table("document_chunk", + sa.Column("embedding", Vector(1536), nullable=True), + ... + ) + + # Create HNSW index + op.create_index( + "ix_chunk_embedding_hnsw", + "document_chunk", + ["embedding"], + postgresql_using="hnsw", + postgresql_with={"m": 16, "ef_construction": 64}, + postgresql_ops={"embedding": "vector_cosine_ops"}, + ) +``` + +### Task 4: Create ORM Models + +```yaml +CREATE: app/features/rag/models.py +MIRROR: app/features/registry/models.py pattern +CRITICAL: + - Use pgvector.sqlalchemy.Vector for embedding column + - Add HNSW index in __table_args__ + - Use TimestampMixin + - Cascade delete from source to chunks +``` + +### Task 5: Create Pydantic Schemas + +```yaml +CREATE: app/features/rag/schemas.py +MIRROR: app/features/registry/schemas.py pattern +INCLUDE: + - IndexRequest, IndexResponse + - RetrieveRequest, RetrieveResponse, ChunkResult + - SourceResponse, SourceListResponse + - DeleteResponse +``` + +### Task 6: Create Chunker Classes + +```yaml +CREATE: app/features/rag/chunkers.py + +Classes: + BaseChunker (ABC): + - chunk(content: str) -> list[ChunkData] + + MarkdownChunker(BaseChunker): + - Split on heading boundaries (# ## ###) + - Respect chunk_size and chunk_overlap from settings + - Extract heading hierarchy for metadata + - Use tiktoken cl100k_base for token counting + + OpenAPIChunker(BaseChunker): + - Parse OpenAPI JSON/YAML + - One chunk per endpoint (path + method) + - Include operation summary, description, parameters + +CRITICAL: + - Use tiktoken for token counting (cl100k_base encoding) + - Never exceed 8191 tokens per chunk (OpenAI limit) +``` + +### Task 7: Create Embedding Service + +```yaml +CREATE: app/features/rag/embeddings.py + +Class EmbeddingService: + __init__(self): + - Load settings (api_key, model, dimension, batch_size) + - Initialize AsyncOpenAI client + + async def embed_texts(self, texts: list[str]) -> list[list[float]]: + - Batch texts into groups of batch_size + - Call OpenAI embeddings API for each batch + - Handle rate limits with exponential backoff + - Return embeddings in same order as input + + async def embed_query(self, query: str) -> list[float]: + - Single text embedding for retrieval queries + +CRITICAL: + - Use openai.AsyncOpenAI for async calls + - Validate token count before API call + - Log token usage for cost tracking +``` + +### Task 8: Create RAG Service + +```yaml +CREATE: app/features/rag/service.py +MIRROR: app/features/registry/service.py pattern + +Class RAGService: + async def index_document(self, db, request: IndexRequest) -> IndexResponse: + - Read content from source_path (or use provided content) + - Compute SHA-256 content hash + - Check if source exists with same hash (skip if unchanged) + - Chunk content using appropriate chunker + - Generate embeddings for all chunks + - Upsert source record + - Delete old chunks, insert new chunks + - Return IndexResponse with stats + + async def retrieve(self, db, request: RetrieveRequest) -> RetrieveResponse: + - Generate query embedding + - Build pgvector similarity query with cosine_distance + - Apply metadata filters if provided + - Execute query, compute relevance scores + - Return top-k results above threshold + + async def list_sources(self, db) -> SourceListResponse: + - Query all sources with chunk counts + - Return paginated list + + async def delete_source(self, db, source_id: str) -> DeleteResponse: + - Find source by source_id + - Delete (cascades to chunks) + - Return delete count + +CRITICAL: + - Use cosine_distance for similarity (NOT l2_distance) + - Relevance score = 1 - cosine_distance (normalized to 0-1) + - Handle source not found with 404 +``` + +### Task 9: Create FastAPI Routes + +```yaml +CREATE: app/features/rag/routes.py +MIRROR: app/features/registry/routes.py pattern + +Routes: + POST /rag/index -> IndexResponse (201 CREATED) + POST /rag/retrieve -> RetrieveResponse (200 OK) + GET /rag/sources -> SourceListResponse (200 OK) + DELETE /rag/sources/{source_id} -> DeleteResponse (200 OK) + +CRITICAL: + - Use structured logging with rag.* event prefix + - Handle OpenAI API errors gracefully + - Validate source_id format +``` + +### Task 10: Register Router in main.py + +```yaml +MODIFY: app/main.py +ADD import: from app.features.rag.routes import router as rag_router +ADD router: app.include_router(rag_router) +``` + +### Task 11: Create Test Fixtures + +```yaml +CREATE: app/features/rag/tests/conftest.py +MIRROR: app/features/registry/tests/conftest.py + +Fixtures: + - db_session: Async session with cleanup (delete test-* sources) + - client: AsyncClient with db override + - sample_markdown_content: Test markdown with headings + - sample_openapi_content: Test OpenAPI spec + - mock_embedding_service: Mocked EmbeddingService for unit tests +``` + +### Task 12: Create Unit Tests + +```yaml +CREATE: app/features/rag/tests/test_schemas.py + - Test IndexRequest validation + - Test RetrieveRequest validation (query length, threshold bounds) + +CREATE: app/features/rag/tests/test_chunkers.py + - Test MarkdownChunker respects heading boundaries + - Test MarkdownChunker respects chunk_size + - Test MarkdownChunker extracts heading metadata + - Test OpenAPIChunker creates one chunk per endpoint + - Test chunk token counts are within limits + +CREATE: app/features/rag/tests/test_embeddings.py + - Test embed_texts batching logic + - Test embed_query returns correct dimension + - Mock OpenAI API responses + +CREATE: app/features/rag/tests/test_service.py (unit) + - Test content hash computation + - Test idempotent re-indexing logic + - Test relevance score normalization +``` + +### Task 13: Create Integration Tests + +```yaml +CREATE: app/features/rag/tests/test_routes.py +@pytest.mark.integration tests: + - test_index_markdown_creates_chunks + - test_index_same_content_returns_unchanged + - test_index_updated_content_re_indexes + - test_retrieve_returns_relevant_chunks + - test_retrieve_respects_threshold + - test_list_sources_returns_all + - test_delete_source_removes_chunks + - test_delete_nonexistent_returns_404 +``` + +### Task 14: Create Examples + +```yaml +CREATE: examples/rag/index_docs.py + - Script to index docs/ directory + +CREATE: examples/rag/query.http + - HTTP client examples for all endpoints +``` + +### Task 15: Update .env.example + +```yaml +MODIFY: .env.example +ADD: + # RAG Configuration + OPENAI_API_KEY=sk-... + RAG_EMBEDDING_MODEL=text-embedding-3-small + RAG_CHUNK_SIZE=512 + RAG_TOP_K=5 +``` + +--- + +## Validation Loop + +### Level 1: Syntax & Style + +```bash +# Run FIRST - fix any errors before proceeding +uv run ruff check app/features/rag/ --fix +uv run ruff format app/features/rag/ + +# Expected: No errors +``` + +### Level 2: Type Checking + +```bash +# MUST be green +uv run mypy app/features/rag/ +uv run pyright app/features/rag/ + +# Expected: 0 errors on both +``` + +### Level 3: Unit Tests + +```bash +# No database required +uv run pytest app/features/rag/tests/ -v -m "not integration" + +# Expected: All pass +# If failing: Read error, fix code, re-run +``` + +### Level 4: Integration Tests + +```bash +# Requires PostgreSQL running +docker-compose up -d + +# Run migrations +uv run alembic upgrade head + +# Run integration tests +uv run pytest app/features/rag/tests/ -v -m integration + +# Expected: All pass +``` + +### Level 5: Manual Smoke Test + +```bash +# Start API +uv run uvicorn app.main:app --reload --port 8123 + +# Index a document +curl -X POST http://localhost:8123/rag/index \ + -H "Content-Type: application/json" \ + -d '{"source_type": "markdown", "source_path": "README.md"}' + +# Expected: {"source_id": "...", "chunks_created": N, ...} + +# Retrieve +curl -X POST http://localhost:8123/rag/retrieve \ + -H "Content-Type: application/json" \ + -d '{"query": "What is ForecastLabAI?", "top_k": 3}' + +# Expected: {"results": [...], ...} + +# List sources +curl http://localhost:8123/rag/sources + +# Delete source +curl -X DELETE http://localhost:8123/rag/sources/{source_id} +``` + +--- + +## Final Validation Checklist + +- [ ] All tests pass: `uv run pytest app/features/rag/tests/ -v` +- [ ] No linting errors: `uv run ruff check app/features/rag/` +- [ ] No type errors: `uv run mypy app/features/rag/ && uv run pyright app/features/rag/` +- [ ] Migration applies cleanly: `uv run alembic upgrade head` +- [ ] Manual smoke test successful +- [ ] Structured logging events follow `rag.*` prefix +- [ ] Content hash prevents duplicate embeddings +- [ ] HNSW index used for similarity queries + +--- + +## Anti-Patterns to Avoid + +- ❌ Don't use `l2_distance` when you want cosine similarity +- ❌ Don't forget to enable pgvector extension in migration +- ❌ Don't exceed 8191 tokens per embedding input +- ❌ Don't use sync OpenAI client - use AsyncOpenAI +- ❌ Don't hardcode embedding dimensions - use settings +- ❌ Don't catch all exceptions - be specific +- ❌ Don't skip content hash comparison (wastes API calls) +- ❌ Don't create new patterns when registry patterns work + +--- + +## Confidence Score: 8.5/10 + +**Strengths:** +- Docker already has pgvector image +- Clear patterns from registry module to follow +- Comprehensive documentation available +- ADR decision already made + +**Risks:** +- OpenAI API rate limits during bulk indexing +- HNSW index creation on large datasets may be slow +- tiktoken token counting edge cases + +**Mitigations:** +- Implement exponential backoff for API calls +- Create index after initial data load +- Extensive unit tests for chunking edge cases diff --git a/README.md b/README.md index 82e24494..9d1285a3 100644 --- a/README.md +++ b/README.md @@ -454,6 +454,59 @@ curl -X POST http://localhost:8123/jobs \ - JSONB storage for flexible params and results - Links to model_run for train/backtest jobs +### RAG Knowledge Base + +- `POST /rag/index` - Index a document into the knowledge base +- `POST /rag/retrieve` - Semantic search across indexed documents +- `GET /rag/sources` - List indexed sources +- `DELETE /rag/sources/{source_id}` - Delete a source and its chunks + +**Embedding Providers:** + +The RAG system supports two embedding providers: + +1. **OpenAI** (default): +```bash +RAG_EMBEDDING_PROVIDER=openai +OPENAI_API_KEY=sk-your-key +RAG_EMBEDDING_MODEL=text-embedding-3-small +RAG_EMBEDDING_DIMENSION=1536 +``` + +2. **Ollama** (local/LAN): +```bash +RAG_EMBEDDING_PROVIDER=ollama +OLLAMA_BASE_URL=http://localhost:11434 +OLLAMA_EMBEDDING_MODEL=nomic-embed-text +RAG_EMBEDDING_DIMENSION=768 +``` + +**Example Index Request:** +```bash +curl -X POST http://localhost:8123/rag/index \ + -H "Content-Type: application/json" \ + -d '{ + "source_type": "markdown", + "source_path": "docs/ARCHITECTURE.md" + }' +``` + +**Example Retrieve Request:** +```bash +curl -X POST http://localhost:8123/rag/retrieve \ + -H "Content-Type: application/json" \ + -d '{ + "query": "How does backtesting work?", + "top_k": 5 + }' +``` + +**Features:** +- pgvector for HNSW similarity search +- Idempotent indexing via content hash +- Markdown and OpenAPI chunking strategies +- Configurable embedding dimensions + ### Error Responses (RFC 7807) All error responses follow RFC 7807 Problem Details format with `Content-Type: application/problem+json`: diff --git a/alembic/env.py b/alembic/env.py index b3d317b0..8d9890f3 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -14,6 +14,7 @@ # Import all models for Alembic autogenerate detection from app.features.data_platform import models as data_platform_models # noqa: F401 from app.features.jobs import models as jobs_models # noqa: F401 +from app.features.rag import models as rag_models # noqa: F401 from app.features.registry import models as registry_models # noqa: F401 # Alembic Config object diff --git a/alembic/versions/b4c8d9e0f123_create_rag_tables.py b/alembic/versions/b4c8d9e0f123_create_rag_tables.py new file mode 100644 index 00000000..e0d76cbc --- /dev/null +++ b/alembic/versions/b4c8d9e0f123_create_rag_tables.py @@ -0,0 +1,153 @@ +"""create_rag_tables + +Revision ID: b4c8d9e0f123 +Revises: 37e16ecef223 +Create Date: 2026-02-01 12:00:00.000000 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op +from pgvector.sqlalchemy import Vector +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = "b4c8d9e0f123" +down_revision: Union[str, None] = "37e16ecef223" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Apply migration - create document_source and document_chunk tables with pgvector.""" + # Enable pgvector extension + op.execute("CREATE EXTENSION IF NOT EXISTS vector") + + # Create document_source table + op.create_table( + "document_source", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("source_id", sa.String(length=32), nullable=False), + sa.Column("source_type", sa.String(length=50), nullable=False), + sa.Column("source_path", sa.Text(), nullable=False), + sa.Column("content_hash", sa.String(length=64), nullable=False), + sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column("indexed_at", sa.DateTime(timezone=True), nullable=False), + # Timestamps (from TimestampMixin) + sa.Column( + "created_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + # Constraints + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("source_type", "source_path", name="uq_source_type_path"), + ) + + # Create indexes for document_source + op.create_index( + op.f("ix_document_source_source_id"), + "document_source", + ["source_id"], + unique=True, + ) + op.create_index( + op.f("ix_document_source_source_type"), + "document_source", + ["source_type"], + unique=False, + ) + + # Create document_chunk table with Vector column + op.create_table( + "document_chunk", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("chunk_id", sa.String(length=32), nullable=False), + sa.Column("source_id", sa.Integer(), nullable=False), + sa.Column("chunk_index", sa.Integer(), nullable=False), + sa.Column("content", sa.Text(), nullable=False), + sa.Column("embedding", Vector(1536), nullable=True), + sa.Column("token_count", sa.Integer(), nullable=False), + sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + # Timestamps (from TimestampMixin) + sa.Column( + "created_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + # Constraints + sa.PrimaryKeyConstraint("id"), + sa.ForeignKeyConstraint( + ["source_id"], + ["document_source.id"], + ondelete="CASCADE", + ), + sa.UniqueConstraint("source_id", "chunk_index", name="uq_source_chunk_index"), + ) + + # Create indexes for document_chunk + op.create_index( + op.f("ix_document_chunk_chunk_id"), + "document_chunk", + ["chunk_id"], + unique=True, + ) + op.create_index( + op.f("ix_document_chunk_source_id"), + "document_chunk", + ["source_id"], + unique=False, + ) + + # Create HNSW index for vector similarity search (cosine distance) + op.create_index( + "ix_chunk_embedding_hnsw", + "document_chunk", + ["embedding"], + unique=False, + postgresql_using="hnsw", + postgresql_with={"m": 16, "ef_construction": 64}, + postgresql_ops={"embedding": "vector_cosine_ops"}, + ) + + # Create GIN index for metadata filtering + op.create_index( + "ix_chunk_metadata_gin", + "document_chunk", + ["metadata"], + unique=False, + postgresql_using="gin", + ) + + +def downgrade() -> None: + """Revert migration - drop document_source and document_chunk tables.""" + # Drop document_chunk indexes and table + op.drop_index("ix_chunk_metadata_gin", table_name="document_chunk") + op.drop_index("ix_chunk_embedding_hnsw", table_name="document_chunk") + op.drop_index(op.f("ix_document_chunk_source_id"), table_name="document_chunk") + op.drop_index(op.f("ix_document_chunk_chunk_id"), table_name="document_chunk") + op.drop_table("document_chunk") + + # Drop document_source indexes and table + op.drop_index(op.f("ix_document_source_source_type"), table_name="document_source") + op.drop_index(op.f("ix_document_source_source_id"), table_name="document_source") + op.drop_table("document_source") + + # Note: We don't drop the vector extension as it might be used by other tables diff --git a/alembic/versions/c5d9e1f2g345_rag_dynamic_embedding_dimension.py b/alembic/versions/c5d9e1f2g345_rag_dynamic_embedding_dimension.py new file mode 100644 index 00000000..abc976be --- /dev/null +++ b/alembic/versions/c5d9e1f2g345_rag_dynamic_embedding_dimension.py @@ -0,0 +1,84 @@ +"""rag_dynamic_embedding_dimension + +Revision ID: c5d9e1f2g345 +Revises: b4c8d9e0f123 +Create Date: 2026-02-01 12:49:28.000000 + +CRITICAL: This migration alters the embedding column dimension. +This migration is deterministic - it changes from 1536 to 1536 (no-op by default). +To change dimensions, create a NEW migration with the desired target dimension. + +If changing to a different dimension, existing embeddings will be incompatible +and re-indexing is required. +""" + +from __future__ import annotations + +from collections.abc import Sequence + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "c5d9e1f2g345" +down_revision: str | None = "b4c8d9e0f123" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + +# CRITICAL: Hardcoded dimensions for deterministic, reversible migrations. +# To change dimensions, create a NEW migration with updated values. +PREVIOUS_DIMENSION = 1536 # Dimension before this migration +TARGET_DIMENSION = 1536 # Dimension after this migration (change this for new dimension) + + +def upgrade() -> None: + """Apply migration - alter embedding column to target dimension. + + Uses hardcoded TARGET_DIMENSION for deterministic behavior. + WARNING: Changing dimension requires re-indexing all documents. + """ + # Drop the HNSW index first (required before altering column type) + op.drop_index("ix_chunk_embedding_hnsw", table_name="document_chunk") + + # Alter the embedding column type with target dimension + # Note: This will invalidate any existing embeddings if dimension changes + op.execute( + f"ALTER TABLE document_chunk ALTER COLUMN embedding TYPE vector({TARGET_DIMENSION})" + ) + + # Recreate the HNSW index with the target dimension + op.create_index( + "ix_chunk_embedding_hnsw", + "document_chunk", + ["embedding"], + unique=False, + postgresql_using="hnsw", + postgresql_with={"m": 16, "ef_construction": 64}, + postgresql_ops={"embedding": "vector_cosine_ops"}, + ) + + +def downgrade() -> None: + """Revert migration - restore embedding column to previous dimension. + + Uses hardcoded PREVIOUS_DIMENSION for deterministic rollback. + WARNING: This will invalidate any embeddings that were generated + with the target dimension. + """ + # Drop the HNSW index + op.drop_index("ix_chunk_embedding_hnsw", table_name="document_chunk") + + # Restore to previous dimension + op.execute( + f"ALTER TABLE document_chunk ALTER COLUMN embedding TYPE vector({PREVIOUS_DIMENSION})" + ) + + # Recreate the HNSW index + op.create_index( + "ix_chunk_embedding_hnsw", + "document_chunk", + ["embedding"], + unique=False, + postgresql_using="hnsw", + postgresql_with={"m": 16, "ef_construction": 64}, + postgresql_ops={"embedding": "vector_cosine_ops"}, + ) diff --git a/app/core/config.py b/app/core/config.py index 46d5c9c9..ba912fa8 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -64,6 +64,32 @@ class Settings(BaseSettings): # Jobs jobs_retention_days: int = 30 + # RAG Embedding Configuration + rag_embedding_provider: Literal["openai", "ollama"] = "openai" + openai_api_key: str = "" + rag_embedding_model: str = "text-embedding-3-small" + rag_embedding_dimension: int = 1536 + rag_embedding_batch_size: int = 100 + + # Ollama Configuration (when rag_embedding_provider = "ollama") + ollama_base_url: str = "http://localhost:11434" + ollama_embedding_model: str = "nomic-embed-text" + + # RAG Chunking Configuration + rag_chunk_size: int = 512 # tokens + rag_chunk_overlap: int = 50 # tokens + rag_min_chunk_size: int = 100 # minimum tokens per chunk + + # RAG Retrieval Configuration + rag_top_k: int = 5 + rag_similarity_threshold: float = 0.7 + rag_max_context_tokens: int = 4000 + + # RAG Index Configuration + rag_index_type: Literal["hnsw", "ivfflat"] = "hnsw" + rag_hnsw_m: int = 16 + rag_hnsw_ef_construction: int = 64 + @property def is_development(self) -> bool: """Check if running in development mode.""" diff --git a/app/features/rag/__init__.py b/app/features/rag/__init__.py new file mode 100644 index 00000000..918ac064 --- /dev/null +++ b/app/features/rag/__init__.py @@ -0,0 +1,5 @@ +"""RAG (Retrieval-Augmented Generation) knowledge base feature.""" + +from app.features.rag.routes import router + +__all__ = ["router"] diff --git a/app/features/rag/chunkers.py b/app/features/rag/chunkers.py new file mode 100644 index 00000000..15c0ecfd --- /dev/null +++ b/app/features/rag/chunkers.py @@ -0,0 +1,650 @@ +"""Document chunking strategies for RAG indexing. + +Provides heading-aware and content-aware chunking: +- MarkdownChunker: Splits on heading boundaries +- OpenAPIChunker: One chunk per endpoint + +CRITICAL: Uses tiktoken for accurate token counting. +""" + +from __future__ import annotations + +import json +import re +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any + +import tiktoken + +from app.core.config import get_settings + + +@dataclass +class ChunkData: + """Represents a single chunk of document content. + + Args: + content: The text content of the chunk. + index: Position of this chunk in the source document. + token_count: Number of tokens in the content. + metadata: Additional context (heading, section_path, etc.). + """ + + content: str + index: int + token_count: int + metadata: dict[str, Any] = field(default_factory=lambda: {}) + + +class BaseChunker(ABC): + """Abstract base class for document chunkers. + + All chunkers must: + - Use tiktoken for token counting (cl100k_base encoding) + - Respect chunk_size and chunk_overlap settings + - Never exceed 8191 tokens per chunk (OpenAI limit) + """ + + MAX_TOKENS_PER_CHUNK = 8191 # OpenAI embedding input limit + + def __init__(self) -> None: + """Initialize chunker with settings and tokenizer.""" + self.settings = get_settings() + self.chunk_size = self.settings.rag_chunk_size + self.chunk_overlap = self.settings.rag_chunk_overlap + self.min_chunk_size = self.settings.rag_min_chunk_size + self._encoder = tiktoken.get_encoding("cl100k_base") + + def count_tokens(self, text: str) -> int: + """Count tokens in text using tiktoken. + + Args: + text: Text to count tokens for. + + Returns: + Number of tokens. + """ + return len(self._encoder.encode(text)) + + def _truncate_to_tokens(self, text: str, max_tokens: int) -> str: + """Truncate text to a maximum number of tokens. + + Args: + text: Text to truncate. + max_tokens: Maximum number of tokens. + + Returns: + Truncated text. + """ + tokens = self._encoder.encode(text) + if len(tokens) <= max_tokens: + return text + return self._encoder.decode(tokens[:max_tokens]) + + @abstractmethod + def chunk(self, content: str) -> list[ChunkData]: + """Split content into chunks. + + Args: + content: Full document content. + + Returns: + List of ChunkData objects. + """ + pass + + +class MarkdownChunker(BaseChunker): + """Chunks markdown documents by heading boundaries. + + Splits content at heading boundaries (# ## ### etc.) while: + - Respecting chunk_size limits + - Including heading hierarchy in metadata + - Preserving context through overlap + """ + + # Regex to match markdown headings + HEADING_PATTERN = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE) + + def chunk(self, content: str) -> list[ChunkData]: + """Split markdown content into heading-aware chunks. + + Args: + content: Markdown document content. + + Returns: + List of ChunkData with heading metadata. + """ + chunks: list[ChunkData] = [] + sections = self._split_by_headings(content) + + current_chunk = "" + current_heading_path: list[str] = [] + chunk_index = 0 + + for section in sections: + section_content = section["content"] + heading = section.get("heading") + level = section.get("level", 0) + + # Update heading path based on level + if heading: + current_heading_path = self._update_heading_path( + current_heading_path, heading, level + ) + + section_tokens = self.count_tokens(section_content) + + # If section alone exceeds chunk size, split it further + if section_tokens > self.chunk_size: + # Flush current chunk if any + if current_chunk.strip(): + chunks.append( + self._create_chunk( + current_chunk.strip(), chunk_index, current_heading_path.copy() + ) + ) + chunk_index += 1 + current_chunk = "" + + # Split large section into smaller chunks + sub_chunks = self._split_large_section(section_content, current_heading_path.copy()) + for sub_chunk in sub_chunks: + sub_chunk.index = chunk_index + chunks.append(sub_chunk) + chunk_index += 1 + continue + + # Check if adding this section exceeds chunk size + combined = current_chunk + section_content + combined_tokens = self.count_tokens(combined) + + if combined_tokens > self.chunk_size: + # Save current chunk and start new one + if current_chunk.strip(): + chunks.append( + self._create_chunk( + current_chunk.strip(), chunk_index, current_heading_path.copy() + ) + ) + chunk_index += 1 + + # Add overlap from previous chunk + overlap_text = self._get_overlap_text(current_chunk) + current_chunk = overlap_text + section_content + else: + current_chunk = combined + + # Don't forget the last chunk + # Include it even if small when it's the only content + if current_chunk.strip(): + token_count = self.count_tokens(current_chunk.strip()) + # Include small chunks if: we have no other chunks OR it meets min size + if len(chunks) == 0 or token_count >= self.min_chunk_size: + chunks.append( + self._create_chunk( + current_chunk.strip(), chunk_index, current_heading_path.copy() + ) + ) + + return chunks + + def _split_by_headings(self, content: str) -> list[dict[str, Any]]: + """Split content at heading boundaries. + + Args: + content: Markdown content. + + Returns: + List of sections with heading info. + """ + sections: list[dict[str, Any]] = [] + lines = content.split("\n") + current_section: dict[str, Any] = {"content": "", "heading": None, "level": 0} + + for line in lines: + match = self.HEADING_PATTERN.match(line) + if match: + # Save current section if it has content + if current_section["content"].strip(): + sections.append(current_section) + + # Start new section with this heading + level = len(match.group(1)) + heading = match.group(2).strip() + current_section = { + "content": line + "\n", + "heading": heading, + "level": level, + } + else: + current_section["content"] += line + "\n" + + # Add final section + if current_section["content"].strip(): + sections.append(current_section) + + return sections + + def _update_heading_path(self, current_path: list[str], heading: str, level: int) -> list[str]: + """Update the heading path based on the new heading level. + + Args: + current_path: Current list of headings. + heading: New heading text. + level: Heading level (1-6). + + Returns: + Updated heading path. + """ + # Truncate path to current level and add new heading + new_path = current_path[: level - 1] + new_path.append(heading) + return new_path + + def _split_large_section(self, content: str, heading_path: list[str]) -> list[ChunkData]: + """Split a large section into smaller chunks by sentences/paragraphs. + + Args: + content: Section content that exceeds chunk size. + heading_path: Current heading hierarchy. + + Returns: + List of smaller chunks. + """ + chunks: list[ChunkData] = [] + paragraphs = content.split("\n\n") + current_chunk = "" + + for para in paragraphs: + para = para.strip() + if not para: + continue + + para_tokens = self.count_tokens(para) + + # If single paragraph exceeds limit, split by sentences + if para_tokens > self.chunk_size: + if current_chunk.strip(): + chunks.append(self._create_chunk(current_chunk.strip(), 0, heading_path)) + current_chunk = "" + + sentence_chunks = self._split_by_sentences(para, heading_path) + chunks.extend(sentence_chunks) + continue + + combined = current_chunk + "\n\n" + para if current_chunk else para + combined_tokens = self.count_tokens(combined) + + if combined_tokens > self.chunk_size: + if current_chunk.strip(): + chunks.append(self._create_chunk(current_chunk.strip(), 0, heading_path)) + current_chunk = para + else: + current_chunk = combined + + if current_chunk.strip(): + chunks.append(self._create_chunk(current_chunk.strip(), 0, heading_path)) + + return chunks + + def _split_by_sentences(self, text: str, heading_path: list[str]) -> list[ChunkData]: + """Split text by sentences when paragraphs are too large. + + Args: + text: Text to split. + heading_path: Current heading hierarchy. + + Returns: + List of sentence-based chunks. + """ + chunks: list[ChunkData] = [] + # Simple sentence splitting (handles . ? !) + sentences = re.split(r"(?<=[.!?])\s+", text) + current_chunk = "" + + for sentence in sentences: + sentence = sentence.strip() + if not sentence: + continue + + sentence_tokens = self.count_tokens(sentence) + + # If single sentence exceeds limit, truncate it + if sentence_tokens > self.MAX_TOKENS_PER_CHUNK: + if current_chunk.strip(): + chunks.append(self._create_chunk(current_chunk.strip(), 0, heading_path)) + current_chunk = "" + + truncated = self._truncate_to_tokens(sentence, self.MAX_TOKENS_PER_CHUNK) + chunks.append(self._create_chunk(truncated, 0, heading_path)) + continue + + combined = current_chunk + " " + sentence if current_chunk else sentence + combined_tokens = self.count_tokens(combined) + + if combined_tokens > self.chunk_size: + if current_chunk.strip(): + chunks.append(self._create_chunk(current_chunk.strip(), 0, heading_path)) + current_chunk = sentence + else: + current_chunk = combined + + if current_chunk.strip(): + chunks.append(self._create_chunk(current_chunk.strip(), 0, heading_path)) + + return chunks + + def _get_overlap_text(self, text: str) -> str: + """Get the last N tokens of text for overlap. + + Args: + text: Text to get overlap from. + + Returns: + Overlap text. + """ + if not text or self.chunk_overlap <= 0: + return "" + + tokens = self._encoder.encode(text) + if len(tokens) <= self.chunk_overlap: + return text + + overlap_tokens = tokens[-self.chunk_overlap :] + return self._encoder.decode(overlap_tokens) + + def _create_chunk(self, content: str, index: int, heading_path: list[str]) -> ChunkData: + """Create a ChunkData object with metadata. + + Args: + content: Chunk content. + index: Chunk index. + heading_path: Heading hierarchy. + + Returns: + ChunkData instance. + """ + token_count = self.count_tokens(content) + metadata: dict[str, Any] = {} + + if heading_path: + metadata["heading"] = heading_path[-1] + metadata["section_path"] = heading_path + + return ChunkData( + content=content, + index=index, + token_count=token_count, + metadata=metadata, + ) + + +class OpenAPIChunker(BaseChunker): + """Chunks OpenAPI specifications by endpoint. + + Creates one chunk per endpoint containing: + - Path and method + - Operation summary and description + - Parameters and request body schema + - Response schemas + """ + + def chunk(self, content: str) -> list[ChunkData]: + """Split OpenAPI spec into endpoint-based chunks. + + Args: + content: OpenAPI JSON/YAML content. + + Returns: + List of ChunkData, one per endpoint. + """ + chunks: list[ChunkData] = [] + + spec_data: dict[str, Any] + try: + spec_data = json.loads(content) + except json.JSONDecodeError: + # Try YAML if JSON fails + try: + import yaml # type: ignore[import-untyped] + + parsed = yaml.safe_load(content) + # yaml.safe_load can return non-dict for simple strings + if not isinstance(parsed, dict): + return MarkdownChunker().chunk(content) + spec_data = parsed # pyright: ignore[reportUnknownVariableType] + except Exception: + # Fall back to treating as markdown + return MarkdownChunker().chunk(content) + + paths: dict[str, Any] = spec_data.get("paths", {}) + chunk_index = 0 + + # Also include info section as first chunk + info: dict[str, Any] = spec_data.get("info", {}) + if info: + servers: list[dict[str, Any]] = spec_data.get("servers", []) + info_chunk = self._create_info_chunk(info, servers) + info_chunk.index = chunk_index + chunks.append(info_chunk) + chunk_index += 1 + + # Create chunk for each endpoint + for path_key, methods in paths.items(): + path: str = str(path_key) + if not isinstance(methods, dict): + continue + + methods_dict: dict[str, Any] = dict(methods) # pyright: ignore[reportUnknownArgumentType] + for method_name, operation in methods_dict.items(): + if method_name.startswith("x-") or not isinstance(operation, dict): + continue + + operation_dict: dict[str, Any] = dict(operation) # pyright: ignore[reportUnknownArgumentType] + chunk = self._create_endpoint_chunk(path, method_name, operation_dict, spec_data) + chunk.index = chunk_index + chunks.append(chunk) + chunk_index += 1 + + return chunks + + def _create_info_chunk(self, info: dict[str, Any], servers: list[dict[str, Any]]) -> ChunkData: + """Create a chunk for API info section. + + Args: + info: OpenAPI info object. + servers: OpenAPI servers array. + + Returns: + ChunkData for API overview. + """ + parts: list[str] = [] + title = info.get("title", "API") + version = info.get("version", "") + + parts.append(f"# {title}") + if version: + parts.append(f"Version: {version}") + if info.get("description"): + parts.append(f"\n{info['description']}") + if servers: + parts.append("\n## Servers") + for server in servers: + url = server.get("url", "") + desc = server.get("description", "") + parts.append(f"- {url}" + (f" ({desc})" if desc else "")) + + content = "\n".join(parts) + return ChunkData( + content=content, + index=0, + token_count=self.count_tokens(content), + metadata={"type": "api_info", "title": title}, + ) + + def _create_endpoint_chunk( + self, + path: str, + method: str, + operation: dict[str, Any], + spec: dict[str, Any], + ) -> ChunkData: + """Create a chunk for a single API endpoint. + + Args: + path: Endpoint path. + method: HTTP method. + operation: OpenAPI operation object. + spec: Full OpenAPI spec (for dereferencing). + + Returns: + ChunkData for the endpoint. + """ + parts: list[str] = [] + + # Endpoint header + operation_id = operation.get("operationId", f"{method}_{path}") + summary = operation.get("summary", "") + parts.append(f"## {method.upper()} {path}") + if summary: + parts.append(f"**{summary}**") + + # Description + if operation.get("description"): + parts.append(f"\n{operation['description']}") + + # Tags + tags = operation.get("tags", []) + if tags: + parts.append(f"\nTags: {', '.join(tags)}") + + # Parameters + params = operation.get("parameters", []) + if params: + parts.append("\n### Parameters") + for param in params: + name = param.get("name", "") + location = param.get("in", "") + required = param.get("required", False) + desc = param.get("description", "") + req_str = " (required)" if required else "" + parts.append(f"- `{name}` ({location}){req_str}: {desc}") + + # Request body + request_body = operation.get("requestBody", {}) + if request_body: + parts.append("\n### Request Body") + content_types = request_body.get("content", {}) + for ct, schema_info in content_types.items(): + parts.append(f"Content-Type: {ct}") + if "schema" in schema_info: + schema_str = self._format_schema(schema_info["schema"], spec) + parts.append(f"```json\n{schema_str}\n```") + + # Responses + responses = operation.get("responses", {}) + if responses: + parts.append("\n### Responses") + for status, response in responses.items(): + desc = response.get("description", "") + parts.append(f"- **{status}**: {desc}") + + content = "\n".join(parts) + + # Ensure we don't exceed token limit + token_count = self.count_tokens(content) + if token_count > self.MAX_TOKENS_PER_CHUNK: + content = self._truncate_to_tokens(content, self.MAX_TOKENS_PER_CHUNK) + token_count = self.count_tokens(content) + + return ChunkData( + content=content, + index=0, + token_count=token_count, + metadata={ + "type": "endpoint", + "path": path, + "method": method.upper(), + "operation_id": operation_id, + "tags": tags, + }, + ) + + def _format_schema(self, schema: dict[str, Any], spec: dict[str, Any], depth: int = 0) -> str: + """Format a JSON schema for display. + + Args: + schema: JSON schema object. + spec: Full OpenAPI spec (for $ref resolution). + depth: Current recursion depth. + + Returns: + Formatted schema string. + """ + if depth > 3: # Prevent deep recursion + return "{...}" + + # Handle $ref + if "$ref" in schema: + ref = schema["$ref"] + resolved = self._resolve_ref(ref, spec) + if resolved: + return self._format_schema(resolved, spec, depth + 1) + return f'{{"$ref": "{ref}"}}' + + # Simple formatting + try: + return json.dumps(schema, indent=2)[:500] # Limit size + except (TypeError, ValueError): + return str(schema)[:500] + + def _resolve_ref(self, ref: str, spec: dict[str, Any]) -> dict[str, Any] | None: + """Resolve a $ref pointer in the OpenAPI spec. + + Args: + ref: Reference string (e.g., "#/components/schemas/User"). + spec: Full OpenAPI spec. + + Returns: + Resolved schema or None. + """ + if not ref.startswith("#/"): + return None + + parts = ref[2:].split("/") + current: Any = spec + + for part in parts: + if isinstance(current, dict) and part in current: + current = current[part] # pyright: ignore[reportUnknownVariableType] + else: + return None + + if isinstance(current, dict): + return dict(current) # pyright: ignore[reportUnknownArgumentType] + return None + + +def get_chunker(source_type: str) -> BaseChunker: + """Factory function to get the appropriate chunker. + + Args: + source_type: Type of source (markdown, openapi). + + Returns: + Appropriate chunker instance. + + Raises: + ValueError: If source_type is not supported. + """ + chunkers = { + "markdown": MarkdownChunker, + "openapi": OpenAPIChunker, + } + + if source_type not in chunkers: + raise ValueError(f"Unsupported source type: {source_type}") + + return chunkers[source_type]() diff --git a/app/features/rag/embeddings.py b/app/features/rag/embeddings.py new file mode 100644 index 00000000..cffa1b1d --- /dev/null +++ b/app/features/rag/embeddings.py @@ -0,0 +1,536 @@ +"""Embedding providers for RAG knowledge base. + +Provides async embedding generation with multiple backends: +- OpenAI API (default): Batch processing with rate limit handling +- Ollama: Local/LAN embedding generation via HTTP API + +CRITICAL: Provider selection via RAG_EMBEDDING_PROVIDER config. +""" + +from __future__ import annotations + +import asyncio +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING + +import httpx +import structlog +import tiktoken +from openai import AsyncOpenAI, RateLimitError + +from app.core.config import get_settings + +if TYPE_CHECKING: + pass + +logger = structlog.get_logger() + + +class EmbeddingError(Exception): + """Error during embedding generation.""" + + pass + + +class EmbeddingProvider(ABC): + """Abstract base class for embedding providers. + + Defines the interface for generating text embeddings. + All providers must implement embed_texts, embed_query, and dimension. + """ + + @abstractmethod + async def embed_texts(self, texts: list[str]) -> list[list[float]]: + """Generate embeddings for multiple texts. + + Args: + texts: List of texts to embed. + + Returns: + List of embedding vectors in same order as input texts. + + Raises: + EmbeddingError: If embedding generation fails. + """ + ... + + @abstractmethod + async def embed_query(self, query: str) -> list[float]: + """Generate embedding for a single query. + + Args: + query: Query text to embed. + + Returns: + Embedding vector. + + Raises: + EmbeddingError: If embedding generation fails. + """ + ... + + @property + @abstractmethod + def dimension(self) -> int: + """Return the embedding dimension for this provider. + + Returns: + Embedding dimension (e.g., 1536 for OpenAI, 768 for nomic-embed-text). + """ + ... + + +class OpenAIEmbeddingProvider(EmbeddingProvider): + """Embedding provider using OpenAI API. + + Handles: + - Async batch embedding generation + - Rate limit handling with exponential backoff + - Token counting and validation + - Cost tracking via logging + + CRITICAL: OpenAI embedding input limit is 8192 tokens per text. + """ + + MAX_TOKENS_PER_INPUT = 8191 # OpenAI limit + MAX_INPUTS_PER_BATCH = 2048 # OpenAI batch limit + + def __init__(self) -> None: + """Initialize OpenAI embedding provider.""" + self.settings = get_settings() + self._encoder = tiktoken.get_encoding("cl100k_base") + self._client: AsyncOpenAI | None = None + + def _get_client(self) -> AsyncOpenAI: + """Get or create the async OpenAI client. + + Returns: + AsyncOpenAI client instance. + + Raises: + EmbeddingError: If OpenAI API key is not configured. + """ + if self._client is None: + if not self.settings.openai_api_key: + raise EmbeddingError( + "OpenAI API key not configured. Set OPENAI_API_KEY environment variable." + ) + self._client = AsyncOpenAI(api_key=self.settings.openai_api_key) + return self._client + + @property + def dimension(self) -> int: + """Return configured embedding dimension. + + Returns: + Embedding dimension from settings. + """ + return self.settings.rag_embedding_dimension + + def count_tokens(self, text: str) -> int: + """Count tokens in text using tiktoken. + + Args: + text: Text to count tokens for. + + Returns: + Number of tokens. + """ + return len(self._encoder.encode(text)) + + def truncate_to_tokens(self, text: str, max_tokens: int) -> str: + """Truncate text to a maximum number of tokens. + + Args: + text: Text to truncate. + max_tokens: Maximum number of tokens. + + Returns: + Truncated text. + """ + tokens = self._encoder.encode(text) + if len(tokens) <= max_tokens: + return text + return self._encoder.decode(tokens[:max_tokens]) + + async def embed_texts( + self, + texts: list[str], + max_retries: int = 3, + retry_delay: float = 1.0, + ) -> list[list[float]]: + """Generate embeddings for multiple texts. + + Processes texts in batches according to settings and OpenAI limits. + Handles rate limits with exponential backoff. + + Args: + texts: List of texts to embed. + max_retries: Maximum retry attempts per batch. + retry_delay: Initial delay between retries (doubles each retry). + + Returns: + List of embeddings in same order as input texts. + + Raises: + EmbeddingError: If embedding generation fails after retries. + """ + if not texts: + return [] + + client = self._get_client() + batch_size = min(self.settings.rag_embedding_batch_size, self.MAX_INPUTS_PER_BATCH) + + # Validate and truncate texts if needed + validated_texts: list[str] = [] + total_tokens = 0 + + for text in texts: + original_token_count = self.count_tokens(text) + if original_token_count > self.MAX_TOKENS_PER_INPUT: + text = self.truncate_to_tokens(text, self.MAX_TOKENS_PER_INPUT) + token_count = self.count_tokens(text) + logger.warning( + "rag.embedding_text_truncated", + original_tokens=original_token_count, + truncated_to=self.MAX_TOKENS_PER_INPUT, + ) + else: + token_count = original_token_count + validated_texts.append(text) + total_tokens += token_count + + embeddings: list[list[float]] = [] + + # Process in batches + for i in range(0, len(validated_texts), batch_size): + batch = validated_texts[i : i + batch_size] + batch_embeddings = await self._embed_batch(client, batch, max_retries, retry_delay) + embeddings.extend(batch_embeddings) + + logger.info( + "rag.embeddings_generated", + text_count=len(texts), + total_tokens=total_tokens, + model=self.settings.rag_embedding_model, + provider="openai", + ) + + return embeddings + + async def embed_query(self, query: str) -> list[float]: + """Generate embedding for a single query. + + Optimized for single query embedding (no batching overhead). + + Args: + query: Query text to embed. + + Returns: + Embedding vector. + + Raises: + EmbeddingError: If embedding generation fails. + """ + embeddings = await self.embed_texts([query]) + return embeddings[0] + + async def _embed_batch( + self, + client: AsyncOpenAI, + texts: list[str], + max_retries: int, + retry_delay: float, + ) -> list[list[float]]: + """Embed a single batch of texts with retry logic. + + Args: + client: OpenAI async client. + texts: Batch of texts to embed. + max_retries: Maximum retry attempts. + retry_delay: Initial delay between retries. + + Returns: + List of embeddings. + + Raises: + EmbeddingError: If all retries fail. + """ + last_error: Exception | None = None + + for attempt in range(max_retries + 1): + try: + response = await client.embeddings.create( + model=self.settings.rag_embedding_model, + input=texts, + dimensions=self.settings.rag_embedding_dimension, + ) + + # Extract embeddings in order + embeddings = [item.embedding for item in response.data] + + # Log token usage + if response.usage: + logger.debug( + "rag.embedding_batch_completed", + batch_size=len(texts), + prompt_tokens=response.usage.prompt_tokens, + total_tokens=response.usage.total_tokens, + ) + + return embeddings + + except RateLimitError as e: + last_error = e + if attempt < max_retries: + wait_time = retry_delay * (2**attempt) + logger.warning( + "rag.embedding_rate_limit", + attempt=attempt + 1, + max_retries=max_retries, + wait_seconds=wait_time, + ) + await asyncio.sleep(wait_time) + continue + + except Exception as e: + last_error = e + logger.error( + "rag.embedding_error", + error=str(e), + error_type=type(e).__name__, + batch_size=len(texts), + ) + raise EmbeddingError(f"Failed to generate embeddings: {e}") from e + + raise EmbeddingError( + f"Failed to generate embeddings after {max_retries} retries: {last_error}" + ) + + +class OllamaEmbeddingProvider(EmbeddingProvider): + """Embedding provider using Ollama's OpenAI-compatible API. + + Provides local/LAN-based embedding generation without OpenAI dependency. + Uses the /v1/embeddings endpoint (OpenAI-compatible) which supports + the `dimensions` parameter for output dimension control. + + CRITICAL: Requires Ollama server running with an embedding model pulled. + """ + + def __init__(self) -> None: + """Initialize Ollama embedding provider.""" + self.settings = get_settings() + self._client: httpx.AsyncClient | None = None + + def _get_client(self) -> httpx.AsyncClient: + """Get or create the async HTTP client. + + Returns: + httpx AsyncClient instance. + """ + if self._client is None: + self._client = httpx.AsyncClient( + base_url=self.settings.ollama_base_url, + timeout=httpx.Timeout(60.0, connect=10.0), + ) + return self._client + + @property + def dimension(self) -> int: + """Return configured embedding dimension. + + Returns: + Embedding dimension from settings. + """ + return self.settings.rag_embedding_dimension + + async def embed_texts( + self, + texts: list[str], + max_retries: int = 3, + retry_delay: float = 1.0, + ) -> list[list[float]]: + """Generate embeddings for multiple texts via Ollama's OpenAI-compatible API. + + Uses /v1/embeddings endpoint which supports the `dimensions` parameter + to control output embedding size. + + Args: + texts: List of texts to embed. + max_retries: Maximum retry attempts. + retry_delay: Initial delay between retries (doubles each retry). + + Returns: + List of embeddings in same order as input texts. + + Raises: + EmbeddingError: If embedding generation fails. + """ + if not texts: + return [] + + client = self._get_client() + last_error: Exception | None = None + + for attempt in range(max_retries + 1): + try: + # Use OpenAI-compatible endpoint with dimensions parameter + response = await client.post( + "/v1/embeddings", + json={ + "model": self.settings.ollama_embedding_model, + "input": texts, + "dimensions": self.settings.rag_embedding_dimension, + }, + ) + response.raise_for_status() + + data = response.json() + + # OpenAI-compatible response format: {"data": [{"embedding": [...], "index": 0}, ...]} + embedding_data = data.get("data", []) + + if len(embedding_data) != len(texts): + raise EmbeddingError( + f"Embedding count mismatch: expected {len(texts)}, got {len(embedding_data)}" + ) + + # Sort by index to ensure correct order and extract embeddings + sorted_data = sorted(embedding_data, key=lambda x: x.get("index", 0)) + embeddings: list[list[float]] = [item["embedding"] for item in sorted_data] + + logger.info( + "rag.embeddings_generated", + text_count=len(texts), + model=self.settings.ollama_embedding_model, + dimension=self.settings.rag_embedding_dimension, + provider="ollama", + ) + + return embeddings + + except httpx.HTTPStatusError as e: + last_error = e + if e.response.status_code == 404: + # Model not found - don't retry + raise EmbeddingError( + f"Ollama model '{self.settings.ollama_embedding_model}' not found. " + f"Run: ollama pull {self.settings.ollama_embedding_model}" + ) from e + if e.response.status_code >= 500 and attempt < max_retries: + # Server error - retry + wait_time = retry_delay * (2**attempt) + logger.warning( + "rag.ollama_server_error", + attempt=attempt + 1, + max_retries=max_retries, + wait_seconds=wait_time, + status_code=e.response.status_code, + ) + await asyncio.sleep(wait_time) + continue + logger.error( + "rag.embedding_error", + error=str(e), + error_type=type(e).__name__, + status_code=e.response.status_code, + ) + raise EmbeddingError(f"Ollama API error: {e}") from e + + except httpx.ConnectError as e: + last_error = e + logger.error( + "rag.ollama_connection_error", + error=str(e), + base_url=self.settings.ollama_base_url, + ) + raise EmbeddingError( + f"Failed to connect to Ollama at {self.settings.ollama_base_url}. " + "Ensure Ollama is running." + ) from e + + except Exception as e: + last_error = e + logger.error( + "rag.embedding_error", + error=str(e), + error_type=type(e).__name__, + ) + raise EmbeddingError(f"Failed to generate embeddings: {e}") from e + + raise EmbeddingError( + f"Failed to generate embeddings after {max_retries} retries: {last_error}" + ) + + async def embed_query(self, query: str) -> list[float]: + """Generate embedding for a single query. + + Args: + query: Query text to embed. + + Returns: + Embedding vector. + + Raises: + EmbeddingError: If embedding generation fails. + """ + embeddings = await self.embed_texts([query]) + return embeddings[0] + + async def close(self) -> None: + """Close the HTTP client. + + Should be called when done using the provider. + """ + if self._client is not None: + await self._client.aclose() + self._client = None + + +# Legacy alias for backwards compatibility +EmbeddingService = OpenAIEmbeddingProvider + + +# Singleton instances for dependency injection +_embedding_provider: EmbeddingProvider | None = None + + +def get_embedding_service() -> EmbeddingProvider: + """Get singleton embedding provider instance. + + Returns provider based on RAG_EMBEDDING_PROVIDER config: + - "openai": OpenAI API (default) + - "ollama": Local Ollama server + + Returns: + EmbeddingProvider instance. + """ + global _embedding_provider + if _embedding_provider is None: + settings = get_settings() + if settings.rag_embedding_provider == "ollama": + _embedding_provider = OllamaEmbeddingProvider() + logger.info( + "rag.embedding_provider_initialized", + provider="ollama", + base_url=settings.ollama_base_url, + model=settings.ollama_embedding_model, + ) + else: + _embedding_provider = OpenAIEmbeddingProvider() + logger.info( + "rag.embedding_provider_initialized", + provider="openai", + model=settings.rag_embedding_model, + ) + return _embedding_provider + + +def reset_embedding_service() -> None: + """Reset the singleton embedding provider. + + Useful for testing or reconfiguration. + """ + global _embedding_provider + _embedding_provider = None diff --git a/app/features/rag/models.py b/app/features/rag/models.py new file mode 100644 index 00000000..ba185b88 --- /dev/null +++ b/app/features/rag/models.py @@ -0,0 +1,115 @@ +"""RAG knowledge base ORM models. + +This module defines: +- DocumentSource: Registry of indexed document sources +- DocumentChunk: Indexed document chunks with embeddings + +CRITICAL: Uses PostgreSQL pgvector for embedding storage and similarity search. +""" + +from __future__ import annotations + +from datetime import datetime +from typing import TYPE_CHECKING, Any + +from pgvector.sqlalchemy import Vector # type: ignore[import-untyped] +from sqlalchemy import ( + DateTime, + ForeignKey, + Index, + Integer, + String, + Text, + UniqueConstraint, +) +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from app.core.database import Base +from app.shared.models import TimestampMixin + +if TYPE_CHECKING: + pass + + +class DocumentSource(TimestampMixin, Base): + """Registered document source for indexing. + + CRITICAL: Tracks indexed sources with content hash for idempotent re-indexing. + + Attributes: + id: Primary key. + source_id: Unique external identifier (UUID hex, 32 chars). + source_type: Type of source (markdown, openapi, run_report). + source_path: Path or identifier for the source. + content_hash: SHA-256 hash for change detection. + metadata_: Custom metadata as JSONB. + indexed_at: When the source was last indexed. + chunks: Related document chunks. + """ + + __tablename__ = "document_source" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + source_id: Mapped[str] = mapped_column(String(32), unique=True, index=True) + source_type: Mapped[str] = mapped_column(String(50), index=True) + source_path: Mapped[str] = mapped_column(Text, nullable=False) + content_hash: Mapped[str] = mapped_column(String(64), nullable=False) + metadata_: Mapped[dict[str, Any] | None] = mapped_column("metadata", JSONB, nullable=True) + indexed_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False) + + # Relationship to chunks + chunks: Mapped[list[DocumentChunk]] = relationship( + back_populates="source", cascade="all, delete-orphan" + ) + + __table_args__ = (UniqueConstraint("source_type", "source_path", name="uq_source_type_path"),) + + +class DocumentChunk(TimestampMixin, Base): + """Indexed document chunk with embedding. + + CRITICAL: Stores vector embeddings for semantic similarity search. + + Attributes: + id: Primary key. + chunk_id: Unique external identifier (UUID hex, 32 chars). + source_id: Foreign key to parent source. + chunk_index: Position within the source document. + content: Chunk text content. + embedding: Vector embedding (1536 dimensions for text-embedding-3-small). + token_count: Number of tokens in the chunk. + metadata_: Heading hierarchy, section path, etc. + source: Related document source. + """ + + __tablename__ = "document_chunk" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + chunk_id: Mapped[str] = mapped_column(String(32), unique=True, index=True) + source_id: Mapped[int] = mapped_column( + Integer, ForeignKey("document_source.id", ondelete="CASCADE"), index=True + ) + chunk_index: Mapped[int] = mapped_column(Integer, nullable=False) + content: Mapped[str] = mapped_column(Text, nullable=False) + # Vector column for embeddings - dimension configurable via settings + embedding: Mapped[list[float] | None] = mapped_column(Vector(1536), nullable=True) + token_count: Mapped[int] = mapped_column(Integer, nullable=False) + metadata_: Mapped[dict[str, Any] | None] = mapped_column("metadata", JSONB, nullable=True) + + # Relationship to source + source: Mapped[DocumentSource] = relationship(back_populates="chunks") + + __table_args__ = ( + UniqueConstraint("source_id", "chunk_index", name="uq_source_chunk_index"), + # HNSW index for cosine similarity search + Index( + "ix_chunk_embedding_hnsw", + "embedding", + postgresql_using="hnsw", + postgresql_with={"m": 16, "ef_construction": 64}, + postgresql_ops={"embedding": "vector_cosine_ops"}, + ), + # GIN index for metadata filtering + Index("ix_chunk_metadata_gin", "metadata", postgresql_using="gin"), + ) diff --git a/app/features/rag/routes.py b/app/features/rag/routes.py new file mode 100644 index 00000000..403edd37 --- /dev/null +++ b/app/features/rag/routes.py @@ -0,0 +1,345 @@ +"""RAG API routes for document indexing and semantic retrieval.""" + +from fastapi import APIRouter, Depends, HTTPException, status +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.database import get_db +from app.core.exceptions import DatabaseError +from app.core.logging import get_logger +from app.features.rag.embeddings import EmbeddingError +from app.features.rag.schemas import ( + DeleteResponse, + IndexRequest, + IndexResponse, + RetrieveRequest, + RetrieveResponse, + SourceListResponse, +) +from app.features.rag.service import RAGService, SourceNotFoundError + +logger = get_logger(__name__) + +router = APIRouter(prefix="/rag", tags=["rag"]) + + +# ============================================================================= +# Index Endpoint +# ============================================================================= + + +@router.post( + "/index", + response_model=IndexResponse, + status_code=status.HTTP_201_CREATED, + summary="Index a document", + description=""" +Index a document into the RAG knowledge base. + +**Source Types:** +- `markdown`: Markdown documents (split by headings) +- `openapi`: OpenAPI specifications (split by endpoint) + +**Content Source:** +- Provide `content` directly in the request, OR +- Provide `source_path` to read from file system + +**Idempotent Updates:** +- Documents are identified by `source_type` + `source_path` +- Content hash is compared to detect changes +- If unchanged, returns `status: "unchanged"` without re-indexing +- If changed, old chunks are deleted and new ones created + +**Returns:** +- `source_id`: Unique identifier for the indexed source +- `chunks_created`: Number of chunks created +- `tokens_processed`: Total tokens processed +- `status`: "indexed", "updated", or "unchanged" +""", +) +async def index_document( + request: IndexRequest, + db: AsyncSession = Depends(get_db), +) -> IndexResponse: + """Index a document into the knowledge base. + + Args: + request: Index request with source type, path, and optional content. + db: Async database session from dependency. + + Returns: + Indexing result with statistics. + + Raises: + HTTPException: If file not found or embedding generation fails. + DatabaseError: If database operation fails. + """ + logger.info( + "rag.index_request_received", + source_type=request.source_type, + source_path=request.source_path, + has_content=request.content is not None, + ) + + service = RAGService() + + try: + response = await service.index_document(db=db, request=request) + + logger.info( + "rag.index_request_completed", + source_id=response.source_id, + chunks_created=response.chunks_created, + status=response.status, + ) + + return response + + except FileNotFoundError as e: + logger.warning( + "rag.index_request_failed", + error=str(e), + error_type=type(e).__name__, + source_path=request.source_path, + ) + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) from e + + except EmbeddingError as e: + logger.error( + "rag.index_request_failed", + error=str(e), + error_type=type(e).__name__, + exc_info=True, + ) + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, + detail=f"Embedding generation failed: {e}", + ) from e + + except SQLAlchemyError as e: + logger.error( + "rag.index_request_failed", + error=str(e), + error_type=type(e).__name__, + exc_info=True, + ) + raise DatabaseError( + message="Failed to index document", + details={"error": str(e)}, + ) from e + + +# ============================================================================= +# Retrieve Endpoint +# ============================================================================= + + +@router.post( + "/retrieve", + response_model=RetrieveResponse, + summary="Semantic search", + description=""" +Perform semantic search across indexed documents. + +**Query:** +- Natural language query (1-2000 characters) +- Converted to embedding for similarity search + +**Parameters:** +- `top_k`: Number of results (1-50, default: 5) +- `similarity_threshold`: Minimum similarity (0.0-1.0, default: 0.7) +- `filters`: Optional metadata filters + +**Filters:** +- `source_type`: List of source types to search +- `category`: Category from source metadata + +**Returns:** +- List of matching chunks with relevance scores +- Performance metrics (embedding time, search time) +- Total chunks searched + +**Evidence-Grounded:** +Returns raw chunks with citations - no answer generation. +""", +) +async def retrieve( + request: RetrieveRequest, + db: AsyncSession = Depends(get_db), +) -> RetrieveResponse: + """Perform semantic search across indexed documents. + + Args: + request: Retrieval request with query and filters. + db: Async database session from dependency. + + Returns: + Search results with relevance scores. + + Raises: + HTTPException: If embedding generation fails. + DatabaseError: If database operation fails. + """ + logger.info( + "rag.retrieve_request_received", + query_length=len(request.query), + top_k=request.top_k, + threshold=request.similarity_threshold, + has_filters=request.filters is not None, + ) + + service = RAGService() + + try: + response = await service.retrieve(db=db, request=request) + + logger.info( + "rag.retrieve_request_completed", + results_count=len(response.results), + query_embedding_time_ms=response.query_embedding_time_ms, + search_time_ms=response.search_time_ms, + ) + + return response + + except EmbeddingError as e: + logger.error( + "rag.retrieve_request_failed", + error=str(e), + error_type=type(e).__name__, + exc_info=True, + ) + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, + detail=f"Embedding generation failed: {e}", + ) from e + + except SQLAlchemyError as e: + logger.error( + "rag.retrieve_request_failed", + error=str(e), + error_type=type(e).__name__, + exc_info=True, + ) + raise DatabaseError( + message="Failed to retrieve documents", + details={"error": str(e)}, + ) from e + + +# ============================================================================= +# Sources Endpoints +# ============================================================================= + + +@router.get( + "/sources", + response_model=SourceListResponse, + summary="List indexed sources", + description=""" +List all indexed document sources with statistics. + +Returns: +- List of sources with chunk counts +- Total source count +- Total chunk count across all sources +""", +) +async def list_sources( + db: AsyncSession = Depends(get_db), +) -> SourceListResponse: + """List all indexed sources. + + Args: + db: Async database session from dependency. + + Returns: + List of sources with statistics. + """ + service = RAGService() + response = await service.list_sources(db=db) + + logger.info( + "rag.list_sources_completed", + total_sources=response.total_sources, + total_chunks=response.total_chunks, + ) + + return response + + +@router.delete( + "/sources/{source_id}", + response_model=DeleteResponse, + summary="Delete a source", + description=""" +Delete an indexed source and all its chunks. + +**Cascade Delete:** +All chunks belonging to the source are automatically deleted. + +**Returns:** +- `source_id`: Deleted source identifier +- `chunks_deleted`: Number of chunks removed +- `status`: Always "deleted" +""", +) +async def delete_source( + source_id: str, + db: AsyncSession = Depends(get_db), +) -> DeleteResponse: + """Delete a source and all its chunks. + + Args: + source_id: Source identifier. + db: Async database session from dependency. + + Returns: + Deletion result. + + Raises: + HTTPException: If source not found. + DatabaseError: If database operation fails. + """ + logger.info("rag.delete_source_request_received", source_id=source_id) + + service = RAGService() + + try: + response = await service.delete_source(db=db, source_id=source_id) + + logger.info( + "rag.delete_source_request_completed", + source_id=source_id, + chunks_deleted=response.chunks_deleted, + ) + + return response + + except SourceNotFoundError as e: + logger.warning( + "rag.delete_source_request_failed", + source_id=source_id, + error=str(e), + error_type=type(e).__name__, + ) + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) from e + + except SQLAlchemyError as e: + logger.error( + "rag.delete_source_request_failed", + source_id=source_id, + error=str(e), + error_type=type(e).__name__, + exc_info=True, + ) + raise DatabaseError( + message="Failed to delete source", + details={"error": str(e)}, + ) from e diff --git a/app/features/rag/schemas.py b/app/features/rag/schemas.py new file mode 100644 index 00000000..3c350c31 --- /dev/null +++ b/app/features/rag/schemas.py @@ -0,0 +1,181 @@ +"""Pydantic schemas for RAG API contracts. + +Schemas are designed to be: +- Validated for data integrity +- Compatible with SQLAlchemy models via from_attributes +- Evidence-grounded (citations include source metadata) +""" + +from __future__ import annotations + +from datetime import datetime +from typing import Any, Literal + +from pydantic import BaseModel, ConfigDict, Field + + +class IndexRequest(BaseModel): + """Request to index a document into the knowledge base. + + Args: + source_type: Type of document to index (markdown or openapi). + source_path: Path to the document or identifier. + content: Optional content override (if not reading from path). + metadata: Custom metadata to attach to the source. + """ + + model_config = ConfigDict(extra="forbid") + + source_type: Literal["markdown", "openapi"] = Field( + ..., description="Type of document to index" + ) + source_path: str = Field( + ..., + min_length=1, + max_length=500, + description="Path to the document or unique identifier", + ) + content: str | None = Field( + None, description="Optional content override (if not reading from path)" + ) + metadata: dict[str, Any] | None = Field( + None, description="Custom metadata to attach to the source" + ) + + +class IndexResponse(BaseModel): + """Response from document indexing operation. + + Args: + source_id: Unique identifier for the indexed source. + source_path: Path of the indexed document. + chunks_created: Number of chunks created from the document. + tokens_processed: Total tokens processed across all chunks. + duration_ms: Time taken to index the document. + status: Indexing status (indexed, updated, unchanged). + """ + + model_config = ConfigDict(from_attributes=True) + + source_id: str + source_path: str + chunks_created: int + tokens_processed: int + duration_ms: float + status: Literal["indexed", "updated", "unchanged"] + + +class RetrieveRequest(BaseModel): + """Request for semantic search across indexed documents. + + Args: + query: Search query text. + top_k: Number of results to return (1-50). + similarity_threshold: Minimum similarity score (0.0-1.0). + filters: Metadata filters to apply. + """ + + model_config = ConfigDict(extra="forbid") + + query: str = Field(..., min_length=1, max_length=2000, description="Search query text") + top_k: int = Field(default=5, ge=1, le=50, description="Number of results to return") + similarity_threshold: float = Field( + default=0.7, ge=0.0, le=1.0, description="Minimum similarity score" + ) + filters: dict[str, Any] | None = Field( + None, description="Metadata filters (source_type, category, etc.)" + ) + + +class ChunkResult(BaseModel): + """Single chunk in retrieval results with citation metadata. + + CRITICAL: Provides evidence-grounded context with stable citations. + + Args: + chunk_id: Unique identifier for the chunk. + source_id: Identifier of the parent source. + source_path: Path of the source document. + source_type: Type of source document. + content: Chunk text content. + relevance_score: Similarity score (0.0-1.0). + metadata: Heading hierarchy, section path, etc. + """ + + model_config = ConfigDict(from_attributes=True) + + chunk_id: str + source_id: str + source_path: str + source_type: str + content: str + relevance_score: float = Field(..., ge=0.0, le=1.0) + metadata: dict[str, Any] | None = None + + +class RetrieveResponse(BaseModel): + """Response from semantic search operation. + + Args: + results: List of matching chunks with relevance scores. + query_embedding_time_ms: Time to generate query embedding. + search_time_ms: Time to execute similarity search. + total_chunks_searched: Total chunks in the search space. + """ + + results: list[ChunkResult] + query_embedding_time_ms: float + search_time_ms: float + total_chunks_searched: int + + +class SourceResponse(BaseModel): + """Details of an indexed document source. + + Args: + source_id: Unique identifier for the source. + source_type: Type of document (markdown, openapi). + source_path: Path to the document. + chunk_count: Number of chunks from this source. + content_hash: SHA-256 hash for change detection. + indexed_at: When the source was last indexed. + metadata: Custom metadata attached to the source. + """ + + model_config = ConfigDict(from_attributes=True) + + source_id: str + source_type: str + source_path: str + chunk_count: int + content_hash: str + indexed_at: datetime + metadata: dict[str, Any] | None = None + + +class SourceListResponse(BaseModel): + """List of all indexed sources with summary statistics. + + Args: + sources: List of indexed sources. + total_sources: Total number of sources. + total_chunks: Total number of chunks across all sources. + """ + + sources: list[SourceResponse] + total_sources: int + total_chunks: int + + +class DeleteResponse(BaseModel): + """Response from source deletion operation. + + Args: + source_id: Identifier of the deleted source. + chunks_deleted: Number of chunks that were deleted. + status: Always "deleted". + """ + + source_id: str + chunks_deleted: int + status: Literal["deleted"] diff --git a/app/features/rag/service.py b/app/features/rag/service.py new file mode 100644 index 00000000..1f38613c --- /dev/null +++ b/app/features/rag/service.py @@ -0,0 +1,613 @@ +"""RAG service for document indexing and semantic retrieval. + +Orchestrates: +- Document indexing with chunking and embedding +- Semantic retrieval with similarity search +- Source management (list, delete) +- Idempotent re-indexing via content hash comparison + +CRITICAL: Uses pgvector cosine_distance for similarity search. +""" + +from __future__ import annotations + +import hashlib +import time +import uuid +from datetime import UTC, datetime +from pathlib import Path +from typing import Any, Literal + +import structlog +from sqlalchemy import delete, func, select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.config import get_settings +from app.features.rag.chunkers import ChunkData, get_chunker +from app.features.rag.embeddings import EmbeddingProvider, get_embedding_service +from app.features.rag.models import DocumentChunk, DocumentSource +from app.features.rag.schemas import ( + ChunkResult, + DeleteResponse, + IndexRequest, + IndexResponse, + RetrieveRequest, + RetrieveResponse, + SourceListResponse, + SourceResponse, +) + +logger = structlog.get_logger() + + +class SourceNotFoundError(ValueError): + """Source not found in the knowledge base.""" + + pass + + +class RAGService: + """Service for RAG knowledge base operations. + + Provides: + - Document indexing with automatic chunking and embedding + - Semantic retrieval with configurable similarity threshold + - Source management and statistics + - Idempotent re-indexing based on content hash + + CRITICAL: Uses cosine_distance for similarity (not l2_distance). + """ + + def __init__( + self, + embedding_service: EmbeddingProvider | None = None, + base_dir: Path | str | None = None, + ) -> None: + """Initialize RAG service. + + Args: + embedding_service: Optional embedding provider override (for testing). + base_dir: Base directory for path validation (for testing). + Defaults to current working directory. + """ + self.settings = get_settings() + self._embedding_service = embedding_service or get_embedding_service() + # Set base directory for path validation (mirrors registry/storage.py pattern) + if base_dir is None: + self._base_dir = Path.cwd().resolve() + elif isinstance(base_dir, str): + self._base_dir = Path(base_dir).resolve() + else: + self._base_dir = base_dir.resolve() + + def _compute_content_hash(self, content: str) -> str: + """Compute SHA-256 hash of content for change detection. + + Args: + content: Document content. + + Returns: + 64-character hex string hash. + """ + return hashlib.sha256(content.encode()).hexdigest() + + def _read_content_from_path(self, source_path: str) -> str: + """Read content from a file path with path traversal protection. + + CRITICAL: Validates path is within base directory to prevent + directory traversal attacks. Mirrors pattern from registry/storage.py. + + Args: + source_path: Path to the file. + + Returns: + File content. + + Raises: + FileNotFoundError: If file doesn't exist or path traversal attempted. + """ + # Resolve the source path + resolved_path = Path(source_path).resolve() + + # Security: ensure path is within base directory + try: + resolved_path.relative_to(self._base_dir) + except ValueError: + logger.warning( + "rag.path_traversal_attempt", + source_path=source_path, + base_dir=str(self._base_dir), + ) + raise FileNotFoundError( + f"Source file not found or access denied: {source_path}" + ) from None + + if not resolved_path.exists(): + raise FileNotFoundError(f"Source file not found: {source_path}") + + return resolved_path.read_text(encoding="utf-8") + + async def index_document( + self, + db: AsyncSession, + request: IndexRequest, + ) -> IndexResponse: + """Index a document into the knowledge base. + + Handles: + - Content reading (from path or request) + - Content hash comparison for idempotent updates + - Chunking based on source type + - Embedding generation for all chunks + - Database upsert (source + chunks) + + Args: + db: Database session. + request: Index request with source info. + + Returns: + Indexing result with statistics. + """ + start_time = time.time() + + logger.info( + "rag.index_document_started", + source_type=request.source_type, + source_path=request.source_path, + ) + + # Get content (from request or file) + if request.content: + content = request.content + else: + content = self._read_content_from_path(request.source_path) + + # Compute content hash + content_hash = self._compute_content_hash(content) + + # Check if source already exists + existing_source = await self._find_source_by_path( + db, request.source_type, request.source_path + ) + + if existing_source and existing_source.content_hash == content_hash: + # Content unchanged - skip re-indexing + chunk_count = await self._get_chunk_count(db, existing_source.id) + duration_ms = (time.time() - start_time) * 1000 + + logger.info( + "rag.index_document_unchanged", + source_id=existing_source.source_id, + source_path=request.source_path, + ) + + return IndexResponse( + source_id=existing_source.source_id, + source_path=request.source_path, + chunks_created=chunk_count, + tokens_processed=0, + duration_ms=duration_ms, + status="unchanged", + ) + + # Chunk the content + chunker = get_chunker(request.source_type) + chunks = chunker.chunk(content) + + if not chunks: + logger.warning( + "rag.index_document_no_chunks", + source_path=request.source_path, + ) + chunks = [] + + # Generate embeddings for all chunks + chunk_texts = [chunk.content for chunk in chunks] + embeddings: list[list[float]] = [] + + if chunk_texts: + embeddings = await self._embedding_service.embed_texts(chunk_texts) + + # Calculate total tokens + total_tokens = sum(chunk.token_count for chunk in chunks) + + # Upsert source and chunks + source_id = existing_source.source_id if existing_source else uuid.uuid4().hex + status: Literal["indexed", "updated", "unchanged"] = ( + "updated" if existing_source else "indexed" + ) + + await self._upsert_source_and_chunks( + db=db, + source_id=source_id, + source_type=request.source_type, + source_path=request.source_path, + content_hash=content_hash, + metadata=request.metadata, + chunks=chunks, + embeddings=embeddings, + existing_source=existing_source, + ) + + duration_ms = (time.time() - start_time) * 1000 + + logger.info( + "rag.index_document_completed", + source_id=source_id, + source_path=request.source_path, + chunks_created=len(chunks), + tokens_processed=total_tokens, + duration_ms=duration_ms, + status=status, + ) + + return IndexResponse( + source_id=source_id, + source_path=request.source_path, + chunks_created=len(chunks), + tokens_processed=total_tokens, + duration_ms=duration_ms, + status=status, + ) + + async def retrieve( + self, + db: AsyncSession, + request: RetrieveRequest, + ) -> RetrieveResponse: + """Perform semantic search across indexed documents. + + Uses pgvector cosine_distance for similarity ranking: + - relevance_score = 1 - cosine_distance (normalized to 0-1) + - Filters by similarity threshold + - Supports metadata filtering + + Args: + db: Database session. + request: Retrieval request with query and filters. + + Returns: + Search results with relevance scores. + """ + embed_start = time.time() + + logger.info( + "rag.retrieve_started", + query_length=len(request.query), + top_k=request.top_k, + threshold=request.similarity_threshold, + ) + + # Generate query embedding + query_embedding = await self._embedding_service.embed_query(request.query) + embed_time_ms = (time.time() - embed_start) * 1000 + + search_start = time.time() + + # Get total chunk count for statistics + total_chunks = await self._get_total_chunk_count(db) + + # Build similarity search query + # CRITICAL: cosine_distance returns values 0-2, so relevance = 1 - distance/2 + # But for cosine similarity on normalized vectors, distance is 0-1 + results = await self._search_similar_chunks( + db=db, + query_embedding=query_embedding, + top_k=request.top_k, + threshold=request.similarity_threshold, + filters=request.filters, + ) + + search_time_ms = (time.time() - search_start) * 1000 + + logger.info( + "rag.retrieve_completed", + results_count=len(results), + query_embedding_time_ms=embed_time_ms, + search_time_ms=search_time_ms, + ) + + return RetrieveResponse( + results=results, + query_embedding_time_ms=embed_time_ms, + search_time_ms=search_time_ms, + total_chunks_searched=total_chunks, + ) + + async def list_sources( + self, + db: AsyncSession, + ) -> SourceListResponse: + """List all indexed sources with statistics. + + Args: + db: Database session. + + Returns: + List of sources with chunk counts. + """ + # Get sources with chunk counts + stmt = ( + select( + DocumentSource, + func.count(DocumentChunk.id).label("chunk_count"), + ) + .outerjoin(DocumentChunk, DocumentSource.id == DocumentChunk.source_id) + .group_by(DocumentSource.id) + .order_by(DocumentSource.indexed_at.desc()) + ) + + result = await db.execute(stmt) + rows = result.all() + + sources: list[SourceResponse] = [] + total_chunks = 0 + + for source, chunk_count in rows: + sources.append( + SourceResponse( + source_id=source.source_id, + source_type=source.source_type, + source_path=source.source_path, + chunk_count=chunk_count, + content_hash=source.content_hash, + indexed_at=source.indexed_at, + metadata=source.metadata_, + ) + ) + total_chunks += chunk_count + + return SourceListResponse( + sources=sources, + total_sources=len(sources), + total_chunks=total_chunks, + ) + + async def delete_source( + self, + db: AsyncSession, + source_id: str, + ) -> DeleteResponse: + """Delete a source and all its chunks. + + Args: + db: Database session. + source_id: Source identifier. + + Returns: + Deletion result with chunk count. + + Raises: + SourceNotFoundError: If source not found. + """ + logger.info("rag.delete_source_started", source_id=source_id) + + # Find source + stmt = select(DocumentSource).where(DocumentSource.source_id == source_id) + result = await db.execute(stmt) + source = result.scalar_one_or_none() + + if source is None: + raise SourceNotFoundError(f"Source not found: {source_id}") + + # Count chunks before deletion + chunk_count = await self._get_chunk_count(db, source.id) + + # Delete source (cascades to chunks) + await db.delete(source) + await db.flush() + + logger.info( + "rag.delete_source_completed", + source_id=source_id, + chunks_deleted=chunk_count, + ) + + return DeleteResponse( + source_id=source_id, + chunks_deleted=chunk_count, + status="deleted", + ) + + async def _find_source_by_path( + self, + db: AsyncSession, + source_type: str, + source_path: str, + ) -> DocumentSource | None: + """Find source by type and path. + + Args: + db: Database session. + source_type: Source type. + source_path: Source path. + + Returns: + Source or None. + """ + stmt = select(DocumentSource).where( + (DocumentSource.source_type == source_type) + & (DocumentSource.source_path == source_path) + ) + result = await db.execute(stmt) + return result.scalar_one_or_none() + + async def _get_chunk_count(self, db: AsyncSession, source_id: int) -> int: + """Get number of chunks for a source. + + Args: + db: Database session. + source_id: Source internal ID. + + Returns: + Chunk count. + """ + stmt = ( + select(func.count()) + .select_from(DocumentChunk) + .where(DocumentChunk.source_id == source_id) + ) + result = await db.execute(stmt) + return result.scalar_one() + + async def _get_total_chunk_count(self, db: AsyncSession) -> int: + """Get total number of chunks across all sources. + + Args: + db: Database session. + + Returns: + Total chunk count. + """ + stmt = select(func.count()).select_from(DocumentChunk) + result = await db.execute(stmt) + return result.scalar_one() + + async def _upsert_source_and_chunks( + self, + db: AsyncSession, + source_id: str, + source_type: str, + source_path: str, + content_hash: str, + metadata: dict[str, Any] | None, + chunks: list[ChunkData], + embeddings: list[list[float]], + existing_source: DocumentSource | None, + ) -> None: + """Upsert source and chunks in database. + + Args: + db: Database session. + source_id: External source identifier. + source_type: Type of source. + source_path: Path to source. + content_hash: SHA-256 hash of content. + metadata: Custom metadata. + chunks: Chunked content. + embeddings: Embeddings for each chunk. + existing_source: Existing source if updating. + """ + now = datetime.now(UTC) + + if existing_source: + # Update existing source + existing_source.content_hash = content_hash + existing_source.metadata_ = metadata + existing_source.indexed_at = now + + # Delete old chunks + await db.execute( + delete(DocumentChunk).where(DocumentChunk.source_id == existing_source.id) + ) + source_internal_id = existing_source.id + else: + # Create new source + source = DocumentSource( + source_id=source_id, + source_type=source_type, + source_path=source_path, + content_hash=content_hash, + metadata_=metadata, + indexed_at=now, + ) + db.add(source) + await db.flush() + source_internal_id = source.id + + # Create new chunks + for i, (chunk, embedding) in enumerate(zip(chunks, embeddings, strict=True)): + chunk_obj = DocumentChunk( + chunk_id=uuid.uuid4().hex, + source_id=source_internal_id, + chunk_index=i, + content=chunk.content, + embedding=embedding, + token_count=chunk.token_count, + metadata_=chunk.metadata if chunk.metadata else None, + ) + db.add(chunk_obj) + + await db.flush() + + async def _search_similar_chunks( + self, + db: AsyncSession, + query_embedding: list[float], + top_k: int, + threshold: float, + filters: dict[str, Any] | None, + ) -> list[ChunkResult]: + """Search for similar chunks using cosine distance. + + Args: + db: Database session. + query_embedding: Query embedding vector. + top_k: Maximum results to return. + threshold: Minimum similarity threshold. + filters: Optional metadata filters. + + Returns: + List of chunk results with relevance scores. + """ + # CRITICAL: Use cosine_distance method from pgvector + # cosine_distance returns 1 - cosine_similarity for normalized vectors + distance = DocumentChunk.embedding.cosine_distance(query_embedding) + + # Build query with distance calculation + stmt = ( + select( + DocumentChunk, + DocumentSource, + distance.label("distance"), + ) + .join(DocumentSource, DocumentChunk.source_id == DocumentSource.id) + .where(DocumentChunk.embedding.isnot(None)) + .order_by(distance) + .limit(top_k * 2) # Fetch extra to filter by threshold + ) + + # Apply metadata filters if provided + if filters: + if "source_type" in filters: + source_types = filters["source_type"] + if isinstance(source_types, str): + source_types = [source_types] + stmt = stmt.where(DocumentSource.source_type.in_(source_types)) + + if "category" in filters: + # Filter by metadata category + stmt = stmt.where( + DocumentSource.metadata_.op("->>")("category") == filters["category"] + ) + + result = await db.execute(stmt) + rows = result.all() + + results: list[ChunkResult] = [] + for chunk, source, dist in rows: + # Convert distance to similarity score + # For cosine distance: similarity = 1 - distance + relevance_score = 1.0 - float(dist) + + # Apply threshold filter + if relevance_score < threshold: + continue + + results.append( + ChunkResult( + chunk_id=chunk.chunk_id, + source_id=source.source_id, + source_path=source.source_path, + source_type=source.source_type, + content=chunk.content, + relevance_score=round(relevance_score, 4), + metadata=chunk.metadata_, + ) + ) + + # Stop if we have enough results + if len(results) >= top_k: + break + + return results diff --git a/app/features/rag/tests/__init__.py b/app/features/rag/tests/__init__.py new file mode 100644 index 00000000..041e4941 --- /dev/null +++ b/app/features/rag/tests/__init__.py @@ -0,0 +1 @@ +"""RAG feature tests.""" diff --git a/app/features/rag/tests/conftest.py b/app/features/rag/tests/conftest.py new file mode 100644 index 00000000..3bf7f318 --- /dev/null +++ b/app/features/rag/tests/conftest.py @@ -0,0 +1,265 @@ +"""Test fixtures for RAG module.""" + +from collections.abc import AsyncGenerator +from datetime import UTC, datetime +from unittest.mock import AsyncMock, MagicMock + +import pytest +from httpx import ASGITransport, AsyncClient +from sqlalchemy import delete +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine + +from app.core.config import get_settings +from app.core.database import get_db +from app.features.rag.embeddings import EmbeddingService +from app.features.rag.models import DocumentChunk, DocumentSource +from app.features.rag.schemas import IndexRequest, RetrieveRequest +from app.main import app + +# ============================================================================= +# Database Fixtures for Integration Tests +# ============================================================================= + + +@pytest.fixture +async def db_session() -> AsyncGenerator[AsyncSession, None]: + """Create async database session for integration tests. + + Creates tables if needed, provides a session, and cleans up test data. + Requires PostgreSQL to be running (docker-compose up -d). + """ + settings = get_settings() + engine = create_async_engine(settings.database_url, echo=False) + + async_session_maker = async_sessionmaker( + engine, + class_=AsyncSession, + expire_on_commit=False, + ) + + async with async_session_maker() as session: + try: + yield session + finally: + # Clean up test data (delete sources with test- prefix) + test_source_ids = delete(DocumentSource).where( + DocumentSource.source_path.like("test-%") + ) + await session.execute(test_source_ids) + await session.commit() + + await engine.dispose() + + +@pytest.fixture +async def client(db_session: AsyncSession) -> AsyncGenerator[AsyncClient, None]: + """Create test client with database dependency override.""" + + async def override_get_db() -> AsyncGenerator[AsyncSession, None]: + try: + yield db_session + await db_session.commit() + except Exception: + await db_session.rollback() + raise + + app.dependency_overrides[get_db] = override_get_db + + async with AsyncClient( + transport=ASGITransport(app=app), + base_url="http://test", + ) as ac: + yield ac + + app.dependency_overrides.clear() + + +# ============================================================================= +# Mock Embedding Service +# ============================================================================= + + +@pytest.fixture +def mock_embedding_service() -> EmbeddingService: + """Create a mocked EmbeddingService for unit tests. + + Returns embeddings of correct dimension (1536) without calling OpenAI API. + """ + service = MagicMock(spec=EmbeddingService) + + # Mock embed_texts to return deterministic embeddings + async def mock_embed_texts(texts, **kwargs): + # Return embedding vector of correct dimension for each text + return [[0.1] * 1536 for _ in texts] + + # Mock embed_query to return single embedding + async def mock_embed_query(query): + return [0.1] * 1536 + + service.embed_texts = AsyncMock(side_effect=mock_embed_texts) + service.embed_query = AsyncMock(side_effect=mock_embed_query) + service.count_tokens = MagicMock(side_effect=lambda text: len(text.split())) + service.truncate_to_tokens = MagicMock(side_effect=lambda text, max_tokens: text) + + return service + + +# ============================================================================= +# Sample Content Fixtures +# ============================================================================= + + +@pytest.fixture +def sample_markdown_content() -> str: + """Sample markdown content with headings for testing.""" + return """# Main Title + +This is the introduction paragraph with some content. + +## Section One + +First section content goes here. It has multiple sentences. +This is the second sentence. And a third one. + +### Subsection 1.1 + +Subsection content with details about the topic. + +### Subsection 1.2 + +More subsection content here. + +## Section Two + +Second section with different content. + +### Subsection 2.1 + +Final subsection content. +""" + + +@pytest.fixture +def sample_openapi_content() -> str: + """Sample OpenAPI JSON content for testing.""" + return """{ + "openapi": "3.0.0", + "info": { + "title": "Test API", + "version": "1.0.0", + "description": "A test API for unit testing" + }, + "servers": [ + {"url": "https://api.example.com", "description": "Production"} + ], + "paths": { + "/users": { + "get": { + "operationId": "listUsers", + "summary": "List all users", + "description": "Returns a paginated list of users", + "tags": ["users"], + "parameters": [ + { + "name": "page", + "in": "query", + "description": "Page number", + "required": false + } + ], + "responses": { + "200": {"description": "Success"} + } + }, + "post": { + "operationId": "createUser", + "summary": "Create a user", + "tags": ["users"], + "requestBody": { + "content": { + "application/json": { + "schema": {"type": "object", "properties": {"name": {"type": "string"}}} + } + } + }, + "responses": { + "201": {"description": "Created"} + } + } + } + } +}""" + + +@pytest.fixture +def sample_large_markdown_content() -> str: + """Large markdown content that exceeds chunk size for testing.""" + # Generate content that will need multiple chunks + paragraphs = [] + for i in range(50): + paragraphs.append( + f"## Section {i}\n\n" + f"This is paragraph {i} with enough content to make it substantial. " + f"It contains multiple sentences to ensure proper chunking behavior. " + f"The content is designed to test the chunker's ability to handle large documents. " + f"Each section has similar structure but different section numbers.\n" + ) + return "\n".join(paragraphs) + + +# ============================================================================= +# Schema Fixtures +# ============================================================================= + + +@pytest.fixture +def sample_index_request() -> IndexRequest: + """Sample index request for testing.""" + return IndexRequest( + source_type="markdown", + source_path="test-document.md", + content="# Test\n\nThis is test content.", + metadata={"category": "testing"}, + ) + + +@pytest.fixture +def sample_retrieve_request() -> RetrieveRequest: + """Sample retrieve request for testing.""" + return RetrieveRequest( + query="What is the test about?", + top_k=5, + similarity_threshold=0.7, + ) + + +# ============================================================================= +# Model Fixtures +# ============================================================================= + + +@pytest.fixture +def sample_document_source() -> DocumentSource: + """Sample DocumentSource ORM object for testing.""" + return DocumentSource( + source_id="test123456789012345678901234", + source_type="markdown", + source_path="test-sample.md", + content_hash="a" * 64, + metadata_={"category": "testing"}, + indexed_at=datetime.now(UTC), + ) + + +@pytest.fixture +def sample_document_chunk() -> DocumentChunk: + """Sample DocumentChunk ORM object for testing.""" + return DocumentChunk( + chunk_id="chunk12345678901234567890123", + source_id=1, + chunk_index=0, + content="Test chunk content", + embedding=[0.1] * 1536, + token_count=3, + metadata_={"heading": "Test"}, + ) diff --git a/app/features/rag/tests/test_chunkers.py b/app/features/rag/tests/test_chunkers.py new file mode 100644 index 00000000..77d63141 --- /dev/null +++ b/app/features/rag/tests/test_chunkers.py @@ -0,0 +1,295 @@ +"""Unit tests for RAG chunkers.""" + +import json + +import pytest + +from app.features.rag.chunkers import ( + BaseChunker, + ChunkData, + MarkdownChunker, + OpenAPIChunker, + get_chunker, +) + + +class TestMarkdownChunker: + """Tests for MarkdownChunker.""" + + def test_chunk_simple_document(self, sample_markdown_content): + """Test chunking a simple markdown document.""" + chunker = MarkdownChunker() + chunks = chunker.chunk(sample_markdown_content) + + assert len(chunks) > 0 + for chunk in chunks: + assert isinstance(chunk, ChunkData) + assert chunk.content + assert chunk.token_count > 0 + + def test_chunk_respects_heading_boundaries(self): + """Test that chunker respects heading boundaries.""" + content = """# Title + +Introduction. + +## Section One + +Content one. + +## Section Two + +Content two. +""" + chunker = MarkdownChunker() + chunker.chunk_size = 1000 # Large enough to not split within sections + chunks = chunker.chunk(content) + + # Each section should be relatively intact + contents = [c.content for c in chunks] + full_content = "\n".join(contents) + + assert "# Title" in full_content or "Title" in full_content + assert "Section One" in full_content + assert "Section Two" in full_content + + def test_chunk_extracts_heading_metadata(self): + """Test that heading metadata is extracted.""" + content = """# Main + +## Sub + +Content here. +""" + chunker = MarkdownChunker() + chunks = chunker.chunk(content) + + # Find chunk with heading metadata + chunks_with_headings = [c for c in chunks if c.metadata.get("heading")] + assert len(chunks_with_headings) > 0 + + # Check section_path is populated + for chunk in chunks_with_headings: + if chunk.metadata.get("section_path"): + assert isinstance(chunk.metadata["section_path"], list) + + def test_chunk_respects_chunk_size(self, sample_large_markdown_content): + """Test that chunks respect the configured chunk size.""" + chunker = MarkdownChunker() + chunker.chunk_size = 200 # Small chunk size + chunks = chunker.chunk(sample_large_markdown_content) + + # Chunks should not vastly exceed chunk size + for chunk in chunks: + # Allow some tolerance for overlap and heading context + assert chunk.token_count <= chunker.chunk_size * 2 + + def test_chunk_handles_empty_content(self): + """Test handling of empty content.""" + chunker = MarkdownChunker() + chunks = chunker.chunk("") + + assert len(chunks) == 0 + + def test_chunk_handles_content_without_headings(self): + """Test handling content without headings.""" + content = "This is just plain text without any headings. It has multiple sentences." + chunker = MarkdownChunker() + chunks = chunker.chunk(content) + + assert len(chunks) >= 1 + assert chunks[0].content.strip() == content.strip() + + def test_chunk_updates_heading_path_correctly(self): + """Test heading path updates with nested headings.""" + content = """# Level 1 + +## Level 2 + +### Level 3 + +Back to level 2 content. + +## Another Level 2 + +Content here. +""" + chunker = MarkdownChunker() + chunks = chunker.chunk(content) + + # Find chunks with section_path + paths = [c.metadata.get("section_path") for c in chunks if c.metadata.get("section_path")] + + # Should have various heading depths + assert len(paths) > 0 + + def test_chunk_token_counting(self): + """Test that token counting is accurate.""" + chunker = MarkdownChunker() + + # Count tokens for known text + text = "Hello, this is a test." + token_count = chunker.count_tokens(text) + + assert token_count > 0 + assert token_count < len(text) # Tokens should be fewer than characters + + def test_chunk_indices_are_sequential(self): + """Test that chunk indices are sequential.""" + content = """# One + +Content one. + +# Two + +Content two. + +# Three + +Content three. +""" + chunker = MarkdownChunker() + chunks = chunker.chunk(content) + + indices = [c.index for c in chunks] + expected = list(range(len(chunks))) + assert indices == expected + + def test_overlap_text_extraction(self): + """Test overlap text extraction works correctly.""" + chunker = MarkdownChunker() + chunker.chunk_overlap = 10 + + text = "This is a longer piece of text that we want to extract overlap from." + overlap = chunker._get_overlap_text(text) + + assert len(overlap) > 0 + assert text.endswith(overlap) or overlap in text + + +class TestOpenAPIChunker: + """Tests for OpenAPIChunker.""" + + def test_chunk_openapi_json(self, sample_openapi_content): + """Test chunking OpenAPI JSON content.""" + chunker = OpenAPIChunker() + chunks = chunker.chunk(sample_openapi_content) + + assert len(chunks) >= 2 # At least info + endpoints + + # Check for endpoint metadata + endpoint_chunks = [c for c in chunks if c.metadata.get("type") == "endpoint"] + assert len(endpoint_chunks) >= 2 # GET and POST /users + + def test_chunk_creates_info_chunk(self, sample_openapi_content): + """Test that an info chunk is created.""" + chunker = OpenAPIChunker() + chunks = chunker.chunk(sample_openapi_content) + + info_chunks = [c for c in chunks if c.metadata.get("type") == "api_info"] + assert len(info_chunks) == 1 + assert "Test API" in info_chunks[0].content + + def test_chunk_extracts_endpoint_metadata(self, sample_openapi_content): + """Test endpoint metadata extraction.""" + chunker = OpenAPIChunker() + chunks = chunker.chunk(sample_openapi_content) + + endpoint_chunks = [c for c in chunks if c.metadata.get("type") == "endpoint"] + + # Check GET /users endpoint + get_users = [ + c + for c in endpoint_chunks + if c.metadata.get("path") == "/users" and c.metadata.get("method") == "GET" + ] + assert len(get_users) == 1 + assert get_users[0].metadata.get("operation_id") == "listUsers" + + def test_chunk_includes_parameters(self, sample_openapi_content): + """Test that parameters are included in chunk content.""" + chunker = OpenAPIChunker() + chunks = chunker.chunk(sample_openapi_content) + + endpoint_chunks = [c for c in chunks if c.metadata.get("type") == "endpoint"] + get_users = next(c for c in endpoint_chunks if c.metadata.get("method") == "GET") + + assert "Parameters" in get_users.content + assert "page" in get_users.content + + def test_chunk_handles_invalid_json(self): + """Test handling of invalid JSON content.""" + chunker = OpenAPIChunker() + chunks = chunker.chunk("not valid json") + + # Should fall back to markdown chunking + assert len(chunks) >= 1 + + def test_chunk_handles_minimal_spec(self): + """Test handling minimal OpenAPI spec.""" + minimal_spec = json.dumps( + { + "openapi": "3.0.0", + "info": {"title": "Minimal", "version": "1.0"}, + "paths": {}, + } + ) + chunker = OpenAPIChunker() + chunks = chunker.chunk(minimal_spec) + + # Should at least have info chunk + assert len(chunks) >= 1 + + def test_chunk_respects_token_limit(self, sample_openapi_content): + """Test that chunks don't exceed token limit.""" + chunker = OpenAPIChunker() + chunks = chunker.chunk(sample_openapi_content) + + for chunk in chunks: + assert chunk.token_count <= BaseChunker.MAX_TOKENS_PER_CHUNK + + +class TestGetChunker: + """Tests for get_chunker factory function.""" + + def test_get_markdown_chunker(self): + """Test getting markdown chunker.""" + chunker = get_chunker("markdown") + assert isinstance(chunker, MarkdownChunker) + + def test_get_openapi_chunker(self): + """Test getting openapi chunker.""" + chunker = get_chunker("openapi") + assert isinstance(chunker, OpenAPIChunker) + + def test_invalid_source_type_raises(self): + """Test that invalid source type raises ValueError.""" + with pytest.raises(ValueError) as exc_info: + get_chunker("invalid_type") + assert "Unsupported source type" in str(exc_info.value) + + +class TestChunkData: + """Tests for ChunkData dataclass.""" + + def test_chunk_data_creation(self): + """Test creating ChunkData.""" + chunk = ChunkData( + content="Test content", + index=0, + token_count=2, + metadata={"heading": "Test"}, + ) + assert chunk.content == "Test content" + assert chunk.index == 0 + assert chunk.token_count == 2 + assert chunk.metadata == {"heading": "Test"} + + def test_chunk_data_default_metadata(self): + """Test default metadata is empty dict.""" + chunk = ChunkData( + content="Test", + index=0, + token_count=1, + ) + assert chunk.metadata == {} diff --git a/app/features/rag/tests/test_embeddings.py b/app/features/rag/tests/test_embeddings.py new file mode 100644 index 00000000..2eb59b70 --- /dev/null +++ b/app/features/rag/tests/test_embeddings.py @@ -0,0 +1,452 @@ +"""Unit tests for RAG embedding providers.""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import httpx +import pytest + +from app.features.rag.embeddings import ( + EmbeddingError, + EmbeddingProvider, + EmbeddingService, + OllamaEmbeddingProvider, + OpenAIEmbeddingProvider, + get_embedding_service, + reset_embedding_service, +) + + +class TestEmbeddingProvider: + """Tests for EmbeddingProvider abstract base class.""" + + def test_cannot_instantiate_directly(self): + """Test that EmbeddingProvider cannot be instantiated directly.""" + with pytest.raises(TypeError): + EmbeddingProvider() # type: ignore[abstract] + + +class TestOpenAIEmbeddingProvider: + """Tests for OpenAIEmbeddingProvider.""" + + def test_init_without_api_key(self): + """Test initialization without API key.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.openai_api_key = "" + mock_settings.return_value.rag_embedding_dimension = 1536 + provider = OpenAIEmbeddingProvider() + # Should not raise during init + assert provider._client is None + + def test_get_client_raises_without_api_key(self): + """Test _get_client raises when no API key configured.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.openai_api_key = "" + provider = OpenAIEmbeddingProvider() + + with pytest.raises(EmbeddingError) as exc_info: + provider._get_client() + assert "API key not configured" in str(exc_info.value) + + def test_dimension_property(self): + """Test dimension property returns configured value.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.openai_api_key = "test-key" + mock_settings.return_value.rag_embedding_dimension = 768 + provider = OpenAIEmbeddingProvider() + + assert provider.dimension == 768 + + def test_count_tokens(self): + """Test token counting.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.openai_api_key = "test-key" + mock_settings.return_value.rag_embedding_model = "text-embedding-3-small" + mock_settings.return_value.rag_embedding_dimension = 1536 + mock_settings.return_value.rag_embedding_batch_size = 100 + + provider = OpenAIEmbeddingProvider() + + count = provider.count_tokens("Hello, world!") + assert count > 0 + assert count < 20 # Should be a reasonable count + + def test_count_tokens_empty_string(self): + """Test token counting for empty string.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.openai_api_key = "test-key" + provider = OpenAIEmbeddingProvider() + + count = provider.count_tokens("") + assert count == 0 + + def test_truncate_to_tokens(self): + """Test token truncation.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.openai_api_key = "test-key" + provider = OpenAIEmbeddingProvider() + + long_text = "This is a longer piece of text that will be truncated." + truncated = provider.truncate_to_tokens(long_text, 5) + + assert len(truncated) < len(long_text) + assert provider.count_tokens(truncated) <= 5 + + def test_truncate_to_tokens_no_truncation_needed(self): + """Test truncation when text is already within limit.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.openai_api_key = "test-key" + provider = OpenAIEmbeddingProvider() + + short_text = "Hi" + truncated = provider.truncate_to_tokens(short_text, 100) + + assert truncated == short_text + + @pytest.mark.asyncio + async def test_embed_texts_empty_list(self): + """Test embedding empty list returns empty list.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.openai_api_key = "test-key" + provider = OpenAIEmbeddingProvider() + + result = await provider.embed_texts([]) + assert result == [] + + @pytest.mark.asyncio + async def test_embed_texts_batching(self): + """Test that texts are batched correctly.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.openai_api_key = "test-key" + mock_settings.return_value.rag_embedding_model = "text-embedding-3-small" + mock_settings.return_value.rag_embedding_dimension = 1536 + mock_settings.return_value.rag_embedding_batch_size = 2 + + provider = OpenAIEmbeddingProvider() + + # Mock the client + mock_client = MagicMock() + + # Need to adjust mock to handle multiple calls + mock_response_1 = MagicMock() + mock_response_1.data = [ + MagicMock(embedding=[0.1] * 1536), + MagicMock(embedding=[0.2] * 1536), + ] + mock_response_1.usage = MagicMock(prompt_tokens=10, total_tokens=10) + + mock_response_2 = MagicMock() + mock_response_2.data = [ + MagicMock(embedding=[0.3] * 1536), + MagicMock(embedding=[0.4] * 1536), + ] + mock_response_2.usage = MagicMock(prompt_tokens=10, total_tokens=10) + + mock_client.embeddings.create = AsyncMock( + side_effect=[mock_response_1, mock_response_2] + ) + provider._client = mock_client + + # Test with 4 texts (should be 2 batches) + texts = ["text1", "text2", "text3", "text4"] + result = await provider.embed_texts(texts) + + assert len(result) == 4 + assert mock_client.embeddings.create.call_count == 2 + + @pytest.mark.asyncio + async def test_embed_query_returns_single_embedding(self): + """Test embed_query returns single embedding.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.openai_api_key = "test-key" + mock_settings.return_value.rag_embedding_model = "text-embedding-3-small" + mock_settings.return_value.rag_embedding_dimension = 1536 + mock_settings.return_value.rag_embedding_batch_size = 100 + + provider = OpenAIEmbeddingProvider() + + # Mock the client + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.data = [MagicMock(embedding=[0.1] * 1536)] + mock_response.usage = MagicMock(prompt_tokens=5, total_tokens=5) + mock_client.embeddings.create = AsyncMock(return_value=mock_response) + provider._client = mock_client + + result = await provider.embed_query("test query") + + assert len(result) == 1536 + assert result == [0.1] * 1536 + + @pytest.mark.asyncio + async def test_embed_texts_truncates_long_input(self): + """Test that long inputs are truncated.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.openai_api_key = "test-key" + mock_settings.return_value.rag_embedding_model = "text-embedding-3-small" + mock_settings.return_value.rag_embedding_dimension = 1536 + mock_settings.return_value.rag_embedding_batch_size = 100 + + provider = OpenAIEmbeddingProvider() + + # Mock the client + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.data = [MagicMock(embedding=[0.1] * 1536)] + mock_response.usage = MagicMock(prompt_tokens=100, total_tokens=100) + mock_client.embeddings.create = AsyncMock(return_value=mock_response) + provider._client = mock_client + + # (In reality, truncation happens before API call) + result = await provider.embed_texts(["short text"]) + + assert len(result) == 1 + + +class TestOllamaEmbeddingProvider: + """Tests for OllamaEmbeddingProvider.""" + + def test_init(self): + """Test initialization.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.ollama_base_url = "http://localhost:11434" + mock_settings.return_value.ollama_embedding_model = "nomic-embed-text" + mock_settings.return_value.rag_embedding_dimension = 768 + + provider = OllamaEmbeddingProvider() + assert provider._client is None + + def test_dimension_property(self): + """Test dimension property returns configured value.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.ollama_base_url = "http://localhost:11434" + mock_settings.return_value.ollama_embedding_model = "nomic-embed-text" + mock_settings.return_value.rag_embedding_dimension = 768 + + provider = OllamaEmbeddingProvider() + assert provider.dimension == 768 + + @pytest.mark.asyncio + async def test_embed_texts_empty_list(self): + """Test embedding empty list returns empty list.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.ollama_base_url = "http://localhost:11434" + mock_settings.return_value.ollama_embedding_model = "nomic-embed-text" + mock_settings.return_value.rag_embedding_dimension = 768 + + provider = OllamaEmbeddingProvider() + result = await provider.embed_texts([]) + assert result == [] + + @pytest.mark.asyncio + async def test_embed_texts_success(self): + """Test successful embedding generation.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.ollama_base_url = "http://localhost:11434" + mock_settings.return_value.ollama_embedding_model = "nomic-embed-text" + mock_settings.return_value.rag_embedding_dimension = 768 + + provider = OllamaEmbeddingProvider() + + # Mock the HTTP client with OpenAI-compatible response format + mock_response = MagicMock() + mock_response.json.return_value = { + "data": [ + {"embedding": [0.1] * 768, "index": 0}, + {"embedding": [0.2] * 768, "index": 1}, + ] + } + mock_response.raise_for_status = MagicMock() + + mock_client = MagicMock(spec=httpx.AsyncClient) + mock_client.post = AsyncMock(return_value=mock_response) + provider._client = mock_client + + result = await provider.embed_texts(["text1", "text2"]) + + assert len(result) == 2 + assert result[0] == [0.1] * 768 + assert result[1] == [0.2] * 768 + mock_client.post.assert_called_once_with( + "/v1/embeddings", + json={ + "model": "nomic-embed-text", + "input": ["text1", "text2"], + "dimensions": 768, + }, + ) + + @pytest.mark.asyncio + async def test_embed_query_returns_single_embedding(self): + """Test embed_query returns single embedding.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.ollama_base_url = "http://localhost:11434" + mock_settings.return_value.ollama_embedding_model = "nomic-embed-text" + mock_settings.return_value.rag_embedding_dimension = 768 + + provider = OllamaEmbeddingProvider() + + # Mock the HTTP client with OpenAI-compatible response format + mock_response = MagicMock() + mock_response.json.return_value = {"data": [{"embedding": [0.5] * 768, "index": 0}]} + mock_response.raise_for_status = MagicMock() + + mock_client = MagicMock(spec=httpx.AsyncClient) + mock_client.post = AsyncMock(return_value=mock_response) + provider._client = mock_client + + result = await provider.embed_query("test query") + + assert len(result) == 768 + assert result == [0.5] * 768 + + @pytest.mark.asyncio + async def test_embed_texts_model_not_found(self): + """Test error handling when model not found.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.ollama_base_url = "http://localhost:11434" + mock_settings.return_value.ollama_embedding_model = "nonexistent-model" + mock_settings.return_value.rag_embedding_dimension = 768 + + provider = OllamaEmbeddingProvider() + + # Mock 404 response + mock_response = MagicMock() + mock_response.status_code = 404 + error = httpx.HTTPStatusError( + "Not Found", + request=MagicMock(), + response=mock_response, + ) + + mock_client = MagicMock(spec=httpx.AsyncClient) + mock_client.post = AsyncMock(side_effect=error) + provider._client = mock_client + + with pytest.raises(EmbeddingError) as exc_info: + await provider.embed_texts(["test"]) + assert "not found" in str(exc_info.value).lower() + assert "ollama pull" in str(exc_info.value) + + @pytest.mark.asyncio + async def test_embed_texts_connection_error(self): + """Test error handling when Ollama not reachable.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.ollama_base_url = "http://localhost:11434" + mock_settings.return_value.ollama_embedding_model = "nomic-embed-text" + mock_settings.return_value.rag_embedding_dimension = 768 + + provider = OllamaEmbeddingProvider() + + # Mock connection error + mock_client = MagicMock(spec=httpx.AsyncClient) + mock_client.post = AsyncMock(side_effect=httpx.ConnectError("Connection refused")) + provider._client = mock_client + + with pytest.raises(EmbeddingError) as exc_info: + await provider.embed_texts(["test"]) + assert "Failed to connect to Ollama" in str(exc_info.value) + + @pytest.mark.asyncio + async def test_embed_texts_count_mismatch(self): + """Test error when embedding count doesn't match input count.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.ollama_base_url = "http://localhost:11434" + mock_settings.return_value.ollama_embedding_model = "nomic-embed-text" + mock_settings.return_value.rag_embedding_dimension = 768 + + provider = OllamaEmbeddingProvider() + + # Mock response with wrong count (OpenAI-compatible format) + mock_response = MagicMock() + mock_response.json.return_value = { + "data": [{"embedding": [0.1] * 768, "index": 0}] # Only 1 embedding for 2 texts + } + mock_response.raise_for_status = MagicMock() + + mock_client = MagicMock(spec=httpx.AsyncClient) + mock_client.post = AsyncMock(return_value=mock_response) + provider._client = mock_client + + with pytest.raises(EmbeddingError) as exc_info: + await provider.embed_texts(["text1", "text2"]) + assert "mismatch" in str(exc_info.value).lower() + + @pytest.mark.asyncio + async def test_close(self): + """Test close method properly closes HTTP client.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.ollama_base_url = "http://localhost:11434" + mock_settings.return_value.ollama_embedding_model = "nomic-embed-text" + mock_settings.return_value.rag_embedding_dimension = 768 + + provider = OllamaEmbeddingProvider() + + # Mock client + mock_client = MagicMock(spec=httpx.AsyncClient) + mock_client.aclose = AsyncMock() + provider._client = mock_client + + await provider.close() + + mock_client.aclose.assert_called_once() + assert provider._client is None + + +class TestGetEmbeddingService: + """Tests for get_embedding_service factory.""" + + def test_returns_openai_by_default(self): + """Test that OpenAI provider is returned by default.""" + reset_embedding_service() + + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.rag_embedding_provider = "openai" + mock_settings.return_value.openai_api_key = "" + mock_settings.return_value.rag_embedding_model = "text-embedding-3-small" + mock_settings.return_value.rag_embedding_dimension = 1536 + mock_settings.return_value.rag_embedding_batch_size = 100 + + provider = get_embedding_service() + assert isinstance(provider, OpenAIEmbeddingProvider) + + reset_embedding_service() + + def test_returns_ollama_when_configured(self): + """Test that Ollama provider is returned when configured.""" + reset_embedding_service() + + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.rag_embedding_provider = "ollama" + mock_settings.return_value.ollama_base_url = "http://localhost:11434" + mock_settings.return_value.ollama_embedding_model = "nomic-embed-text" + mock_settings.return_value.rag_embedding_dimension = 768 + + provider = get_embedding_service() + assert isinstance(provider, OllamaEmbeddingProvider) + + reset_embedding_service() + + def test_returns_same_instance(self): + """Test that singleton returns same instance.""" + reset_embedding_service() + + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.rag_embedding_provider = "openai" + mock_settings.return_value.openai_api_key = "" + mock_settings.return_value.rag_embedding_model = "text-embedding-3-small" + mock_settings.return_value.rag_embedding_dimension = 1536 + mock_settings.return_value.rag_embedding_batch_size = 100 + + provider1 = get_embedding_service() + provider2 = get_embedding_service() + assert provider1 is provider2 + + reset_embedding_service() + + +class TestEmbeddingServiceAlias: + """Tests for backwards compatibility alias.""" + + def test_embedding_service_is_openai_provider(self): + """Test that EmbeddingService alias points to OpenAIEmbeddingProvider.""" + assert EmbeddingService is OpenAIEmbeddingProvider diff --git a/app/features/rag/tests/test_routes.py b/app/features/rag/tests/test_routes.py new file mode 100644 index 00000000..ce09a05a --- /dev/null +++ b/app/features/rag/tests/test_routes.py @@ -0,0 +1,433 @@ +"""Integration tests for RAG API routes. + +These tests require: +- PostgreSQL running with pgvector extension (docker-compose up -d) +- Migrations applied (uv run alembic upgrade head) + +Note: These tests mock the OpenAI embedding service to avoid API calls. +""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from httpx import AsyncClient + +from app.features.rag.embeddings import EmbeddingService + +# ============================================================================= +# Mock Embedding Service for Integration Tests +# ============================================================================= + + +def create_mock_embedding_service() -> EmbeddingService: + """Create a mock embedding service for integration tests.""" + service = MagicMock(spec=EmbeddingService) + + async def mock_embed_texts(texts, **kwargs): + return [[0.1 + i * 0.01] * 1536 for i, _ in enumerate(texts)] + + async def mock_embed_query(query): + return [0.1] * 1536 + + service.embed_texts = AsyncMock(side_effect=mock_embed_texts) + service.embed_query = AsyncMock(side_effect=mock_embed_query) + service.count_tokens = MagicMock(side_effect=lambda text: len(text.split())) + service.truncate_to_tokens = MagicMock(side_effect=lambda text, max_tokens: text) + + return service + + +# ============================================================================= +# Index Endpoint Tests +# ============================================================================= + + +@pytest.mark.integration +class TestIndexEndpoint: + """Integration tests for POST /rag/index endpoint.""" + + @pytest.mark.asyncio + async def test_index_markdown_creates_chunks(self, client: AsyncClient): + """Test that indexing markdown creates chunks in database.""" + mock_service = create_mock_embedding_service() + + with patch( + "app.features.rag.service.get_embedding_service", + return_value=mock_service, + ): + response = await client.post( + "/rag/index", + json={ + "source_type": "markdown", + "source_path": "test-index-md-001", + "content": "# Test Document\n\nThis is test content for indexing.", + "metadata": {"category": "testing"}, + }, + ) + + assert response.status_code == 201 + data = response.json() + assert data["status"] == "indexed" + assert data["chunks_created"] >= 1 + assert data["source_path"] == "test-index-md-001" + assert "source_id" in data + + @pytest.mark.asyncio + async def test_index_same_content_returns_unchanged(self, client: AsyncClient): + """Test that re-indexing unchanged content returns 'unchanged' status.""" + mock_service = create_mock_embedding_service() + + content = "# Unchanged\n\nSame content twice." + + with patch( + "app.features.rag.service.get_embedding_service", + return_value=mock_service, + ): + # First index + response1 = await client.post( + "/rag/index", + json={ + "source_type": "markdown", + "source_path": "test-unchanged-001", + "content": content, + }, + ) + assert response1.status_code == 201 + assert response1.json()["status"] == "indexed" + + # Second index with same content + response2 = await client.post( + "/rag/index", + json={ + "source_type": "markdown", + "source_path": "test-unchanged-001", + "content": content, + }, + ) + assert response2.status_code == 201 + assert response2.json()["status"] == "unchanged" + + @pytest.mark.asyncio + async def test_index_updated_content_re_indexes(self, client: AsyncClient): + """Test that updated content triggers re-indexing.""" + mock_service = create_mock_embedding_service() + + with patch( + "app.features.rag.service.get_embedding_service", + return_value=mock_service, + ): + # First index + response1 = await client.post( + "/rag/index", + json={ + "source_type": "markdown", + "source_path": "test-updated-001", + "content": "# Original\n\nOriginal content.", + }, + ) + assert response1.status_code == 201 + source_id = response1.json()["source_id"] + + # Second index with different content + response2 = await client.post( + "/rag/index", + json={ + "source_type": "markdown", + "source_path": "test-updated-001", + "content": "# Updated\n\nNew updated content.", + }, + ) + assert response2.status_code == 201 + assert response2.json()["status"] == "updated" + assert response2.json()["source_id"] == source_id + + @pytest.mark.asyncio + async def test_index_invalid_source_type(self, client: AsyncClient): + """Test that invalid source type returns 422.""" + response = await client.post( + "/rag/index", + json={ + "source_type": "invalid", + "source_path": "test.txt", + "content": "test", + }, + ) + assert response.status_code == 422 + + @pytest.mark.asyncio + async def test_index_file_not_found(self, client: AsyncClient): + """Test that missing file returns 404.""" + response = await client.post( + "/rag/index", + json={ + "source_type": "markdown", + "source_path": "/nonexistent/path/file.md", + }, + ) + assert response.status_code == 404 + + +# ============================================================================= +# Retrieve Endpoint Tests +# ============================================================================= + + +@pytest.mark.integration +class TestRetrieveEndpoint: + """Integration tests for POST /rag/retrieve endpoint.""" + + @pytest.mark.asyncio + async def test_retrieve_returns_relevant_chunks(self, client: AsyncClient): + """Test that retrieval returns matching chunks.""" + mock_service = create_mock_embedding_service() + + with patch( + "app.features.rag.service.get_embedding_service", + return_value=mock_service, + ): + # First, index a document + await client.post( + "/rag/index", + json={ + "source_type": "markdown", + "source_path": "test-retrieve-001", + "content": "# Backtesting Guide\n\nBacktesting prevents data leakage by using time-based splits.", + }, + ) + + # Then retrieve + response = await client.post( + "/rag/retrieve", + json={ + "query": "How does backtesting prevent leakage?", + "top_k": 5, + "similarity_threshold": 0.0, # Low threshold to ensure results + }, + ) + + assert response.status_code == 200 + data = response.json() + assert "results" in data + assert "query_embedding_time_ms" in data + assert "search_time_ms" in data + assert "total_chunks_searched" in data + + @pytest.mark.asyncio + async def test_retrieve_respects_threshold(self, client: AsyncClient): + """Test that retrieval respects similarity threshold.""" + mock_service = create_mock_embedding_service() + + with patch( + "app.features.rag.service.get_embedding_service", + return_value=mock_service, + ): + # Index a document + await client.post( + "/rag/index", + json={ + "source_type": "markdown", + "source_path": "test-threshold-001", + "content": "# Test Content\n\nSome test content here.", + }, + ) + + # Retrieve with very high threshold + response = await client.post( + "/rag/retrieve", + json={ + "query": "unrelated query", + "top_k": 5, + "similarity_threshold": 0.99, # Very high threshold + }, + ) + + assert response.status_code == 200 + # With high threshold and mock embeddings, results may be empty + data = response.json() + assert isinstance(data["results"], list) + + @pytest.mark.asyncio + async def test_retrieve_empty_database(self, client: AsyncClient): + """Test retrieval on empty database returns empty results.""" + mock_service = create_mock_embedding_service() + + with patch( + "app.features.rag.service.get_embedding_service", + return_value=mock_service, + ): + response = await client.post( + "/rag/retrieve", + json={ + "query": "anything", + "top_k": 5, + }, + ) + + assert response.status_code == 200 + data = response.json() + assert isinstance(data["results"], list) + + @pytest.mark.asyncio + async def test_retrieve_validates_query(self, client: AsyncClient): + """Test that empty query is rejected.""" + response = await client.post( + "/rag/retrieve", + json={ + "query": "", + "top_k": 5, + }, + ) + assert response.status_code == 422 + + +# ============================================================================= +# Sources Endpoint Tests +# ============================================================================= + + +@pytest.mark.integration +class TestSourcesEndpoint: + """Integration tests for /rag/sources endpoints.""" + + @pytest.mark.asyncio + async def test_list_sources_returns_all(self, client: AsyncClient): + """Test listing all indexed sources.""" + mock_service = create_mock_embedding_service() + + with patch( + "app.features.rag.service.get_embedding_service", + return_value=mock_service, + ): + # Index a couple of documents + await client.post( + "/rag/index", + json={ + "source_type": "markdown", + "source_path": "test-list-001", + "content": "# First Doc", + }, + ) + await client.post( + "/rag/index", + json={ + "source_type": "markdown", + "source_path": "test-list-002", + "content": "# Second Doc", + }, + ) + + # List sources + response = await client.get("/rag/sources") + + assert response.status_code == 200 + data = response.json() + assert "sources" in data + assert "total_sources" in data + assert "total_chunks" in data + assert data["total_sources"] >= 2 + + @pytest.mark.asyncio + async def test_delete_source_removes_chunks(self, client: AsyncClient): + """Test that deleting a source removes all its chunks.""" + mock_service = create_mock_embedding_service() + + with patch( + "app.features.rag.service.get_embedding_service", + return_value=mock_service, + ): + # Index a document + index_response = await client.post( + "/rag/index", + json={ + "source_type": "markdown", + "source_path": "test-delete-001", + "content": "# Delete Me\n\nThis will be deleted.", + }, + ) + source_id = index_response.json()["source_id"] + + # Delete the source + delete_response = await client.delete(f"/rag/sources/{source_id}") + + assert delete_response.status_code == 200 + data = delete_response.json() + assert data["status"] == "deleted" + assert data["chunks_deleted"] >= 1 + + @pytest.mark.asyncio + async def test_delete_nonexistent_returns_404(self, client: AsyncClient): + """Test that deleting non-existent source returns 404.""" + response = await client.delete("/rag/sources/nonexistent123456789012") + assert response.status_code == 404 + + @pytest.mark.asyncio + async def test_source_not_in_list_after_delete(self, client: AsyncClient): + """Test that deleted source no longer appears in list.""" + mock_service = create_mock_embedding_service() + + with patch( + "app.features.rag.service.get_embedding_service", + return_value=mock_service, + ): + # Index a document + index_response = await client.post( + "/rag/index", + json={ + "source_type": "markdown", + "source_path": "test-delete-verify-001", + "content": "# Verify Delete", + }, + ) + source_id = index_response.json()["source_id"] + + # Delete the source + await client.delete(f"/rag/sources/{source_id}") + + # Verify not in list + list_response = await client.get("/rag/sources") + source_ids = [s["source_id"] for s in list_response.json()["sources"]] + assert source_id not in source_ids + + +# ============================================================================= +# OpenAPI Indexing Tests +# ============================================================================= + + +@pytest.mark.integration +class TestOpenAPIIndexing: + """Integration tests for OpenAPI document indexing.""" + + @pytest.mark.asyncio + async def test_index_openapi_creates_endpoint_chunks(self, client: AsyncClient): + """Test that OpenAPI spec creates endpoint-based chunks.""" + mock_service = create_mock_embedding_service() + + openapi_spec = """{ + "openapi": "3.0.0", + "info": {"title": "Test API", "version": "1.0"}, + "paths": { + "/users": { + "get": {"summary": "List users", "operationId": "listUsers", "responses": {"200": {"description": "OK"}}}, + "post": {"summary": "Create user", "operationId": "createUser", "responses": {"201": {"description": "Created"}}} + } + } + }""" + + with patch( + "app.features.rag.service.get_embedding_service", + return_value=mock_service, + ): + response = await client.post( + "/rag/index", + json={ + "source_type": "openapi", + "source_path": "test-openapi-001", + "content": openapi_spec, + }, + ) + + assert response.status_code == 201 + data = response.json() + # Should have at least: info chunk + 2 endpoint chunks + assert data["chunks_created"] >= 3 diff --git a/app/features/rag/tests/test_schemas.py b/app/features/rag/tests/test_schemas.py new file mode 100644 index 00000000..a3bb0292 --- /dev/null +++ b/app/features/rag/tests/test_schemas.py @@ -0,0 +1,345 @@ +"""Unit tests for RAG schemas.""" + +import pytest +from pydantic import ValidationError + +from app.features.rag.schemas import ( + ChunkResult, + DeleteResponse, + IndexRequest, + IndexResponse, + RetrieveRequest, + RetrieveResponse, + SourceListResponse, + SourceResponse, +) + + +class TestIndexRequest: + """Tests for IndexRequest schema.""" + + def test_valid_markdown_request(self): + """Test valid markdown index request.""" + request = IndexRequest( + source_type="markdown", + source_path="docs/README.md", + content="# Hello\n\nWorld", + metadata={"category": "docs"}, + ) + assert request.source_type == "markdown" + assert request.source_path == "docs/README.md" + assert request.content == "# Hello\n\nWorld" + assert request.metadata == {"category": "docs"} + + def test_valid_openapi_request(self): + """Test valid openapi index request.""" + request = IndexRequest( + source_type="openapi", + source_path="api/openapi.json", + ) + assert request.source_type == "openapi" + assert request.content is None + assert request.metadata is None + + def test_invalid_source_type(self): + """Test invalid source type is rejected.""" + with pytest.raises(ValidationError) as exc_info: + IndexRequest( + source_type="invalid", # type: ignore[arg-type] + source_path="test.txt", + ) + assert "source_type" in str(exc_info.value) + + def test_empty_source_path_rejected(self): + """Test empty source path is rejected.""" + with pytest.raises(ValidationError) as exc_info: + IndexRequest( + source_type="markdown", + source_path="", + ) + assert "source_path" in str(exc_info.value) + + def test_source_path_max_length(self): + """Test source path max length is enforced.""" + with pytest.raises(ValidationError) as exc_info: + IndexRequest( + source_type="markdown", + source_path="x" * 501, + ) + assert "source_path" in str(exc_info.value) + + def test_extra_fields_rejected(self): + """Test extra fields are rejected.""" + with pytest.raises(ValidationError) as exc_info: + IndexRequest( + source_type="markdown", + source_path="test.md", + extra_field="not allowed", # type: ignore[call-arg] + ) + assert "extra_field" in str(exc_info.value) + + +class TestRetrieveRequest: + """Tests for RetrieveRequest schema.""" + + def test_valid_request_defaults(self): + """Test valid request with defaults.""" + request = RetrieveRequest(query="What is forecasting?") + assert request.query == "What is forecasting?" + assert request.top_k == 5 + assert request.similarity_threshold == 0.7 + assert request.filters is None + + def test_valid_request_custom_params(self): + """Test valid request with custom parameters.""" + request = RetrieveRequest( + query="How does backtesting work?", + top_k=10, + similarity_threshold=0.8, + filters={"source_type": ["markdown"]}, + ) + assert request.top_k == 10 + assert request.similarity_threshold == 0.8 + assert request.filters == {"source_type": ["markdown"]} + + def test_empty_query_rejected(self): + """Test empty query is rejected.""" + with pytest.raises(ValidationError) as exc_info: + RetrieveRequest(query="") + assert "query" in str(exc_info.value) + + def test_query_max_length(self): + """Test query max length is enforced.""" + with pytest.raises(ValidationError) as exc_info: + RetrieveRequest(query="x" * 2001) + assert "query" in str(exc_info.value) + + def test_top_k_bounds(self): + """Test top_k bounds are enforced.""" + # Below minimum + with pytest.raises(ValidationError): + RetrieveRequest(query="test", top_k=0) + + # Above maximum + with pytest.raises(ValidationError): + RetrieveRequest(query="test", top_k=51) + + # Valid bounds + request_min = RetrieveRequest(query="test", top_k=1) + assert request_min.top_k == 1 + + request_max = RetrieveRequest(query="test", top_k=50) + assert request_max.top_k == 50 + + def test_similarity_threshold_bounds(self): + """Test similarity threshold bounds are enforced.""" + # Below minimum + with pytest.raises(ValidationError): + RetrieveRequest(query="test", similarity_threshold=-0.1) + + # Above maximum + with pytest.raises(ValidationError): + RetrieveRequest(query="test", similarity_threshold=1.1) + + # Valid bounds + request_min = RetrieveRequest(query="test", similarity_threshold=0.0) + assert request_min.similarity_threshold == 0.0 + + request_max = RetrieveRequest(query="test", similarity_threshold=1.0) + assert request_max.similarity_threshold == 1.0 + + +class TestIndexResponse: + """Tests for IndexResponse schema.""" + + def test_indexed_status(self): + """Test indexed status response.""" + response = IndexResponse( + source_id="abc123", + source_path="test.md", + chunks_created=5, + tokens_processed=1000, + duration_ms=123.45, + status="indexed", + ) + assert response.status == "indexed" + assert response.chunks_created == 5 + + def test_updated_status(self): + """Test updated status response.""" + response = IndexResponse( + source_id="abc123", + source_path="test.md", + chunks_created=3, + tokens_processed=500, + duration_ms=50.0, + status="updated", + ) + assert response.status == "updated" + + def test_unchanged_status(self): + """Test unchanged status response.""" + response = IndexResponse( + source_id="abc123", + source_path="test.md", + chunks_created=5, + tokens_processed=0, + duration_ms=10.0, + status="unchanged", + ) + assert response.status == "unchanged" + assert response.tokens_processed == 0 + + +class TestChunkResult: + """Tests for ChunkResult schema.""" + + def test_valid_chunk_result(self): + """Test valid chunk result.""" + result = ChunkResult( + chunk_id="chunk123", + source_id="src123", + source_path="docs/test.md", + source_type="markdown", + content="This is chunk content", + relevance_score=0.95, + metadata={"heading": "Introduction"}, + ) + assert result.relevance_score == 0.95 + assert result.metadata == {"heading": "Introduction"} + + def test_relevance_score_bounds(self): + """Test relevance score bounds.""" + # Valid bounds + result_zero = ChunkResult( + chunk_id="c1", + source_id="s1", + source_path="test.md", + source_type="markdown", + content="test", + relevance_score=0.0, + ) + assert result_zero.relevance_score == 0.0 + + result_one = ChunkResult( + chunk_id="c1", + source_id="s1", + source_path="test.md", + source_type="markdown", + content="test", + relevance_score=1.0, + ) + assert result_one.relevance_score == 1.0 + + # Out of bounds + with pytest.raises(ValidationError): + ChunkResult( + chunk_id="c1", + source_id="s1", + source_path="test.md", + source_type="markdown", + content="test", + relevance_score=1.5, + ) + + +class TestRetrieveResponse: + """Tests for RetrieveResponse schema.""" + + def test_valid_response(self): + """Test valid retrieve response.""" + response = RetrieveResponse( + results=[ + ChunkResult( + chunk_id="c1", + source_id="s1", + source_path="test.md", + source_type="markdown", + content="test content", + relevance_score=0.9, + ) + ], + query_embedding_time_ms=45.5, + search_time_ms=12.3, + total_chunks_searched=100, + ) + assert len(response.results) == 1 + assert response.total_chunks_searched == 100 + + def test_empty_results(self): + """Test response with no results.""" + response = RetrieveResponse( + results=[], + query_embedding_time_ms=50.0, + search_time_ms=10.0, + total_chunks_searched=0, + ) + assert len(response.results) == 0 + + +class TestSourceResponse: + """Tests for SourceResponse schema.""" + + def test_valid_source_response(self): + """Test valid source response.""" + from datetime import UTC, datetime + + response = SourceResponse( + source_id="src123", + source_type="markdown", + source_path="docs/README.md", + chunk_count=10, + content_hash="a" * 64, + indexed_at=datetime.now(UTC), + metadata={"category": "docs"}, + ) + assert response.chunk_count == 10 + assert response.source_type == "markdown" + + +class TestSourceListResponse: + """Tests for SourceListResponse schema.""" + + def test_valid_list_response(self): + """Test valid source list response.""" + from datetime import UTC, datetime + + response = SourceListResponse( + sources=[ + SourceResponse( + source_id="src1", + source_type="markdown", + source_path="doc1.md", + chunk_count=5, + content_hash="a" * 64, + indexed_at=datetime.now(UTC), + ) + ], + total_sources=1, + total_chunks=5, + ) + assert response.total_sources == 1 + assert response.total_chunks == 5 + + def test_empty_list_response(self): + """Test empty source list response.""" + response = SourceListResponse( + sources=[], + total_sources=0, + total_chunks=0, + ) + assert len(response.sources) == 0 + + +class TestDeleteResponse: + """Tests for DeleteResponse schema.""" + + def test_valid_delete_response(self): + """Test valid delete response.""" + response = DeleteResponse( + source_id="src123", + chunks_deleted=10, + status="deleted", + ) + assert response.status == "deleted" + assert response.chunks_deleted == 10 diff --git a/app/features/rag/tests/test_service.py b/app/features/rag/tests/test_service.py new file mode 100644 index 00000000..52a7afc2 --- /dev/null +++ b/app/features/rag/tests/test_service.py @@ -0,0 +1,273 @@ +"""Unit tests for RAG service.""" + +import hashlib +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from app.features.rag.schemas import IndexRequest, RetrieveRequest +from app.features.rag.service import RAGService, SourceNotFoundError + + +class TestRAGServiceUnit: + """Unit tests for RAGService (no database).""" + + def test_compute_content_hash(self): + """Test content hash computation.""" + service = RAGService() + + content = "Test content" + hash1 = service._compute_content_hash(content) + + # Should be SHA-256 hex (64 characters) + assert len(hash1) == 64 + assert all(c in "0123456789abcdef" for c in hash1) + + # Same content should produce same hash + hash2 = service._compute_content_hash(content) + assert hash1 == hash2 + + # Different content should produce different hash + hash3 = service._compute_content_hash("Different content") + assert hash1 != hash3 + + def test_compute_content_hash_deterministic(self): + """Test hash is deterministic.""" + service = RAGService() + + content = "# Test\n\nWith some content." + expected = hashlib.sha256(content.encode()).hexdigest() + + result = service._compute_content_hash(content) + assert result == expected + + def test_read_content_from_path_not_found(self, tmp_path): + """Test reading from non-existent path raises.""" + service = RAGService() + + with pytest.raises(FileNotFoundError): + service._read_content_from_path("/nonexistent/path.md") + + def test_read_content_from_path_success(self, tmp_path): + """Test reading from existing path.""" + # Pass tmp_path as base_dir to allow test files in tmp directory + service = RAGService(base_dir=tmp_path) + + # Create test file + test_file = tmp_path / "test.md" + test_file.write_text("# Test Content") + + content = service._read_content_from_path(str(test_file)) + assert content == "# Test Content" + + def test_read_content_from_path_traversal_blocked(self, tmp_path): + """Test that path traversal attempts are blocked.""" + # Set base_dir to tmp_path + service = RAGService(base_dir=tmp_path) + + # Try to read file outside base_dir (should fail) + with pytest.raises(FileNotFoundError, match="not found or access denied"): + service._read_content_from_path("/etc/passwd") + + +class TestRAGServiceIndexDocument: + """Tests for index_document method.""" + + @pytest.mark.asyncio + async def test_index_with_content_provided(self, mock_embedding_service): + """Test indexing when content is provided directly.""" + service = RAGService(embedding_service=mock_embedding_service) + + request = IndexRequest( + source_type="markdown", + source_path="test-direct-content.md", + content="# Test\n\nDirect content.", + ) + + # Mock database session + mock_db = AsyncMock() + mock_db.execute = AsyncMock( + return_value=MagicMock(scalar_one_or_none=MagicMock(return_value=None)) + ) + mock_db.flush = AsyncMock() + mock_db.add = MagicMock() + + with patch.object(service, "_find_source_by_path", return_value=None): + with patch.object(service, "_upsert_source_and_chunks", new_callable=AsyncMock): + response = await service.index_document(db=mock_db, request=request) + + assert response.status == "indexed" + assert response.source_path == "test-direct-content.md" + assert response.chunks_created > 0 + + @pytest.mark.asyncio + async def test_index_unchanged_content(self, mock_embedding_service): + """Test that unchanged content returns 'unchanged' status.""" + service = RAGService(embedding_service=mock_embedding_service) + + content = "# Test\n\nContent." + content_hash = service._compute_content_hash(content) + + request = IndexRequest( + source_type="markdown", + source_path="test-unchanged.md", + content=content, + ) + + # Mock existing source with same hash + mock_source = MagicMock() + mock_source.source_id = "existing123" + mock_source.content_hash = content_hash + + mock_db = AsyncMock() + + with patch.object(service, "_find_source_by_path", return_value=mock_source): + with patch.object(service, "_get_chunk_count", return_value=5): + response = await service.index_document(db=mock_db, request=request) + + assert response.status == "unchanged" + assert response.tokens_processed == 0 + assert response.chunks_created == 5 + + @pytest.mark.asyncio + async def test_index_updated_content(self, mock_embedding_service): + """Test that changed content returns 'updated' status.""" + service = RAGService(embedding_service=mock_embedding_service) + + request = IndexRequest( + source_type="markdown", + source_path="test-updated.md", + content="# Updated\n\nNew content.", + ) + + # Mock existing source with different hash + mock_source = MagicMock() + mock_source.source_id = "existing123" + mock_source.content_hash = "different_hash" + + mock_db = AsyncMock() + + with patch.object(service, "_find_source_by_path", return_value=mock_source): + with patch.object(service, "_upsert_source_and_chunks", new_callable=AsyncMock): + response = await service.index_document(db=mock_db, request=request) + + assert response.status == "updated" + assert response.source_id == "existing123" + + +class TestRAGServiceRetrieve: + """Tests for retrieve method.""" + + @pytest.mark.asyncio + async def test_retrieve_calls_embedding_service(self, mock_embedding_service): + """Test that retrieve calls embedding service for query.""" + service = RAGService(embedding_service=mock_embedding_service) + + request = RetrieveRequest( + query="Test query", + top_k=5, + similarity_threshold=0.7, + ) + + mock_db = AsyncMock() + + with patch.object(service, "_get_total_chunk_count", return_value=100): + with patch.object(service, "_search_similar_chunks", return_value=[]): + response = await service.retrieve(db=mock_db, request=request) + + # Verify embedding service was called + mock_embedding_service.embed_query.assert_called_once_with("Test query") + + assert response.total_chunks_searched == 100 + assert len(response.results) == 0 + + @pytest.mark.asyncio + async def test_retrieve_returns_results(self, mock_embedding_service): + """Test that retrieve returns search results.""" + from app.features.rag.schemas import ChunkResult + + service = RAGService(embedding_service=mock_embedding_service) + + request = RetrieveRequest( + query="Test query", + top_k=5, + ) + + mock_db = AsyncMock() + + mock_results = [ + ChunkResult( + chunk_id="chunk1", + source_id="src1", + source_path="test.md", + source_type="markdown", + content="Result content", + relevance_score=0.95, + ) + ] + + with patch.object(service, "_get_total_chunk_count", return_value=50): + with patch.object(service, "_search_similar_chunks", return_value=mock_results): + response = await service.retrieve(db=mock_db, request=request) + + assert len(response.results) == 1 + assert response.results[0].relevance_score == 0.95 + + +class TestRAGServiceListSources: + """Tests for list_sources method.""" + + @pytest.mark.asyncio + async def test_list_sources_empty(self): + """Test listing sources when none exist.""" + service = RAGService() + + mock_db = AsyncMock() + mock_result = MagicMock() + mock_result.all.return_value = [] + mock_db.execute = AsyncMock(return_value=mock_result) + + response = await service.list_sources(db=mock_db) + + assert response.total_sources == 0 + assert response.total_chunks == 0 + assert len(response.sources) == 0 + + +class TestRAGServiceDeleteSource: + """Tests for delete_source method.""" + + @pytest.mark.asyncio + async def test_delete_source_not_found(self): + """Test deleting non-existent source raises.""" + service = RAGService() + + mock_db = AsyncMock() + mock_result = MagicMock() + mock_result.scalar_one_or_none.return_value = None + mock_db.execute = AsyncMock(return_value=mock_result) + + with pytest.raises(SourceNotFoundError): + await service.delete_source(db=mock_db, source_id="nonexistent") + + @pytest.mark.asyncio + async def test_delete_source_success(self): + """Test successful source deletion.""" + service = RAGService() + + mock_source = MagicMock() + mock_source.id = 1 + + mock_db = AsyncMock() + mock_result = MagicMock() + mock_result.scalar_one_or_none.return_value = mock_source + mock_db.execute = AsyncMock(return_value=mock_result) + mock_db.delete = AsyncMock() + mock_db.flush = AsyncMock() + + with patch.object(service, "_get_chunk_count", return_value=10): + response = await service.delete_source(db=mock_db, source_id="test123") + + assert response.status == "deleted" + assert response.chunks_deleted == 10 + mock_db.delete.assert_called_once_with(mock_source) diff --git a/app/main.py b/app/main.py index 4b425db3..323c7987 100644 --- a/app/main.py +++ b/app/main.py @@ -17,6 +17,7 @@ from app.features.forecasting.routes import router as forecasting_router from app.features.ingest.routes import router as ingest_router from app.features.jobs.routes import router as jobs_router +from app.features.rag.routes import router as rag_router from app.features.registry.routes import router as registry_router logger = get_logger(__name__) @@ -82,6 +83,7 @@ def create_app() -> FastAPI: app.include_router(forecasting_router) app.include_router(backtesting_router) app.include_router(registry_router) + app.include_router(rag_router) return app diff --git a/docs/DAILY-FLOW.md b/docs/DAILY-FLOW.md index 66521dbc..72622625 100644 --- a/docs/DAILY-FLOW.md +++ b/docs/DAILY-FLOW.md @@ -162,21 +162,29 @@ gh run watch --- -## Következő Phase: Forecasting (PRP-5) +## Következő Phases (INITIAL-9 → INITIAL-11) -```bash -# Kezdés -git checkout dev -git pull origin dev -git checkout -b feat/prp-5-forecasting +A projekt a moduláris három-fázisú roadmap szerint halad: -# Fejlesztés... -# PR → dev → main → release → phase-4 snapshot +```text +Phase 8: RAG Knowledge Base ("The Memory") + ↓ +Phase 9: Agentic Layer ("The Brain") + ↓ +Phase 10: ForecastLab Dashboard ("The Face") ``` -### PRP-5 Scope (INITIAL-5) -- Model zoo: naive, seasonal naive, moving average -- Unified model interface: fit/predict, serialize/load -- Scikit-learn Pipeline: Scaling → Encoding → Regressor -- Joblib-based ModelBundle persistence -- Multi-horizon recursive forecasting +### Phase 8: RAG Knowledge Base (INITIAL-9) +- pgvector embeddings + semantic retrieval +- Markdown/OpenAPI chunking +- POST /rag/index, POST /rag/retrieve endpoints + +### Phase 9: Agentic Layer (INITIAL-10) +- PydanticAI agents (Experiment Orchestrator, RAG Assistant) +- Tool orchestration + structured outputs +- WebSocket streaming + +### Phase 10: Dashboard (INITIAL-11) +- React 19 + Vite + shadcn/ui +- Data tables + time series charts +- Agent chat interface diff --git a/docs/PHASE-index.md b/docs/PHASE-index.md index 280fa43b..836c63ef 100644 --- a/docs/PHASE-index.md +++ b/docs/PHASE-index.md @@ -16,9 +16,9 @@ This document indexes all implementation phases of the ForecastLabAI project. | 5 | Backtesting | Completed | PRP-6 | [5-BACKTESTING.md](./PHASE/5-BACKTESTING.md) | | 6 | Model Registry | Completed | PRP-7 | [6-MODEL_REGISTRY.md](./PHASE/6-MODEL_REGISTRY.md) | | 7 | Serving Layer | Completed | PRP-8 | [7-SERVING_LAYER.md](./PHASE/7-SERVING_LAYER.md) | -| 8 | RAG Knowledge Base | Pending | PRP-9 | - | -| 9 | Dashboard | Pending | PRP-10 | - | -| 10 | Agentic Layer | Pending | - | - | +| 8 | RAG Knowledge Base | Completed | PRP-9 | [8-RAG_KNOWLEDGE_BASE.md](./PHASE/8-RAG_KNOWLEDGE_BASE.md) | +| 9 | Agentic Layer | Pending | PRP-10 | - | +| 10 | ForecastLab Dashboard | Pending | PRP-11 | - | --- @@ -273,18 +273,66 @@ jobs_retention_days: int = 30 - Pyright: 0 errors - Pytest: 426 unit tests passed ---- +### [Phase 8: RAG Knowledge Base](./PHASE/8-RAG_KNOWLEDGE_BASE.md) -## Pending Phases +**Date Completed**: 2026-02-01 + +**Summary**: RAG Knowledge Base with pgvector and multiple embedding providers: +- PostgreSQL pgvector for HNSW similarity search +- Embedding Provider Pattern: OpenAI (default) and Ollama (local/LAN) +- Ollama uses `/v1/embeddings` OpenAI-compatible endpoint with `dimensions` parameter +- Markdown-aware and OpenAPI endpoint-aware chunking +- Idempotent indexing via SHA-256 content hash +- Configurable embedding dimensions (1536 default, 768 for nomic-embed-text, etc.) + +**Key Deliverables**: +- `app/features/rag/embeddings.py` - EmbeddingProvider, OpenAIEmbeddingProvider, OllamaEmbeddingProvider +- `app/features/rag/chunkers.py` - MarkdownChunker, OpenAPIChunker +- `app/features/rag/models.py` - DocumentSource, DocumentChunk ORM models +- `app/features/rag/service.py` - RAGService (index, retrieve, list, delete) +- `app/features/rag/routes.py` - API endpoints +- `alembic/versions/b4c8d9e0f123_create_rag_tables.py` - Base RAG tables +- `alembic/versions/c5d9e1f2g345_rag_dynamic_embedding_dimension.py` - Dynamic dimension + +**API Endpoints**: +- `POST /rag/index` - Index document into knowledge base +- `POST /rag/retrieve` - Semantic search with similarity threshold +- `GET /rag/sources` - List indexed sources +- `DELETE /rag/sources/{source_id}` - Delete source and chunks + +**Configuration (Settings)**: +```python +rag_embedding_provider: Literal["openai", "ollama"] = "openai" +rag_embedding_dimension: int = 1536 +ollama_base_url: str = "http://localhost:11434" +ollama_embedding_model: str = "nomic-embed-text" +``` -### Phase 8: RAG Knowledge Base -pgvector embeddings with evidence-grounded answers and citations. +**Validation Results**: +- Ruff: All checks passed +- MyPy: 0 errors (117 source files) +- Pyright: 0 errors +- Pytest: 82 unit tests + 14 integration tests + +--- -### Phase 9: Dashboard -React + Vite + shadcn/ui frontend with data tables and visualizations. +## Pending Phases -### Phase 10: Agentic Layer (Optional) -PydanticAI integration for experiment orchestration. +### Phase 9: Agentic Layer ("The Brain") +Autonomous decision-making, tool orchestration, and structured outputs using PydanticAI. +- Experiment Orchestrator Agent (backtest → compare → deploy workflow) +- RAG Assistant Agent (query → retrieve → answer with citations) +- Human-in-the-loop approval for sensitive operations +- WebSocket streaming for real-time responses +- Endpoints: POST /agents/experiment/run, POST /agents/rag/query, WS /agents/stream + +### Phase 10: ForecastLab Dashboard ("The Face") +User interface, data visualization, and agent interaction. +- React 19 + Vite + shadcn/ui + Tailwind CSS 4 +- TanStack Table for server-side data grids +- TanStack Query for data fetching and caching +- Recharts for time series visualization +- Agent chat interface with streaming and citations --- @@ -331,3 +379,4 @@ Each phase document (`docs/PHASE/X-PHASE_NAME.md`) contains: | 2026-01-31 | 5 | Backtesting module with time-series CV completed | | 2026-02-01 | 6 | Model Registry with run tracking and deployment aliases completed | | 2026-02-01 | 7 | Serving Layer with RFC 7807, dimensions, analytics, and jobs completed | +| 2026-02-01 | 8 | RAG Knowledge Base with pgvector and Ollama embedding provider completed | diff --git a/docs/PHASE/8-RAG_KNOWLEDGE_BASE.md b/docs/PHASE/8-RAG_KNOWLEDGE_BASE.md new file mode 100644 index 00000000..aec1f984 --- /dev/null +++ b/docs/PHASE/8-RAG_KNOWLEDGE_BASE.md @@ -0,0 +1,398 @@ +# Phase 8: RAG Knowledge Base + +**Date Completed**: 2026-02-01 +**PRP**: PRP-9 +**Status**: ✅ Completed + +--- + +## Executive Summary + +Phase 8 implements the RAG (Retrieval-Augmented Generation) Knowledge Base for ForecastLabAI with PostgreSQL pgvector for semantic similarity search, multiple embedding providers (OpenAI and Ollama), and evidence-grounded retrieval with citations. + +### Objectives Achieved + +1. **pgvector Integration** - HNSW index for fast cosine similarity search +2. **Embedding Provider Pattern** - Abstract base class with OpenAI and Ollama implementations +3. **Document Indexing** - Markdown and OpenAPI-aware chunking with content hash for idempotency +4. **Semantic Retrieval** - Configurable top-k retrieval with similarity threshold +5. **Source Management** - List, index, and delete document sources + +--- + +## Deliverables + +### 1. Embedding Provider Pattern + +**File**: `app/features/rag/embeddings.py` + +Implements abstract `EmbeddingProvider` base class with two concrete implementations: + +```python +class EmbeddingProvider(ABC): + """Abstract base class for embedding providers.""" + + @abstractmethod + async def embed_texts(self, texts: list[str]) -> list[list[float]]: ... + + @abstractmethod + async def embed_query(self, query: str) -> list[float]: ... + + @property + @abstractmethod + def dimension(self) -> int: ... +``` + +**Providers**: + +| Provider | Endpoint | Features | +|----------|----------|----------| +| `OpenAIEmbeddingProvider` | OpenAI API | Batch processing, rate limit handling, token validation | +| `OllamaEmbeddingProvider` | `/v1/embeddings` | OpenAI-compatible, configurable dimensions, local/LAN | + +**Factory Function**: + +```python +def get_embedding_service() -> EmbeddingProvider: + """Returns provider based on RAG_EMBEDDING_PROVIDER config.""" + settings = get_settings() + if settings.rag_embedding_provider == "ollama": + return OllamaEmbeddingProvider() + return OpenAIEmbeddingProvider() +``` + +### 2. Document Chunking + +**File**: `app/features/rag/chunkers.py` + +| Chunker | Source Type | Strategy | +|---------|-------------|----------| +| `MarkdownChunker` | `markdown` | Respects heading boundaries, extracts heading hierarchy metadata | +| `OpenAPIChunker` | `openapi` | Chunks by endpoint, extracts method/path/parameters metadata | + +**ChunkData Structure**: + +```python +@dataclass +class ChunkData: + content: str # Chunk text + token_count: int # Token count for the chunk + chunk_index: int # Position in source document + metadata: dict | None # Heading path, endpoint info, etc. +``` + +### 3. RAG Service + +**File**: `app/features/rag/service.py` + +| Method | Description | +|--------|-------------| +| `index_document()` | Index document with chunking and embedding | +| `retrieve()` | Semantic search with similarity scoring | +| `list_sources()` | List indexed sources with statistics | +| `delete_source()` | Delete source and its chunks | + +**Idempotent Indexing**: +- SHA-256 content hash for change detection +- Returns `"unchanged"` status if content matches existing source +- Re-indexes only when content changes + +### 4. ORM Models + +**File**: `app/features/rag/models.py` + +```python +class DocumentSource(TimestampMixin, Base): + """Registry of indexed document sources.""" + __tablename__ = "document_source" + + id: Mapped[int] + source_id: Mapped[str] # UUID hex (32 chars) + source_type: Mapped[str] # markdown, openapi + source_path: Mapped[str] # File path or identifier + content_hash: Mapped[str] # SHA-256 for change detection + metadata_: Mapped[dict] # JSONB custom metadata + indexed_at: Mapped[datetime] + + +class DocumentChunk(TimestampMixin, Base): + """Indexed document chunk with embedding.""" + __tablename__ = "document_chunk" + + id: Mapped[int] + chunk_id: Mapped[str] # UUID hex (32 chars) + source_id: Mapped[int] # FK to document_source + chunk_index: Mapped[int] # Position in document + content: Mapped[str] # Chunk text + embedding: Mapped[list[float]] # Vector(dimension) + token_count: Mapped[int] + metadata_: Mapped[dict] # Heading hierarchy, etc. +``` + +### 5. API Endpoints + +**File**: `app/features/rag/routes.py` + +| Method | Path | Description | +|--------|------|-------------| +| POST | `/rag/index` | Index a document into the knowledge base | +| POST | `/rag/retrieve` | Semantic search across indexed documents | +| GET | `/rag/sources` | List all indexed sources | +| DELETE | `/rag/sources/{source_id}` | Delete source and its chunks | + +--- + +## Configuration + +### New Settings in `app/core/config.py` + +```python +# Embedding Provider +rag_embedding_provider: Literal["openai", "ollama"] = "openai" + +# OpenAI Configuration +openai_api_key: str = "" +rag_embedding_model: str = "text-embedding-3-small" + +# Ollama Configuration +ollama_base_url: str = "http://localhost:11434" +ollama_embedding_model: str = "nomic-embed-text" + +# Common Embedding Settings +rag_embedding_dimension: int = 1536 +rag_embedding_batch_size: int = 100 + +# Chunking Configuration +rag_chunk_size: int = 512 # tokens +rag_chunk_overlap: int = 50 # tokens +rag_min_chunk_size: int = 100 # minimum tokens per chunk + +# Retrieval Configuration +rag_top_k: int = 5 +rag_similarity_threshold: float = 0.7 +rag_max_context_tokens: int = 4000 + +# Index Configuration +rag_index_type: Literal["hnsw", "ivfflat"] = "hnsw" +rag_hnsw_m: int = 16 +rag_hnsw_ef_construction: int = 64 +``` + +### Environment Variables + +**OpenAI Provider (default)**: +```bash +RAG_EMBEDDING_PROVIDER=openai +OPENAI_API_KEY=sk-your-key +RAG_EMBEDDING_MODEL=text-embedding-3-small +RAG_EMBEDDING_DIMENSION=1536 +``` + +**Ollama Provider (local/LAN)**: +```bash +RAG_EMBEDDING_PROVIDER=ollama +OLLAMA_BASE_URL=http://localhost:11434 +OLLAMA_EMBEDDING_MODEL=nomic-embed-text +RAG_EMBEDDING_DIMENSION=768 +``` + +--- + +## Database Changes + +### Migration: `b4c8d9e0f123_create_rag_tables.py` + +Creates base RAG tables with pgvector: + +**Tables**: +- `document_source` - Source registry with content hash +- `document_chunk` - Chunks with vector embeddings + +**Indexes**: +- `ix_document_source_source_id` (unique) +- `ix_document_source_source_type` +- `ix_document_chunk_chunk_id` (unique) +- `ix_document_chunk_source_id` +- `ix_chunk_embedding_hnsw` - HNSW index for cosine similarity +- `ix_chunk_metadata_gin` - GIN index for metadata filtering + +### Migration: `c5d9e1f2g345_rag_dynamic_embedding_dimension.py` + +Enables configurable embedding dimension: + +```python +def upgrade() -> None: + dimension = int(os.environ.get("RAG_EMBEDDING_DIMENSION", "1536")) + op.drop_index("ix_chunk_embedding_hnsw") + op.execute(f"ALTER TABLE document_chunk ALTER COLUMN embedding TYPE vector({dimension})") + op.create_index("ix_chunk_embedding_hnsw", ...) +``` + +**Note**: Changing dimension requires re-indexing all documents. + +--- + +## Integration + +### Router Registration in `app/main.py` + +```python +from app.features.rag.routes import router as rag_router + +# In create_app(): +app.include_router(rag_router) +``` + +### Alembic Model Import in `alembic/env.py` + +```python +from app.features.rag import models as rag_models # noqa: F401 +``` + +--- + +## Test Coverage + +### Test Files + +| File | Tests | Description | +|------|-------|-------------| +| `test_embeddings.py` | 25 | Provider pattern, OpenAI, Ollama, factory | +| `test_chunkers.py` | 22 | Markdown and OpenAPI chunking | +| `test_schemas.py` | 22 | Request/response validation | +| `test_service.py` | 12 | Service unit tests | +| `test_routes.py` | 14 | Integration tests (require DB) | + +### Validation Results + +``` +Ruff: All checks passed +MyPy: 0 errors (117 source files) +Pyright: 0 errors +Pytest: 82 unit tests passed + 14 integration tests +``` + +--- + +## Directory Structure + +``` +app/ +├── core/ +│ └── config.py # MODIFIED: Added RAG and Ollama settings +├── features/ +│ └── rag/ # NEW: RAG Knowledge Base +│ ├── __init__.py +│ ├── models.py # DocumentSource, DocumentChunk ORM +│ ├── schemas.py # Request/response Pydantic schemas +│ ├── embeddings.py # EmbeddingProvider, OpenAI, Ollama +│ ├── chunkers.py # MarkdownChunker, OpenAPIChunker +│ ├── service.py # RAGService +│ ├── routes.py # API endpoints +│ └── tests/ +│ ├── __init__.py +│ ├── conftest.py +│ ├── test_embeddings.py +│ ├── test_chunkers.py +│ ├── test_schemas.py +│ ├── test_service.py +│ └── test_routes.py +└── main.py # MODIFIED: Router registration + +alembic/ +├── env.py # MODIFIED: RAG model import +└── versions/ + ├── b4c8d9e0f123_create_rag_tables.py # NEW + └── c5d9e1f2g345_rag_dynamic_embedding_dimension.py # NEW +``` + +--- + +## API Usage Examples + +### Index Documents + +```bash +# Index a markdown file +curl -X POST http://localhost:8123/rag/index \ + -H "Content-Type: application/json" \ + -d '{ + "source_type": "markdown", + "source_path": "docs/ARCHITECTURE.md" + }' + +# Index with inline content +curl -X POST http://localhost:8123/rag/index \ + -H "Content-Type: application/json" \ + -d '{ + "source_type": "markdown", + "source_path": "inline/readme", + "content": "# Project Overview\n\nThis is the project readme...", + "metadata": {"category": "documentation"} + }' + +# Index OpenAPI spec +curl -X POST http://localhost:8123/rag/index \ + -H "Content-Type: application/json" \ + -d '{ + "source_type": "openapi", + "source_path": "openapi.json" + }' +``` + +### Semantic Retrieval + +```bash +# Basic query +curl -X POST http://localhost:8123/rag/retrieve \ + -H "Content-Type: application/json" \ + -d '{ + "query": "How does backtesting work?" + }' + +# Query with filters +curl -X POST http://localhost:8123/rag/retrieve \ + -H "Content-Type: application/json" \ + -d '{ + "query": "API endpoints for forecasting", + "top_k": 10, + "similarity_threshold": 0.8, + "filters": { + "source_type": "openapi" + } + }' +``` + +### Source Management + +```bash +# List all sources +curl http://localhost:8123/rag/sources + +# Delete a source +curl -X DELETE http://localhost:8123/rag/sources/abc123def456... +``` + +--- + +## Embedding Provider Comparison + +| Feature | OpenAI | Ollama | +|---------|--------|--------| +| Endpoint | OpenAI API | `/v1/embeddings` | +| Authentication | API key required | None | +| Rate Limiting | Yes, with backoff | No | +| Token Validation | Yes (8191 max) | No | +| Batch Size | Configurable (2048 max) | Native batch support | +| Dimensions | 1536 (text-embedding-3-small) | Model-dependent | +| Network | Internet required | Local/LAN | + +--- + +## Next Phase Preparation + +Phase 9 (Agentic Layer) will build on this RAG infrastructure to: +- Create RAG Assistant Agent for evidence-grounded Q&A +- Implement citation formatting with source references +- Add WebSocket streaming for real-time responses +- Integrate with Experiment Orchestrator Agent diff --git a/examples/rag/index_docs.py b/examples/rag/index_docs.py new file mode 100644 index 00000000..7ce2902d --- /dev/null +++ b/examples/rag/index_docs.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python +"""Example: Index documentation into RAG knowledge base. + +This script demonstrates how to index markdown documentation +from the docs/ directory into the RAG knowledge base. + +Usage: + # Make sure the API is running + uv run uvicorn app.main:app --reload --port 8123 + + # Run this script + uv run python examples/rag/index_docs.py + +Requirements: + - OPENAI_API_KEY environment variable must be set + - PostgreSQL with pgvector must be running (docker-compose up -d) + - Migrations applied (uv run alembic upgrade head) +""" + +import asyncio +from pathlib import Path + +import httpx + + +async def index_markdown_docs(base_url: str = "http://localhost:8123") -> None: + """Index all markdown docs from docs/ directory. + + Args: + base_url: Base URL of the API server. + """ + docs_dir = Path("docs") + + if not docs_dir.exists(): + print(f"Error: {docs_dir} directory not found") + return + + async with httpx.AsyncClient(base_url=base_url, timeout=60.0) as client: + # Find all markdown files + md_files = list(docs_dir.rglob("*.md")) + print(f"Found {len(md_files)} markdown files to index") + + total_chunks = 0 + total_tokens = 0 + indexed = 0 + unchanged = 0 + failed = 0 + + for md_file in md_files: + try: + # Read file content + content = md_file.read_text(encoding="utf-8") + + # Index the document + response = await client.post( + "/rag/index", + json={ + "source_type": "markdown", + "source_path": str(md_file), + "content": content, + "metadata": { + "category": "documentation", + "file_type": "markdown", + }, + }, + ) + + if response.status_code in (200, 201): + result = response.json() + status = result["status"] + + if status == "unchanged": + unchanged += 1 + print(f" [unchanged] {md_file}") + else: + indexed += 1 + total_chunks += result["chunks_created"] + total_tokens += result["tokens_processed"] + print( + f" [{status}] {md_file}: " + f"{result['chunks_created']} chunks, " + f"{result['tokens_processed']} tokens" + ) + else: + failed += 1 + print(f" [FAILED] {md_file}: {response.status_code} - {response.text}") + + except Exception as e: + failed += 1 + print(f" [ERROR] {md_file}: {e}") + + print("\n" + "=" * 50) + print("Indexing Summary:") + print(f" Indexed: {indexed}") + print(f" Unchanged: {unchanged}") + print(f" Failed: {failed}") + print(f" Total chunks created: {total_chunks}") + print(f" Total tokens processed: {total_tokens}") + + +async def index_readme(base_url: str = "http://localhost:8123") -> None: + """Index the main README.md file. + + Args: + base_url: Base URL of the API server. + """ + readme_path = Path("README.md") + + if not readme_path.exists(): + print("README.md not found") + return + + async with httpx.AsyncClient(base_url=base_url, timeout=60.0) as client: + content = readme_path.read_text(encoding="utf-8") + + response = await client.post( + "/rag/index", + json={ + "source_type": "markdown", + "source_path": str(readme_path), + "content": content, + "metadata": {"category": "overview"}, + }, + ) + + if response.status_code == 201: + result = response.json() + print(f"README.md indexed: {result['chunks_created']} chunks ({result['status']})") + else: + print(f"Failed to index README.md: {response.status_code}") + + +async def list_sources(base_url: str = "http://localhost:8123") -> None: + """List all indexed sources. + + Args: + base_url: Base URL of the API server. + """ + async with httpx.AsyncClient(base_url=base_url) as client: + response = await client.get("/rag/sources") + + if response.status_code == 200: + data = response.json() + print(f"\nIndexed Sources: {data['total_sources']}") + print(f"Total Chunks: {data['total_chunks']}") + print("\nSources:") + for source in data["sources"]: + print(f" - {source['source_path']} ({source['chunk_count']} chunks)") + else: + print(f"Failed to list sources: {response.status_code}") + + +async def main() -> None: + """Main entry point.""" + print("RAG Knowledge Base - Document Indexer") + print("=" * 50) + + # Index README first + print("\n1. Indexing README.md...") + await index_readme() + + # Index documentation + print("\n2. Indexing docs/ directory...") + await index_markdown_docs() + + # List all sources + print("\n3. Listing indexed sources...") + await list_sources() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/rag/query.http b/examples/rag/query.http new file mode 100644 index 00000000..04937945 --- /dev/null +++ b/examples/rag/query.http @@ -0,0 +1,123 @@ +### RAG Knowledge Base - HTTP Client Examples +### Use with VS Code REST Client or similar tools + +### ============================================================================= +### Index Endpoints +### ============================================================================= + +### Index a markdown document (with content) +POST http://localhost:8123/rag/index +Content-Type: application/json + +{ + "source_type": "markdown", + "source_path": "docs/example.md", + "content": "# Example Document\n\nThis is an example markdown document for testing the RAG indexing pipeline.\n\n## Section One\n\nFirst section with some content about forecasting.\n\n## Section Two\n\nSecond section about backtesting strategies.", + "metadata": { + "category": "documentation", + "author": "test" + } +} + +### Index a markdown document (read from file path) +POST http://localhost:8123/rag/index +Content-Type: application/json + +{ + "source_type": "markdown", + "source_path": "README.md" +} + +### Index an OpenAPI specification +POST http://localhost:8123/rag/index +Content-Type: application/json + +{ + "source_type": "openapi", + "source_path": "api/openapi.json", + "content": "{\"openapi\":\"3.0.0\",\"info\":{\"title\":\"Test API\",\"version\":\"1.0\"},\"paths\":{\"/users\":{\"get\":{\"summary\":\"List users\",\"operationId\":\"listUsers\",\"responses\":{\"200\":{\"description\":\"OK\"}}}}}}" +} + +### ============================================================================= +### Retrieve Endpoints +### ============================================================================= + +### Semantic search - basic query +POST http://localhost:8123/rag/retrieve +Content-Type: application/json + +{ + "query": "How does backtesting prevent data leakage?", + "top_k": 5, + "similarity_threshold": 0.7 +} + +### Semantic search - with filters +POST http://localhost:8123/rag/retrieve +Content-Type: application/json + +{ + "query": "What forecasting models are available?", + "top_k": 10, + "similarity_threshold": 0.6, + "filters": { + "source_type": ["markdown"], + "category": "documentation" + } +} + +### Semantic search - lower threshold for more results +POST http://localhost:8123/rag/retrieve +Content-Type: application/json + +{ + "query": "time series cross validation", + "top_k": 20, + "similarity_threshold": 0.5 +} + +### ============================================================================= +### Sources Endpoints +### ============================================================================= + +### List all indexed sources +GET http://localhost:8123/rag/sources + +### Delete a specific source (replace source_id with actual value) +DELETE http://localhost:8123/rag/sources/abc123def456789012345678901234 + +### ============================================================================= +### Example Workflows +### ============================================================================= + +### Workflow 1: Index and then query +### Step 1: Index a document +POST http://localhost:8123/rag/index +Content-Type: application/json + +{ + "source_type": "markdown", + "source_path": "test-workflow.md", + "content": "# Backtesting Guide\n\nBacktesting is a method to evaluate forecasting models using historical data.\n\n## Time-Based Splits\n\nWe use expanding or sliding window strategies to prevent data leakage.\n\n## Metrics\n\nKey metrics include MAE, sMAPE, WAPE, and Bias." +} + +### Step 2: Query the indexed content +POST http://localhost:8123/rag/retrieve +Content-Type: application/json + +{ + "query": "What metrics are used in backtesting?", + "top_k": 3, + "similarity_threshold": 0.6 +} + +### Workflow 2: Re-index with updated content +### (Using same source_path will update existing chunks) +POST http://localhost:8123/rag/index +Content-Type: application/json + +{ + "source_type": "markdown", + "source_path": "test-workflow.md", + "content": "# Backtesting Guide (Updated)\n\nBacktesting evaluates forecasting models.\n\n## Time-Based Splits\n\nWe use expanding or sliding window strategies.\n\n## Metrics\n\nKey metrics: MAE, sMAPE, WAPE, Bias, and Stability Index." +} diff --git a/pyproject.toml b/pyproject.toml index 187facf4..a5c70231 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,12 @@ dependencies = [ "numpy>=2.4.1", "scikit-learn>=1.6.0", "joblib>=1.4.0", + # RAG dependencies + "pgvector>=0.3.0", + "openai>=1.40.0", + "tiktoken>=0.7.0", + "httpx>=0.28.0", + "pyyaml>=6.0.0", ] [project.optional-dependencies] diff --git a/uv.lock b/uv.lock index 85d3d0c8..7451e80c 100644 --- a/uv.lock +++ b/uv.lock @@ -104,6 +104,63 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" }, ] +[[package]] +name = "charset-normalizer" +version = "3.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" }, + { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" }, + { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" }, + { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" }, + { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" }, + { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" }, + { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" }, + { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" }, + { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" }, + { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" }, + { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" }, + { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" }, + { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" }, + { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" }, + { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" }, + { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" }, + { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091, upload-time = "2025-10-14T04:41:13.346Z" }, + { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936, upload-time = "2025-10-14T04:41:14.461Z" }, + { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180, upload-time = "2025-10-14T04:41:15.588Z" }, + { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346, upload-time = "2025-10-14T04:41:16.738Z" }, + { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874, upload-time = "2025-10-14T04:41:17.923Z" }, + { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076, upload-time = "2025-10-14T04:41:19.106Z" }, + { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601, upload-time = "2025-10-14T04:41:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376, upload-time = "2025-10-14T04:41:21.398Z" }, + { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825, upload-time = "2025-10-14T04:41:22.583Z" }, + { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583, upload-time = "2025-10-14T04:41:23.754Z" }, + { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366, upload-time = "2025-10-14T04:41:25.27Z" }, + { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300, upload-time = "2025-10-14T04:41:26.725Z" }, + { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465, upload-time = "2025-10-14T04:41:28.322Z" }, + { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404, upload-time = "2025-10-14T04:41:29.95Z" }, + { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092, upload-time = "2025-10-14T04:41:31.188Z" }, + { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408, upload-time = "2025-10-14T04:41:32.624Z" }, + { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746, upload-time = "2025-10-14T04:41:33.773Z" }, + { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889, upload-time = "2025-10-14T04:41:34.897Z" }, + { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641, upload-time = "2025-10-14T04:41:36.116Z" }, + { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779, upload-time = "2025-10-14T04:41:37.229Z" }, + { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035, upload-time = "2025-10-14T04:41:38.368Z" }, + { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542, upload-time = "2025-10-14T04:41:39.862Z" }, + { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524, upload-time = "2025-10-14T04:41:41.319Z" }, + { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395, upload-time = "2025-10-14T04:41:42.539Z" }, + { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680, upload-time = "2025-10-14T04:41:43.661Z" }, + { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045, upload-time = "2025-10-14T04:41:44.821Z" }, + { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687, upload-time = "2025-10-14T04:41:46.442Z" }, + { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014, upload-time = "2025-10-14T04:41:47.631Z" }, + { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044, upload-time = "2025-10-14T04:41:48.81Z" }, + { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940, upload-time = "2025-10-14T04:41:49.946Z" }, + { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104, upload-time = "2025-10-14T04:41:51.051Z" }, + { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743, upload-time = "2025-10-14T04:41:52.122Z" }, + { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" }, +] + [[package]] name = "click" version = "8.3.1" @@ -199,6 +256,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d2/db/d291e30fdf7ea617a335531e72294e0c723356d7fdde8fba00610a76bda9/coverage-7.13.2-py3-none-any.whl", hash = "sha256:40ce1ea1e25125556d8e76bd0b61500839a07944cc287ac21d5626f3e620cad5", size = 210943, upload-time = "2026-01-25T13:00:02.388Z" }, ] +[[package]] +name = "distro" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" }, +] + [[package]] name = "fastapi" version = "0.128.0" @@ -216,21 +282,26 @@ wheels = [ [[package]] name = "forecastlabai" -version = "0.1.8" +version = "0.2.1" source = { editable = "." } dependencies = [ { name = "alembic" }, { name = "asyncpg" }, { name = "fastapi" }, + { name = "httpx" }, { name = "joblib" }, { name = "numpy" }, + { name = "openai" }, { name = "pandas" }, + { name = "pgvector" }, { name = "pydantic" }, { name = "pydantic-settings" }, { name = "python-dotenv" }, + { name = "pyyaml" }, { name = "scikit-learn" }, { name = "sqlalchemy", extra = ["asyncio"] }, { name = "structlog" }, + { name = "tiktoken" }, { name = "uvicorn", extra = ["standard"] }, ] @@ -255,11 +326,14 @@ requires-dist = [ { name = "alembic", specifier = ">=1.14.0" }, { name = "asyncpg", specifier = ">=0.30.0" }, { name = "fastapi", specifier = ">=0.115.0" }, + { name = "httpx", specifier = ">=0.28.0" }, { name = "httpx", marker = "extra == 'dev'", specifier = ">=0.28.0" }, { name = "joblib", specifier = ">=1.4.0" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.13.0" }, { name = "numpy", specifier = ">=2.4.1" }, + { name = "openai", specifier = ">=1.40.0" }, { name = "pandas", specifier = ">=3.0.0" }, + { name = "pgvector", specifier = ">=0.3.0" }, { name = "pydantic", specifier = ">=2.10.0" }, { name = "pydantic-settings", specifier = ">=2.6.0" }, { name = "pyright", marker = "extra == 'dev'", specifier = ">=1.1.390" }, @@ -267,10 +341,12 @@ requires-dist = [ { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24.0" }, { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=6.0.0" }, { name = "python-dotenv", specifier = ">=1.0.1" }, + { name = "pyyaml", specifier = ">=6.0.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.8.0" }, { name = "scikit-learn", specifier = ">=1.6.0" }, { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.36" }, { name = "structlog", specifier = ">=24.4.0" }, + { name = "tiktoken", specifier = ">=0.7.0" }, { name = "uvicorn", extras = ["standard"], specifier = ">=0.32.0" }, ] provides-extras = ["dev"] @@ -405,6 +481,74 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, ] +[[package]] +name = "jiter" +version = "0.12.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/45/9d/e0660989c1370e25848bb4c52d061c71837239738ad937e83edca174c273/jiter-0.12.0.tar.gz", hash = "sha256:64dfcd7d5c168b38d3f9f8bba7fc639edb3418abcc74f22fdbe6b8938293f30b", size = 168294, upload-time = "2025-11-09T20:49:23.302Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/c9/5b9f7b4983f1b542c64e84165075335e8a236fa9e2ea03a0c79780062be8/jiter-0.12.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:305e061fa82f4680607a775b2e8e0bcb071cd2205ac38e6ef48c8dd5ebe1cf37", size = 314449, upload-time = "2025-11-09T20:47:22.999Z" }, + { url = "https://files.pythonhosted.org/packages/98/6e/e8efa0e78de00db0aee82c0cf9e8b3f2027efd7f8a71f859d8f4be8e98ef/jiter-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5c1860627048e302a528333c9307c818c547f214d8659b0705d2195e1a94b274", size = 319855, upload-time = "2025-11-09T20:47:24.779Z" }, + { url = "https://files.pythonhosted.org/packages/20/26/894cd88e60b5d58af53bec5c6759d1292bd0b37a8b5f60f07abf7a63ae5f/jiter-0.12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df37577a4f8408f7e0ec3205d2a8f87672af8f17008358063a4d6425b6081ce3", size = 350171, upload-time = "2025-11-09T20:47:26.469Z" }, + { url = "https://files.pythonhosted.org/packages/f5/27/a7b818b9979ac31b3763d25f3653ec3a954044d5e9f5d87f2f247d679fd1/jiter-0.12.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:75fdd787356c1c13a4f40b43c2156276ef7a71eb487d98472476476d803fb2cf", size = 365590, upload-time = "2025-11-09T20:47:27.918Z" }, + { url = "https://files.pythonhosted.org/packages/ba/7e/e46195801a97673a83746170b17984aa8ac4a455746354516d02ca5541b4/jiter-0.12.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1eb5db8d9c65b112aacf14fcd0faae9913d07a8afea5ed06ccdd12b724e966a1", size = 479462, upload-time = "2025-11-09T20:47:29.654Z" }, + { url = "https://files.pythonhosted.org/packages/ca/75/f833bfb009ab4bd11b1c9406d333e3b4357709ed0570bb48c7c06d78c7dd/jiter-0.12.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:73c568cc27c473f82480abc15d1301adf333a7ea4f2e813d6a2c7d8b6ba8d0df", size = 378983, upload-time = "2025-11-09T20:47:31.026Z" }, + { url = "https://files.pythonhosted.org/packages/71/b3/7a69d77943cc837d30165643db753471aff5df39692d598da880a6e51c24/jiter-0.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4321e8a3d868919bcb1abb1db550d41f2b5b326f72df29e53b2df8b006eb9403", size = 361328, upload-time = "2025-11-09T20:47:33.286Z" }, + { url = "https://files.pythonhosted.org/packages/b0/ac/a78f90caf48d65ba70d8c6efc6f23150bc39dc3389d65bbec2a95c7bc628/jiter-0.12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0a51bad79f8cc9cac2b4b705039f814049142e0050f30d91695a2d9a6611f126", size = 386740, upload-time = "2025-11-09T20:47:34.703Z" }, + { url = "https://files.pythonhosted.org/packages/39/b6/5d31c2cc8e1b6a6bcf3c5721e4ca0a3633d1ab4754b09bc7084f6c4f5327/jiter-0.12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:2a67b678f6a5f1dd6c36d642d7db83e456bc8b104788262aaefc11a22339f5a9", size = 520875, upload-time = "2025-11-09T20:47:36.058Z" }, + { url = "https://files.pythonhosted.org/packages/30/b5/4df540fae4e9f68c54b8dab004bd8c943a752f0b00efd6e7d64aa3850339/jiter-0.12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efe1a211fe1fd14762adea941e3cfd6c611a136e28da6c39272dbb7a1bbe6a86", size = 511457, upload-time = "2025-11-09T20:47:37.932Z" }, + { url = "https://files.pythonhosted.org/packages/07/65/86b74010e450a1a77b2c1aabb91d4a91dd3cd5afce99f34d75fd1ac64b19/jiter-0.12.0-cp312-cp312-win32.whl", hash = "sha256:d779d97c834b4278276ec703dc3fc1735fca50af63eb7262f05bdb4e62203d44", size = 204546, upload-time = "2025-11-09T20:47:40.47Z" }, + { url = "https://files.pythonhosted.org/packages/1c/c7/6659f537f9562d963488e3e55573498a442503ced01f7e169e96a6110383/jiter-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e8269062060212b373316fe69236096aaf4c49022d267c6736eebd66bbbc60bb", size = 205196, upload-time = "2025-11-09T20:47:41.794Z" }, + { url = "https://files.pythonhosted.org/packages/21/f4/935304f5169edadfec7f9c01eacbce4c90bb9a82035ac1de1f3bd2d40be6/jiter-0.12.0-cp312-cp312-win_arm64.whl", hash = "sha256:06cb970936c65de926d648af0ed3d21857f026b1cf5525cb2947aa5e01e05789", size = 186100, upload-time = "2025-11-09T20:47:43.007Z" }, + { url = "https://files.pythonhosted.org/packages/3d/a6/97209693b177716e22576ee1161674d1d58029eb178e01866a0422b69224/jiter-0.12.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:6cc49d5130a14b732e0612bc76ae8db3b49898732223ef8b7599aa8d9810683e", size = 313658, upload-time = "2025-11-09T20:47:44.424Z" }, + { url = "https://files.pythonhosted.org/packages/06/4d/125c5c1537c7d8ee73ad3d530a442d6c619714b95027143f1b61c0b4dfe0/jiter-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:37f27a32ce36364d2fa4f7fdc507279db604d27d239ea2e044c8f148410defe1", size = 318605, upload-time = "2025-11-09T20:47:45.973Z" }, + { url = "https://files.pythonhosted.org/packages/99/bf/a840b89847885064c41a5f52de6e312e91fa84a520848ee56c97e4fa0205/jiter-0.12.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbc0944aa3d4b4773e348cda635252824a78f4ba44328e042ef1ff3f6080d1cf", size = 349803, upload-time = "2025-11-09T20:47:47.535Z" }, + { url = "https://files.pythonhosted.org/packages/8a/88/e63441c28e0db50e305ae23e19c1d8fae012d78ed55365da392c1f34b09c/jiter-0.12.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:da25c62d4ee1ffbacb97fac6dfe4dcd6759ebdc9015991e92a6eae5816287f44", size = 365120, upload-time = "2025-11-09T20:47:49.284Z" }, + { url = "https://files.pythonhosted.org/packages/0a/7c/49b02714af4343970eb8aca63396bc1c82fa01197dbb1e9b0d274b550d4e/jiter-0.12.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:048485c654b838140b007390b8182ba9774621103bd4d77c9c3f6f117474ba45", size = 479918, upload-time = "2025-11-09T20:47:50.807Z" }, + { url = "https://files.pythonhosted.org/packages/69/ba/0a809817fdd5a1db80490b9150645f3aae16afad166960bcd562be194f3b/jiter-0.12.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:635e737fbb7315bef0037c19b88b799143d2d7d3507e61a76751025226b3ac87", size = 379008, upload-time = "2025-11-09T20:47:52.211Z" }, + { url = "https://files.pythonhosted.org/packages/5f/c3/c9fc0232e736c8877d9e6d83d6eeb0ba4e90c6c073835cc2e8f73fdeef51/jiter-0.12.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e017c417b1ebda911bd13b1e40612704b1f5420e30695112efdbed8a4b389ed", size = 361785, upload-time = "2025-11-09T20:47:53.512Z" }, + { url = "https://files.pythonhosted.org/packages/96/61/61f69b7e442e97ca6cd53086ddc1cf59fb830549bc72c0a293713a60c525/jiter-0.12.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:89b0bfb8b2bf2351fba36bb211ef8bfceba73ef58e7f0c68fb67b5a2795ca2f9", size = 386108, upload-time = "2025-11-09T20:47:54.893Z" }, + { url = "https://files.pythonhosted.org/packages/e9/2e/76bb3332f28550c8f1eba3bf6e5efe211efda0ddbbaf24976bc7078d42a5/jiter-0.12.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:f5aa5427a629a824a543672778c9ce0c5e556550d1569bb6ea28a85015287626", size = 519937, upload-time = "2025-11-09T20:47:56.253Z" }, + { url = "https://files.pythonhosted.org/packages/84/d6/fa96efa87dc8bff2094fb947f51f66368fa56d8d4fc9e77b25d7fbb23375/jiter-0.12.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ed53b3d6acbcb0fd0b90f20c7cb3b24c357fe82a3518934d4edfa8c6898e498c", size = 510853, upload-time = "2025-11-09T20:47:58.32Z" }, + { url = "https://files.pythonhosted.org/packages/8a/28/93f67fdb4d5904a708119a6ab58a8f1ec226ff10a94a282e0215402a8462/jiter-0.12.0-cp313-cp313-win32.whl", hash = "sha256:4747de73d6b8c78f2e253a2787930f4fffc68da7fa319739f57437f95963c4de", size = 204699, upload-time = "2025-11-09T20:47:59.686Z" }, + { url = "https://files.pythonhosted.org/packages/c4/1f/30b0eb087045a0abe2a5c9c0c0c8da110875a1d3be83afd4a9a4e548be3c/jiter-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:e25012eb0c456fcc13354255d0338cd5397cce26c77b2832b3c4e2e255ea5d9a", size = 204258, upload-time = "2025-11-09T20:48:01.01Z" }, + { url = "https://files.pythonhosted.org/packages/2c/f4/2b4daf99b96bce6fc47971890b14b2a36aef88d7beb9f057fafa032c6141/jiter-0.12.0-cp313-cp313-win_arm64.whl", hash = "sha256:c97b92c54fe6110138c872add030a1f99aea2401ddcdaa21edf74705a646dd60", size = 185503, upload-time = "2025-11-09T20:48:02.35Z" }, + { url = "https://files.pythonhosted.org/packages/39/ca/67bb15a7061d6fe20b9b2a2fd783e296a1e0f93468252c093481a2f00efa/jiter-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:53839b35a38f56b8be26a7851a48b89bc47e5d88e900929df10ed93b95fea3d6", size = 317965, upload-time = "2025-11-09T20:48:03.783Z" }, + { url = "https://files.pythonhosted.org/packages/18/af/1788031cd22e29c3b14bc6ca80b16a39a0b10e611367ffd480c06a259831/jiter-0.12.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94f669548e55c91ab47fef8bddd9c954dab1938644e715ea49d7e117015110a4", size = 345831, upload-time = "2025-11-09T20:48:05.55Z" }, + { url = "https://files.pythonhosted.org/packages/05/17/710bf8472d1dff0d3caf4ced6031060091c1320f84ee7d5dcbed1f352417/jiter-0.12.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:351d54f2b09a41600ffea43d081522d792e81dcfb915f6d2d242744c1cc48beb", size = 361272, upload-time = "2025-11-09T20:48:06.951Z" }, + { url = "https://files.pythonhosted.org/packages/fb/f1/1dcc4618b59761fef92d10bcbb0b038b5160be653b003651566a185f1a5c/jiter-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2a5e90604620f94bf62264e7c2c038704d38217b7465b863896c6d7c902b06c7", size = 204604, upload-time = "2025-11-09T20:48:08.328Z" }, + { url = "https://files.pythonhosted.org/packages/d9/32/63cb1d9f1c5c6632a783c0052cde9ef7ba82688f7065e2f0d5f10a7e3edb/jiter-0.12.0-cp313-cp313t-win_arm64.whl", hash = "sha256:88ef757017e78d2860f96250f9393b7b577b06a956ad102c29c8237554380db3", size = 185628, upload-time = "2025-11-09T20:48:09.572Z" }, + { url = "https://files.pythonhosted.org/packages/a8/99/45c9f0dbe4a1416b2b9a8a6d1236459540f43d7fb8883cff769a8db0612d/jiter-0.12.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:c46d927acd09c67a9fb1416df45c5a04c27e83aae969267e98fba35b74e99525", size = 312478, upload-time = "2025-11-09T20:48:10.898Z" }, + { url = "https://files.pythonhosted.org/packages/4c/a7/54ae75613ba9e0f55fcb0bc5d1f807823b5167cc944e9333ff322e9f07dd/jiter-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:774ff60b27a84a85b27b88cd5583899c59940bcc126caca97eb2a9df6aa00c49", size = 318706, upload-time = "2025-11-09T20:48:12.266Z" }, + { url = "https://files.pythonhosted.org/packages/59/31/2aa241ad2c10774baf6c37f8b8e1f39c07db358f1329f4eb40eba179c2a2/jiter-0.12.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5433fab222fb072237df3f637d01b81f040a07dcac1cb4a5c75c7aa9ed0bef1", size = 351894, upload-time = "2025-11-09T20:48:13.673Z" }, + { url = "https://files.pythonhosted.org/packages/54/4f/0f2759522719133a9042781b18cc94e335b6d290f5e2d3e6899d6af933e3/jiter-0.12.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f8c593c6e71c07866ec6bfb790e202a833eeec885022296aff6b9e0b92d6a70e", size = 365714, upload-time = "2025-11-09T20:48:15.083Z" }, + { url = "https://files.pythonhosted.org/packages/dc/6f/806b895f476582c62a2f52c453151edd8a0fde5411b0497baaa41018e878/jiter-0.12.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:90d32894d4c6877a87ae00c6b915b609406819dce8bc0d4e962e4de2784e567e", size = 478989, upload-time = "2025-11-09T20:48:16.706Z" }, + { url = "https://files.pythonhosted.org/packages/86/6c/012d894dc6e1033acd8db2b8346add33e413ec1c7c002598915278a37f79/jiter-0.12.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:798e46eed9eb10c3adbbacbd3bdb5ecd4cf7064e453d00dbef08802dae6937ff", size = 378615, upload-time = "2025-11-09T20:48:18.614Z" }, + { url = "https://files.pythonhosted.org/packages/87/30/d718d599f6700163e28e2c71c0bbaf6dace692e7df2592fd793ac9276717/jiter-0.12.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3f1368f0a6719ea80013a4eb90ba72e75d7ea67cfc7846db2ca504f3df0169a", size = 364745, upload-time = "2025-11-09T20:48:20.117Z" }, + { url = "https://files.pythonhosted.org/packages/8f/85/315b45ce4b6ddc7d7fceca24068543b02bdc8782942f4ee49d652e2cc89f/jiter-0.12.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:65f04a9d0b4406f7e51279710b27484af411896246200e461d80d3ba0caa901a", size = 386502, upload-time = "2025-11-09T20:48:21.543Z" }, + { url = "https://files.pythonhosted.org/packages/74/0b/ce0434fb40c5b24b368fe81b17074d2840748b4952256bab451b72290a49/jiter-0.12.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:fd990541982a24281d12b67a335e44f117e4c6cbad3c3b75c7dea68bf4ce3a67", size = 519845, upload-time = "2025-11-09T20:48:22.964Z" }, + { url = "https://files.pythonhosted.org/packages/e8/a3/7a7a4488ba052767846b9c916d208b3ed114e3eb670ee984e4c565b9cf0d/jiter-0.12.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:b111b0e9152fa7df870ecaebb0bd30240d9f7fff1f2003bcb4ed0f519941820b", size = 510701, upload-time = "2025-11-09T20:48:24.483Z" }, + { url = "https://files.pythonhosted.org/packages/c3/16/052ffbf9d0467b70af24e30f91e0579e13ded0c17bb4a8eb2aed3cb60131/jiter-0.12.0-cp314-cp314-win32.whl", hash = "sha256:a78befb9cc0a45b5a5a0d537b06f8544c2ebb60d19d02c41ff15da28a9e22d42", size = 205029, upload-time = "2025-11-09T20:48:25.749Z" }, + { url = "https://files.pythonhosted.org/packages/e4/18/3cf1f3f0ccc789f76b9a754bdb7a6977e5d1d671ee97a9e14f7eb728d80e/jiter-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:e1fe01c082f6aafbe5c8faf0ff074f38dfb911d53f07ec333ca03f8f6226debf", size = 204960, upload-time = "2025-11-09T20:48:27.415Z" }, + { url = "https://files.pythonhosted.org/packages/02/68/736821e52ecfdeeb0f024b8ab01b5a229f6b9293bbdb444c27efade50b0f/jiter-0.12.0-cp314-cp314-win_arm64.whl", hash = "sha256:d72f3b5a432a4c546ea4bedc84cce0c3404874f1d1676260b9c7f048a9855451", size = 185529, upload-time = "2025-11-09T20:48:29.125Z" }, + { url = "https://files.pythonhosted.org/packages/30/61/12ed8ee7a643cce29ac97c2281f9ce3956eb76b037e88d290f4ed0d41480/jiter-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e6ded41aeba3603f9728ed2b6196e4df875348ab97b28fc8afff115ed42ba7a7", size = 318974, upload-time = "2025-11-09T20:48:30.87Z" }, + { url = "https://files.pythonhosted.org/packages/2d/c6/f3041ede6d0ed5e0e79ff0de4c8f14f401bbf196f2ef3971cdbe5fd08d1d/jiter-0.12.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a947920902420a6ada6ad51892082521978e9dd44a802663b001436e4b771684", size = 345932, upload-time = "2025-11-09T20:48:32.658Z" }, + { url = "https://files.pythonhosted.org/packages/d5/5d/4d94835889edd01ad0e2dbfc05f7bdfaed46292e7b504a6ac7839aa00edb/jiter-0.12.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:add5e227e0554d3a52cf390a7635edaffdf4f8fce4fdbcef3cc2055bb396a30c", size = 367243, upload-time = "2025-11-09T20:48:34.093Z" }, + { url = "https://files.pythonhosted.org/packages/fd/76/0051b0ac2816253a99d27baf3dda198663aff882fa6ea7deeb94046da24e/jiter-0.12.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f9b1cda8fcb736250d7e8711d4580ebf004a46771432be0ae4796944b5dfa5d", size = 479315, upload-time = "2025-11-09T20:48:35.507Z" }, + { url = "https://files.pythonhosted.org/packages/70/ae/83f793acd68e5cb24e483f44f482a1a15601848b9b6f199dacb970098f77/jiter-0.12.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:deeb12a2223fe0135c7ff1356a143d57f95bbf1f4a66584f1fc74df21d86b993", size = 380714, upload-time = "2025-11-09T20:48:40.014Z" }, + { url = "https://files.pythonhosted.org/packages/b1/5e/4808a88338ad2c228b1126b93fcd8ba145e919e886fe910d578230dabe3b/jiter-0.12.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c596cc0f4cb574877550ce4ecd51f8037469146addd676d7c1a30ebe6391923f", size = 365168, upload-time = "2025-11-09T20:48:41.462Z" }, + { url = "https://files.pythonhosted.org/packages/0c/d4/04619a9e8095b42aef436b5aeb4c0282b4ff1b27d1db1508df9f5dc82750/jiter-0.12.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ab4c823b216a4aeab3fdbf579c5843165756bd9ad87cc6b1c65919c4715f783", size = 387893, upload-time = "2025-11-09T20:48:42.921Z" }, + { url = "https://files.pythonhosted.org/packages/17/ea/d3c7e62e4546fdc39197fa4a4315a563a89b95b6d54c0d25373842a59cbe/jiter-0.12.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:e427eee51149edf962203ff8db75a7514ab89be5cb623fb9cea1f20b54f1107b", size = 520828, upload-time = "2025-11-09T20:48:44.278Z" }, + { url = "https://files.pythonhosted.org/packages/cc/0b/c6d3562a03fd767e31cb119d9041ea7958c3c80cb3d753eafb19b3b18349/jiter-0.12.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:edb868841f84c111255ba5e80339d386d937ec1fdce419518ce1bd9370fac5b6", size = 511009, upload-time = "2025-11-09T20:48:45.726Z" }, + { url = "https://files.pythonhosted.org/packages/aa/51/2cb4468b3448a8385ebcd15059d325c9ce67df4e2758d133ab9442b19834/jiter-0.12.0-cp314-cp314t-win32.whl", hash = "sha256:8bbcfe2791dfdb7c5e48baf646d37a6a3dcb5a97a032017741dea9f817dca183", size = 205110, upload-time = "2025-11-09T20:48:47.033Z" }, + { url = "https://files.pythonhosted.org/packages/b2/c5/ae5ec83dec9c2d1af805fd5fe8f74ebded9c8670c5210ec7820ce0dbeb1e/jiter-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2fa940963bf02e1d8226027ef461e36af472dea85d36054ff835aeed944dd873", size = 205223, upload-time = "2025-11-09T20:48:49.076Z" }, + { url = "https://files.pythonhosted.org/packages/97/9a/3c5391907277f0e55195550cf3fa8e293ae9ee0c00fb402fec1e38c0c82f/jiter-0.12.0-cp314-cp314t-win_arm64.whl", hash = "sha256:506c9708dd29b27288f9f8f1140c3cb0e3d8ddb045956d7757b1fa0e0f39a473", size = 185564, upload-time = "2025-11-09T20:48:50.376Z" }, + { url = "https://files.pythonhosted.org/packages/cb/f5/12efb8ada5f5c9edc1d4555fe383c1fb2eac05ac5859258a72d61981d999/jiter-0.12.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:e8547883d7b96ef2e5fe22b88f8a4c8725a56e7f4abafff20fd5272d634c7ecb", size = 309974, upload-time = "2025-11-09T20:49:17.187Z" }, + { url = "https://files.pythonhosted.org/packages/85/15/d6eb3b770f6a0d332675141ab3962fd4a7c270ede3515d9f3583e1d28276/jiter-0.12.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:89163163c0934854a668ed783a2546a0617f71706a2551a4a0666d91ab365d6b", size = 304233, upload-time = "2025-11-09T20:49:18.734Z" }, + { url = "https://files.pythonhosted.org/packages/8c/3e/e7e06743294eea2cf02ced6aa0ff2ad237367394e37a0e2b4a1108c67a36/jiter-0.12.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d96b264ab7d34bbb2312dedc47ce07cd53f06835eacbc16dde3761f47c3a9e7f", size = 338537, upload-time = "2025-11-09T20:49:20.317Z" }, + { url = "https://files.pythonhosted.org/packages/2f/9c/6753e6522b8d0ef07d3a3d239426669e984fb0eba15a315cdbc1253904e4/jiter-0.12.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c24e864cb30ab82311c6425655b0cdab0a98c5d973b065c66a3f020740c2324c", size = 346110, upload-time = "2025-11-09T20:49:21.817Z" }, +] + [[package]] name = "joblib" version = "1.5.3" @@ -653,6 +797,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ad/0d/eca3d962f9eef265f01a8e0d20085c6dd1f443cbffc11b6dede81fd82356/numpy-2.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:6436cffb4f2bf26c974344439439c95e152c9a527013f26b3577be6c2ca64295", size = 10667121, upload-time = "2026-01-10T06:44:41.644Z" }, ] +[[package]] +name = "openai" +version = "2.16.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "httpx" }, + { name = "jiter" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/6c/e4c964fcf1d527fdf4739e7cc940c60075a4114d50d03871d5d5b1e13a88/openai-2.16.0.tar.gz", hash = "sha256:42eaa22ca0d8ded4367a77374104d7a2feafee5bd60a107c3c11b5243a11cd12", size = 629649, upload-time = "2026-01-27T23:28:02.579Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/83/0315bf2cfd75a2ce8a7e54188e9456c60cec6c0cf66728ed07bd9859ff26/openai-2.16.0-py3-none-any.whl", hash = "sha256:5f46643a8f42899a84e80c38838135d7038e7718333ce61396994f887b09a59b", size = 1068612, upload-time = "2026-01-27T23:28:00.356Z" }, +] + [[package]] name = "packaging" version = "26.0" @@ -736,6 +899,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/32/2b/121e912bd60eebd623f873fd090de0e84f322972ab25a7f9044c056804ed/pathspec-1.0.3-py3-none-any.whl", hash = "sha256:e80767021c1cc524aa3fb14bedda9c34406591343cc42797b386ce7b9354fb6c", size = 55021, upload-time = "2026-01-09T15:46:44.652Z" }, ] +[[package]] +name = "pgvector" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/25/6c/6d8b4b03b958c02fa8687ec6063c49d952a189f8c91ebbe51e877dfab8f7/pgvector-0.4.2.tar.gz", hash = "sha256:322cac0c1dc5d41c9ecf782bd9991b7966685dee3a00bc873631391ed949513a", size = 31354, upload-time = "2025-12-05T01:07:17.87Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/26/6cee8a1ce8c43625ec561aff19df07f9776b7525d9002c86bceb3e0ac970/pgvector-0.4.2-py3-none-any.whl", hash = "sha256:549d45f7a18593783d5eec609ea1684a724ba8405c4cb182a0b2b08aeff04e08", size = 27441, upload-time = "2025-12-05T01:07:16.536Z" }, +] + [[package]] name = "pluggy" version = "1.6.0" @@ -977,6 +1152,109 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, ] +[[package]] +name = "regex" +version = "2026.1.15" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/86/07d5056945f9ec4590b518171c4254a5925832eb727b56d3c38a7476f316/regex-2026.1.15.tar.gz", hash = "sha256:164759aa25575cbc0651bef59a0b18353e54300d79ace8084c818ad8ac72b7d5", size = 414811, upload-time = "2026-01-14T23:18:02.775Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/81/10d8cf43c807d0326efe874c1b79f22bfb0fb226027b0b19ebc26d301408/regex-2026.1.15-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:4c8fcc5793dde01641a35905d6731ee1548f02b956815f8f1cab89e515a5bdf1", size = 489398, upload-time = "2026-01-14T23:14:43.741Z" }, + { url = "https://files.pythonhosted.org/packages/90/b0/7c2a74e74ef2a7c32de724658a69a862880e3e4155cba992ba04d1c70400/regex-2026.1.15-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bfd876041a956e6a90ad7cdb3f6a630c07d491280bfeed4544053cd434901681", size = 291339, upload-time = "2026-01-14T23:14:45.183Z" }, + { url = "https://files.pythonhosted.org/packages/19/4d/16d0773d0c818417f4cc20aa0da90064b966d22cd62a8c46765b5bd2d643/regex-2026.1.15-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9250d087bc92b7d4899ccd5539a1b2334e44eee85d848c4c1aef8e221d3f8c8f", size = 289003, upload-time = "2026-01-14T23:14:47.25Z" }, + { url = "https://files.pythonhosted.org/packages/c6/e4/1fc4599450c9f0863d9406e944592d968b8d6dfd0d552a7d569e43bceada/regex-2026.1.15-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c8a154cf6537ebbc110e24dabe53095e714245c272da9c1be05734bdad4a61aa", size = 798656, upload-time = "2026-01-14T23:14:48.77Z" }, + { url = "https://files.pythonhosted.org/packages/b2/e6/59650d73a73fa8a60b3a590545bfcf1172b4384a7df2e7fe7b9aab4e2da9/regex-2026.1.15-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8050ba2e3ea1d8731a549e83c18d2f0999fbc99a5f6bd06b4c91449f55291804", size = 864252, upload-time = "2026-01-14T23:14:50.528Z" }, + { url = "https://files.pythonhosted.org/packages/6e/ab/1d0f4d50a1638849a97d731364c9a80fa304fec46325e48330c170ee8e80/regex-2026.1.15-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0bf065240704cb8951cc04972cf107063917022511273e0969bdb34fc173456c", size = 912268, upload-time = "2026-01-14T23:14:52.952Z" }, + { url = "https://files.pythonhosted.org/packages/dd/df/0d722c030c82faa1d331d1921ee268a4e8fb55ca8b9042c9341c352f17fa/regex-2026.1.15-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c32bef3e7aeee75746748643667668ef941d28b003bfc89994ecf09a10f7a1b5", size = 803589, upload-time = "2026-01-14T23:14:55.182Z" }, + { url = "https://files.pythonhosted.org/packages/66/23/33289beba7ccb8b805c6610a8913d0131f834928afc555b241caabd422a9/regex-2026.1.15-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d5eaa4a4c5b1906bd0d2508d68927f15b81821f85092e06f1a34a4254b0e1af3", size = 775700, upload-time = "2026-01-14T23:14:56.707Z" }, + { url = "https://files.pythonhosted.org/packages/e7/65/bf3a42fa6897a0d3afa81acb25c42f4b71c274f698ceabd75523259f6688/regex-2026.1.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:86c1077a3cc60d453d4084d5b9649065f3bf1184e22992bd322e1f081d3117fb", size = 787928, upload-time = "2026-01-14T23:14:58.312Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f5/13bf65864fc314f68cdd6d8ca94adcab064d4d39dbd0b10fef29a9da48fc/regex-2026.1.15-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:2b091aefc05c78d286657cd4db95f2e6313375ff65dcf085e42e4c04d9c8d410", size = 858607, upload-time = "2026-01-14T23:15:00.657Z" }, + { url = "https://files.pythonhosted.org/packages/a3/31/040e589834d7a439ee43fb0e1e902bc81bd58a5ba81acffe586bb3321d35/regex-2026.1.15-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:57e7d17f59f9ebfa9667e6e5a1c0127b96b87cb9cede8335482451ed00788ba4", size = 763729, upload-time = "2026-01-14T23:15:02.248Z" }, + { url = "https://files.pythonhosted.org/packages/9b/84/6921e8129687a427edf25a34a5594b588b6d88f491320b9de5b6339a4fcb/regex-2026.1.15-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:c6c4dcdfff2c08509faa15d36ba7e5ef5fcfab25f1e8f85a0c8f45bc3a30725d", size = 850697, upload-time = "2026-01-14T23:15:03.878Z" }, + { url = "https://files.pythonhosted.org/packages/8a/87/3d06143d4b128f4229158f2de5de6c8f2485170c7221e61bf381313314b2/regex-2026.1.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cf8ff04c642716a7f2048713ddc6278c5fd41faa3b9cab12607c7abecd012c22", size = 789849, upload-time = "2026-01-14T23:15:06.102Z" }, + { url = "https://files.pythonhosted.org/packages/77/69/c50a63842b6bd48850ebc7ab22d46e7a2a32d824ad6c605b218441814639/regex-2026.1.15-cp312-cp312-win32.whl", hash = "sha256:82345326b1d8d56afbe41d881fdf62f1926d7264b2fc1537f99ae5da9aad7913", size = 266279, upload-time = "2026-01-14T23:15:07.678Z" }, + { url = "https://files.pythonhosted.org/packages/f2/36/39d0b29d087e2b11fd8191e15e81cce1b635fcc845297c67f11d0d19274d/regex-2026.1.15-cp312-cp312-win_amd64.whl", hash = "sha256:4def140aa6156bc64ee9912383d4038f3fdd18fee03a6f222abd4de6357ce42a", size = 277166, upload-time = "2026-01-14T23:15:09.257Z" }, + { url = "https://files.pythonhosted.org/packages/28/32/5b8e476a12262748851fa8ab1b0be540360692325975b094e594dfebbb52/regex-2026.1.15-cp312-cp312-win_arm64.whl", hash = "sha256:c6c565d9a6e1a8d783c1948937ffc377dd5771e83bd56de8317c450a954d2056", size = 270415, upload-time = "2026-01-14T23:15:10.743Z" }, + { url = "https://files.pythonhosted.org/packages/f8/2e/6870bb16e982669b674cce3ee9ff2d1d46ab80528ee6bcc20fb2292efb60/regex-2026.1.15-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e69d0deeb977ffe7ed3d2e4439360089f9c3f217ada608f0f88ebd67afb6385e", size = 489164, upload-time = "2026-01-14T23:15:13.962Z" }, + { url = "https://files.pythonhosted.org/packages/dc/67/9774542e203849b0286badf67199970a44ebdb0cc5fb739f06e47ada72f8/regex-2026.1.15-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3601ffb5375de85a16f407854d11cca8fe3f5febbe3ac78fb2866bb220c74d10", size = 291218, upload-time = "2026-01-14T23:15:15.647Z" }, + { url = "https://files.pythonhosted.org/packages/b2/87/b0cda79f22b8dee05f774922a214da109f9a4c0eca5da2c9d72d77ea062c/regex-2026.1.15-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4c5ef43b5c2d4114eb8ea424bb8c9cec01d5d17f242af88b2448f5ee81caadbc", size = 288895, upload-time = "2026-01-14T23:15:17.788Z" }, + { url = "https://files.pythonhosted.org/packages/3b/6a/0041f0a2170d32be01ab981d6346c83a8934277d82c780d60b127331f264/regex-2026.1.15-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:968c14d4f03e10b2fd960f1d5168c1f0ac969381d3c1fcc973bc45fb06346599", size = 798680, upload-time = "2026-01-14T23:15:19.342Z" }, + { url = "https://files.pythonhosted.org/packages/58/de/30e1cfcdbe3e891324aa7568b7c968771f82190df5524fabc1138cb2d45a/regex-2026.1.15-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:56a5595d0f892f214609c9f76b41b7428bed439d98dc961efafdd1354d42baae", size = 864210, upload-time = "2026-01-14T23:15:22.005Z" }, + { url = "https://files.pythonhosted.org/packages/64/44/4db2f5c5ca0ccd40ff052ae7b1e9731352fcdad946c2b812285a7505ca75/regex-2026.1.15-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0bf650f26087363434c4e560011f8e4e738f6f3e029b85d4904c50135b86cfa5", size = 912358, upload-time = "2026-01-14T23:15:24.569Z" }, + { url = "https://files.pythonhosted.org/packages/79/b6/e6a5665d43a7c42467138c8a2549be432bad22cbd206f5ec87162de74bd7/regex-2026.1.15-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18388a62989c72ac24de75f1449d0fb0b04dfccd0a1a7c1c43af5eb503d890f6", size = 803583, upload-time = "2026-01-14T23:15:26.526Z" }, + { url = "https://files.pythonhosted.org/packages/e7/53/7cd478222169d85d74d7437e74750005e993f52f335f7c04ff7adfda3310/regex-2026.1.15-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6d220a2517f5893f55daac983bfa9fe998a7dbcaee4f5d27a88500f8b7873788", size = 775782, upload-time = "2026-01-14T23:15:29.352Z" }, + { url = "https://files.pythonhosted.org/packages/ca/b5/75f9a9ee4b03a7c009fe60500fe550b45df94f0955ca29af16333ef557c5/regex-2026.1.15-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c9c08c2fbc6120e70abff5d7f28ffb4d969e14294fb2143b4b5c7d20e46d1714", size = 787978, upload-time = "2026-01-14T23:15:31.295Z" }, + { url = "https://files.pythonhosted.org/packages/72/b3/79821c826245bbe9ccbb54f6eadb7879c722fd3e0248c17bfc90bf54e123/regex-2026.1.15-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7ef7d5d4bd49ec7364315167a4134a015f61e8266c6d446fc116a9ac4456e10d", size = 858550, upload-time = "2026-01-14T23:15:33.558Z" }, + { url = "https://files.pythonhosted.org/packages/4a/85/2ab5f77a1c465745bfbfcb3ad63178a58337ae8d5274315e2cc623a822fa/regex-2026.1.15-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:6e42844ad64194fa08d5ccb75fe6a459b9b08e6d7296bd704460168d58a388f3", size = 763747, upload-time = "2026-01-14T23:15:35.206Z" }, + { url = "https://files.pythonhosted.org/packages/6d/84/c27df502d4bfe2873a3e3a7cf1bdb2b9cc10284d1a44797cf38bed790470/regex-2026.1.15-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:cfecdaa4b19f9ca534746eb3b55a5195d5c95b88cac32a205e981ec0a22b7d31", size = 850615, upload-time = "2026-01-14T23:15:37.523Z" }, + { url = "https://files.pythonhosted.org/packages/7d/b7/658a9782fb253680aa8ecb5ccbb51f69e088ed48142c46d9f0c99b46c575/regex-2026.1.15-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:08df9722d9b87834a3d701f3fca570b2be115654dbfd30179f30ab2f39d606d3", size = 789951, upload-time = "2026-01-14T23:15:39.582Z" }, + { url = "https://files.pythonhosted.org/packages/fc/2a/5928af114441e059f15b2f63e188bd00c6529b3051c974ade7444b85fcda/regex-2026.1.15-cp313-cp313-win32.whl", hash = "sha256:d426616dae0967ca225ab12c22274eb816558f2f99ccb4a1d52ca92e8baf180f", size = 266275, upload-time = "2026-01-14T23:15:42.108Z" }, + { url = "https://files.pythonhosted.org/packages/4f/16/5bfbb89e435897bff28cf0352a992ca719d9e55ebf8b629203c96b6ce4f7/regex-2026.1.15-cp313-cp313-win_amd64.whl", hash = "sha256:febd38857b09867d3ed3f4f1af7d241c5c50362e25ef43034995b77a50df494e", size = 277145, upload-time = "2026-01-14T23:15:44.244Z" }, + { url = "https://files.pythonhosted.org/packages/56/c1/a09ff7392ef4233296e821aec5f78c51be5e91ffde0d163059e50fd75835/regex-2026.1.15-cp313-cp313-win_arm64.whl", hash = "sha256:8e32f7896f83774f91499d239e24cebfadbc07639c1494bb7213983842348337", size = 270411, upload-time = "2026-01-14T23:15:45.858Z" }, + { url = "https://files.pythonhosted.org/packages/3c/38/0cfd5a78e5c6db00e6782fdae70458f89850ce95baa5e8694ab91d89744f/regex-2026.1.15-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:ec94c04149b6a7b8120f9f44565722c7ae31b7a6d2275569d2eefa76b83da3be", size = 492068, upload-time = "2026-01-14T23:15:47.616Z" }, + { url = "https://files.pythonhosted.org/packages/50/72/6c86acff16cb7c959c4355826bbf06aad670682d07c8f3998d9ef4fee7cd/regex-2026.1.15-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:40c86d8046915bb9aeb15d3f3f15b6fd500b8ea4485b30e1bbc799dab3fe29f8", size = 292756, upload-time = "2026-01-14T23:15:49.307Z" }, + { url = "https://files.pythonhosted.org/packages/4e/58/df7fb69eadfe76526ddfce28abdc0af09ffe65f20c2c90932e89d705153f/regex-2026.1.15-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:726ea4e727aba21643205edad8f2187ec682d3305d790f73b7a51c7587b64bdd", size = 291114, upload-time = "2026-01-14T23:15:51.484Z" }, + { url = "https://files.pythonhosted.org/packages/ed/6c/a4011cd1cf96b90d2cdc7e156f91efbd26531e822a7fbb82a43c1016678e/regex-2026.1.15-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1cb740d044aff31898804e7bf1181cc72c03d11dfd19932b9911ffc19a79070a", size = 807524, upload-time = "2026-01-14T23:15:53.102Z" }, + { url = "https://files.pythonhosted.org/packages/1d/25/a53ffb73183f69c3e9f4355c4922b76d2840aee160af6af5fac229b6201d/regex-2026.1.15-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:05d75a668e9ea16f832390d22131fe1e8acc8389a694c8febc3e340b0f810b93", size = 873455, upload-time = "2026-01-14T23:15:54.956Z" }, + { url = "https://files.pythonhosted.org/packages/66/0b/8b47fc2e8f97d9b4a851736f3890a5f786443aa8901061c55f24c955f45b/regex-2026.1.15-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d991483606f3dbec93287b9f35596f41aa2e92b7c2ebbb935b63f409e243c9af", size = 915007, upload-time = "2026-01-14T23:15:57.041Z" }, + { url = "https://files.pythonhosted.org/packages/c2/fa/97de0d681e6d26fabe71968dbee06dd52819e9a22fdce5dac7256c31ed84/regex-2026.1.15-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:194312a14819d3e44628a44ed6fea6898fdbecb0550089d84c403475138d0a09", size = 812794, upload-time = "2026-01-14T23:15:58.916Z" }, + { url = "https://files.pythonhosted.org/packages/22/38/e752f94e860d429654aa2b1c51880bff8dfe8f084268258adf9151cf1f53/regex-2026.1.15-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fe2fda4110a3d0bc163c2e0664be44657431440722c5c5315c65155cab92f9e5", size = 781159, upload-time = "2026-01-14T23:16:00.817Z" }, + { url = "https://files.pythonhosted.org/packages/e9/a7/d739ffaef33c378fc888302a018d7f81080393d96c476b058b8c64fd2b0d/regex-2026.1.15-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:124dc36c85d34ef2d9164da41a53c1c8c122cfb1f6e1ec377a1f27ee81deb794", size = 795558, upload-time = "2026-01-14T23:16:03.267Z" }, + { url = "https://files.pythonhosted.org/packages/3e/c4/542876f9a0ac576100fc73e9c75b779f5c31e3527576cfc9cb3009dcc58a/regex-2026.1.15-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:a1774cd1981cd212506a23a14dba7fdeaee259f5deba2df6229966d9911e767a", size = 868427, upload-time = "2026-01-14T23:16:05.646Z" }, + { url = "https://files.pythonhosted.org/packages/fc/0f/d5655bea5b22069e32ae85a947aa564912f23758e112cdb74212848a1a1b/regex-2026.1.15-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:b5f7d8d2867152cdb625e72a530d2ccb48a3d199159144cbdd63870882fb6f80", size = 769939, upload-time = "2026-01-14T23:16:07.542Z" }, + { url = "https://files.pythonhosted.org/packages/20/06/7e18a4fa9d326daeda46d471a44ef94201c46eaa26dbbb780b5d92cbfdda/regex-2026.1.15-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:492534a0ab925d1db998defc3c302dae3616a2fc3fe2e08db1472348f096ddf2", size = 854753, upload-time = "2026-01-14T23:16:10.395Z" }, + { url = "https://files.pythonhosted.org/packages/3b/67/dc8946ef3965e166f558ef3b47f492bc364e96a265eb4a2bb3ca765c8e46/regex-2026.1.15-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c661fc820cfb33e166bf2450d3dadbda47c8d8981898adb9b6fe24e5e582ba60", size = 799559, upload-time = "2026-01-14T23:16:12.347Z" }, + { url = "https://files.pythonhosted.org/packages/a5/61/1bba81ff6d50c86c65d9fd84ce9699dd106438ee4cdb105bf60374ee8412/regex-2026.1.15-cp313-cp313t-win32.whl", hash = "sha256:99ad739c3686085e614bf77a508e26954ff1b8f14da0e3765ff7abbf7799f952", size = 268879, upload-time = "2026-01-14T23:16:14.049Z" }, + { url = "https://files.pythonhosted.org/packages/e9/5e/cef7d4c5fb0ea3ac5c775fd37db5747f7378b29526cc83f572198924ff47/regex-2026.1.15-cp313-cp313t-win_amd64.whl", hash = "sha256:32655d17905e7ff8ba5c764c43cb124e34a9245e45b83c22e81041e1071aee10", size = 280317, upload-time = "2026-01-14T23:16:15.718Z" }, + { url = "https://files.pythonhosted.org/packages/b4/52/4317f7a5988544e34ab57b4bde0f04944c4786128c933fb09825924d3e82/regex-2026.1.15-cp313-cp313t-win_arm64.whl", hash = "sha256:b2a13dd6a95e95a489ca242319d18fc02e07ceb28fa9ad146385194d95b3c829", size = 271551, upload-time = "2026-01-14T23:16:17.533Z" }, + { url = "https://files.pythonhosted.org/packages/52/0a/47fa888ec7cbbc7d62c5f2a6a888878e76169170ead271a35239edd8f0e8/regex-2026.1.15-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:d920392a6b1f353f4aa54328c867fec3320fa50657e25f64abf17af054fc97ac", size = 489170, upload-time = "2026-01-14T23:16:19.835Z" }, + { url = "https://files.pythonhosted.org/packages/ac/c4/d000e9b7296c15737c9301708e9e7fbdea009f8e93541b6b43bdb8219646/regex-2026.1.15-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b5a28980a926fa810dbbed059547b02783952e2efd9c636412345232ddb87ff6", size = 291146, upload-time = "2026-01-14T23:16:21.541Z" }, + { url = "https://files.pythonhosted.org/packages/f9/b6/921cc61982e538682bdf3bdf5b2c6ab6b34368da1f8e98a6c1ddc503c9cf/regex-2026.1.15-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:621f73a07595d83f28952d7bd1e91e9d1ed7625fb7af0064d3516674ec93a2a2", size = 288986, upload-time = "2026-01-14T23:16:23.381Z" }, + { url = "https://files.pythonhosted.org/packages/ca/33/eb7383dde0bbc93f4fb9d03453aab97e18ad4024ac7e26cef8d1f0a2cff0/regex-2026.1.15-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3d7d92495f47567a9b1669c51fc8d6d809821849063d168121ef801bbc213846", size = 799098, upload-time = "2026-01-14T23:16:25.088Z" }, + { url = "https://files.pythonhosted.org/packages/27/56/b664dccae898fc8d8b4c23accd853f723bde0f026c747b6f6262b688029c/regex-2026.1.15-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8dd16fba2758db7a3780a051f245539c4451ca20910f5a5e6ea1c08d06d4a76b", size = 864980, upload-time = "2026-01-14T23:16:27.297Z" }, + { url = "https://files.pythonhosted.org/packages/16/40/0999e064a170eddd237bae9ccfcd8f28b3aa98a38bf727a086425542a4fc/regex-2026.1.15-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1e1808471fbe44c1a63e5f577a1d5f02fe5d66031dcbdf12f093ffc1305a858e", size = 911607, upload-time = "2026-01-14T23:16:29.235Z" }, + { url = "https://files.pythonhosted.org/packages/07/78/c77f644b68ab054e5a674fb4da40ff7bffb2c88df58afa82dbf86573092d/regex-2026.1.15-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0751a26ad39d4f2ade8fe16c59b2bf5cb19eb3d2cd543e709e583d559bd9efde", size = 803358, upload-time = "2026-01-14T23:16:31.369Z" }, + { url = "https://files.pythonhosted.org/packages/27/31/d4292ea8566eaa551fafc07797961c5963cf5235c797cc2ae19b85dfd04d/regex-2026.1.15-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0f0c7684c7f9ca241344ff95a1de964f257a5251968484270e91c25a755532c5", size = 775833, upload-time = "2026-01-14T23:16:33.141Z" }, + { url = "https://files.pythonhosted.org/packages/ce/b2/cff3bf2fea4133aa6fb0d1e370b37544d18c8350a2fa118c7e11d1db0e14/regex-2026.1.15-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:74f45d170a21df41508cb67165456538425185baaf686281fa210d7e729abc34", size = 788045, upload-time = "2026-01-14T23:16:35.005Z" }, + { url = "https://files.pythonhosted.org/packages/8d/99/2cb9b69045372ec877b6f5124bda4eb4253bc58b8fe5848c973f752bc52c/regex-2026.1.15-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f1862739a1ffb50615c0fde6bae6569b5efbe08d98e59ce009f68a336f64da75", size = 859374, upload-time = "2026-01-14T23:16:36.919Z" }, + { url = "https://files.pythonhosted.org/packages/09/16/710b0a5abe8e077b1729a562d2f297224ad079f3a66dce46844c193416c8/regex-2026.1.15-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:453078802f1b9e2b7303fb79222c054cb18e76f7bdc220f7530fdc85d319f99e", size = 763940, upload-time = "2026-01-14T23:16:38.685Z" }, + { url = "https://files.pythonhosted.org/packages/dd/d1/7585c8e744e40eb3d32f119191969b91de04c073fca98ec14299041f6e7e/regex-2026.1.15-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:a30a68e89e5a218b8b23a52292924c1f4b245cb0c68d1cce9aec9bbda6e2c160", size = 850112, upload-time = "2026-01-14T23:16:40.646Z" }, + { url = "https://files.pythonhosted.org/packages/af/d6/43e1dd85df86c49a347aa57c1f69d12c652c7b60e37ec162e3096194a278/regex-2026.1.15-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9479cae874c81bf610d72b85bb681a94c95722c127b55445285fb0e2c82db8e1", size = 789586, upload-time = "2026-01-14T23:16:42.799Z" }, + { url = "https://files.pythonhosted.org/packages/93/38/77142422f631e013f316aaae83234c629555729a9fbc952b8a63ac91462a/regex-2026.1.15-cp314-cp314-win32.whl", hash = "sha256:d639a750223132afbfb8f429c60d9d318aeba03281a5f1ab49f877456448dcf1", size = 271691, upload-time = "2026-01-14T23:16:44.671Z" }, + { url = "https://files.pythonhosted.org/packages/4a/a9/ab16b4649524ca9e05213c1cdbb7faa85cc2aa90a0230d2f796cbaf22736/regex-2026.1.15-cp314-cp314-win_amd64.whl", hash = "sha256:4161d87f85fa831e31469bfd82c186923070fc970b9de75339b68f0c75b51903", size = 280422, upload-time = "2026-01-14T23:16:46.607Z" }, + { url = "https://files.pythonhosted.org/packages/be/2a/20fd057bf3521cb4791f69f869635f73e0aaf2b9ad2d260f728144f9047c/regex-2026.1.15-cp314-cp314-win_arm64.whl", hash = "sha256:91c5036ebb62663a6b3999bdd2e559fd8456d17e2b485bf509784cd31a8b1705", size = 273467, upload-time = "2026-01-14T23:16:48.967Z" }, + { url = "https://files.pythonhosted.org/packages/ad/77/0b1e81857060b92b9cad239104c46507dd481b3ff1fa79f8e7f865aae38a/regex-2026.1.15-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:ee6854c9000a10938c79238de2379bea30c82e4925a371711af45387df35cab8", size = 492073, upload-time = "2026-01-14T23:16:51.154Z" }, + { url = "https://files.pythonhosted.org/packages/70/f3/f8302b0c208b22c1e4f423147e1913fd475ddd6230565b299925353de644/regex-2026.1.15-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2c2b80399a422348ce5de4fe40c418d6299a0fa2803dd61dc0b1a2f28e280fcf", size = 292757, upload-time = "2026-01-14T23:16:53.08Z" }, + { url = "https://files.pythonhosted.org/packages/bf/f0/ef55de2460f3b4a6da9d9e7daacd0cb79d4ef75c64a2af316e68447f0df0/regex-2026.1.15-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:dca3582bca82596609959ac39e12b7dad98385b4fefccb1151b937383cec547d", size = 291122, upload-time = "2026-01-14T23:16:55.383Z" }, + { url = "https://files.pythonhosted.org/packages/cf/55/bb8ccbacabbc3a11d863ee62a9f18b160a83084ea95cdfc5d207bfc3dd75/regex-2026.1.15-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef71d476caa6692eea743ae5ea23cde3260677f70122c4d258ca952e5c2d4e84", size = 807761, upload-time = "2026-01-14T23:16:57.251Z" }, + { url = "https://files.pythonhosted.org/packages/8f/84/f75d937f17f81e55679a0509e86176e29caa7298c38bd1db7ce9c0bf6075/regex-2026.1.15-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c243da3436354f4af6c3058a3f81a97d47ea52c9bd874b52fd30274853a1d5df", size = 873538, upload-time = "2026-01-14T23:16:59.349Z" }, + { url = "https://files.pythonhosted.org/packages/b8/d9/0da86327df70349aa8d86390da91171bd3ca4f0e7c1d1d453a9c10344da3/regex-2026.1.15-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8355ad842a7c7e9e5e55653eade3b7d1885ba86f124dd8ab1f722f9be6627434", size = 915066, upload-time = "2026-01-14T23:17:01.607Z" }, + { url = "https://files.pythonhosted.org/packages/2a/5e/f660fb23fc77baa2a61aa1f1fe3a4eea2bbb8a286ddec148030672e18834/regex-2026.1.15-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f192a831d9575271a22d804ff1a5355355723f94f31d9eef25f0d45a152fdc1a", size = 812938, upload-time = "2026-01-14T23:17:04.366Z" }, + { url = "https://files.pythonhosted.org/packages/69/33/a47a29bfecebbbfd1e5cd3f26b28020a97e4820f1c5148e66e3b7d4b4992/regex-2026.1.15-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:166551807ec20d47ceaeec380081f843e88c8949780cd42c40f18d16168bed10", size = 781314, upload-time = "2026-01-14T23:17:06.378Z" }, + { url = "https://files.pythonhosted.org/packages/65/ec/7ec2bbfd4c3f4e494a24dec4c6943a668e2030426b1b8b949a6462d2c17b/regex-2026.1.15-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f9ca1cbdc0fbfe5e6e6f8221ef2309988db5bcede52443aeaee9a4ad555e0dac", size = 795652, upload-time = "2026-01-14T23:17:08.521Z" }, + { url = "https://files.pythonhosted.org/packages/46/79/a5d8651ae131fe27d7c521ad300aa7f1c7be1dbeee4d446498af5411b8a9/regex-2026.1.15-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:b30bcbd1e1221783c721483953d9e4f3ab9c5d165aa709693d3f3946747b1aea", size = 868550, upload-time = "2026-01-14T23:17:10.573Z" }, + { url = "https://files.pythonhosted.org/packages/06/b7/25635d2809664b79f183070786a5552dd4e627e5aedb0065f4e3cf8ee37d/regex-2026.1.15-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:2a8d7b50c34578d0d3bf7ad58cde9652b7d683691876f83aedc002862a35dc5e", size = 769981, upload-time = "2026-01-14T23:17:12.871Z" }, + { url = "https://files.pythonhosted.org/packages/16/8b/fc3fcbb2393dcfa4a6c5ffad92dc498e842df4581ea9d14309fcd3c55fb9/regex-2026.1.15-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:9d787e3310c6a6425eb346be4ff2ccf6eece63017916fd77fe8328c57be83521", size = 854780, upload-time = "2026-01-14T23:17:14.837Z" }, + { url = "https://files.pythonhosted.org/packages/d0/38/dde117c76c624713c8a2842530be9c93ca8b606c0f6102d86e8cd1ce8bea/regex-2026.1.15-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:619843841e220adca114118533a574a9cd183ed8a28b85627d2844c500a2b0db", size = 799778, upload-time = "2026-01-14T23:17:17.369Z" }, + { url = "https://files.pythonhosted.org/packages/e3/0d/3a6cfa9ae99606afb612d8fb7a66b245a9d5ff0f29bb347c8a30b6ad561b/regex-2026.1.15-cp314-cp314t-win32.whl", hash = "sha256:e90b8db97f6f2c97eb045b51a6b2c5ed69cedd8392459e0642d4199b94fabd7e", size = 274667, upload-time = "2026-01-14T23:17:19.301Z" }, + { url = "https://files.pythonhosted.org/packages/5b/b2/297293bb0742fd06b8d8e2572db41a855cdf1cae0bf009b1cb74fe07e196/regex-2026.1.15-cp314-cp314t-win_amd64.whl", hash = "sha256:5ef19071f4ac9f0834793af85bd04a920b4407715624e40cb7a0631a11137cdf", size = 284386, upload-time = "2026-01-14T23:17:21.231Z" }, + { url = "https://files.pythonhosted.org/packages/95/e4/a3b9480c78cf8ee86626cb06f8d931d74d775897d44201ccb813097ae697/regex-2026.1.15-cp314-cp314t-win_arm64.whl", hash = "sha256:ca89c5e596fc05b015f27561b3793dc2fa0917ea0d7507eebb448efd35274a70", size = 274837, upload-time = "2026-01-14T23:17:23.146Z" }, +] + +[[package]] +name = "requests" +version = "2.32.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" }, +] + [[package]] name = "ruff" version = "0.14.14" @@ -1117,6 +1395,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] +[[package]] +name = "sniffio" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, +] + [[package]] name = "sqlalchemy" version = "2.0.46" @@ -1195,6 +1482,65 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" }, ] +[[package]] +name = "tiktoken" +version = "0.12.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "regex" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806, upload-time = "2025-10-06T20:22:45.419Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/85/be65d39d6b647c79800fd9d29241d081d4eeb06271f383bb87200d74cf76/tiktoken-0.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b97f74aca0d78a1ff21b8cd9e9925714c15a9236d6ceacf5c7327c117e6e21e8", size = 1050728, upload-time = "2025-10-06T20:21:52.756Z" }, + { url = "https://files.pythonhosted.org/packages/4a/42/6573e9129bc55c9bf7300b3a35bef2c6b9117018acca0dc760ac2d93dffe/tiktoken-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b90f5ad190a4bb7c3eb30c5fa32e1e182ca1ca79f05e49b448438c3e225a49b", size = 994049, upload-time = "2025-10-06T20:21:53.782Z" }, + { url = "https://files.pythonhosted.org/packages/66/c5/ed88504d2f4a5fd6856990b230b56d85a777feab84e6129af0822f5d0f70/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:65b26c7a780e2139e73acc193e5c63ac754021f160df919add909c1492c0fb37", size = 1129008, upload-time = "2025-10-06T20:21:54.832Z" }, + { url = "https://files.pythonhosted.org/packages/f4/90/3dae6cc5436137ebd38944d396b5849e167896fc2073da643a49f372dc4f/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:edde1ec917dfd21c1f2f8046b86348b0f54a2c0547f68149d8600859598769ad", size = 1152665, upload-time = "2025-10-06T20:21:56.129Z" }, + { url = "https://files.pythonhosted.org/packages/a3/fe/26df24ce53ffde419a42f5f53d755b995c9318908288c17ec3f3448313a3/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:35a2f8ddd3824608b3d650a000c1ef71f730d0c56486845705a8248da00f9fe5", size = 1194230, upload-time = "2025-10-06T20:21:57.546Z" }, + { url = "https://files.pythonhosted.org/packages/20/cc/b064cae1a0e9fac84b0d2c46b89f4e57051a5f41324e385d10225a984c24/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83d16643edb7fa2c99eff2ab7733508aae1eebb03d5dfc46f5565862810f24e3", size = 1254688, upload-time = "2025-10-06T20:21:58.619Z" }, + { url = "https://files.pythonhosted.org/packages/81/10/b8523105c590c5b8349f2587e2fdfe51a69544bd5a76295fc20f2374f470/tiktoken-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffc5288f34a8bc02e1ea7047b8d041104791d2ddbf42d1e5fa07822cbffe16bd", size = 878694, upload-time = "2025-10-06T20:21:59.876Z" }, + { url = "https://files.pythonhosted.org/packages/00/61/441588ee21e6b5cdf59d6870f86beb9789e532ee9718c251b391b70c68d6/tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3", size = 1050802, upload-time = "2025-10-06T20:22:00.96Z" }, + { url = "https://files.pythonhosted.org/packages/1f/05/dcf94486d5c5c8d34496abe271ac76c5b785507c8eae71b3708f1ad9b45a/tiktoken-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160", size = 993995, upload-time = "2025-10-06T20:22:02.788Z" }, + { url = "https://files.pythonhosted.org/packages/a0/70/5163fe5359b943f8db9946b62f19be2305de8c3d78a16f629d4165e2f40e/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa", size = 1128948, upload-time = "2025-10-06T20:22:03.814Z" }, + { url = "https://files.pythonhosted.org/packages/0c/da/c028aa0babf77315e1cef357d4d768800c5f8a6de04d0eac0f377cb619fa/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be", size = 1151986, upload-time = "2025-10-06T20:22:05.173Z" }, + { url = "https://files.pythonhosted.org/packages/a0/5a/886b108b766aa53e295f7216b509be95eb7d60b166049ce2c58416b25f2a/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a", size = 1194222, upload-time = "2025-10-06T20:22:06.265Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f8/4db272048397636ac7a078d22773dd2795b1becee7bc4922fe6207288d57/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3", size = 1255097, upload-time = "2025-10-06T20:22:07.403Z" }, + { url = "https://files.pythonhosted.org/packages/8e/32/45d02e2e0ea2be3a9ed22afc47d93741247e75018aac967b713b2941f8ea/tiktoken-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:b8a0cd0c789a61f31bf44851defbd609e8dd1e2c8589c614cc1060940ef1f697", size = 879117, upload-time = "2025-10-06T20:22:08.418Z" }, + { url = "https://files.pythonhosted.org/packages/ce/76/994fc868f88e016e6d05b0da5ac24582a14c47893f4474c3e9744283f1d5/tiktoken-0.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16", size = 1050309, upload-time = "2025-10-06T20:22:10.939Z" }, + { url = "https://files.pythonhosted.org/packages/f6/b8/57ef1456504c43a849821920d582a738a461b76a047f352f18c0b26c6516/tiktoken-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a", size = 993712, upload-time = "2025-10-06T20:22:12.115Z" }, + { url = "https://files.pythonhosted.org/packages/72/90/13da56f664286ffbae9dbcfadcc625439142675845baa62715e49b87b68b/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27", size = 1128725, upload-time = "2025-10-06T20:22:13.541Z" }, + { url = "https://files.pythonhosted.org/packages/05/df/4f80030d44682235bdaecd7346c90f67ae87ec8f3df4a3442cb53834f7e4/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb", size = 1151875, upload-time = "2025-10-06T20:22:14.559Z" }, + { url = "https://files.pythonhosted.org/packages/22/1f/ae535223a8c4ef4c0c1192e3f9b82da660be9eb66b9279e95c99288e9dab/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e", size = 1194451, upload-time = "2025-10-06T20:22:15.545Z" }, + { url = "https://files.pythonhosted.org/packages/78/a7/f8ead382fce0243cb625c4f266e66c27f65ae65ee9e77f59ea1653b6d730/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25", size = 1253794, upload-time = "2025-10-06T20:22:16.624Z" }, + { url = "https://files.pythonhosted.org/packages/93/e0/6cc82a562bc6365785a3ff0af27a2a092d57c47d7a81d9e2295d8c36f011/tiktoken-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dc2dd125a62cb2b3d858484d6c614d136b5b848976794edfb63688d539b8b93f", size = 878777, upload-time = "2025-10-06T20:22:18.036Z" }, + { url = "https://files.pythonhosted.org/packages/72/05/3abc1db5d2c9aadc4d2c76fa5640134e475e58d9fbb82b5c535dc0de9b01/tiktoken-0.12.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646", size = 1050188, upload-time = "2025-10-06T20:22:19.563Z" }, + { url = "https://files.pythonhosted.org/packages/e3/7b/50c2f060412202d6c95f32b20755c7a6273543b125c0985d6fa9465105af/tiktoken-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88", size = 993978, upload-time = "2025-10-06T20:22:20.702Z" }, + { url = "https://files.pythonhosted.org/packages/14/27/bf795595a2b897e271771cd31cb847d479073497344c637966bdf2853da1/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff", size = 1129271, upload-time = "2025-10-06T20:22:22.06Z" }, + { url = "https://files.pythonhosted.org/packages/f5/de/9341a6d7a8f1b448573bbf3425fa57669ac58258a667eb48a25dfe916d70/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830", size = 1151216, upload-time = "2025-10-06T20:22:23.085Z" }, + { url = "https://files.pythonhosted.org/packages/75/0d/881866647b8d1be4d67cb24e50d0c26f9f807f994aa1510cb9ba2fe5f612/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b", size = 1194860, upload-time = "2025-10-06T20:22:24.602Z" }, + { url = "https://files.pythonhosted.org/packages/b3/1e/b651ec3059474dab649b8d5b69f5c65cd8fcd8918568c1935bd4136c9392/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b", size = 1254567, upload-time = "2025-10-06T20:22:25.671Z" }, + { url = "https://files.pythonhosted.org/packages/80/57/ce64fd16ac390fafde001268c364d559447ba09b509181b2808622420eec/tiktoken-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:399c3dd672a6406719d84442299a490420b458c44d3ae65516302a99675888f3", size = 921067, upload-time = "2025-10-06T20:22:26.753Z" }, + { url = "https://files.pythonhosted.org/packages/ac/a4/72eed53e8976a099539cdd5eb36f241987212c29629d0a52c305173e0a68/tiktoken-0.12.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365", size = 1050473, upload-time = "2025-10-06T20:22:27.775Z" }, + { url = "https://files.pythonhosted.org/packages/e6/d7/0110b8f54c008466b19672c615f2168896b83706a6611ba6e47313dbc6e9/tiktoken-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e", size = 993855, upload-time = "2025-10-06T20:22:28.799Z" }, + { url = "https://files.pythonhosted.org/packages/5f/77/4f268c41a3957c418b084dd576ea2fad2e95da0d8e1ab705372892c2ca22/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63", size = 1129022, upload-time = "2025-10-06T20:22:29.981Z" }, + { url = "https://files.pythonhosted.org/packages/4e/2b/fc46c90fe5028bd094cd6ee25a7db321cb91d45dc87531e2bdbb26b4867a/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0", size = 1150736, upload-time = "2025-10-06T20:22:30.996Z" }, + { url = "https://files.pythonhosted.org/packages/28/c0/3c7a39ff68022ddfd7d93f3337ad90389a342f761c4d71de99a3ccc57857/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a", size = 1194908, upload-time = "2025-10-06T20:22:32.073Z" }, + { url = "https://files.pythonhosted.org/packages/ab/0d/c1ad6f4016a3968c048545f5d9b8ffebf577774b2ede3e2e352553b685fe/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0", size = 1253706, upload-time = "2025-10-06T20:22:33.385Z" }, + { url = "https://files.pythonhosted.org/packages/af/df/c7891ef9d2712ad774777271d39fdef63941ffba0a9d59b7ad1fd2765e57/tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71", size = 920667, upload-time = "2025-10-06T20:22:34.444Z" }, +] + +[[package]] +name = "tqdm" +version = "4.67.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/27/89/4b0001b2dab8df0a5ee2787dcbe771de75ded01f18f1f8d53dedeea2882b/tqdm-4.67.2.tar.gz", hash = "sha256:649aac53964b2cb8dec76a14b405a4c0d13612cb8933aae547dd144eacc99653", size = 169514, upload-time = "2026-01-30T23:12:06.555Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f5/e2/31eac96de2915cf20ccaed0225035db149dfb9165a9ed28d4b252ef3f7f7/tqdm-4.67.2-py3-none-any.whl", hash = "sha256:9a12abcbbff58b6036b2167d9d3853042b9d436fe7330f06ae047867f2f8e0a7", size = 78354, upload-time = "2026-01-30T23:12:04.368Z" }, +] + [[package]] name = "types-pytz" version = "2025.2.0.20251108" @@ -1234,6 +1580,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/b0/003792df09decd6849a5e39c28b513c06e84436a54440380862b5aeff25d/tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", size = 348521, upload-time = "2025-12-13T17:45:33.889Z" }, ] +[[package]] +name = "urllib3" +version = "2.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, +] + [[package]] name = "uvicorn" version = "0.40.0"