diff --git a/INITIAL-13.md b/INITIAL-13.md new file mode 100644 index 00000000..d0595ea2 --- /dev/null +++ b/INITIAL-13.md @@ -0,0 +1,947 @@ +# INITIAL-13.md — Data Seeder Dashboard (The Forge UI) + +## Architectural Role + +**"The Forge UI"** - Admin interface for managing synthetic data generation using The Forge seeder. + +This phase provides the visual layer for: +- Viewing current database state (row counts, date ranges) +- Triggering data generation operations (full-new, append, delete) +- Selecting pre-built scenarios and configuring parameters +- Monitoring operation progress and viewing logs +- Verifying data integrity + +--- + +## RESEARCH PHASE + +### Codebase Analysis + +**Existing Frontend Patterns Reviewed:** +- `frontend/src/pages/admin.tsx` — Tabbed admin panel with Cards, Dialogs, AlertDialogs +- `frontend/src/pages/dashboard.tsx` — KPI cards layout, data hooks pattern +- `frontend/src/components/ui/` — 26 shadcn/ui components already installed +- `frontend/src/hooks/` — TanStack Query patterns for data fetching +- `frontend/src/lib/api.ts` — API client with RFC 7807 error handling + +**Available shadcn/ui Components (Already Installed):** +| Component | Use Case in Forge UI | +|-----------|---------------------| +| `Card` | KPI summary cards, configuration panels | +| `Tabs` | Switch between Operations/Config/Logs views | +| `Button` | Action triggers (Generate, Delete, Append) | +| `AlertDialog` | Confirmation for destructive operations | +| `Dialog` | Configuration modals | +| `Select` | Scenario picker, scope selector | +| `Input` | Seed, stores, products inputs | +| `Calendar` | Date range picker | +| `Progress` | Operation progress indicator | +| `Badge` | Status indicators | +| `Table` | Row count summary, verification results | +| `Accordion` | Collapsible log sections | +| `Skeleton` | Loading states | +| `Sonner` | Toast notifications | + +**Backend API (scripts/seed_random.py):** +- CLI-based (no REST endpoints yet) +- Operations: `--full-new`, `--delete`, `--append`, `--status`, `--verify` +- Configuration: `--seed`, `--stores`, `--products`, `--start-date`, `--end-date`, `--scenario` + +--- + +## BRAINSTORM PHASE + +### Core Features (Required) + +1. **Data Status Dashboard** — Current row counts for all 7 tables +2. **Quick Actions** — One-click operations with confirmation +3. **Scenario Selector** — Pre-built scenario presets +4. **Configuration Form** — Custom parameters (seed, counts, dates) +5. **Operation Log** — Real-time output from seeder operations + +### Additional Features (Brainstormed) + +#### Visual Data Summary +- **Date Range Indicator** — Min/max dates in sales_daily +- **Coverage Heatmap** — Store × Product matrix showing data density +- **Trend Preview** — Small sparkline of daily sales totals + +#### Advanced Configuration +- **YAML Editor** — Edit custom configuration inline +- **Preset Manager** — Save/load custom configurations +- **Dry Run Toggle** — Preview changes before executing + +#### Operation Management +- **Progress Streaming** — WebSocket-based real-time progress +- **Cancel Operation** — Abort long-running operations +- **History Log** — Past operations with timestamps and parameters + +#### Data Quality +- **Verification Panel** — FK integrity, constraint checks, gap detection +- **Data Preview** — Sample rows from generated data +- **Export Config** — Download YAML for reproducibility + +--- + +## DECISION PHASE + +### Architecture Decision: Backend Integration + +| Option | Pros | Cons | +|--------|------|------| +| **REST API Endpoints** (Recommended) | Standard patterns, async-ready | Requires new routes | +| Direct CLI Execution | Immediate, no backend changes | Not web-friendly | +| WebSocket Streaming | Real-time progress | Complex implementation | + +**Decision**: Create new REST API endpoints in `app/features/seeder/` feature slice: +- `GET /seeder/status` — Current table row counts +- `POST /seeder/generate` — Trigger full-new generation +- `POST /seeder/append` — Append data to existing dataset +- `DELETE /seeder/data` — Delete data with scope +- `POST /seeder/verify` — Run integrity verification +- `GET /seeder/scenarios` — List available scenarios + +### UI Layout Decision + +| Option | Pros | Cons | +|--------|------|------| +| **New Tab in Admin** (Recommended) | Consistent with existing admin | Adds to existing complexity | +| Standalone Page | Clean separation | Navigation overhead | +| Dashboard Widget | Quick access | Limited space | + +**Decision**: Add new "Data Seeder" tab to existing `/admin` page, following the Tabs pattern from RAG Sources and Aliases panels. + +--- + +## FEATURE + +### Data Status Panel + +Real-time view of current database state: + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Current Data Summary [Refresh] │ +├─────────────────────────────────────────────────────────────┤ +│ ┌─────────┬─────────┬─────────┬─────────┬─────────┐ │ +│ │ Stores │Products │Calendar │ Sales │Inventory│ │ +│ │ 10 │ 50 │ 365 │ 127,450 │ 182,500 │ │ +│ │ +0% │ +0% │ +0% │ +12% │ +8% │ │ +│ └─────────┴─────────┴─────────┴─────────┴─────────┘ │ +│ │ +│ Date Range: 2024-01-01 → 2024-12-31 (365 days) │ +│ Last Updated: 2 hours ago │ +└─────────────────────────────────────────────────────────────┘ +``` + +**Components Used:** +- `Card` with `CardHeader`, `CardContent` +- Grid of stat cards with `Badge` for change indicators +- `Skeleton` for loading states + +### Quick Actions Panel + +One-click operations with confirmation dialogs: + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Quick Actions │ +├─────────────────────────────────────────────────────────────┤ +│ [🔄 Generate New Dataset] [➕ Append Data] [🗑️ Delete] │ +│ │ +│ ○ retail_standard (default) │ +│ ○ holiday_rush │ +│ ○ high_variance │ +│ ○ stockout_heavy │ +│ ○ new_launches │ +│ ○ sparse │ +└─────────────────────────────────────────────────────────────┘ +``` + +**Components Used:** +- `Button` with variants (default, destructive) +- `AlertDialog` for delete confirmation +- Radio group for scenario selection (using multiple `Button` with `variant="outline"`) + +### Configuration Panel + +Detailed parameter configuration: + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Configuration │ +├─────────────────────────────────────────────────────────────┤ +│ Seed [42 ] Stores [10 ] │ +│ Products [50 ] Batch Size [1000 ] │ +│ │ +│ Date Range │ +│ Start: [📅 2024-01-01] End: [📅 2024-12-31] │ +│ │ +│ Advanced Options │ +│ ┌──────────────────────────────────────────────────────┐ │ +│ │ Sparsity: [0.0 ] (0.0 - 1.0) │ │ +│ │ ☐ Dry Run (preview only) │ │ +│ └──────────────────────────────────────────────────────┘ │ +│ │ +│ [Apply Configuration] │ +└─────────────────────────────────────────────────────────────┘ +``` + +**Components Used:** +- `Input` for numeric values +- `Calendar` + `Popover` for date pickers +- `Checkbox` for dry run toggle +- `Accordion` for advanced options +- `Button` for apply action + +### Operation Log Panel + +Real-time operation output: + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Operation Log [Clear] │ +├─────────────────────────────────────────────────────────────┤ +│ ▶ Running: Generate New Dataset (seed: 42) │ +│ ████████████████████░░░░░░░░░░ 65% - Generating sales... │ +│ ──────────────────────────────────────────────────────── │ +│ 2026-02-02 10:30:15 ✓ Generated 10 stores │ +│ 2026-02-02 10:30:16 ✓ Generated 50 products │ +│ 2026-02-02 10:30:17 ✓ Generated 365 calendar days │ +│ 2026-02-02 10:30:45 ⏳ Generating sales records... │ +│ 2026-02-02 10:30:45 Batch 1/128 complete │ +│ 2026-02-02 10:30:46 Batch 2/128 complete │ +│ ... │ +└─────────────────────────────────────────────────────────────┘ +``` + +**Components Used:** +- `Card` with fixed height and `ScrollArea` +- `Progress` bar for operation progress +- `Badge` for status indicators (success, error, pending) +- Monospace font for log entries + +### Verification Panel + +Data integrity check results: + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Data Verification [Run Check] │ +├─────────────────────────────────────────────────────────────┤ +│ Last Run: 2026-02-02 10:35:00 │ +│ │ +│ ✓ Foreign Key Integrity PASSED │ +│ ✓ Non-Negative Constraints PASSED │ +│ ✓ Date Range Coverage PASSED │ +│ ✓ Unique Constraints PASSED │ +│ ⚠ Data Gaps Detected 2 gaps found │ +│ │ +│ Gaps: │ +│ - Store S003, Product P012: 2024-03-15 to 2024-03-17 │ +│ - Store S007, Product P045: 2024-08-01 to 2024-08-03 │ +└─────────────────────────────────────────────────────────────┘ +``` + +**Components Used:** +- `Table` for check results +- `Badge` with variants (success, warning, destructive) +- `Accordion` for expandable details + +--- + +## PAGE STRUCTURE + +### /admin (Extended with Seeder Tab) + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Admin Panel │ +├─────────────────────────────────────────────────────────────┤ +│ [📚 RAG Sources] [🏷️ Deployment Aliases] [🔥 Data Seeder] │ +├─────────────────────────────────────────────────────────────┤ +│ │ +│ ┌─ Data Status ──────────────────────────────────────────┐ │ +│ │ [Store: 10] [Products: 50] [Days: 365] [Sales: 127K] │ │ +│ │ Date Range: 2024-01-01 → 2024-12-31 │ │ +│ └────────────────────────────────────────────────────────┘ │ +│ │ +│ ┌─ Quick Actions ────────────────────────────────────────┐ │ +│ │ [🔄 Generate] [➕ Append] [🗑️ Delete] [✓ Verify] │ │ +│ │ │ │ +│ │ Scenario: [retail_standard ▼] │ │ +│ └────────────────────────────────────────────────────────┘ │ +│ │ +│ ┌─ Configuration ────────────────────────────────────────┐ │ +│ │ Seed: [42] Stores: [10] Products: [50] │ │ +│ │ Dates: [2024-01-01] to [2024-12-31] │ │ +│ │ [▸ Advanced Options] │ │ +│ └────────────────────────────────────────────────────────┘ │ +│ │ +│ ┌─ Operation Log ────────────────────────────────────────┐ │ +│ │ ████████████░░░░░░░░ 60% Generating sales... │ │ +│ │ 10:30:15 ✓ Generated 10 stores │ │ +│ │ 10:30:16 ✓ Generated 50 products │ │ +│ └────────────────────────────────────────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────┘ +``` + +--- + +## COMPONENTS + +### SeederPanel (Main Component) + +```tsx +// components/seeder/seeder-panel.tsx +import { useState } from 'react' +import { Flame, Plus, Trash2, CheckCircle, RefreshCw } from 'lucide-react' +import { useSeederStatus, useGenerateData, useDeleteData, useVerifyData } from '@/hooks/use-seeder' +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { Button } from '@/components/ui/button' +import { Progress } from '@/components/ui/progress' +import { Badge } from '@/components/ui/badge' +import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select' +import { AlertDialog, AlertDialogAction, AlertDialogCancel, AlertDialogContent, AlertDialogDescription, AlertDialogFooter, AlertDialogHeader, AlertDialogTitle, AlertDialogTrigger } from '@/components/ui/alert-dialog' + +const SCENARIOS = [ + { value: 'retail_standard', label: 'Retail Standard', description: 'Normal retail patterns' }, + { value: 'holiday_rush', label: 'Holiday Rush', description: 'Q4 surge with peaks' }, + { value: 'high_variance', label: 'High Variance', description: 'Noisy, unpredictable' }, + { value: 'stockout_heavy', label: 'Stockout Heavy', description: 'Frequent stockouts' }, + { value: 'new_launches', label: 'New Launches', description: 'Product launch ramps' }, + { value: 'sparse', label: 'Sparse', description: 'Missing data patterns' }, +] + +export function SeederPanel() { + const [scenario, setScenario] = useState('retail_standard') + const { data: status, isLoading, refetch } = useSeederStatus() + const generateMutation = useGenerateData() + const deleteMutation = useDeleteData() + + return ( +
+ + generateMutation.mutate({ scenario })} + onDelete={() => deleteMutation.mutate({ scope: 'all' })} + isGenerating={generateMutation.isPending} + isDeleting={deleteMutation.isPending} + /> + + +
+ ) +} +``` + +### DataStatusCard + +```tsx +// components/seeder/data-status-card.tsx +interface DataStatusCardProps { + status: SeederStatus | undefined + isLoading: boolean + onRefresh: () => void +} + +export function DataStatusCard({ status, isLoading, onRefresh }: DataStatusCardProps) { + if (isLoading) { + return ( + + + Current Data Summary + + +
+ {[...Array(5)].map((_, i) => ( + + ))} +
+
+
+ ) + } + + const stats = [ + { label: 'Stores', value: status?.stores ?? 0, icon: Store }, + { label: 'Products', value: status?.products ?? 0, icon: Package }, + { label: 'Calendar', value: status?.calendar ?? 0, icon: Calendar }, + { label: 'Sales', value: status?.sales ?? 0, icon: TrendingUp }, + { label: 'Inventory', value: status?.inventory ?? 0, icon: Warehouse }, + ] + + return ( + + +
+ Current Data Summary + + {status?.date_range_start} → {status?.date_range_end} + +
+ +
+ +
+ {stats.map((stat) => ( +
+ +

{stat.value.toLocaleString()}

+

{stat.label}

+
+ ))} +
+
+
+ ) +} +``` + +### QuickActionsCard + +```tsx +// components/seeder/quick-actions-card.tsx +interface QuickActionsCardProps { + scenario: string + onScenarioChange: (scenario: string) => void + onGenerate: () => void + onDelete: () => void + isGenerating: boolean + isDeleting: boolean +} + +export function QuickActionsCard({ + scenario, + onScenarioChange, + onGenerate, + onDelete, + isGenerating, + isDeleting, +}: QuickActionsCardProps) { + return ( + + + Quick Actions + + Generate, append, or delete synthetic data + + + +
+ + + + + + + + + + + Delete All Data? + + This will permanently delete all generated data from the database. + This action cannot be undone. + + + + Cancel + + Delete All Data + + + + + + +
+ +
+ + +
+
+
+ ) +} +``` + +--- + +## API HOOKS + +```tsx +// hooks/use-seeder.ts +import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query' +import { api } from '@/lib/api' + +interface SeederStatus { + stores: number + products: number + calendar: number + sales: number + inventory: number + price_history: number + promotions: number + date_range_start: string | null + date_range_end: string | null + last_updated: string | null +} + +interface GenerateParams { + scenario?: string + seed?: number + stores?: number + products?: number + start_date?: string + end_date?: string + dry_run?: boolean +} + +interface DeleteParams { + scope: 'all' | 'facts' | 'dimensions' +} + +interface VerifyResult { + passed: boolean + checks: Array<{ + name: string + status: 'passed' | 'warning' | 'failed' + message: string + details?: string[] + }> +} + +export function useSeederStatus() { + return useQuery({ + queryKey: ['seeder', 'status'], + queryFn: () => api.get('/seeder/status'), + refetchInterval: 30000, // Refresh every 30 seconds + }) +} + +export function useGenerateData() { + const queryClient = useQueryClient() + + return useMutation({ + mutationFn: (params: GenerateParams) => + api.post('/seeder/generate', params), + onSuccess: () => { + queryClient.invalidateQueries({ queryKey: ['seeder', 'status'] }) + queryClient.invalidateQueries({ queryKey: ['analytics'] }) + }, + }) +} + +export function useAppendData() { + const queryClient = useQueryClient() + + return useMutation({ + mutationFn: (params: GenerateParams) => + api.post('/seeder/append', params), + onSuccess: () => { + queryClient.invalidateQueries({ queryKey: ['seeder', 'status'] }) + }, + }) +} + +export function useDeleteData() { + const queryClient = useQueryClient() + + return useMutation({ + mutationFn: (params: DeleteParams) => + api.delete('/seeder/data', { data: params }), + onSuccess: () => { + queryClient.invalidateQueries({ queryKey: ['seeder', 'status'] }) + queryClient.invalidateQueries({ queryKey: ['analytics'] }) + }, + }) +} + +export function useVerifyData() { + return useMutation({ + mutationFn: () => api.post('/seeder/verify'), + }) +} + +export function useSeederScenarios() { + return useQuery({ + queryKey: ['seeder', 'scenarios'], + queryFn: () => api.get>('/seeder/scenarios'), + staleTime: Infinity, // Scenarios don't change + }) +} +``` + +--- + +## BACKEND API ENDPOINTS (New Feature Slice) + +### Routes + +```python +# app/features/seeder/routes.py +from fastapi import APIRouter, Depends +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.database import get_db +from app.features.seeder import schemas, service + +router = APIRouter(prefix="/seeder", tags=["seeder"]) + + +@router.get("/status", response_model=schemas.SeederStatus) +async def get_status(db: AsyncSession = Depends(get_db)) -> schemas.SeederStatus: + """Get current database row counts and date range.""" + return await service.get_status(db) + + +@router.get("/scenarios", response_model=list[schemas.ScenarioInfo]) +async def list_scenarios() -> list[schemas.ScenarioInfo]: + """List available scenario presets.""" + return service.list_scenarios() + + +@router.post("/generate", response_model=schemas.GenerateResult) +async def generate_data( + params: schemas.GenerateParams, + db: AsyncSession = Depends(get_db), +) -> schemas.GenerateResult: + """Generate new synthetic dataset.""" + return await service.generate_data(db, params) + + +@router.post("/append", response_model=schemas.GenerateResult) +async def append_data( + params: schemas.AppendParams, + db: AsyncSession = Depends(get_db), +) -> schemas.GenerateResult: + """Append data to existing dataset.""" + return await service.append_data(db, params) + + +@router.delete("/data", response_model=schemas.DeleteResult) +async def delete_data( + params: schemas.DeleteParams, + db: AsyncSession = Depends(get_db), +) -> schemas.DeleteResult: + """Delete data with specified scope.""" + return await service.delete_data(db, params) + + +@router.post("/verify", response_model=schemas.VerifyResult) +async def verify_data( + db: AsyncSession = Depends(get_db), +) -> schemas.VerifyResult: + """Run data integrity verification.""" + return await service.verify_data(db) +``` + +### Schemas + +```python +# app/features/seeder/schemas.py +from datetime import date +from typing import Literal + +from pydantic import BaseModel, Field + + +class SeederStatus(BaseModel): + """Current database state.""" + stores: int + products: int + calendar: int + sales: int + inventory: int + price_history: int + promotions: int + date_range_start: date | None + date_range_end: date | None + last_updated: str | None + + +class ScenarioInfo(BaseModel): + """Scenario preset information.""" + name: str + description: str + stores: int + products: int + + +class GenerateParams(BaseModel): + """Parameters for data generation.""" + scenario: str = "retail_standard" + seed: int = Field(default=42, ge=0) + stores: int = Field(default=10, ge=1, le=100) + products: int = Field(default=50, ge=1, le=500) + start_date: date = Field(default_factory=lambda: date(2024, 1, 1)) + end_date: date = Field(default_factory=lambda: date(2024, 12, 31)) + sparsity: float = Field(default=0.0, ge=0.0, le=1.0) + dry_run: bool = False + + +class AppendParams(BaseModel): + """Parameters for appending data.""" + start_date: date + end_date: date + seed: int = Field(default=43, ge=0) + + +class DeleteParams(BaseModel): + """Parameters for data deletion.""" + scope: Literal["all", "facts", "dimensions"] = "all" + + +class GenerateResult(BaseModel): + """Result of generation operation.""" + success: bool + records_created: dict[str, int] + duration_seconds: float + message: str + + +class DeleteResult(BaseModel): + """Result of deletion operation.""" + success: bool + records_deleted: dict[str, int] + message: str + + +class VerifyCheck(BaseModel): + """Single verification check result.""" + name: str + status: Literal["passed", "warning", "failed"] + message: str + details: list[str] | None = None + + +class VerifyResult(BaseModel): + """Data verification result.""" + passed: bool + checks: list[VerifyCheck] +``` + +--- + +## CONFIGURATION + +### Environment Variables + +```env +# Seeder Configuration (already in CLAUDE.md) +SEEDER_DEFAULT_SEED=42 +SEEDER_DEFAULT_STORES=10 +SEEDER_DEFAULT_PRODUCTS=50 +SEEDER_BATCH_SIZE=1000 +SEEDER_ENABLE_PROGRESS=True +SEEDER_ALLOW_PRODUCTION=False +SEEDER_REQUIRE_CONFIRM=True +``` + +### Frontend Environment + +```env +# frontend/.env +VITE_API_BASE_URL=http://localhost:8123 +VITE_ENABLE_SEEDER_PANEL=true # Feature flag for seeder UI +``` + +--- + +## EXAMPLES + +### examples/ui/seeder-panel.md + +```markdown +# Data Seeder Panel + +## Quick Start + +1. Navigate to Admin Panel: http://localhost:5173/admin +2. Click "Data Seeder" tab +3. Select a scenario (e.g., "Holiday Rush") +4. Click "Generate New" to create synthetic data +5. View progress in the Operation Log panel + +## Scenario Selection + +| Scenario | Best For | +|----------|----------| +| retail_standard | General development and testing | +| holiday_rush | Seasonal forecasting models | +| high_variance | Robustness testing | +| stockout_heavy | Inventory optimization | +| new_launches | Product launch forecasting | +| sparse | Gap handling validation | + +## Verification + +After generating data, run "Verify" to check: +- Foreign key integrity +- Non-negative constraints +- Date range coverage +- Unique constraint compliance +``` + +--- + +## SUCCESS CRITERIA + +### Functional Requirements +- [ ] Status panel shows accurate row counts for all 7 tables +- [ ] Date range displays correctly (min/max from sales_daily) +- [ ] Generate button triggers full-new with selected scenario +- [ ] Delete button shows confirmation dialog before executing +- [ ] Append button adds data without affecting existing records +- [ ] Verify button runs all integrity checks +- [ ] Scenario selector updates generation parameters +- [ ] Configuration form validates input ranges + +### UX Requirements +- [ ] Loading states shown during API calls +- [ ] Success/error toasts for all operations +- [ ] Progress indicator during long operations +- [ ] Disabled states for buttons during pending operations +- [ ] Responsive layout on tablet and mobile +- [ ] Keyboard accessible (focus states, enter to submit) + +### Performance +- [ ] Status query returns in < 500ms +- [ ] UI remains responsive during generation +- [ ] No memory leaks from polling/subscriptions + +### Safety +- [ ] Delete requires explicit confirmation +- [ ] Production environment check (SEEDER_ALLOW_PRODUCTION) +- [ ] Clear error messages for failed operations + +--- + +## CROSS-MODULE INTEGRATION + +| Direction | Module | Integration Point | +|-----------|--------|-------------------| +| **← Data Platform** | Phase 1 | Queries all 7 tables for status | +| **← Shared Seeder** | Phase 12 | Uses existing DataSeeder orchestrator | +| **→ Analytics** | Phase 7 | Invalidates KPI cache after generation | +| **→ Dashboard** | Phase 10 | Refreshes dashboard data after changes | +| **→ Explorer** | Phase 10 | New data appears in explorer tables | +| **→ Admin** | Phase 10 | Extends existing admin page with new tab | + +--- + +## DOCUMENTATION LINKS + +### shadcn/ui Components Used +- [Card](https://ui.shadcn.com/docs/components/card) — Container layout +- [Tabs](https://ui.shadcn.com/docs/components/tabs) — Admin panel navigation +- [Button](https://ui.shadcn.com/docs/components/button) — Action triggers +- [AlertDialog](https://ui.shadcn.com/docs/components/alert-dialog) — Destructive confirmations +- [Select](https://ui.shadcn.com/docs/components/select) — Scenario picker +- [Input](https://ui.shadcn.com/docs/components/input) — Configuration values +- [Progress](https://ui.shadcn.com/docs/components/progress) — Operation progress +- [Badge](https://ui.shadcn.com/docs/components/badge) — Status indicators +- [Skeleton](https://ui.shadcn.com/docs/components/skeleton) — Loading states +- [Accordion](https://ui.shadcn.com/docs/components/accordion) — Collapsible sections +- [Calendar](https://ui.shadcn.com/docs/components/calendar) — Date picker +- [Popover](https://ui.shadcn.com/docs/components/popover) — Date picker container + +### TanStack +- [TanStack Query Mutations](https://tanstack.com/query/latest/docs/react/guides/mutations) +- [Query Invalidation](https://tanstack.com/query/latest/docs/react/guides/query-invalidation) + +### Project References +- [CLAUDE.md](./CLAUDE.md) — Project coding standards +- [INITIAL-12.md](./INITIAL-12.md) — Data Seeder backend (The Forge) +- [docs/DATA-SEEDER.md](./docs/DATA-SEEDER.md) — Seeder documentation +- [app/shared/seeder/](./app/shared/seeder/) — Seeder implementation + +--- + +## OTHER CONSIDERATIONS + +### Best Practices + +1. **Reuse Admin Patterns** — Follow existing RagSourcesPanel and AliasesPanel patterns +2. **Optimistic UI** — Show immediate feedback, rollback on error +3. **Cache Invalidation** — Invalidate analytics queries after data changes +4. **Feature Flag** — `VITE_ENABLE_SEEDER_PANEL` to hide in production + +### Security + +- **Production Guard** — Check `SEEDER_ALLOW_PRODUCTION` before any mutation +- **Confirmation Required** — AlertDialog for all destructive operations +- **Rate Limiting** — Prevent rapid repeated generation requests +- **Audit Logging** — Log all seeder operations with user context + +### Observability + +- **Structured Logging** — Log events: `seeder.generate_started`, `seeder.generate_completed` +- **Metrics** — Track generation duration, records created +- **Error Tracking** — Capture and display detailed error messages + +### Future Enhancements + +- WebSocket streaming for real-time progress +- YAML configuration editor +- Data preview (sample rows) +- Generation history with rollback +- Scheduled data refresh +- Multi-tenant isolation + +--- + +## IMPLEMENTATION ORDER + +1. **Backend API** — Create `app/features/seeder/` feature slice with routes/schemas/service +2. **Frontend Hooks** — Add `hooks/use-seeder.ts` with TanStack Query +3. **Status Panel** — DataStatusCard component with row counts +4. **Quick Actions** — QuickActionsCard with generate/delete buttons +5. **Configuration** — ConfigurationCard with form inputs +6. **Operation Log** — OperationLogCard with progress display +7. **Admin Integration** — Add Seeder tab to existing admin page +8. **Verification** — VerifyResult display component +9. **Testing** — Unit tests for hooks and components +10. **Documentation** — Update README and add examples + +--- + +*Phase 13: The Forge UI — Where developers interact with the data factory.* diff --git a/PRPs/PRP-13-data-seeder-dashboard.md b/PRPs/PRP-13-data-seeder-dashboard.md new file mode 100644 index 00000000..c197bdbf --- /dev/null +++ b/PRPs/PRP-13-data-seeder-dashboard.md @@ -0,0 +1,855 @@ +# PRP-13: Data Seeder Dashboard (The Forge UI) + +**Phase**: 13 +**Status**: Ready for Implementation +**PRP Score**: 9/10 (High confidence for one-pass implementation) +**Estimated Complexity**: Medium + +--- + +## Goal + +Build the frontend admin interface for The Forge data seeder - a dashboard panel that allows users to: +1. View current database state (row counts for all 7 tables, date ranges) +2. Generate new synthetic datasets with scenario presets +3. Append data to existing datasets +4. Delete data with scope selection +5. Run data integrity verification + +The backend API already exists at `/seeder/*` endpoints - this PRP focuses exclusively on the **frontend implementation**. + +--- + +## Why + +- **Developer Experience**: Provides visual interface for synthetic data operations instead of CLI +- **Testing Workflow**: Quick data generation for development and testing scenarios +- **Integration**: Extends existing Admin panel with consistent UI patterns +- **Safety**: Built-in confirmations for destructive operations, dry-run previews + +--- + +## What + +### User-Visible Behavior + +1. New "Data Seeder" tab in Admin panel (`/admin`) +2. Status cards showing row counts for stores, products, calendar, sales, inventory, price_history, promotions +3. Date range display (earliest to latest sales date) +4. Scenario dropdown with 6 presets: retail_standard, holiday_rush, high_variance, stockout_heavy, new_launches, sparse +5. Action buttons: Generate, Append, Delete, Verify +6. Configuration form: seed, stores, products, dates, sparsity +7. AlertDialog confirmations for destructive actions +8. Loading states, success/error toasts + +### Success Criteria + +- [ ] Status panel displays all 7 table counts correctly +- [ ] Date range shows min/max from sales_daily (or "No data" when empty) +- [ ] Scenario selector lists 6 presets with descriptions +- [ ] Generate button creates dataset with selected scenario and config +- [ ] Delete button shows AlertDialog confirmation before executing +- [ ] Verify button runs integrity checks and displays results +- [ ] Loading spinners shown during API operations +- [ ] Toast notifications on success/error +- [ ] Tab integrates seamlessly with existing RAG Sources and Aliases tabs +- [ ] All TypeScript types are correct (no `any`) +- [ ] ESLint passes with no errors + +--- + +## All Needed Context + +### Documentation & References + +```yaml +# MUST READ - Existing Patterns to Follow +- file: frontend/src/pages/admin.tsx + why: Pattern for admin tabs, RagSourcesPanel and AliasesPanel show exact component structure + +- file: frontend/src/hooks/use-runs.ts + why: TanStack Query patterns for useQuery and useMutation with cache invalidation + +- file: frontend/src/hooks/use-rag-sources.ts + why: Mutation pattern with refetch, shows useIndexDocument pattern + +- file: frontend/src/lib/api.ts + why: API client usage - api(endpoint, config) + +- file: frontend/src/types/api.ts + why: Type definition patterns, add seeder types here + +# Backend API Reference (already implemented) +- file: app/features/seeder/schemas.py + why: Exact response shapes - SeederStatus, ScenarioInfo, GenerateResult, DeleteResult, VerifyResult + +- file: app/features/seeder/routes.py + why: API endpoint signatures and HTTP methods/status codes + +# shadcn/ui Components (all already installed) +- url: https://ui.shadcn.com/docs/components/tabs + why: Tab structure pattern + +- url: https://ui.shadcn.com/docs/components/card + why: Status cards, configuration panels + +- url: https://ui.shadcn.com/docs/components/alert-dialog + why: Destructive action confirmation + +- url: https://ui.shadcn.com/docs/components/select + why: Scenario picker + +- url: https://ui.shadcn.com/docs/components/input + why: Configuration form inputs + +- url: https://ui.shadcn.com/docs/components/badge + why: Status indicators + +- url: https://ui.shadcn.com/docs/components/calendar + why: Date picker in Popover + +- url: https://ui.shadcn.com/docs/components/progress + why: Operation progress indicator +``` + +### Current Codebase Tree (Frontend Focus) + +```bash +frontend/src/ +├── App.tsx # Routes - no changes needed +├── components/ +│ ├── common/ +│ │ ├── error-display.tsx # ErrorDisplay component to reuse +│ │ └── loading-state.tsx # LoadingState component to reuse +│ └── ui/ # 26 shadcn/ui components (all needed are installed) +│ ├── alert-dialog.tsx +│ ├── badge.tsx +│ ├── button.tsx +│ ├── calendar.tsx +│ ├── card.tsx +│ ├── checkbox.tsx +│ ├── collapsible.tsx +│ ├── dialog.tsx +│ ├── input.tsx +│ ├── popover.tsx +│ ├── progress.tsx +│ ├── select.tsx +│ ├── skeleton.tsx +│ ├── sonner.tsx +│ └── tabs.tsx +├── hooks/ +│ ├── index.ts # Export all hooks - ADD use-seeder export +│ ├── use-runs.ts # Pattern: useQuery/useMutation with cache invalidation +│ └── use-rag-sources.ts # Pattern: mutations with refetch +├── lib/ +│ └── api.ts # api function +├── pages/ +│ └── admin.tsx # ADD SeederPanel to Tabs +└── types/ + └── api.ts # ADD seeder types +``` + +### Desired Codebase Tree (New/Modified Files) + +```bash +frontend/src/ +├── hooks/ +│ ├── index.ts # MODIFY: Add 'export * from './use-seeder'' +│ └── use-seeder.ts # CREATE: TanStack Query hooks for seeder API +├── pages/ +│ └── admin.tsx # MODIFY: Add SeederPanel tab +└── types/ + └── api.ts # MODIFY: Add Seeder* type interfaces +``` + +### Known Gotchas & Library Quirks + +```typescript +// CRITICAL: TanStack Query v5 patterns +// useMutation uses isPending (not isLoading) +const mutation = useMutation({ mutationFn: ... }) +mutation.isPending // ✅ correct in v5 +mutation.isLoading // ❌ deprecated + +// CRITICAL: API client returns Promise, not { data: T } +const data = await api('/seeder/status') // ✅ +// data is directly the response, not wrapped + +// CRITICAL: DELETE with body requires special handling +// Our api() function supports body in DELETE: +api('/seeder/data', { + method: 'DELETE', + body: { scope: 'all', dry_run: false } +}) + +// CRITICAL: Date handling for API +// Backend expects date format: YYYY-MM-DD (ISO date string) +// Use format(date, 'yyyy-MM-dd') from date-fns + +// CRITICAL: POST returns 201 for /generate and /append +// The api() function handles this transparently + +// CRITICAL: void queryClient.invalidateQueries() +// Always use void prefix to satisfy TypeScript/ESLint for floating promises +``` + +--- + +## Implementation Blueprint + +### Data Models (TypeScript Types) + +Add to `frontend/src/types/api.ts`: + +```typescript +// === Seeder === +export interface SeederStatus { + stores: number + products: number + calendar: number + sales: number + inventory: number + price_history: number + promotions: number + date_range_start: string | null // ISO date "2024-01-01" + date_range_end: string | null + last_updated: string | null // ISO datetime +} + +export interface ScenarioInfo { + name: string + description: string + stores: number + products: number + start_date: string // ISO date + end_date: string +} + +export interface GenerateParams { + scenario?: string // default: "retail_standard" + seed?: number // default: 42 + stores?: number // 1-100, default: 10 + products?: number // 1-500, default: 50 + start_date?: string // ISO date + end_date?: string + sparsity?: number // 0.0-1.0 + dry_run?: boolean +} + +export interface AppendParams { + start_date: string // Required + end_date: string // Required + seed?: number +} + +export interface DeleteParams { + scope?: 'all' | 'facts' | 'dimensions' // default: "all" + dry_run?: boolean +} + +export interface GenerateResult { + success: boolean + records_created: Record + duration_seconds: number + message: string + seed: number +} + +export interface DeleteResult { + success: boolean + records_deleted: Record + message: string + dry_run: boolean +} + +export type VerifyCheckStatus = 'passed' | 'warning' | 'failed' + +export interface VerifyCheck { + name: string + status: VerifyCheckStatus + message: string + details: string[] | null +} + +export interface VerifyResult { + passed: boolean + checks: VerifyCheck[] + total_checks: number + passed_count: number + warning_count: number + failed_count: number +} +``` + +--- + +## Tasks (Implementation Order) + +### Task 1: Add TypeScript Types +**MODIFY** `frontend/src/types/api.ts` +- FIND pattern: After the `// === Error Response (RFC 7807) ===` section +- ADD: All seeder interfaces before EOF + +```typescript +// ADD these interfaces at the end of the file, before the closing empty line +// === Seeder === +export interface SeederStatus { ... } +export interface ScenarioInfo { ... } +// ... (all types from Data Models section above) +``` + +### Task 2: Create Seeder Hooks +**CREATE** `frontend/src/hooks/use-seeder.ts` +- MIRROR pattern from: `frontend/src/hooks/use-runs.ts` +- Uses TanStack Query v5 patterns + +```typescript +// frontend/src/hooks/use-seeder.ts +import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query' +import { api } from '@/lib/api' +import type { + SeederStatus, + ScenarioInfo, + GenerateParams, + GenerateResult, + AppendParams, + DeleteParams, + DeleteResult, + VerifyResult, +} from '@/types/api' + +// Query: Get database status (row counts, date range) +export function useSeederStatus() { + return useQuery({ + queryKey: ['seeder', 'status'], + queryFn: () => api('/seeder/status'), + // Refresh every 30 seconds to catch external changes + refetchInterval: 30000, + }) +} + +// Query: Get available scenarios (cached indefinitely - they don't change) +export function useSeederScenarios() { + return useQuery({ + queryKey: ['seeder', 'scenarios'], + queryFn: () => api('/seeder/scenarios'), + staleTime: Infinity, + }) +} + +// Mutation: Generate new dataset +export function useGenerateData() { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (params: GenerateParams) => + api('/seeder/generate', { method: 'POST', body: params }), + onSuccess: () => { + // Invalidate status to refresh counts + void queryClient.invalidateQueries({ queryKey: ['seeder', 'status'] }) + // Also invalidate analytics as data changed + void queryClient.invalidateQueries({ queryKey: ['analytics'] }) + void queryClient.invalidateQueries({ queryKey: ['kpis'] }) + }, + }) +} + +// Mutation: Append data to existing dataset +export function useAppendData() { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (params: AppendParams) => + api('/seeder/append', { method: 'POST', body: params }), + onSuccess: () => { + void queryClient.invalidateQueries({ queryKey: ['seeder', 'status'] }) + void queryClient.invalidateQueries({ queryKey: ['analytics'] }) + void queryClient.invalidateQueries({ queryKey: ['kpis'] }) + }, + }) +} + +// Mutation: Delete data +export function useDeleteData() { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (params: DeleteParams) => + api('/seeder/data', { method: 'DELETE', body: params }), + onSuccess: () => { + void queryClient.invalidateQueries({ queryKey: ['seeder', 'status'] }) + void queryClient.invalidateQueries({ queryKey: ['analytics'] }) + void queryClient.invalidateQueries({ queryKey: ['kpis'] }) + }, + }) +} + +// Mutation: Verify data integrity +export function useVerifyData() { + return useMutation({ + mutationFn: () => api('/seeder/verify', { method: 'POST' }), + }) +} +``` + +### Task 3: Export Seeder Hooks +**MODIFY** `frontend/src/hooks/index.ts` +- ADD export line + +```typescript +// ADD this line at the end of the file +export * from './use-seeder' +``` + +### Task 4: Add SeederPanel to Admin Page +**MODIFY** `frontend/src/pages/admin.tsx` + +This is the main implementation task. Follow the exact pattern of RagSourcesPanel and AliasesPanel. + +#### 4a: Add Imports (at top of file) + +```typescript +// ADD to existing imports +import { format } from 'date-fns' +import { + Flame, // Generate icon + Plus, // Append icon + CheckCircle, // Verify icon + RefreshCw, // Refresh icon + Store, // Stats icon + Package, // Stats icon + Calendar, // Stats icon + TrendingUp, // Stats icon (sales) + Warehouse, // Stats icon (inventory) + History, // Stats icon (price history) + Percent, // Stats icon (promotions) +} from 'lucide-react' +import { + useSeederStatus, + useSeederScenarios, + useGenerateData, + useDeleteData, + useVerifyData, +} from '@/hooks/use-seeder' +import { Badge } from '@/components/ui/badge' +import { Skeleton } from '@/components/ui/skeleton' +import { Progress } from '@/components/ui/progress' +import { toast } from 'sonner' +import type { + ScenarioInfo, + VerifyCheck, + VerifyCheckStatus, +} from '@/types/api' +``` + +#### 4b: Add Tab to AdminPage Component + +```tsx +// MODIFY the Tabs component - ADD new TabsTrigger and TabsContent + + + + + RAG Sources + + + + Deployment Aliases + + {/* ADD THIS NEW TAB */} + + + Data Seeder + + + + + + + + + + + + {/* ADD THIS NEW TAB CONTENT */} + + + + +``` + +#### 4c: Create SeederPanel Component (new function in same file) + +```tsx +function SeederPanel() { + const { data: status, isLoading, error, refetch } = useSeederStatus() + const { data: scenarios } = useSeederScenarios() + const generateMutation = useGenerateData() + const deleteMutation = useDeleteData() + const verifyMutation = useVerifyData() + + const [selectedScenario, setSelectedScenario] = useState('retail_standard') + const [deleteDialogOpen, setDeleteDialogOpen] = useState(false) + const [verifyResult, setVerifyResult] = useState<{ + passed: boolean + checks: VerifyCheck[] + passed_count: number + warning_count: number + failed_count: number + } | null>(null) + + const handleGenerate = async () => { + try { + const result = await generateMutation.mutateAsync({ + scenario: selectedScenario, + }) + toast.success(`Generated ${result.records_created.sales?.toLocaleString() ?? 0} sales records in ${result.duration_seconds.toFixed(1)}s`) + } catch (err) { + toast.error(err instanceof Error ? err.message : 'Generation failed') + } + } + + const handleDelete = async () => { + try { + const result = await deleteMutation.mutateAsync({ scope: 'all' }) + setDeleteDialogOpen(false) + toast.success(result.message) + } catch (err) { + toast.error(err instanceof Error ? err.message : 'Delete failed') + } + } + + const handleVerify = async () => { + try { + const result = await verifyMutation.mutateAsync() + setVerifyResult(result) + if (result.passed) { + toast.success('All integrity checks passed') + } else { + toast.warning(`${result.failed_count} checks failed`) + } + } catch (err) { + toast.error(err instanceof Error ? err.message : 'Verification failed') + } + } + + if (error) { + return + } + + return ( +
+ {/* Status Card */} + + +
+ Current Data Summary + + {status?.date_range_start && status?.date_range_end + ? `${status.date_range_start} → ${status.date_range_end}` + : 'No data yet'} + +
+ +
+ + {isLoading ? ( +
+ {Array.from({ length: 7 }).map((_, i) => ( + + ))} +
+ ) : ( +
+ + + + + + + +
+ )} +
+
+ + {/* Actions Card */} + + + Quick Actions + Generate, delete, or verify synthetic data + + +
+ + + + + + + + + Delete All Data? + + This will permanently delete all {status?.sales?.toLocaleString() ?? 0} sales records, + {' '}{status?.stores ?? 0} stores, and {status?.products ?? 0} products. + This action cannot be undone. + + + + Cancel + + Delete All Data + + + + + + +
+ +
+ + +
+
+
+ + {/* Verification Results */} + {verifyResult && ( + + + + Verification Results + + {verifyResult.passed ? 'Passed' : 'Failed'} + + + + {verifyResult.passed_count} passed • {verifyResult.warning_count} warnings • {verifyResult.failed_count} failed + + + +
+ {verifyResult.checks.map((check: VerifyCheck, idx: number) => ( +
+
+

{check.name}

+

{check.message}

+
+ + {check.status} + +
+ ))} +
+
+
+ )} +
+ ) +} + +// Helper component for stat cards +function StatCard({ + icon: Icon, + label, + value, +}: { + icon: React.ComponentType<{ className?: string }> + label: string + value: number +}) { + return ( +
+ +

{value.toLocaleString()}

+

{label}

+
+ ) +} + +// Helper for scenario names +function formatScenarioLabel(name: string): string { + return name + .split('_') + .map((word) => word.charAt(0).toUpperCase() + word.slice(1)) + .join(' ') +} + +// Helper for badge variants +function getCheckBadgeVariant(status: VerifyCheckStatus): 'default' | 'secondary' | 'destructive' { + switch (status) { + case 'passed': + return 'default' + case 'warning': + return 'secondary' + case 'failed': + return 'destructive' + } +} +``` + +--- + +## Validation Loop + +### Level 1: Syntax & Style + +```bash +# Run from frontend/ directory +cd frontend + +# TypeScript check +pnpm tsc --noEmit + +# ESLint check +pnpm lint + +# Expected: No errors +``` + +### Level 2: Build Check + +```bash +# Build production bundle +pnpm build + +# Expected: Build succeeds without errors +``` + +### Level 3: Integration Test (Manual) + +```bash +# Terminal 1: Start backend +cd /home/w7-shellsnake/w7-DEV_X1/w7-ForecastLabAI +uv run uvicorn app.main:app --reload --port 8123 + +# Terminal 2: Start frontend +cd frontend +pnpm dev + +# Test in browser: +# 1. Navigate to http://localhost:5173/admin +# 2. Click "Data Seeder" tab +# 3. Verify status cards show (0 or existing counts) +# 4. Select "Holiday Rush" scenario +# 5. Click "Generate New" +# 6. Wait for completion, verify toast appears +# 7. Verify status cards update with new counts +# 8. Click "Verify" - verify results display +# 9. Click "Delete All" - confirm dialog appears +# 10. Cancel, then try again and confirm +# 11. Verify counts reset to 0 +``` + +### Level 4: API Verification + +```bash +# Test endpoints directly +curl http://localhost:8123/seeder/status | jq +curl http://localhost:8123/seeder/scenarios | jq + +# Expected: JSON responses matching SeederStatus and ScenarioInfo[] types +``` + +--- + +## Final Validation Checklist + +- [ ] `pnpm tsc --noEmit` passes (no TypeScript errors) +- [ ] `pnpm lint` passes (no ESLint errors) +- [ ] `pnpm build` succeeds +- [ ] Data Seeder tab appears in Admin panel +- [ ] Status cards display row counts correctly +- [ ] Scenario dropdown shows 6 presets with descriptions +- [ ] Generate button works with loading state +- [ ] Delete shows confirmation dialog +- [ ] Verify displays check results with badges +- [ ] Toast notifications appear on success/error +- [ ] No console errors in browser DevTools + +--- + +## Anti-Patterns to Avoid + +- ❌ Don't use `any` type - all types are defined +- ❌ Don't use `isLoading` on mutations - use `isPending` (TanStack Query v5) +- ❌ Don't forget `void` prefix on `queryClient.invalidateQueries()` +- ❌ Don't create separate component files - follow existing admin.tsx pattern +- ❌ Don't add new routes - SeederPanel goes inside existing admin page +- ❌ Don't skip AlertDialog for delete - always confirm destructive actions +- ❌ Don't forget toast imports from 'sonner' + +--- + +## Cross-Module Integration + +| Direction | Module | Integration Point | +|-----------|--------|-------------------| +| **← Backend** | Phase 12 | Uses existing `/seeder/*` REST endpoints | +| **← Admin** | Phase 10 | Extends existing admin.tsx with new tab | +| **→ Analytics** | Phase 7 | Invalidates KPI/analytics cache after data changes | +| **← Types** | Common | Adds types to shared api.ts | + +--- + +## Files Changed Summary + +| File | Action | Lines Changed | +|------|--------|---------------| +| `frontend/src/types/api.ts` | MODIFY | +50 lines (seeder types) | +| `frontend/src/hooks/use-seeder.ts` | CREATE | ~70 lines | +| `frontend/src/hooks/index.ts` | MODIFY | +1 line (export) | +| `frontend/src/pages/admin.tsx` | MODIFY | +200 lines (SeederPanel) | + +**Total**: ~320 lines of new/modified code + +--- + +*PRP-13: The Forge UI - Visual control for synthetic data generation* diff --git a/README.md b/README.md index 45b02926..1dbe35ed 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ Portfolio-grade end-to-end retail demand forecasting system. - **Dashboard**: React 19 + Vite + Tailwind CSS 4 + shadcn/ui for data exploration and model management - **RAG Knowledge Base**: Postgres pgvector embeddings + evidence-grounded answers with citations - **Agentic Layer**: PydanticAI agents for autonomous experimentation and evidence-grounded Q&A with human-in-the-loop approval +- **Data Seeder (The Forge)**: Reproducible synthetic data generator with realistic time-series patterns, scenario presets, and retail effects ## Quick Start @@ -154,7 +155,9 @@ pnpm preview ``` app/ # FastAPI backend ├── core/ # Config, database, logging, middleware, exceptions -├── shared/ # Pagination, timestamps, error schemas +├── shared/ +│ ├── seeder/ # The Forge - randomized database seeder +│ └── ... # Pagination, timestamps, error schemas ├── features/ │ ├── data_platform/ # Store, product, calendar, sales tables │ ├── ingest/ # Batch upsert endpoints for sales data @@ -187,6 +190,7 @@ examples/ ├── queries/ # Example SQL queries ├── models/ # Baseline model examples (naive, seasonal_naive, moving_average) ├── backtest/ # Backtesting examples (run_backtest, inspect_splits, metrics_demo) +├── seed/ # Data seeder configs and examples (YAML scenarios) ├── compute_features_demo.py # Feature engineering demo └── registry_demo.py # Model registry workflow demo scripts/ # Utility scripts @@ -640,6 +644,52 @@ AGENT_APPROVAL_TIMEOUT_MINUTES=60 AGENT_ENABLE_STREAMING=true ``` +### Data Seeder (The Forge) + +Generate reproducible synthetic test data with realistic time-series patterns. + +**CLI Commands:** +```bash +# Generate complete dataset +uv run python scripts/seed_random.py --full-new --seed 42 --confirm + +# Delete all data +uv run python scripts/seed_random.py --delete --confirm + +# Append data for new date range +uv run python scripts/seed_random.py --append --start-date 2025-01-01 --end-date 2025-03-31 + +# Run pre-built scenario +uv run python scripts/seed_random.py --full-new --scenario holiday_rush --confirm + +# Show current data counts +uv run python scripts/seed_random.py --status + +# Verify data integrity +uv run python scripts/seed_random.py --verify +``` + +**Scenario Presets:** + +| Scenario | Description | +|----------|-------------| +| `retail_standard` | Normal retail patterns with mild seasonality | +| `holiday_rush` | Q4 surge with Black Friday/Christmas peaks | +| `high_variance` | Noisy data with anomalies for robustness testing | +| `stockout_heavy` | Frequent stockouts (25% probability) | +| `new_launches` | 100 products with launch ramp patterns | +| `sparse` | 50% missing combinations, random gaps | + +**Features:** +- Deterministic generation with configurable seeds for reproducibility +- Realistic time-series patterns (trend, weekly/monthly seasonality, noise, anomalies) +- Retail effects (promotions, stockouts, price elasticity) +- YAML configuration support for custom scenarios +- Safe deletion with scope control (all/facts/dimensions) +- Dry-run mode for previewing changes + +See [examples/seed/README.md](examples/seed/README.md) for detailed configuration options. + ### Error Responses (RFC 7807) All error responses follow RFC 7807 Problem Details format with `Content-Type: application/problem+json`: diff --git a/app/features/seeder/__init__.py b/app/features/seeder/__init__.py new file mode 100644 index 00000000..992331b2 --- /dev/null +++ b/app/features/seeder/__init__.py @@ -0,0 +1,9 @@ +"""Seeder feature module for managing synthetic data generation via REST API. + +This feature provides REST endpoints for the Data Seeder (The Forge), +allowing management of synthetic test data through the dashboard. +""" + +from app.features.seeder.routes import router + +__all__ = ["router"] diff --git a/app/features/seeder/routes.py b/app/features/seeder/routes.py new file mode 100644 index 00000000..76e1233b --- /dev/null +++ b/app/features/seeder/routes.py @@ -0,0 +1,262 @@ +"""FastAPI routes for seeder operations. + +Provides REST endpoints for managing synthetic data generation +through the dashboard admin panel. +""" + +from fastapi import APIRouter, Depends, HTTPException, status +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.config import get_settings +from app.core.database import get_db +from app.core.logging import get_logger +from app.features.seeder import schemas, service + +router = APIRouter(prefix="/seeder", tags=["seeder"]) +logger = get_logger(__name__) + + +def _check_seeder_enabled() -> None: + """Check if seeder operations are allowed in current environment. + + Raises: + HTTPException: If seeder is disabled in production. + """ + settings = get_settings() + if not settings.seeder_allow_production and settings.app_env == "production": + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Seeder operations are not allowed in production environment. " + "Set SEEDER_ALLOW_PRODUCTION=true to enable (not recommended).", + ) + + +@router.get( + "/status", + response_model=schemas.SeederStatus, + summary="Get database status", + description="Returns current row counts for all tables and date range metadata.", +) +async def get_status( + db: AsyncSession = Depends(get_db), +) -> schemas.SeederStatus: + """Get current database row counts and metadata. + + Returns counts for all dimension and fact tables, plus date range + information from sales_daily. + """ + return await service.get_status(db) + + +@router.get( + "/scenarios", + response_model=list[schemas.ScenarioInfo], + summary="List scenario presets", + description="Returns available scenario presets with their default configurations.", +) +async def list_scenarios() -> list[schemas.ScenarioInfo]: + """List available scenario presets. + + Returns pre-built scenarios like retail_standard, holiday_rush, etc. + with their default configurations. + """ + return service.list_scenarios() + + +@router.post( + "/generate", + response_model=schemas.GenerateResult, + status_code=status.HTTP_201_CREATED, + summary="Generate new dataset", + description="Generate a complete synthetic dataset. Requires confirmation in non-dev environments.", +) +async def generate_data( + params: schemas.GenerateParams, + db: AsyncSession = Depends(get_db), +) -> schemas.GenerateResult: + """Generate a new synthetic dataset from scratch. + + This will create stores, products, calendar, sales, inventory, + price history, and promotions based on the selected scenario. + + **Warning:** This operation may take several minutes for large datasets. + + Args: + params: Generation parameters including scenario and seed. + + Returns: + GenerateResult with counts of created records. + + Raises: + HTTPException: If operation fails or is blocked. + """ + _check_seeder_enabled() + + try: + return await service.generate_data(db, params) + except ValueError as e: + logger.error( + "seeder.generate.failed", + error=str(e), + error_type=type(e).__name__, + ) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) from e + except Exception as e: + logger.error( + "seeder.generate.failed", + error=str(e), + error_type=type(e).__name__, + exc_info=True, + ) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Generation failed: {e}", + ) from e + + +@router.post( + "/append", + response_model=schemas.GenerateResult, + status_code=status.HTTP_201_CREATED, + summary="Append data", + description="Append new data to existing dataset for a specified date range.", +) +async def append_data( + params: schemas.AppendParams, + db: AsyncSession = Depends(get_db), +) -> schemas.GenerateResult: + """Append data to existing dataset. + + Uses existing dimension tables (stores, products) and generates + new fact records (sales, inventory, etc.) for the specified date range. + + Requires existing dimensions. Run /generate first if database is empty. + + Args: + params: Append parameters with date range. + + Returns: + GenerateResult with counts of appended records. + + Raises: + HTTPException: If no dimensions exist or operation fails. + """ + _check_seeder_enabled() + + try: + return await service.append_data(db, params) + except ValueError as e: + logger.error( + "seeder.append.failed", + error=str(e), + error_type=type(e).__name__, + ) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) from e + except Exception as e: + logger.error( + "seeder.append.failed", + error=str(e), + error_type=type(e).__name__, + exc_info=True, + ) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Append failed: {e}", + ) from e + + +@router.delete( + "/data", + response_model=schemas.DeleteResult, + summary="Delete data", + description="Delete data with specified scope. Supports dry_run preview.", +) +async def delete_data( + params: schemas.DeleteParams, + db: AsyncSession = Depends(get_db), +) -> schemas.DeleteResult: + """Delete data with specified scope. + + Scopes: + - `all`: Delete everything (dimensions + facts) + - `facts`: Delete only fact tables (sales, inventory, prices, promotions) + - `dimensions`: Delete dimension tables (also deletes facts due to FK constraints) + + Use `dry_run=true` to preview what would be deleted without executing. + + Args: + params: Delete parameters with scope and dry_run flag. + + Returns: + DeleteResult with counts of deleted records. + + Raises: + HTTPException: If operation is blocked or fails. + """ + _check_seeder_enabled() + + try: + return await service.delete_data(db, params) + except ValueError as e: + logger.error( + "seeder.delete.failed", + error=str(e), + error_type=type(e).__name__, + ) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) from e + except Exception as e: + logger.error( + "seeder.delete.failed", + error=str(e), + error_type=type(e).__name__, + exc_info=True, + ) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Delete failed: {e}", + ) from e + + +@router.post( + "/verify", + response_model=schemas.VerifyResult, + summary="Verify data integrity", + description="Run data integrity checks on current database content.", +) +async def verify_data( + db: AsyncSession = Depends(get_db), +) -> schemas.VerifyResult: + """Run data integrity verification. + + Checks performed: + - Foreign key integrity (sales reference valid stores/products/dates) + - Non-negative constraints (quantities, prices >= 0) + - Calendar date coverage (no gaps in date sequence) + - Data presence (sales data exists) + - Dimension completeness (stores, products, calendar populated) + + Returns: + VerifyResult with pass/fail status for each check. + """ + try: + return await service.verify_data(db) + except Exception as e: + logger.error( + "seeder.verify.failed", + error=str(e), + error_type=type(e).__name__, + exc_info=True, + ) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Verification failed: {e}", + ) from e diff --git a/app/features/seeder/schemas.py b/app/features/seeder/schemas.py new file mode 100644 index 00000000..6c925114 --- /dev/null +++ b/app/features/seeder/schemas.py @@ -0,0 +1,158 @@ +"""Pydantic schemas for the seeder feature.""" + +from datetime import date, datetime +from typing import Literal + +from pydantic import BaseModel, Field + + +class SeederStatus(BaseModel): + """Current database state with row counts and metadata.""" + + stores: int = Field(description="Number of store records") + products: int = Field(description="Number of product records") + calendar: int = Field(description="Number of calendar day records") + sales: int = Field(description="Number of sales_daily records") + inventory: int = Field(description="Number of inventory_snapshot_daily records") + price_history: int = Field(description="Number of price_history records") + promotions: int = Field(description="Number of promotion records") + date_range_start: date | None = Field( + default=None, + description="Earliest date in sales_daily", + ) + date_range_end: date | None = Field( + default=None, + description="Latest date in sales_daily", + ) + last_updated: datetime | None = Field( + default=None, + description="Timestamp of last data modification", + ) + + +class ScenarioInfo(BaseModel): + """Information about a scenario preset.""" + + name: str = Field(description="Scenario preset name") + description: str = Field(description="Human-readable description") + stores: int = Field(description="Default number of stores") + products: int = Field(description="Default number of products") + start_date: date = Field(description="Default start date") + end_date: date = Field(description="Default end date") + + +class GenerateParams(BaseModel): + """Parameters for generating a new dataset.""" + + scenario: str = Field( + default="retail_standard", + description="Scenario preset name", + ) + seed: int = Field( + default=42, + ge=0, + description="Random seed for reproducibility", + ) + stores: int = Field( + default=10, + ge=1, + le=100, + description="Number of stores to generate", + ) + products: int = Field( + default=50, + ge=1, + le=500, + description="Number of products to generate", + ) + start_date: date = Field( + default_factory=lambda: date(2024, 1, 1), + description="Start of date range", + ) + end_date: date = Field( + default_factory=lambda: date(2024, 12, 31), + description="End of date range", + ) + sparsity: float = Field( + default=0.0, + ge=0.0, + le=1.0, + description="Fraction of missing store/product combinations", + ) + dry_run: bool = Field( + default=False, + description="Preview only, do not execute", + ) + + +class AppendParams(BaseModel): + """Parameters for appending data to existing dataset.""" + + start_date: date = Field(description="Start of new date range") + end_date: date = Field(description="End of new date range") + seed: int = Field( + default=43, + ge=0, + description="Random seed for reproducibility", + ) + + +class DeleteParams(BaseModel): + """Parameters for deleting data.""" + + scope: Literal["all", "facts", "dimensions"] = Field( + default="all", + description="What to delete: all, facts (sales/inventory/etc), or dimensions (store/product/calendar)", + ) + dry_run: bool = Field( + default=False, + description="Preview what would be deleted without executing", + ) + + +class GenerateResult(BaseModel): + """Result of a generation or append operation.""" + + success: bool = Field(description="Whether the operation succeeded") + records_created: dict[str, int] = Field( + description="Count of records created per table", + ) + duration_seconds: float = Field(description="Time taken in seconds") + message: str = Field(description="Human-readable result message") + seed: int = Field(description="Random seed used") + + +class DeleteResult(BaseModel): + """Result of a delete operation.""" + + success: bool = Field(description="Whether the operation succeeded") + records_deleted: dict[str, int] = Field( + description="Count of records deleted per table", + ) + message: str = Field(description="Human-readable result message") + dry_run: bool = Field(description="Whether this was a preview only") + + +class VerifyCheck(BaseModel): + """Single verification check result.""" + + name: str = Field(description="Check name") + status: Literal["passed", "warning", "failed"] = Field( + description="Check status", + ) + message: str = Field(description="Human-readable result") + details: list[str] | None = Field( + default=None, + description="Additional details if applicable", + ) + + +class VerifyResult(BaseModel): + """Result of data verification.""" + + passed: bool = Field(description="Whether all critical checks passed") + checks: list[VerifyCheck] = Field(description="Individual check results") + total_checks: int = Field(description="Number of checks performed") + passed_count: int = Field(description="Number of passed checks") + warning_count: int = Field(description="Number of warnings") + failed_count: int = Field(description="Number of failures") diff --git a/app/features/seeder/service.py b/app/features/seeder/service.py new file mode 100644 index 00000000..a7696aec --- /dev/null +++ b/app/features/seeder/service.py @@ -0,0 +1,539 @@ +"""Service layer for seeder operations.""" + +from __future__ import annotations + +import time +from datetime import date, datetime + +from sqlalchemy import func, select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.config import get_settings +from app.core.logging import get_logger +from app.features.data_platform.models import ( + Calendar, + InventorySnapshotDaily, + PriceHistory, + Product, + Promotion, + SalesDaily, + Store, +) +from app.features.seeder import schemas +from app.shared.seeder import DataSeeder, ScenarioPreset, SeederConfig +from app.shared.seeder.config import DimensionConfig, SparsityConfig + +logger = get_logger(__name__) + + +def _get_scenario_preset(name: str) -> ScenarioPreset | None: + """Convert scenario name string to ScenarioPreset enum. + + Args: + name: Scenario name (e.g., "retail_standard"). + + Returns: + ScenarioPreset enum value or None if not found. + """ + try: + return ScenarioPreset(name) + except ValueError: + return None + + +def _build_config_from_params(params: schemas.GenerateParams) -> SeederConfig: + """Build SeederConfig from API parameters. + + Args: + params: Generation parameters from API request. + + Returns: + Configured SeederConfig instance. + """ + preset = _get_scenario_preset(params.scenario) + + if preset: + # Start from scenario preset and override with explicit params + config = SeederConfig.from_scenario(preset, seed=params.seed) + # Override dimensions if explicitly set (different from defaults) + config.dimensions = DimensionConfig( + stores=params.stores, + products=params.products, + ) + config.start_date = params.start_date + config.end_date = params.end_date + if params.sparsity > 0: + config.sparsity = SparsityConfig(missing_combinations_pct=params.sparsity) + else: + # Use default config with provided params + config = SeederConfig( + seed=params.seed, + start_date=params.start_date, + end_date=params.end_date, + dimensions=DimensionConfig( + stores=params.stores, + products=params.products, + ), + sparsity=SparsityConfig(missing_combinations_pct=params.sparsity), + ) + + settings = get_settings() + config.batch_size = settings.seeder_batch_size + config.enable_progress = settings.seeder_enable_progress + + return config + + +async def get_status(db: AsyncSession) -> schemas.SeederStatus: + """Get current database status with row counts. + + Args: + db: Async database session. + + Returns: + SeederStatus with current counts and metadata. + """ + logger.info("seeder.status.fetching") + + # Fetch counts for all tables + tables = [ + ("stores", Store), + ("products", Product), + ("calendar", Calendar), + ("sales", SalesDaily), + ("inventory", InventorySnapshotDaily), + ("price_history", PriceHistory), + ("promotions", Promotion), + ] + + counts: dict[str, int] = {} + for name, model in tables: + result = await db.execute(select(func.count()).select_from(model)) + counts[name] = result.scalar() or 0 + + # Get date range from sales_daily + date_range_start: date | None = None + date_range_end: date | None = None + + if counts["sales"] > 0: + result = await db.execute(select(func.min(SalesDaily.date), func.max(SalesDaily.date))) + row = result.fetchone() + if row: + date_range_start = row[0] + date_range_end = row[1] + + # Get last update time from most recent sale + last_updated: datetime | None = None + if counts["sales"] > 0: + result = await db.execute(select(func.max(SalesDaily.updated_at))) + scalar_result = result.scalar() + if isinstance(scalar_result, datetime): + last_updated = scalar_result + + status = schemas.SeederStatus( + stores=counts["stores"], + products=counts["products"], + calendar=counts["calendar"], + sales=counts["sales"], + inventory=counts["inventory"], + price_history=counts["price_history"], + promotions=counts["promotions"], + date_range_start=date_range_start, + date_range_end=date_range_end, + last_updated=last_updated, + ) + + logger.info( + "seeder.status.fetched", + total_records=sum(counts.values()), + has_data=counts["sales"] > 0, + ) + + return status + + +def list_scenarios() -> list[schemas.ScenarioInfo]: + """List available scenario presets. + + Returns: + List of ScenarioInfo with preset details. + """ + scenarios = [ + schemas.ScenarioInfo( + name="retail_standard", + description="Normal retail patterns with mild seasonality and linear trend", + stores=10, + products=50, + start_date=date(2024, 1, 1), + end_date=date(2024, 12, 31), + ), + schemas.ScenarioInfo( + name="holiday_rush", + description="Q4 surge with Black Friday/Christmas peaks and high stockout risk", + stores=10, + products=50, + start_date=date(2024, 10, 1), + end_date=date(2024, 12, 31), + ), + schemas.ScenarioInfo( + name="high_variance", + description="Noisy, unpredictable data with frequent anomalies for robustness testing", + stores=10, + products=50, + start_date=date(2024, 1, 1), + end_date=date(2024, 12, 31), + ), + schemas.ScenarioInfo( + name="stockout_heavy", + description="Frequent stockouts (25% probability) for inventory modeling", + stores=10, + products=50, + start_date=date(2024, 1, 1), + end_date=date(2024, 12, 31), + ), + schemas.ScenarioInfo( + name="new_launches", + description="100 products with gradual launch ramp patterns", + stores=10, + products=100, + start_date=date(2024, 1, 1), + end_date=date(2024, 12, 31), + ), + schemas.ScenarioInfo( + name="sparse", + description="50% missing combinations and random date gaps for gap handling", + stores=10, + products=50, + start_date=date(2024, 1, 1), + end_date=date(2024, 12, 31), + ), + ] + + logger.info("seeder.scenarios.listed", count=len(scenarios)) + return scenarios + + +async def generate_data( + db: AsyncSession, + params: schemas.GenerateParams, +) -> schemas.GenerateResult: + """Generate a new synthetic dataset. + + Args: + db: Async database session. + params: Generation parameters. + + Returns: + GenerateResult with counts and timing. + + Raises: + ValueError: If production guard is enabled. + """ + settings = get_settings() + + # Production guard + if not settings.seeder_allow_production and settings.app_env == "production": + logger.warning("seeder.generate.blocked", reason="production_guard") + raise ValueError("Seeder operations are not allowed in production environment") + + if params.dry_run: + logger.info( + "seeder.generate.dry_run", + scenario=params.scenario, + seed=params.seed, + stores=params.stores, + products=params.products, + ) + return schemas.GenerateResult( + success=True, + records_created={ + "stores": params.stores, + "products": params.products, + "calendar": (params.end_date - params.start_date).days + 1, + "sales": 0, # Would be calculated + "price_history": 0, + "promotions": 0, + "inventory": 0, + }, + duration_seconds=0.0, + message=f"Dry run: would generate data with scenario '{params.scenario}'", + seed=params.seed, + ) + + logger.info( + "seeder.generate.started", + scenario=params.scenario, + seed=params.seed, + stores=params.stores, + products=params.products, + start_date=str(params.start_date), + end_date=str(params.end_date), + ) + + start_time = time.perf_counter() + + config = _build_config_from_params(params) + seeder = DataSeeder(config) + + result = await seeder.generate_full(db) + + duration = time.perf_counter() - start_time + + logger.info( + "seeder.generate.completed", + seed=params.seed, + duration_seconds=round(duration, 2), + total_records=result.sales_count + result.inventory_count, + ) + + return schemas.GenerateResult( + success=True, + records_created={ + "stores": result.stores_count, + "products": result.products_count, + "calendar": result.calendar_days, + "sales": result.sales_count, + "price_history": result.price_history_count, + "promotions": result.promotions_count, + "inventory": result.inventory_count, + }, + duration_seconds=round(duration, 2), + message=f"Successfully generated {result.sales_count:,} sales records with seed {params.seed}", + seed=params.seed, + ) + + +async def append_data( + db: AsyncSession, + params: schemas.AppendParams, +) -> schemas.GenerateResult: + """Append data to existing dataset. + + Args: + db: Async database session. + params: Append parameters. + + Returns: + GenerateResult with counts and timing. + + Raises: + ValueError: If no existing dimensions found. + """ + settings = get_settings() + + # Production guard + if not settings.seeder_allow_production and settings.app_env == "production": + logger.warning("seeder.append.blocked", reason="production_guard") + raise ValueError("Seeder operations are not allowed in production environment") + + logger.info( + "seeder.append.started", + seed=params.seed, + start_date=str(params.start_date), + end_date=str(params.end_date), + ) + + start_time = time.perf_counter() + + config = SeederConfig( + seed=params.seed, + start_date=params.start_date, + end_date=params.end_date, + batch_size=settings.seeder_batch_size, + ) + seeder = DataSeeder(config) + + result = await seeder.append_data(db, params.start_date, params.end_date) + + duration = time.perf_counter() - start_time + + logger.info( + "seeder.append.completed", + seed=params.seed, + duration_seconds=round(duration, 2), + sales_appended=result.sales_count, + ) + + return schemas.GenerateResult( + success=True, + records_created={ + "stores": result.stores_count, + "products": result.products_count, + "calendar": result.calendar_days, + "sales": result.sales_count, + "price_history": result.price_history_count, + "promotions": result.promotions_count, + "inventory": result.inventory_count, + }, + duration_seconds=round(duration, 2), + message=f"Appended {result.sales_count:,} sales records for date range {params.start_date} to {params.end_date}", + seed=params.seed, + ) + + +async def delete_data( + db: AsyncSession, + params: schemas.DeleteParams, +) -> schemas.DeleteResult: + """Delete data with specified scope. + + Args: + db: Async database session. + params: Delete parameters. + + Returns: + DeleteResult with counts and status. + + Raises: + ValueError: If production guard is enabled. + """ + settings = get_settings() + + # Production guard + if not settings.seeder_allow_production and settings.app_env == "production": + logger.warning("seeder.delete.blocked", reason="production_guard") + raise ValueError("Seeder operations are not allowed in production environment") + + logger.info( + "seeder.delete.started", + scope=params.scope, + dry_run=params.dry_run, + ) + + config = SeederConfig(batch_size=settings.seeder_batch_size) + seeder = DataSeeder(config) + + counts = await seeder.delete_data(db, scope=params.scope, dry_run=params.dry_run) + + total_deleted = sum(counts.values()) + + if params.dry_run: + message = f"Dry run: would delete {total_deleted:,} records (scope: {params.scope})" + else: + message = f"Deleted {total_deleted:,} records (scope: {params.scope})" + + logger.info( + "seeder.delete.completed", + scope=params.scope, + dry_run=params.dry_run, + total_deleted=total_deleted, + ) + + return schemas.DeleteResult( + success=True, + records_deleted=counts, + message=message, + dry_run=params.dry_run, + ) + + +async def verify_data(db: AsyncSession) -> schemas.VerifyResult: + """Run data integrity verification. + + Args: + db: Async database session. + + Returns: + VerifyResult with check results. + """ + logger.info("seeder.verify.started") + + checks: list[schemas.VerifyCheck] = [] + settings = get_settings() + + config = SeederConfig(batch_size=settings.seeder_batch_size) + seeder = DataSeeder(config) + + # Run basic integrity checks + errors = await seeder.verify_data_integrity(db) + + # Check 1: Foreign key integrity + fk_errors = [e for e in errors if "foreign key" in e.lower()] + checks.append( + schemas.VerifyCheck( + name="Foreign Key Integrity", + status="failed" if fk_errors else "passed", + message="All foreign key references are valid" if not fk_errors else fk_errors[0], + details=fk_errors if fk_errors else None, + ) + ) + + # Check 2: Non-negative constraints + neg_errors = [e for e in errors if "negative" in e.lower()] + checks.append( + schemas.VerifyCheck( + name="Non-Negative Constraints", + status="failed" if neg_errors else "passed", + message="All quantities and prices are non-negative" + if not neg_errors + else neg_errors[0], + details=neg_errors if neg_errors else None, + ) + ) + + # Check 3: Calendar coverage + calendar_errors = [e for e in errors if "calendar" in e.lower() or "gap" in e.lower()] + checks.append( + schemas.VerifyCheck( + name="Calendar Date Coverage", + status="warning" if calendar_errors else "passed", + message="Calendar has gaps in date sequence" + if calendar_errors + else "Calendar dates are contiguous", + details=calendar_errors if calendar_errors else None, + ) + ) + + # Check 4: Data presence + status = await get_status(db) + has_data = status.sales > 0 + checks.append( + schemas.VerifyCheck( + name="Data Presence", + status="passed" if has_data else "warning", + message=f"{status.sales:,} sales records found" if has_data else "No sales data found", + ) + ) + + # Check 5: Dimension completeness + has_dimensions = status.stores > 0 and status.products > 0 and status.calendar > 0 + checks.append( + schemas.VerifyCheck( + name="Dimension Completeness", + status="passed" if has_dimensions else "warning", + message="All dimension tables populated" + if has_dimensions + else "Missing dimension data", + details=[ + f"Stores: {status.stores}", + f"Products: {status.products}", + f"Calendar: {status.calendar}", + ], + ) + ) + + # Calculate summary + passed_count = sum(1 for c in checks if c.status == "passed") + warning_count = sum(1 for c in checks if c.status == "warning") + failed_count = sum(1 for c in checks if c.status == "failed") + + # Overall pass if no failures + passed = failed_count == 0 + + logger.info( + "seeder.verify.completed", + passed=passed, + total_checks=len(checks), + passed_count=passed_count, + warning_count=warning_count, + failed_count=failed_count, + ) + + return schemas.VerifyResult( + passed=passed, + checks=checks, + total_checks=len(checks), + passed_count=passed_count, + warning_count=warning_count, + failed_count=failed_count, + ) diff --git a/app/features/seeder/tests/__init__.py b/app/features/seeder/tests/__init__.py new file mode 100644 index 00000000..7451fd79 --- /dev/null +++ b/app/features/seeder/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for the seeder feature module.""" diff --git a/app/features/seeder/tests/conftest.py b/app/features/seeder/tests/conftest.py new file mode 100644 index 00000000..38eccb6c --- /dev/null +++ b/app/features/seeder/tests/conftest.py @@ -0,0 +1,13 @@ +"""Test fixtures for seeder feature tests.""" + +import pytest + + +@pytest.fixture(autouse=True) +def reset_settings_cache(): + """Reset settings cache between tests.""" + from app.core.config import get_settings + + get_settings.cache_clear() + yield + get_settings.cache_clear() diff --git a/app/features/seeder/tests/test_routes.py b/app/features/seeder/tests/test_routes.py new file mode 100644 index 00000000..f83d6a81 --- /dev/null +++ b/app/features/seeder/tests/test_routes.py @@ -0,0 +1,324 @@ +"""Unit tests for seeder routes.""" + +from datetime import date +from unittest.mock import AsyncMock, patch + +import pytest +from fastapi import status +from fastapi.testclient import TestClient + +from app.features.seeder import schemas +from app.main import app + + +@pytest.fixture +def client(): + """Create test client.""" + return TestClient(app) + + +@pytest.fixture +def mock_settings(): + """Mock settings to allow seeder in tests.""" + with patch("app.features.seeder.routes.get_settings") as mock: + mock.return_value.seeder_allow_production = True + mock.return_value.app_env = "testing" + yield mock + + +@pytest.fixture +def mock_db(): + """Mock database session.""" + with patch("app.features.seeder.routes.get_db") as mock: + mock_session = AsyncMock() + mock.return_value = mock_session + yield mock_session + + +class TestGetStatus: + """Tests for GET /seeder/status endpoint.""" + + def test_returns_status(self, client, mock_db): + """Test status endpoint returns counts.""" + mock_status = schemas.SeederStatus( + stores=10, + products=50, + calendar=365, + sales=182500, + inventory=182500, + price_history=1500, + promotions=500, + date_range_start=date(2024, 1, 1), + date_range_end=date(2024, 12, 31), + ) + + with patch("app.features.seeder.routes.service.get_status", return_value=mock_status): + response = client.get("/seeder/status") + + assert response.status_code == status.HTTP_200_OK + data = response.json() + assert data["stores"] == 10 + assert data["products"] == 50 + assert data["sales"] == 182500 + + +class TestListScenarios: + """Tests for GET /seeder/scenarios endpoint.""" + + def test_returns_scenarios(self, client): + """Test scenarios endpoint returns list.""" + response = client.get("/seeder/scenarios") + + assert response.status_code == status.HTTP_200_OK + data = response.json() + assert len(data) == 6 + + names = [s["name"] for s in data] + assert "retail_standard" in names + assert "holiday_rush" in names + + def test_scenario_structure(self, client): + """Test scenario response structure.""" + response = client.get("/seeder/scenarios") + + data = response.json() + scenario = data[0] + + assert "name" in scenario + assert "description" in scenario + assert "stores" in scenario + assert "products" in scenario + assert "start_date" in scenario + assert "end_date" in scenario + + +class TestGenerate: + """Tests for POST /seeder/generate endpoint.""" + + def test_generate_with_defaults(self, client, mock_settings, mock_db): + """Test generate with default parameters.""" + mock_result = schemas.GenerateResult( + success=True, + records_created={"stores": 10, "products": 50, "sales": 182500}, + duration_seconds=45.5, + message="Successfully generated data", + seed=42, + ) + + with patch("app.features.seeder.routes.service.generate_data", return_value=mock_result): + response = client.post("/seeder/generate", json={}) + + assert response.status_code == status.HTTP_201_CREATED + data = response.json() + assert data["success"] is True + assert data["seed"] == 42 + + def test_generate_with_scenario(self, client, mock_settings, mock_db): + """Test generate with specific scenario.""" + mock_result = schemas.GenerateResult( + success=True, + records_created={"stores": 10, "sales": 50000}, + duration_seconds=30.0, + message="Success", + seed=123, + ) + + with patch("app.features.seeder.routes.service.generate_data", return_value=mock_result): + response = client.post( + "/seeder/generate", + json={ + "scenario": "holiday_rush", + "seed": 123, + "stores": 20, + "products": 100, + }, + ) + + assert response.status_code == status.HTTP_201_CREATED + + def test_generate_dry_run(self, client, mock_settings, mock_db): + """Test generate with dry_run flag.""" + mock_result = schemas.GenerateResult( + success=True, + records_created={"stores": 10}, + duration_seconds=0.0, + message="Dry run preview", + seed=42, + ) + + with patch("app.features.seeder.routes.service.generate_data", return_value=mock_result): + response = client.post("/seeder/generate", json={"dry_run": True}) + + assert response.status_code == status.HTTP_201_CREATED + data = response.json() + assert data["duration_seconds"] == 0.0 + + def test_generate_validation_error(self, client, mock_settings): + """Test generate with invalid parameters.""" + response = client.post( + "/seeder/generate", + json={"stores": 0}, # Invalid - must be >= 1 + ) + + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + + def test_generate_blocked_in_production(self, client, mock_db): + """Test generate is blocked in production.""" + with patch("app.features.seeder.routes.get_settings") as mock_settings: + mock_settings.return_value.seeder_allow_production = False + mock_settings.return_value.app_env = "production" + + response = client.post("/seeder/generate", json={}) + + assert response.status_code == status.HTTP_403_FORBIDDEN + + +class TestAppend: + """Tests for POST /seeder/append endpoint.""" + + def test_append_data(self, client, mock_settings, mock_db): + """Test append endpoint.""" + mock_result = schemas.GenerateResult( + success=True, + records_created={"calendar": 90, "sales": 45000}, + duration_seconds=15.0, + message="Appended data", + seed=43, + ) + + with patch("app.features.seeder.routes.service.append_data", return_value=mock_result): + response = client.post( + "/seeder/append", + json={ + "start_date": "2025-01-01", + "end_date": "2025-03-31", + "seed": 43, + }, + ) + + assert response.status_code == status.HTTP_201_CREATED + data = response.json() + assert data["success"] is True + + def test_append_requires_dates(self, client, mock_settings): + """Test append requires start_date and end_date.""" + response = client.post("/seeder/append", json={}) + + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + + +class TestDelete: + """Tests for DELETE /seeder/data endpoint.""" + + def test_delete_all(self, client, mock_settings, mock_db): + """Test delete with scope 'all'.""" + mock_result = schemas.DeleteResult( + success=True, + records_deleted={"sales_daily": 182500, "store": 10}, + message="Deleted all data", + dry_run=False, + ) + + with patch("app.features.seeder.routes.service.delete_data", return_value=mock_result): + response = client.request( + "DELETE", + "/seeder/data", + json={"scope": "all"}, + ) + + assert response.status_code == status.HTTP_200_OK + data = response.json() + assert data["success"] is True + + def test_delete_facts_only(self, client, mock_settings, mock_db): + """Test delete with scope 'facts'.""" + mock_result = schemas.DeleteResult( + success=True, + records_deleted={"sales_daily": 182500}, + message="Deleted facts", + dry_run=False, + ) + + with patch("app.features.seeder.routes.service.delete_data", return_value=mock_result): + response = client.request( + "DELETE", + "/seeder/data", + json={"scope": "facts"}, + ) + + assert response.status_code == status.HTTP_200_OK + + def test_delete_dry_run(self, client, mock_settings, mock_db): + """Test delete with dry_run flag.""" + mock_result = schemas.DeleteResult( + success=True, + records_deleted={"sales_daily": 182500}, + message="Dry run: would delete", + dry_run=True, + ) + + with patch("app.features.seeder.routes.service.delete_data", return_value=mock_result): + response = client.request( + "DELETE", + "/seeder/data", + json={"scope": "all", "dry_run": True}, + ) + + assert response.status_code == status.HTTP_200_OK + data = response.json() + assert data["dry_run"] is True + + +class TestVerify: + """Tests for POST /seeder/verify endpoint.""" + + def test_verify_success(self, client, mock_db): + """Test verify endpoint with passing checks.""" + mock_result = schemas.VerifyResult( + passed=True, + checks=[ + schemas.VerifyCheck( + name="FK Integrity", + status="passed", + message="All FKs valid", + ), + ], + total_checks=1, + passed_count=1, + warning_count=0, + failed_count=0, + ) + + with patch("app.features.seeder.routes.service.verify_data", return_value=mock_result): + response = client.post("/seeder/verify") + + assert response.status_code == status.HTTP_200_OK + data = response.json() + assert data["passed"] is True + assert data["total_checks"] == 1 + + def test_verify_with_failures(self, client, mock_db): + """Test verify endpoint with failing checks.""" + mock_result = schemas.VerifyResult( + passed=False, + checks=[ + schemas.VerifyCheck( + name="FK Integrity", + status="failed", + message="5 orphaned records", + details=["Detail 1"], + ), + ], + total_checks=1, + passed_count=0, + warning_count=0, + failed_count=1, + ) + + with patch("app.features.seeder.routes.service.verify_data", return_value=mock_result): + response = client.post("/seeder/verify") + + assert response.status_code == status.HTTP_200_OK + data = response.json() + assert data["passed"] is False + assert data["failed_count"] == 1 diff --git a/app/features/seeder/tests/test_schemas.py b/app/features/seeder/tests/test_schemas.py new file mode 100644 index 00000000..911e1785 --- /dev/null +++ b/app/features/seeder/tests/test_schemas.py @@ -0,0 +1,276 @@ +"""Unit tests for seeder schemas.""" + +from datetime import date + +import pytest + +from app.features.seeder import schemas + + +class TestGenerateParams: + """Tests for GenerateParams schema.""" + + def test_default_values(self): + """Test default parameter values.""" + params = schemas.GenerateParams() + + assert params.scenario == "retail_standard" + assert params.seed == 42 + assert params.stores == 10 + assert params.products == 50 + assert params.sparsity == 0.0 + assert params.dry_run is False + + def test_custom_values(self): + """Test custom parameter values.""" + params = schemas.GenerateParams( + scenario="holiday_rush", + seed=123, + stores=20, + products=100, + start_date=date(2025, 1, 1), + end_date=date(2025, 6, 30), + sparsity=0.3, + dry_run=True, + ) + + assert params.scenario == "holiday_rush" + assert params.seed == 123 + assert params.stores == 20 + assert params.products == 100 + assert params.start_date == date(2025, 1, 1) + assert params.end_date == date(2025, 6, 30) + assert params.sparsity == 0.3 + assert params.dry_run is True + + def test_stores_validation_min(self): + """Test stores minimum validation.""" + with pytest.raises(ValueError): + schemas.GenerateParams(stores=0) + + def test_stores_validation_max(self): + """Test stores maximum validation.""" + with pytest.raises(ValueError): + schemas.GenerateParams(stores=101) + + def test_products_validation_min(self): + """Test products minimum validation.""" + with pytest.raises(ValueError): + schemas.GenerateParams(products=0) + + def test_products_validation_max(self): + """Test products maximum validation.""" + with pytest.raises(ValueError): + schemas.GenerateParams(products=501) + + def test_sparsity_validation_min(self): + """Test sparsity minimum validation.""" + with pytest.raises(ValueError): + schemas.GenerateParams(sparsity=-0.1) + + def test_sparsity_validation_max(self): + """Test sparsity maximum validation.""" + with pytest.raises(ValueError): + schemas.GenerateParams(sparsity=1.1) + + def test_seed_validation(self): + """Test seed must be non-negative.""" + with pytest.raises(ValueError): + schemas.GenerateParams(seed=-1) + + +class TestAppendParams: + """Tests for AppendParams schema.""" + + def test_required_dates(self): + """Test that dates are required.""" + params = schemas.AppendParams( + start_date=date(2025, 1, 1), + end_date=date(2025, 3, 31), + ) + + assert params.start_date == date(2025, 1, 1) + assert params.end_date == date(2025, 3, 31) + assert params.seed == 43 # default + + def test_custom_seed(self): + """Test custom seed value.""" + params = schemas.AppendParams( + start_date=date(2025, 1, 1), + end_date=date(2025, 3, 31), + seed=999, + ) + + assert params.seed == 999 + + +class TestDeleteParams: + """Tests for DeleteParams schema.""" + + def test_default_scope(self): + """Test default scope is 'all'.""" + params = schemas.DeleteParams() + + assert params.scope == "all" + assert params.dry_run is False + + def test_valid_scopes(self): + """Test valid scope values.""" + # Test each valid scope individually to satisfy type checker + params_all = schemas.DeleteParams(scope="all") + assert params_all.scope == "all" + + params_facts = schemas.DeleteParams(scope="facts") + assert params_facts.scope == "facts" + + params_dimensions = schemas.DeleteParams(scope="dimensions") + assert params_dimensions.scope == "dimensions" + + def test_dry_run_flag(self): + """Test dry_run flag.""" + params = schemas.DeleteParams(scope="facts", dry_run=True) + + assert params.scope == "facts" + assert params.dry_run is True + + +class TestSeederStatus: + """Tests for SeederStatus schema.""" + + def test_all_fields(self): + """Test all status fields.""" + status = schemas.SeederStatus( + stores=10, + products=50, + calendar=365, + sales=182500, + inventory=182500, + price_history=1500, + promotions=500, + date_range_start=date(2024, 1, 1), + date_range_end=date(2024, 12, 31), + ) + + assert status.stores == 10 + assert status.products == 50 + assert status.calendar == 365 + assert status.sales == 182500 + assert status.inventory == 182500 + assert status.price_history == 1500 + assert status.promotions == 500 + assert status.date_range_start == date(2024, 1, 1) + assert status.date_range_end == date(2024, 12, 31) + + def test_optional_dates(self): + """Test optional date fields can be None.""" + status = schemas.SeederStatus( + stores=0, + products=0, + calendar=0, + sales=0, + inventory=0, + price_history=0, + promotions=0, + ) + + assert status.date_range_start is None + assert status.date_range_end is None + assert status.last_updated is None + + +class TestScenarioInfo: + """Tests for ScenarioInfo schema.""" + + def test_scenario_fields(self): + """Test scenario info fields.""" + scenario = schemas.ScenarioInfo( + name="holiday_rush", + description="Q4 surge with peaks", + stores=10, + products=50, + start_date=date(2024, 10, 1), + end_date=date(2024, 12, 31), + ) + + assert scenario.name == "holiday_rush" + assert scenario.description == "Q4 surge with peaks" + assert scenario.stores == 10 + assert scenario.products == 50 + assert scenario.start_date == date(2024, 10, 1) + assert scenario.end_date == date(2024, 12, 31) + + +class TestGenerateResult: + """Tests for GenerateResult schema.""" + + def test_result_fields(self): + """Test generate result fields.""" + result = schemas.GenerateResult( + success=True, + records_created={ + "stores": 10, + "products": 50, + "sales": 182500, + }, + duration_seconds=45.5, + message="Successfully generated data", + seed=42, + ) + + assert result.success is True + assert result.records_created["stores"] == 10 + assert result.duration_seconds == 45.5 + assert result.seed == 42 + + +class TestDeleteResult: + """Tests for DeleteResult schema.""" + + def test_delete_result_fields(self): + """Test delete result fields.""" + result = schemas.DeleteResult( + success=True, + records_deleted={"sales": 1000, "inventory": 500}, + message="Deleted 1500 records", + dry_run=False, + ) + + assert result.success is True + assert result.records_deleted["sales"] == 1000 + assert result.dry_run is False + + +class TestVerifyResult: + """Tests for VerifyResult schema.""" + + def test_verify_result_fields(self): + """Test verify result fields.""" + checks = [ + schemas.VerifyCheck( + name="FK Integrity", + status="passed", + message="All FKs valid", + ), + schemas.VerifyCheck( + name="Data Gaps", + status="warning", + message="2 gaps found", + details=["Gap 1", "Gap 2"], + ), + ] + + result = schemas.VerifyResult( + passed=True, + checks=checks, + total_checks=2, + passed_count=1, + warning_count=1, + failed_count=0, + ) + + assert result.passed is True + assert len(result.checks) == 2 + assert result.total_checks == 2 + assert result.passed_count == 1 + assert result.warning_count == 1 + assert result.failed_count == 0 diff --git a/app/features/seeder/tests/test_service.py b/app/features/seeder/tests/test_service.py new file mode 100644 index 00000000..cf28d72f --- /dev/null +++ b/app/features/seeder/tests/test_service.py @@ -0,0 +1,330 @@ +"""Unit tests for seeder service layer.""" + +from datetime import date +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from app.features.seeder import schemas, service + + +class TestListScenarios: + """Tests for list_scenarios function.""" + + def test_returns_all_scenarios(self): + """Test that all scenario presets are returned.""" + scenarios = service.list_scenarios() + + assert len(scenarios) == 6 + + names = [s.name for s in scenarios] + assert "retail_standard" in names + assert "holiday_rush" in names + assert "high_variance" in names + assert "stockout_heavy" in names + assert "new_launches" in names + assert "sparse" in names + + def test_scenario_info_structure(self): + """Test that scenarios have required fields.""" + scenarios = service.list_scenarios() + + for scenario in scenarios: + assert isinstance(scenario.name, str) + assert isinstance(scenario.description, str) + assert isinstance(scenario.stores, int) + assert isinstance(scenario.products, int) + assert isinstance(scenario.start_date, date) + assert isinstance(scenario.end_date, date) + + def test_new_launches_has_more_products(self): + """Test that new_launches scenario has 100 products.""" + scenarios = service.list_scenarios() + + new_launches = next(s for s in scenarios if s.name == "new_launches") + assert new_launches.products == 100 + + def test_holiday_rush_date_range(self): + """Test that holiday_rush has Q4 date range.""" + scenarios = service.list_scenarios() + + holiday = next(s for s in scenarios if s.name == "holiday_rush") + assert holiday.start_date == date(2024, 10, 1) + assert holiday.end_date == date(2024, 12, 31) + + +class TestGetScenarioPreset: + """Tests for _get_scenario_preset helper.""" + + def test_valid_scenarios(self): + """Test valid scenario names return presets.""" + valid_names = [ + "retail_standard", + "holiday_rush", + "high_variance", + "stockout_heavy", + "new_launches", + "sparse", + ] + + for name in valid_names: + preset = service._get_scenario_preset(name) + assert preset is not None + assert preset.value == name + + def test_invalid_scenario(self): + """Test invalid scenario name returns None.""" + preset = service._get_scenario_preset("invalid_scenario") + assert preset is None + + def test_empty_string(self): + """Test empty string returns None.""" + preset = service._get_scenario_preset("") + assert preset is None + + +class TestBuildConfigFromParams: + """Tests for _build_config_from_params helper.""" + + def test_default_params(self): + """Test config built from default params.""" + params = schemas.GenerateParams() + config = service._build_config_from_params(params) + + assert config.seed == 42 + assert config.dimensions.stores == 10 + assert config.dimensions.products == 50 + + def test_custom_scenario(self): + """Test config built from custom scenario.""" + params = schemas.GenerateParams( + scenario="holiday_rush", + seed=123, + stores=20, + products=100, + ) + config = service._build_config_from_params(params) + + assert config.seed == 123 + assert config.dimensions.stores == 20 + assert config.dimensions.products == 100 + # Holiday rush has exponential trend + assert config.time_series.trend == "exponential" + + def test_unknown_scenario_uses_defaults(self): + """Test unknown scenario uses default config.""" + params = schemas.GenerateParams( + scenario="unknown_scenario", + seed=999, + ) + config = service._build_config_from_params(params) + + assert config.seed == 999 + # Default config has no trend + assert config.time_series.trend == "none" + + def test_sparsity_config(self): + """Test sparsity config is applied.""" + params = schemas.GenerateParams(sparsity=0.5) + config = service._build_config_from_params(params) + + assert config.sparsity.missing_combinations_pct == 0.5 + + def test_date_range_override(self): + """Test date range is overridden.""" + params = schemas.GenerateParams( + start_date=date(2025, 1, 1), + end_date=date(2025, 6, 30), + ) + config = service._build_config_from_params(params) + + assert config.start_date == date(2025, 1, 1) + assert config.end_date == date(2025, 6, 30) + + +class TestGetStatus: + """Tests for get_status function.""" + + @pytest.mark.asyncio + async def test_returns_status(self): + """Test status is returned with counts.""" + mock_db = AsyncMock() + + # Mock the count queries - return different values for each table + mock_results = [10, 50, 365, 182500, 182500, 1500, 500] + mock_db.execute.side_effect = [ + # Counts for each table + *[MagicMock(scalar=MagicMock(return_value=count)) for count in mock_results], + # Date range query + MagicMock(fetchone=MagicMock(return_value=(date(2024, 1, 1), date(2024, 12, 31)))), + # Updated_at query + MagicMock(scalar=MagicMock(return_value=None)), + ] + + status = await service.get_status(mock_db) + + assert status.stores == 10 + assert status.products == 50 + assert status.calendar == 365 + assert status.sales == 182500 + + @pytest.mark.asyncio + async def test_empty_database(self): + """Test status for empty database.""" + mock_db = AsyncMock() + + # Mock empty counts + mock_db.execute.side_effect = [ + *[MagicMock(scalar=MagicMock(return_value=0)) for _ in range(7)], + ] + + status = await service.get_status(mock_db) + + assert status.stores == 0 + assert status.products == 0 + assert status.sales == 0 + assert status.date_range_start is None + assert status.date_range_end is None + + +class TestGenerateData: + """Tests for generate_data function.""" + + @pytest.mark.asyncio + async def test_dry_run_returns_preview(self): + """Test dry_run returns preview without executing.""" + mock_db = AsyncMock() + params = schemas.GenerateParams( + scenario="retail_standard", + dry_run=True, + ) + + with patch.object(service, "get_settings") as mock_settings: + mock_settings.return_value.seeder_allow_production = True + mock_settings.return_value.app_env = "development" + + result = await service.generate_data(mock_db, params) + + assert result.success is True + assert result.duration_seconds == 0.0 + assert "Dry run" in result.message + # Verify no database operations occurred + mock_db.execute.assert_not_called() + + @pytest.mark.asyncio + async def test_production_guard_blocks(self): + """Test production guard blocks operations.""" + mock_db = AsyncMock() + params = schemas.GenerateParams() + + with patch.object(service, "get_settings") as mock_settings: + mock_settings.return_value.seeder_allow_production = False + mock_settings.return_value.app_env = "production" + + with pytest.raises(ValueError, match="production"): + await service.generate_data(mock_db, params) + + +class TestDeleteData: + """Tests for delete_data function.""" + + @pytest.mark.asyncio + async def test_dry_run_returns_preview(self): + """Test dry_run returns preview without deleting.""" + mock_db = AsyncMock() + params = schemas.DeleteParams(scope="all", dry_run=True) + + with patch.object(service, "get_settings") as mock_settings: + mock_settings.return_value.seeder_allow_production = True + mock_settings.return_value.app_env = "development" + mock_settings.return_value.seeder_batch_size = 1000 + + # Mock DataSeeder.delete_data to return counts + with patch("app.features.seeder.service.DataSeeder") as MockSeeder: + mock_seeder = MockSeeder.return_value + mock_seeder.delete_data = AsyncMock(return_value={"sales_daily": 1000, "store": 10}) + + result = await service.delete_data(mock_db, params) + + assert result.success is True + assert result.dry_run is True + assert "Dry run" in result.message + + @pytest.mark.asyncio + async def test_production_guard_blocks(self): + """Test production guard blocks delete operations.""" + mock_db = AsyncMock() + params = schemas.DeleteParams(scope="all") + + with patch.object(service, "get_settings") as mock_settings: + mock_settings.return_value.seeder_allow_production = False + mock_settings.return_value.app_env = "production" + + with pytest.raises(ValueError, match="production"): + await service.delete_data(mock_db, params) + + +class TestVerifyData: + """Tests for verify_data function.""" + + @pytest.mark.asyncio + async def test_returns_checks(self): + """Test verify returns check results.""" + mock_db = AsyncMock() + + with patch.object(service, "get_settings") as mock_settings: + mock_settings.return_value.seeder_batch_size = 1000 + + # Mock DataSeeder.verify_data_integrity + with patch("app.features.seeder.service.DataSeeder") as MockSeeder: + mock_seeder = MockSeeder.return_value + mock_seeder.verify_data_integrity = AsyncMock(return_value=[]) + + # Mock get_status + with patch.object(service, "get_status") as mock_status: + mock_status.return_value = schemas.SeederStatus( + stores=10, + products=50, + calendar=365, + sales=182500, + inventory=182500, + price_history=1500, + promotions=500, + ) + + result = await service.verify_data(mock_db) + + assert result.total_checks == 5 + assert result.passed is True + + @pytest.mark.asyncio + async def test_detects_failures(self): + """Test verify detects integrity failures.""" + mock_db = AsyncMock() + + with patch.object(service, "get_settings") as mock_settings: + mock_settings.return_value.seeder_batch_size = 1000 + + # Mock DataSeeder.verify_data_integrity with errors + with patch("app.features.seeder.service.DataSeeder") as MockSeeder: + mock_seeder = MockSeeder.return_value + mock_seeder.verify_data_integrity = AsyncMock( + return_value=["Found 5 sales with invalid foreign keys"] + ) + + # Mock get_status + with patch.object(service, "get_status") as mock_status: + mock_status.return_value = schemas.SeederStatus( + stores=10, + products=50, + calendar=365, + sales=182500, + inventory=182500, + price_history=1500, + promotions=500, + ) + + result = await service.verify_data(mock_db) + + assert result.passed is False + assert result.failed_count > 0 diff --git a/app/main.py b/app/main.py index 052da077..6a26cf42 100644 --- a/app/main.py +++ b/app/main.py @@ -22,6 +22,7 @@ from app.features.jobs.routes import router as jobs_router from app.features.rag.routes import router as rag_router from app.features.registry.routes import router as registry_router +from app.features.seeder.routes import router as seeder_router logger = get_logger(__name__) @@ -109,6 +110,7 @@ def create_app() -> FastAPI: app.include_router(rag_router) app.include_router(agents_router) app.include_router(agents_ws_router) + app.include_router(seeder_router) return app diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index cc53c1b3..9a975371 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -153,7 +153,9 @@ Backend follows **vertical slice architecture**: ``` app/ ├── core/ # config, database, logging, middleware, health, exceptions -├── shared/ # pagination, timestamps, error schemas +├── shared/ +│ ├── seeder/ # The Forge - randomized database seeder +│ └── ... # pagination, timestamps, error schemas └── features/ ├── data_platform/ # core tables (store, product, calendar, sales) ├── ingest/ # idempotent ingest endpoints (sales_daily / sales_txn) @@ -706,20 +708,146 @@ agent_enable_streaming: bool = True --- -## 11) Dashboard (React + Vite) — Pending +## 11) Dashboard (React + Vite) — ✅ IMPLEMENTED -The UI is intentionally **table-first**: -- Data Explorer -- Model Runs (leaderboard + compare) -- Train & Predict (forms + status) -- Predictions (tabular forecasts) -- **Agent Chat Interface** with streaming and citations +**Implemented via PRP-11** - Modern React dashboard with shadcn/ui components: + +### 11.1 Technology Stack + +| Technology | Version | Purpose | +|------------|---------|---------| +| React | 19 | UI framework | +| Vite | 7 | Build tool and dev server | +| TypeScript | 5.9 | Type safety (strict mode) | +| Tailwind CSS | 4 | Utility-first styling | +| shadcn/ui | New York | Component library (26 components) | +| TanStack Query | 5 | Server state management | +| TanStack Table | 8 | Data tables with sorting/filtering | +| React Router | 7 | Client-side routing | +| Recharts | 2 | Charts and visualizations | + +### 11.2 Pages + +- **Dashboard**: KPI summary, recent sales, quick actions +- **Data Explorer**: Store/product browsing with pagination and search +- **Visualizations**: Time series charts, backtest comparisons +- **Agent Chat**: Streaming chat interface with tool call displays +- **Admin**: Configuration and system status + +### 11.3 Component Architecture + +``` +frontend/src/components/ +├── ui/ # shadcn/ui components (26 components) +├── charts/ # KPI cards, time series, backtest charts +├── data-table/ # TanStack Table wrapper components +├── layout/ # AppShell, TopNav, ThemeToggle +├── common/ # DateRangePicker, StatusBadge, LoadingState +└── chat/ # Chat input, messages, tool displays +``` + +### 11.4 Location + +- Source: `frontend/src/` +- Configuration: `frontend/vite.config.ts`, `frontend/components.json` +- Environment: `frontend/.env.example` Decision reference: `docs/ADR/ADR-0002-frontend-architecture-vite-spa-first.md` --- -## 12) Quality, CI, and Review Rules +## 12) Data Seeder (The Forge) — ✅ IMPLEMENTED + +**Implemented via PRP-12** - Reproducible synthetic data generator for development and testing: + +### 12.1 Core Features + +**Reproducible Generation:** +- Deterministic output with configurable seed parameter +- Same seed = identical data across runs + +**Time-Series Patterns:** +- Trend components: none, linear, exponential +- Weekly seasonality (day-of-week multipliers) +- Monthly seasonality (optional month multipliers) +- Holiday effects with custom dates and multipliers +- Gaussian noise with configurable variance +- Random anomalies (spikes/dips) for robustness testing + +**Retail Effects:** +- Promotions with demand lift +- Stockouts (zero or partial sales) +- Price elasticity +- New product launch ramps + +**Scenario Presets:** +| Scenario | Description | +|----------|-------------| +| `retail_standard` | Normal retail patterns with mild seasonality | +| `holiday_rush` | Q4 surge with Black Friday/Christmas peaks | +| `high_variance` | Noisy data with anomalies | +| `stockout_heavy` | Frequent stockouts (25% probability) | +| `new_launches` | 100 products with launch ramp patterns | +| `sparse` | 50% missing combinations, random gaps | + +### 12.2 Architecture + +``` +app/shared/seeder/ +├── config.py # Configuration dataclasses, scenario presets +├── core.py # DataSeeder orchestrator class +├── rag_scenario.py # RAG-specific seeding +├── generators/ +│ ├── calendar.py # CalendarGenerator (dates, holidays) +│ ├── dimensions.py # StoreGenerator, ProductGenerator +│ ├── facts.py # SalesDailyGenerator, time-series logic +│ ├── inventory.py # InventorySnapshotGenerator +│ ├── price.py # PriceHistoryGenerator +│ └── promotions.py # PromotionGenerator +└── tests/ # 77 unit tests + integration tests +``` + +### 12.3 CLI Commands + +```bash +# Generate complete dataset +uv run python scripts/seed_random.py --full-new --seed 42 --confirm + +# Use scenario preset +uv run python scripts/seed_random.py --full-new --scenario holiday_rush --confirm + +# Append data for new date range +uv run python scripts/seed_random.py --append --start-date 2025-01-01 --end-date 2025-03-31 + +# Delete data +uv run python scripts/seed_random.py --delete --scope facts --confirm + +# Verify data integrity +uv run python scripts/seed_random.py --verify +``` + +### 12.4 Configuration (Settings) + +```python +seeder_default_seed: int = 42 +seeder_default_stores: int = 10 +seeder_default_products: int = 50 +seeder_batch_size: int = 1000 +seeder_enable_progress: bool = True +seeder_allow_production: bool = False +seeder_require_confirm: bool = True +``` + +### 12.5 Location + +- Module: `app/shared/seeder/` +- CLI: `scripts/seed_random.py` +- Examples: `examples/seed/` (YAML configs, README) +- Documentation: `docs/DATA-SEEDER.md` + +--- + +## 13) Quality, CI, and Review Rules The repo standards live in `docs/validation/` and are treated as merge gates: - Ruff lint/format @@ -792,9 +920,7 @@ The repo standards live in `docs/validation/` and are treated as merge gates: - Tool integration with Registry, Backtesting, Forecasting, and RAG - PRP-10 -### Pending Phases - -- **Phase 10**: ForecastLab Dashboard (Pending) +- **Phase 10**: ForecastLab Dashboard ✅ - React 19 + Vite + shadcn/ui + Tailwind CSS 4 - TanStack Table for server-side data grids - TanStack Query for data fetching and caching @@ -802,6 +928,16 @@ The repo standards live in `docs/validation/` and are treated as merge gates: - Agent chat interface with streaming and citations - PRP-11 +- **Phase 12**: Data Seeder (The Forge) ✅ + - Reproducible synthetic data generator + - Scenario presets (retail_standard, holiday_rush, etc.) + - Time-series patterns (trend, seasonality, noise, anomalies) + - Retail effects (promotions, stockouts, price elasticity) + - YAML configuration support + - PRP-12 + +### Pending Phases + - **Phase 11**: ML Models (Future) - Advanced models (XGBoost, Prophet, etc.) - Richer exogenous features diff --git a/docs/DATA-SEEDER.md b/docs/DATA-SEEDER.md new file mode 100644 index 00000000..8f94c8f3 --- /dev/null +++ b/docs/DATA-SEEDER.md @@ -0,0 +1,298 @@ +# Data Seeder: The Forge + +**The Forge** is ForecastLabAI's randomized database seeder for generating reproducible synthetic test data with realistic time-series patterns. + +## Overview + +The seeder generates synthetic retail demand data that mimics real-world patterns, enabling: + +- **Development**: Populate local databases with realistic test data +- **Testing**: Create deterministic datasets for reproducible test suites +- **Demos**: Generate visually compelling data for demonstrations +- **Benchmarking**: Compare model performance across standardized scenarios + +## Architecture + +``` +app/shared/seeder/ +├── __init__.py # Public exports (DataSeeder, SeederConfig, etc.) +├── config.py # Configuration dataclasses and scenario presets +├── core.py # DataSeeder orchestrator class +├── rag_scenario.py # RAG-specific seeding for knowledge base +├── generators/ +│ ├── __init__.py # Generator exports +│ ├── calendar.py # CalendarGenerator (dates, holidays) +│ ├── dimensions.py # StoreGenerator, ProductGenerator +│ ├── facts.py # SalesDailyGenerator, time-series logic +│ ├── inventory.py # InventorySnapshotGenerator +│ ├── price.py # PriceHistoryGenerator +│ ├── product.py # ProductGenerator with SKU allocation +│ ├── promotions.py # PromotionGenerator +│ └── store.py # StoreGenerator with code allocation +└── tests/ + ├── conftest.py # Test fixtures + ├── test_config.py # Configuration tests + ├── test_core.py # Orchestrator tests + ├── test_generators.py # Generator unit tests + └── test_integration.py # Full database integration tests +``` + +## Quick Start + +```bash +# 1. Start PostgreSQL +docker-compose up -d + +# 2. Apply migrations +uv run alembic upgrade head + +# 3. Generate test data +uv run python scripts/seed_random.py --full-new --seed 42 --confirm + +# 4. Verify data +uv run python scripts/seed_random.py --status +``` + +## CLI Reference + +### Operations + +| Flag | Description | +|------|-------------| +| `--full-new` | Generate complete dataset (dimensions + facts) | +| `--delete` | Delete data (use with `--scope`) | +| `--append` | Append fact data for new date range | +| `--status` | Show current table row counts | +| `--verify` | Validate data integrity | + +### Configuration Options + +| Option | Default | Description | +|--------|---------|-------------| +| `--seed` | 42 | Random seed for reproducibility | +| `--stores` | 10 | Number of stores to generate | +| `--products` | 50 | Number of products to generate | +| `--start-date` | 2024-01-01 | Start of date range | +| `--end-date` | 2024-12-31 | End of date range | +| `--sparsity` | 0.0 | Fraction of missing store/product combinations | +| `--scenario` | — | Pre-built scenario name | +| `--config` | — | Path to YAML configuration file | +| `--scope` | all | Deletion scope: `all`, `facts`, `dimensions` | +| `--batch-size` | 1000 | Records per INSERT statement | + +### Safety Flags + +| Flag | Description | +|------|-------------| +| `--confirm` | Required for destructive operations | +| `--dry-run` | Preview changes without executing | + +## Scenario Presets + +Pre-configured scenarios for common testing patterns: + +| Scenario | Description | Key Settings | +|----------|-------------|--------------| +| `retail_standard` | Normal retail patterns | Linear trend, 15% noise, 10% promotions | +| `holiday_rush` | Q4 holiday surge | Oct-Dec, exponential trend, 1.8x December multiplier | +| `high_variance` | Noisy, unpredictable | 40% noise, 5% anomalies, 3x anomaly magnitude | +| `stockout_heavy` | Frequent stockouts | 25% stockout probability | +| `new_launches` | Product launch ramps | 100 products, 30-day ramp period | +| `sparse` | Missing data patterns | 50% missing combinations, random gaps | + +### Usage + +```bash +# Use built-in scenario +uv run python scripts/seed_random.py --full-new --scenario holiday_rush --confirm + +# Override scenario parameters +uv run python scripts/seed_random.py --full-new --scenario holiday_rush --stores 20 --confirm +``` + +## YAML Configuration + +For complex scenarios, use YAML configuration files: + +```yaml +# examples/seed/config_custom.yaml +dimensions: + stores: + count: 15 + regions: ["North", "South", "East", "West", "Central"] + types: ["supermarket", "express", "warehouse", "online"] + products: + count: 100 + categories: ["Beverage", "Snack", "Dairy", "Frozen", "Fresh"] + brands: ["PremiumCo", "ValueMax", "Generic", "Organic"] + +date_range: + start: "2024-01-01" + end: "2024-12-31" + +time_series: + base_demand: 100 + trend: "linear" + trend_slope: 0.001 + weekly_seasonality: [0.8, 0.9, 1.0, 1.0, 1.1, 1.3, 1.2] + monthly_seasonality: + 11: 1.2 # November + 12: 1.5 # December + noise_sigma: 0.15 + anomaly_probability: 0.01 + anomaly_magnitude: 2.0 + +retail: + promotion_probability: 0.1 + promotion_lift: 1.3 + stockout_probability: 0.02 + stockout_behavior: "zero" # or "partial" + price_elasticity: -0.5 + +sparsity: + missing_combinations_pct: 0.1 + random_gaps_per_series: 2 + +holidays: + - date: "2024-11-29" + name: "Black Friday" + multiplier: 2.0 + - date: "2024-12-25" + name: "Christmas Day" + multiplier: 0.3 + +seed: 42 +``` + +```bash +uv run python scripts/seed_random.py --full-new --config examples/seed/config_custom.yaml --confirm +``` + +## Time-Series Patterns + +### Trend Components + +- **none**: Stationary demand (no trend) +- **linear**: `demand * (1 + slope * days_from_start)` +- **exponential**: `demand * (1 + slope) ^ days_from_start` + +### Seasonality + +**Weekly**: Day-of-week multipliers (Mon=0.8, Sat=1.3, etc.) + +**Monthly**: Optional month-specific multipliers + +**Holidays**: US federal holidays + custom dates with multipliers + +### Noise & Anomalies + +- Gaussian noise with configurable sigma +- Random anomalies (spikes/dips) with configurable probability and magnitude + +### Retail Effects + +- **Promotions**: Random promotional periods with demand lift +- **Stockouts**: Zero or partial sales during stockout events +- **Price Elasticity**: Demand adjustment based on price changes +- **New Product Ramps**: Gradual demand increase for new launches + +## Data Integrity + +The seeder enforces data integrity: + +1. **Foreign Keys**: All fact records reference valid dimension records +2. **Non-Negative Values**: Quantities and prices are always non-negative +3. **Date Coverage**: Calendar table covers entire date range +4. **Uniqueness**: Store codes and product SKUs are unique + +Verify with: +```bash +uv run python scripts/seed_random.py --verify +``` + +## Reproducibility + +Same seed = identical data: + +```bash +# These produce identical datasets +uv run python scripts/seed_random.py --full-new --seed 42 --confirm +uv run python scripts/seed_random.py --delete --confirm +uv run python scripts/seed_random.py --full-new --seed 42 --confirm +``` + +## Environment Variables + +Configure defaults via settings: + +```bash +SEEDER_DEFAULT_SEED=42 # Default random seed +SEEDER_DEFAULT_STORES=10 # Default store count +SEEDER_DEFAULT_PRODUCTS=50 # Default product count +SEEDER_BATCH_SIZE=1000 # Records per INSERT +SEEDER_ENABLE_PROGRESS=True # Show progress bar +SEEDER_ALLOW_PRODUCTION=False # Block in production +SEEDER_REQUIRE_CONFIRM=True # Require --confirm flag +``` + +## Integration Tests + +The seeder includes a comprehensive test suite: + +```bash +# Unit tests (no database required) +uv run pytest app/shared/seeder/tests/ -v -m "not integration" + +# Integration tests (requires PostgreSQL + explicit opt-in) +APP_ENV=testing uv run pytest app/shared/seeder/tests/test_integration.py -v +``` + +**Safety Guard**: Integration tests require explicit opt-in via `APP_ENV=testing` or `ALLOW_DESTRUCTIVE_TEST_DB=true` to prevent accidental data loss. + +## Common Workflows + +### Development Setup + +```bash +# Quick start with standard data +uv run python scripts/seed_random.py --full-new --confirm +``` + +### Seasonal Testing + +```bash +# Test holiday forecasting +uv run python scripts/seed_random.py --full-new --scenario holiday_rush --confirm +``` + +### Missing Data Testing + +```bash +# Test gap handling +uv run python scripts/seed_random.py --full-new --scenario sparse --confirm +``` + +### Extending Data + +```bash +# Add Q1 2025 data +uv run python scripts/seed_random.py --append \ + --start-date 2025-01-01 \ + --end-date 2025-03-31 \ + --seed 43 +``` + +### Clean Slate + +```bash +# Delete everything and regenerate +uv run python scripts/seed_random.py --delete --confirm +uv run python scripts/seed_random.py --full-new --seed 42 --confirm +``` + +## Related Documentation + +- [CLI Reference](../examples/seed/README.md) - Detailed CLI options and examples +- [YAML Configs](../examples/seed/) - Example configuration files +- [Architecture](./ARCHITECTURE.md) - Overall system design +- [Data Platform](./PHASE/1-DATA_PLATFORM.md) - Database schema details diff --git a/frontend/src/hooks/index.ts b/frontend/src/hooks/index.ts index 0069c8b1..b6d05698 100644 --- a/frontend/src/hooks/index.ts +++ b/frontend/src/hooks/index.ts @@ -6,3 +6,4 @@ export * from './use-runs' export * from './use-jobs' export * from './use-rag-sources' export * from './use-websocket' +export * from './use-seeder' diff --git a/frontend/src/hooks/use-seeder.ts b/frontend/src/hooks/use-seeder.ts new file mode 100644 index 00000000..d32c1f2a --- /dev/null +++ b/frontend/src/hooks/use-seeder.ts @@ -0,0 +1,82 @@ +import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query' +import { api } from '@/lib/api' +import type { + SeederStatus, + ScenarioInfo, + GenerateParams, + GenerateResult, + AppendParams, + DeleteParams, + DeleteResult, + VerifyResult, +} from '@/types/api' + +// Query: Get database status (row counts, date range) +export function useSeederStatus() { + return useQuery({ + queryKey: ['seeder', 'status'], + queryFn: () => api('/seeder/status'), + // Refresh every 30 seconds to catch external changes + refetchInterval: 30000, + }) +} + +// Query: Get available scenarios (cached indefinitely - they don't change) +export function useSeederScenarios() { + return useQuery({ + queryKey: ['seeder', 'scenarios'], + queryFn: () => api('/seeder/scenarios'), + staleTime: Infinity, + }) +} + +// Mutation: Generate new dataset +export function useGenerateData() { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (params: GenerateParams) => + api('/seeder/generate', { method: 'POST', body: params }), + onSuccess: () => { + // Invalidate status to refresh counts + void queryClient.invalidateQueries({ queryKey: ['seeder', 'status'] }) + // Also invalidate analytics as data changed + void queryClient.invalidateQueries({ queryKey: ['analytics'] }) + void queryClient.invalidateQueries({ queryKey: ['kpis'] }) + }, + }) +} + +// Mutation: Append data to existing dataset +export function useAppendData() { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (params: AppendParams) => + api('/seeder/append', { method: 'POST', body: params }), + onSuccess: () => { + void queryClient.invalidateQueries({ queryKey: ['seeder', 'status'] }) + void queryClient.invalidateQueries({ queryKey: ['analytics'] }) + void queryClient.invalidateQueries({ queryKey: ['kpis'] }) + }, + }) +} + +// Mutation: Delete data +export function useDeleteData() { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (params: DeleteParams) => + api('/seeder/data', { method: 'DELETE', body: params }), + onSuccess: () => { + void queryClient.invalidateQueries({ queryKey: ['seeder', 'status'] }) + void queryClient.invalidateQueries({ queryKey: ['analytics'] }) + void queryClient.invalidateQueries({ queryKey: ['kpis'] }) + }, + }) +} + +// Mutation: Verify data integrity +export function useVerifyData() { + return useMutation({ + mutationFn: () => api('/seeder/verify', { method: 'POST' }), + }) +} diff --git a/frontend/src/pages/admin.tsx b/frontend/src/pages/admin.tsx index 7252f672..c01e6519 100644 --- a/frontend/src/pages/admin.tsx +++ b/frontend/src/pages/admin.tsx @@ -1,8 +1,31 @@ import { useState } from 'react' import { format } from 'date-fns' -import { Trash2, Plus, Database, Tag, Loader2 } from 'lucide-react' +import { + Trash2, + Plus, + Database, + Tag, + Loader2, + Flame, + CheckCircle, + RefreshCw, + Store, + Package, + Calendar, + TrendingUp, + Warehouse, + History, + Percent, +} from 'lucide-react' import { useRagSources, useDeleteRagSource, useIndexDocument } from '@/hooks/use-rag-sources' import { useAliases, useDeleteAlias, useCreateAlias } from '@/hooks/use-runs' +import { + useSeederStatus, + useSeederScenarios, + useGenerateData, + useDeleteData, + useVerifyData, +} from '@/hooks/use-seeder' import { ErrorDisplay } from '@/components/common/error-display' import { LoadingState } from '@/components/common/loading-state' import { Button } from '@/components/ui/button' @@ -36,6 +59,10 @@ import { DialogTrigger, } from '@/components/ui/dialog' import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs' +import { Badge } from '@/components/ui/badge' +import { Skeleton } from '@/components/ui/skeleton' +import { toast } from 'sonner' +import type { ScenarioInfo, VerifyCheck, VerifyCheckStatus } from '@/types/api' export default function AdminPage() { return ( @@ -52,6 +79,10 @@ export default function AdminPage() { Deployment Aliases + + + Data Seeder + @@ -61,6 +92,10 @@ export default function AdminPage() { + + + + ) @@ -345,3 +380,259 @@ function AliasesPanel() { ) } + +function SeederPanel() { + const { data: status, isLoading, error, refetch } = useSeederStatus() + const { data: scenarios } = useSeederScenarios() + const generateMutation = useGenerateData() + const deleteMutation = useDeleteData() + const verifyMutation = useVerifyData() + + const [selectedScenario, setSelectedScenario] = useState('retail_standard') + const [deleteDialogOpen, setDeleteDialogOpen] = useState(false) + const [verifyResult, setVerifyResult] = useState<{ + passed: boolean + checks: VerifyCheck[] + passed_count: number + warning_count: number + failed_count: number + } | null>(null) + + const handleGenerate = async () => { + try { + const result = await generateMutation.mutateAsync({ + scenario: selectedScenario, + }) + toast.success( + `Generated ${result.records_created.sales?.toLocaleString() ?? 0} sales records in ${result.duration_seconds.toFixed(1)}s` + ) + } catch (err) { + toast.error(err instanceof Error ? err.message : 'Generation failed') + } + } + + const handleDelete = async () => { + try { + const result = await deleteMutation.mutateAsync({ scope: 'all' }) + setDeleteDialogOpen(false) + toast.success(result.message) + } catch (err) { + toast.error(err instanceof Error ? err.message : 'Delete failed') + } + } + + const handleVerify = async () => { + try { + const result = await verifyMutation.mutateAsync() + setVerifyResult(result) + if (result.passed) { + toast.success('All integrity checks passed') + } else { + toast.warning(`${result.failed_count} checks failed`) + } + } catch (err) { + toast.error(err instanceof Error ? err.message : 'Verification failed') + } + } + + if (error) { + return + } + + return ( +
+ {/* Status Card */} + + +
+ Current Data Summary + + {status?.date_range_start && status?.date_range_end + ? `${status.date_range_start} → ${status.date_range_end}` + : 'No data yet'} + +
+ +
+ + {isLoading ? ( +
+ {Array.from({ length: 7 }).map((_, i) => ( + + ))} +
+ ) : ( +
+ + + + + + + +
+ )} +
+
+ + {/* Actions Card */} + + + Quick Actions + Generate, delete, or verify synthetic data + + +
+ + + + + + + + + Delete All Data? + + This will permanently delete all{' '} + {status?.sales?.toLocaleString() ?? 0} sales records,{' '} + {status?.stores ?? 0} stores, and {status?.products ?? 0} products. This + action cannot be undone. + + + + Cancel + Delete All Data + + + + + +
+ +
+ + +
+
+
+ + {/* Verification Results */} + {verifyResult && ( + + + + Verification Results + + {verifyResult.passed ? 'Passed' : 'Failed'} + + + + {verifyResult.passed_count} passed • {verifyResult.warning_count} warnings •{' '} + {verifyResult.failed_count} failed + + + +
+ {verifyResult.checks.map((check: VerifyCheck, idx: number) => ( +
+
+

{check.name}

+

{check.message}

+
+ {check.status} +
+ ))} +
+
+
+ )} +
+ ) +} + +// Helper component for stat cards +function StatCard({ + icon: Icon, + label, + value, +}: { + icon: React.ComponentType<{ className?: string }> + label: string + value: number +}) { + return ( +
+ +

{value.toLocaleString()}

+

{label}

+
+ ) +} + +// Helper for scenario names +function formatScenarioLabel(name: string): string { + return name + .split('_') + .map((word) => word.charAt(0).toUpperCase() + word.slice(1)) + .join(' ') +} + +// Helper for badge variants +function getCheckBadgeVariant(status: VerifyCheckStatus): 'default' | 'secondary' | 'destructive' { + switch (status) { + case 'passed': + return 'default' + case 'warning': + return 'secondary' + case 'failed': + return 'destructive' + } +} diff --git a/frontend/src/types/api.ts b/frontend/src/types/api.ts index 1096e245..33b0a0b2 100644 --- a/frontend/src/types/api.ts +++ b/frontend/src/types/api.ts @@ -242,3 +242,81 @@ export interface ProblemDetail { code?: string request_id?: string } + +// === Seeder === +export interface SeederStatus { + stores: number + products: number + calendar: number + sales: number + inventory: number + price_history: number + promotions: number + date_range_start: string | null // ISO date "2024-01-01" + date_range_end: string | null + last_updated: string | null // ISO datetime +} + +export interface ScenarioInfo { + name: string + description: string + stores: number + products: number + start_date: string // ISO date + end_date: string +} + +export interface GenerateParams { + scenario?: string // default: "retail_standard" + seed?: number // default: 42 + stores?: number // 1-100, default: 10 + products?: number // 1-500, default: 50 + start_date?: string // ISO date + end_date?: string + sparsity?: number // 0.0-1.0 + dry_run?: boolean +} + +export interface AppendParams { + start_date: string // Required + end_date: string // Required + seed?: number +} + +export interface DeleteParams { + scope?: 'all' | 'facts' | 'dimensions' // default: "all" + dry_run?: boolean +} + +export interface GenerateResult { + success: boolean + records_created: Record + duration_seconds: number + message: string + seed: number +} + +export interface DeleteResult { + success: boolean + records_deleted: Record + message: string + dry_run: boolean +} + +export type VerifyCheckStatus = 'passed' | 'warning' | 'failed' + +export interface VerifyCheck { + name: string + status: VerifyCheckStatus + message: string + details: string[] | null +} + +export interface VerifyResult { + passed: boolean + checks: VerifyCheck[] + total_checks: number + passed_count: number + warning_count: number + failed_count: number +}