From a553dc1c9d5d7df71ab80fa9586c38051eeeacd7 Mon Sep 17 00:00:00 2001 From: "Gabe@w7dev" Date: Sun, 1 Feb 2026 08:19:38 +0000 Subject: [PATCH 1/5] docs(initial-8): expand serving layer requirements Add specifications for job-driven orchestration, dimension discovery endpoints, standardized API protocols (filtering/pagination), and agent-first API design patterns for LLM tool-calling optimization. Co-Authored-By: Claude Opus 4.5 --- INITIAL-8.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/INITIAL-8.md b/INITIAL-8.md index 593c47d2..e84364b2 100644 --- a/INITIAL-8.md +++ b/INITIAL-8.md @@ -12,6 +12,24 @@ - request validation - response_model-enforced outputs - OpenAPI export generation (also used as a RAG source). +- Job-Driven Orchestration: - Asynchronous task pattern (POST returns job_id, GET polls status). + - Standardized Job statuses: PENDING | RUNNING | COMPLETED | FAILED. +- Dimension Discovery: + - Metadata endpoints for Store and Product catalogs (names, categories, IDs). +- Standardized API Protocols: + - Unified filtering, sorting, and pagination schemas (Mixin pattern). + - Semantic Error responses with domain-specific error codes (RFC 7807). +- AI-Enhanced Documentation: + - Rich OpenAPI metadata optimized for LLM tool-calling and RAG indexing. +- Agent-First API Design: +  - Rich OpenAPI metadata (Pydantic Field descriptions) for RAG indexing. +  - Discovery endpoints for Store/Product metadata resolution. +- Asynchronous Task Protocol: +  - Unified Job Status API (job_id tracking) for long-running ForecastOps. +- Robust Error Handling: +  - Semantic error codes (RFC 7807) to enable Agent-led troubleshooting. +- Scalable Data Access: +  - Standardized Pagination and Filtering mixins for consistent tool-calling. ## EXAMPLES: - `examples/api/train.http` From e4020f2acf95e26d10baf147d3d6d80a456dbbce Mon Sep 17 00:00:00 2001 From: "Gabe@w7dev" Date: Sun, 1 Feb 2026 09:00:55 +0000 Subject: [PATCH 2/5] docs(prp-8): add serving layer implementation spec Comprehensive PRP for FastAPI serving layer including: - Dimensions module for store/product discovery endpoints - Analytics module for KPI/drilldown queries - Jobs module for async-ready task orchestration - RFC 7807 problem details for semantic error responses - OpenAPI export optimization for LLM tool-calling 26 tasks with validation gates and 8.5/10 confidence score. Co-Authored-By: Claude Opus 4.5 --- PRPs/PRP-8-serving-layer.md | 1116 +++++++++++++++++++++++++++++++++++ 1 file changed, 1116 insertions(+) create mode 100644 PRPs/PRP-8-serving-layer.md diff --git a/PRPs/PRP-8-serving-layer.md b/PRPs/PRP-8-serving-layer.md new file mode 100644 index 00000000..cc48d844 --- /dev/null +++ b/PRPs/PRP-8-serving-layer.md @@ -0,0 +1,1116 @@ +# PRP-8: FastAPI Serving Layer (Typed Contracts, Agent-First API Design) + +## Goal + +Implement a production-ready serving layer that extends the existing ForecastOps API with: +- **Dimension Discovery**: Store/Product metadata endpoints for agent-driven resolution +- **Data Analytics**: KPI aggregations and drilldown queries +- **Job Orchestration**: Async-ready contracts with job_id tracking (sync implementation, async contracts) +- **RFC 7807 Problem Details**: Semantic error responses for agent troubleshooting +- **OpenAPI Export**: RAG-optimized schema export for LLM tool-calling +- **Standardized Mixins**: Unified pagination, filtering, and sorting patterns + +**End State:** An agent-optimized serving layer where: +- LLM agents can discover available stores/products via dedicated endpoints +- Semantic error codes enable automatic troubleshooting workflows +- Rich OpenAPI descriptions optimize tool selection for LLM function calling +- Job orchestration contracts are async-ready for future background execution +- All validation gates passing (ruff, mypy, pyright, pytest) + +--- + +## Why + +- **Agent Discoverability**: LLM agents need to resolve natural keys (store_code, sku) before calling ingest/train/predict endpoints; dedicated discovery endpoints eliminate guesswork +- **Troubleshooting Autonomy**: RFC 7807 problem details with semantic error codes enable agents to diagnose and fix issues without human intervention +- **Data Exploration**: KPI and drilldown endpoints allow agents and dashboards to explore sales performance programmatically +- **Scalability Foundation**: Async-ready job contracts prepare for background execution of long-running operations (training, backtesting) +- **RAG Integration**: OpenAPI export with rich descriptions enables high-quality function calling via embeddings + +--- + +## What + +### User-Visible Behavior + +1. **Dimension Discovery** + - `GET /dimensions/stores` - List all stores with metadata (code, name, region, type) + - `GET /dimensions/stores/{store_id}` - Get single store details + - `GET /dimensions/products` - List all products with metadata (sku, name, category, brand) + - `GET /dimensions/products/{product_id}` - Get single product details + - Supports filtering by region, category, brand with pagination + +2. **Data Analytics** + - `GET /analytics/kpis` - Aggregated KPIs (total revenue, units, by store/category/date) + - `GET /analytics/drilldowns` - Drill into KPIs by dimension (store, product, date range) + +3. **Job Orchestration (Async-Ready)** + - `POST /jobs` - Create new job (wraps train/predict/backtest) + - `GET /jobs/{job_id}` - Poll job status (PENDING | RUNNING | COMPLETED | FAILED) + - `GET /jobs` - List recent jobs with filtering + - `DELETE /jobs/{job_id}` - Cancel pending/running job + - Synchronous execution initially; contracts support future async migration + +4. **RFC 7807 Error Responses** + - All errors return structured Problem Details format + - Domain-specific error types (URIs) for each error category + - Instance URIs for error tracking/correlation + +5. **OpenAPI Export** + - `GET /openapi.json` - Standard OpenAPI 3.1 schema (already provided by FastAPI) + - `scripts/export_openapi.py` - Export enriched schema for RAG indexing + - All Field descriptions optimized for LLM tool selection + +### Success Criteria + +- [ ] Dimension discovery endpoints implemented with pagination and filtering +- [ ] KPI/drilldown endpoints with date range, store, product filters +- [ ] Job orchestration contracts defined (sync implementation) +- [ ] RFC 7807 ProblemDetail schema integrated with all error handlers +- [ ] All existing endpoints enhanced with rich Field descriptions +- [ ] OpenAPI export script produces RAG-ready documentation +- [ ] 50+ unit tests covering new features +- [ ] 15+ integration tests for new endpoints +- [ ] All validation gates green + +--- + +## All Needed Context + +### Documentation & References + +```yaml +# MUST READ - Include these in your context window + +# RFC 7807/9457 Problem Details +- url: https://datatracker.ietf.org/doc/html/rfc7807 + why: "Original problem details standard" + critical: "Use 'type' URI for error categorization, 'instance' for correlation" + +- url: https://github.com/vapor-ware/fastapi-rfc7807 + why: "FastAPI RFC 7807 implementation reference" + critical: "Pattern for exception handler integration" + +# OpenAPI for LLM Tool Calling +- url: https://medium.com/percolation-labs/how-llm-apis-use-the-openapi-spec-for-function-calling-f37d76e0fef3 + why: "How LLMs use OpenAPI for function selection" + critical: "Clear semantic naming and descriptions are crucial for tool selection" + +- url: https://github.com/samchon/openapi + why: "OpenAPI to LLM function calling schema converter" + critical: "Rich descriptions significantly improve function calling accuracy" + +# Internal Codebase References +- file: app/features/registry/routes.py + why: "Pattern for pagination with Query params" + pattern: "page: int = Query(1, ge=1), page_size: int = Query(20, ge=1, le=100)" + +- file: app/features/registry/schemas.py + why: "Pattern for RunListResponse with pagination fields" + pattern: "runs: list[RunResponse], total: int, page: int, page_size: int" + +- file: app/features/ingest/service.py + why: "KeyResolver pattern for store_code → store_id resolution" + pattern: "resolve_store_codes(), resolve_skus()" + +- file: app/core/exceptions.py + why: "Base exception hierarchy to extend with RFC 7807" + pattern: "ForecastLabError, forecastlab_exception_handler" + +- file: app/features/data_platform/models.py + why: "Store, Product, SalesDaily ORM models" + pattern: "Mapped[], mapped_column(), relationships" + +- file: examples/queries/kpi_sales.sql + why: "SQL patterns for KPI aggregations" + pattern: "SUM, COUNT, GROUP BY, DATE_TRUNC, RANK, NTILE" + +- file: app/shared/schemas.py + why: "Existing PaginatedResponse generic" + pattern: "PaginatedResponse[T] with items, total, page, page_size, pages" +``` + +### Current Codebase Tree (Relevant Parts) + +```text +app/ +├── core/ +│ ├── config.py # Settings singleton (extend with job settings) +│ ├── database.py # AsyncSession, get_db +│ ├── exceptions.py # ForecastLabError hierarchy (EXTEND with RFC 7807) +│ ├── logging.py # Structured logging +│ └── middleware.py # RequestIdMiddleware +├── shared/ +│ ├── schemas.py # PaginatedResponse (EXTEND with mixins) +│ └── models.py # TimestampMixin +├── features/ +│ ├── data_platform/ +│ │ └── models.py # Store, Product, SalesDaily, Calendar +│ ├── ingest/ +│ │ └── service.py # KeyResolver (REFERENCE for lookups) +│ ├── forecasting/ +│ │ └── routes.py # train/predict endpoints +│ ├── backtesting/ +│ │ └── routes.py # backtest/run endpoint +│ └── registry/ +│ ├── routes.py # Run/Alias CRUD (REFERENCE for pagination) +│ └── schemas.py # RunListResponse (REFERENCE) +└── main.py # Router registration +``` + +### Desired Codebase Tree (New Files) + +```text +app/features/dimensions/ # NEW: Dimension discovery +├── __init__.py +├── routes.py # GET /dimensions/stores, /products +├── schemas.py # StoreResponse, ProductResponse, filters +├── service.py # DimensionService (paginated lookups) +└── tests/ + ├── __init__.py + ├── conftest.py + ├── test_routes.py # Route tests + └── test_service.py # Service tests + +app/features/analytics/ # NEW: KPI/Drilldown endpoints +├── __init__.py +├── routes.py # GET /analytics/kpis, /drilldowns +├── schemas.py # KPIResponse, DrilldownRequest, filters +├── service.py # AnalyticsService (aggregation queries) +└── tests/ + ├── __init__.py + ├── conftest.py + ├── test_routes.py + └── test_service.py + +app/features/jobs/ # NEW: Job orchestration layer +├── __init__.py +├── models.py # Job ORM model (JSONB for params/result) +├── routes.py # POST /jobs, GET /jobs/{job_id} +├── schemas.py # JobCreate, JobResponse, JobStatus enum +├── service.py # JobService (sync execution, async contracts) +└── tests/ + ├── __init__.py + ├── conftest.py + ├── test_routes.py + └── test_service.py + +app/core/problem_details.py # NEW: RFC 7807 implementation + # ProblemDetail schema, exception handlers + +app/shared/mixins.py # NEW: Pagination/filter/sort mixins + +scripts/export_openapi.py # NEW: RAG-optimized OpenAPI export + +examples/api/dimensions.http # NEW: Dimension discovery examples +examples/api/analytics.http # NEW: KPI/drilldown examples +examples/api/jobs.http # NEW: Job orchestration examples + +alembic/versions/xxx_create_jobs_table.py # NEW: Jobs table migration +``` + +### Known Gotchas + +```python +# CRITICAL: RFC 7807 requires specific content type +# Content-Type: application/problem+json +# FastAPI JSONResponse can set this via media_type parameter + +# CRITICAL: 'type' in Problem Details should be a URI +# Use relative URIs like "/errors/validation" or absolute URIs +# Example: "type": "https://api.forecastlabai.com/errors/unknown-store" + +# CRITICAL: 'instance' should be request-specific +# Use request_id from middleware: f"/requests/{request_id}" + +# CRITICAL: OpenAPI descriptions are used by LLMs for tool selection +# Keep descriptions concise but semantically rich +# BAD: "The ID" +# GOOD: "Unique store identifier from /dimensions/stores endpoint" + +# CRITICAL: Pagination uses 1-indexed pages (not 0-indexed) +# Offset = (page - 1) * page_size + +# CRITICAL: Jobs table uses JSONB for params and result +# This allows arbitrary job configurations without schema migration + +# CRITICAL: Job status transitions must be validated +# PENDING -> RUNNING -> COMPLETED|FAILED +# PENDING -> CANCELLED (via DELETE) +# No other transitions allowed + +# CRITICAL: KPI queries should use calendar table for date validation +# Don't trust user-provided dates without checking calendar table + +# CRITICAL: Use SQLAlchemy func for aggregations +# from sqlalchemy import func +# func.sum(), func.count(), func.avg() + +# CRITICAL: For large result sets, add row limits +# Analytics queries should have max_rows setting (default 10000) +``` + +--- + +## Implementation Blueprint + +### Data Models + +#### RFC 7807 Problem Details Schema + +```python +# app/core/problem_details.py + +from typing import Any +from pydantic import BaseModel, Field, ConfigDict + + +class ProblemDetail(BaseModel): + """RFC 7807 Problem Details for HTTP APIs. + + This schema enables machine-readable error responses that LLM agents + can use for automatic troubleshooting and retry logic. + + Attributes: + type: URI identifying the error type (for categorization) + title: Short human-readable summary + status: HTTP status code + detail: Human-readable explanation + instance: URI for this specific error occurrence + errors: Optional field-level validation errors + """ + model_config = ConfigDict(extra="allow") # Allow extensions + + type: str = Field( + default="about:blank", + description="URI reference identifying the problem type" + ) + title: str = Field( + ..., + description="Short, human-readable summary of the problem" + ) + status: int = Field( + ..., + ge=400, + le=599, + description="HTTP status code" + ) + detail: str | None = Field( + None, + description="Human-readable explanation specific to this occurrence" + ) + instance: str | None = Field( + None, + description="URI reference for this specific problem occurrence" + ) + # Extension: validation errors for 422 responses + errors: list[dict[str, Any]] | None = Field( + None, + description="Field-level validation errors (for 422 responses)" + ) +``` + +#### Job Model + +```python +# app/features/jobs/models.py + +class JobType(str, Enum): + """Types of jobs that can be executed.""" + TRAIN = "train" + PREDICT = "predict" + BACKTEST = "backtest" + + +class JobStatus(str, Enum): + """Job lifecycle states.""" + PENDING = "pending" + RUNNING = "running" + COMPLETED = "completed" + FAILED = "failed" + CANCELLED = "cancelled" + + +class Job(TimestampMixin, Base): + """Background job tracking. + + CRITICAL: Stores job configuration and results as JSONB for flexibility. + """ + __tablename__ = "job" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + job_id: Mapped[str] = mapped_column(String(32), unique=True, index=True) + job_type: Mapped[str] = mapped_column(String(20), index=True) + status: Mapped[str] = mapped_column(String(20), default=JobStatus.PENDING.value) + + # Job configuration (stored as JSONB for flexibility) + params: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False) + + # Result/error storage + result: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + error_message: Mapped[str | None] = mapped_column(String(2000), nullable=True) + error_type: Mapped[str | None] = mapped_column(String(100), nullable=True) + + # Timing + started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) + completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) + + # Linkage to model run (for train/backtest jobs) + run_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True) +``` + +#### Dimension Schemas (Agent-Optimized) + +```python +# app/features/dimensions/schemas.py + +class StoreResponse(BaseModel): + """Store dimension record for agent discovery. + + Use this endpoint to resolve store_code to store_id before calling + ingest or forecasting endpoints. + """ + model_config = ConfigDict(from_attributes=True) + + id: int = Field( + ..., + description="Internal store ID. Use this value for store_id parameters." + ) + code: str = Field( + ..., + description="Business store code (e.g., 'S001'). Unique identifier." + ) + name: str = Field( + ..., + description="Human-readable store name for display purposes." + ) + region: str | None = Field( + None, + description="Geographic region. Filter using region parameter." + ) + city: str | None = Field( + None, + description="City where store is located." + ) + store_type: str | None = Field( + None, + description="Store format (e.g., 'supermarket', 'express', 'warehouse')." + ) + + +class StoreListResponse(BaseModel): + """Paginated list of stores with filtering metadata.""" + stores: list[StoreResponse] = Field( + ..., + description="Array of store records for current page." + ) + total: int = Field( + ..., + ge=0, + description="Total number of stores matching filters." + ) + page: int = Field( + ..., + ge=1, + description="Current page number (1-indexed)." + ) + page_size: int = Field( + ..., + ge=1, + description="Number of stores per page." + ) + + +class StoreFilter(BaseModel): + """Filter parameters for store queries.""" + region: str | None = Field( + None, + description="Filter by region (exact match)." + ) + store_type: str | None = Field( + None, + description="Filter by store type (exact match)." + ) + search: str | None = Field( + None, + min_length=2, + description="Search in store code and name (case-insensitive)." + ) +``` + +### Task List + +#### Task 1: Create RFC 7807 Problem Details module + +```yaml +FILE: app/core/problem_details.py +ACTION: CREATE +IMPLEMENT: + - ProblemDetail schema with RFC 7807 fields + - Error type URIs for each error category: + - /errors/not-found + - /errors/validation + - /errors/database + - /errors/conflict + - /errors/unauthorized + - /errors/rate-limited + - problem_detail_handler() exception handler + - Mapping from ForecastLabError types to problem details +CRITICAL: + - Set Content-Type: application/problem+json + - Include instance URI with request_id + - Handle Pydantic ValidationError specially (field-level errors) +VALIDATION: + - uv run mypy app/core/problem_details.py + - uv run pyright app/core/problem_details.py +``` + +#### Task 2: Integrate Problem Details into exception handlers + +```yaml +FILE: app/core/exceptions.py +ACTION: MODIFY +IMPLEMENT: + - Import ProblemDetail from problem_details + - Update forecastlab_exception_handler to return ProblemDetail + - Update unhandled_exception_handler to return ProblemDetail + - Add error_type URI property to ForecastLabError subclasses +FIND: "async def forecastlab_exception_handler" +MODIFY: Return ProblemDetailResponse instead of dict +VALIDATION: + - uv run pytest app/core/tests/test_exceptions.py -v +``` + +#### Task 3: Create dimensions module structure + +```yaml +ACTION: CREATE directories and files +FILES: + - app/features/dimensions/__init__.py + - app/features/dimensions/schemas.py + - app/features/dimensions/service.py + - app/features/dimensions/routes.py + - app/features/dimensions/tests/__init__.py + - app/features/dimensions/tests/conftest.py +PATTERN: Mirror registry module structure +``` + +#### Task 4: Implement dimensions schemas + +```yaml +FILE: app/features/dimensions/schemas.py +ACTION: CREATE +IMPLEMENT: + - StoreResponse with rich Field descriptions + - StoreListResponse for paginated results + - StoreFilter for query parameters + - ProductResponse with sku, name, category, brand + - ProductListResponse for paginated results + - ProductFilter for query parameters +CRITICAL: + - Every Field must have a description optimized for LLM tool selection + - Use pattern validation for code/sku formats +VALIDATION: + - uv run mypy app/features/dimensions/schemas.py +``` + +#### Task 5: Implement dimensions service + +```yaml +FILE: app/features/dimensions/service.py +ACTION: CREATE +IMPLEMENT: + - DimensionService class + - list_stores() - Paginated store list with filters + - get_store() - Single store by ID + - list_products() - Paginated product list with filters + - get_product() - Single product by ID + - search_stores() - Search by code/name + - search_products() - Search by sku/name +PATTERN: Mirror registry service pattern +CRITICAL: + - Use async SQLAlchemy queries + - Apply filters with ilike() for case-insensitive search + - Count total before applying pagination +VALIDATION: + - uv run mypy app/features/dimensions/service.py +``` + +#### Task 6: Implement dimensions routes + +```yaml +FILE: app/features/dimensions/routes.py +ACTION: CREATE +IMPLEMENT: + - APIRouter(prefix="/dimensions", tags=["dimensions"]) + - GET /stores - List stores with pagination and filters + - GET /stores/{store_id} - Get store by ID + - GET /products - List products with pagination and filters + - GET /products/{product_id} - Get product by ID +CRITICAL: + - Rich OpenAPI descriptions on each endpoint + - Include example responses in docstrings + - Log dimension queries for analytics +VALIDATION: + - uv run mypy app/features/dimensions/routes.py +``` + +#### Task 7: Create analytics module structure + +```yaml +ACTION: CREATE directories and files +FILES: + - app/features/analytics/__init__.py + - app/features/analytics/schemas.py + - app/features/analytics/service.py + - app/features/analytics/routes.py + - app/features/analytics/tests/__init__.py + - app/features/analytics/tests/conftest.py +``` + +#### Task 8: Implement analytics schemas + +```yaml +FILE: app/features/analytics/schemas.py +ACTION: CREATE +IMPLEMENT: + - DateRange filter (start_date, end_date with validation) + - KPIRequest (dimensions to group by, date range) + - KPIResponse (revenue, units, orders, avg_basket) + - DrilldownRequest (dimension, filter, date range) + - DrilldownResponse (breakdown by dimension value) + - TimeGranularity enum (day, week, month, quarter) +CRITICAL: + - Validate date range (end >= start) + - Max date range constraint (e.g., 2 years) + - Rich descriptions for LLM tool selection +VALIDATION: + - uv run mypy app/features/analytics/schemas.py +``` + +#### Task 9: Implement analytics service + +```yaml +FILE: app/features/analytics/service.py +ACTION: CREATE +IMPLEMENT: + - AnalyticsService class + - compute_kpis() - Aggregate revenue/units by dimension + - compute_drilldown() - Drill into specific dimension + - _build_kpi_query() - SQL builder for aggregations +PATTERN: Use SQLAlchemy func for aggregations +CRITICAL: + - Validate dates exist in calendar table + - Apply max_rows limit (setting) + - Use DATE_TRUNC for time grouping +VALIDATION: + - uv run mypy app/features/analytics/service.py +``` + +#### Task 10: Implement analytics routes + +```yaml +FILE: app/features/analytics/routes.py +ACTION: CREATE +IMPLEMENT: + - APIRouter(prefix="/analytics", tags=["analytics"]) + - GET /kpis - Compute KPIs with filters + - GET /drilldowns - Drill into dimension +CRITICAL: + - Rich OpenAPI descriptions with examples + - Response models for type safety + - Appropriate caching headers +VALIDATION: + - uv run mypy app/features/analytics/routes.py +``` + +#### Task 11: Create jobs module structure + +```yaml +ACTION: CREATE directories and files +FILES: + - app/features/jobs/__init__.py + - app/features/jobs/models.py + - app/features/jobs/schemas.py + - app/features/jobs/service.py + - app/features/jobs/routes.py + - app/features/jobs/tests/__init__.py + - app/features/jobs/tests/conftest.py +``` + +#### Task 12: Implement jobs ORM model + +```yaml +FILE: app/features/jobs/models.py +ACTION: CREATE +IMPLEMENT: + - JobType enum (train, predict, backtest) + - JobStatus enum (pending, running, completed, failed, cancelled) + - Job model with JSONB params and result + - Indexes on job_id, status, job_type + - Check constraint for valid status values +PATTERN: Mirror registry ModelRun model +VALIDATION: + - uv run mypy app/features/jobs/models.py +``` + +#### Task 13: Create jobs migration + +```yaml +ACTION: Run alembic revision +COMMAND: uv run alembic revision --autogenerate -m "create_jobs_table" +IMPLEMENT: + - Create job table with JSONB columns + - Add indexes + - Add check constraints +VALIDATION: + - uv run alembic upgrade head + - uv run alembic downgrade -1 + - uv run alembic upgrade head +``` + +#### Task 14: Implement jobs schemas + +```yaml +FILE: app/features/jobs/schemas.py +ACTION: CREATE +IMPLEMENT: + - JobType, JobStatus enums + - VALID_JOB_TRANSITIONS dict + - JobCreate (job_type, params as dict) + - JobResponse (job_id, status, params, result, timing) + - JobListResponse (pagination) +CRITICAL: + - params is flexible JSONB - validated by job type handlers + - Rich descriptions for LLM orchestration +VALIDATION: + - uv run mypy app/features/jobs/schemas.py +``` + +#### Task 15: Implement jobs service + +```yaml +FILE: app/features/jobs/service.py +ACTION: CREATE +IMPLEMENT: + - JobService class + - create_job() - Create PENDING job, execute synchronously + - get_job() - Get job by job_id + - list_jobs() - List with filtering and pagination + - cancel_job() - Cancel PENDING job + - _execute_train() - Delegate to ForecastingService + - _execute_predict() - Delegate to ForecastingService + - _execute_backtest() - Delegate to BacktestingService + - _validate_params() - Validate params for job type +CRITICAL: + - Jobs execute synchronously (contracts ready for async) + - Capture execution time + - Store result or error in JSONB + - Link to run_id for train/backtest jobs +VALIDATION: + - uv run mypy app/features/jobs/service.py +``` + +#### Task 16: Implement jobs routes + +```yaml +FILE: app/features/jobs/routes.py +ACTION: CREATE +IMPLEMENT: + - APIRouter(prefix="/jobs", tags=["jobs"]) + - POST /jobs - Create and execute job (returns job_id) + - GET /jobs - List jobs with filtering + - GET /jobs/{job_id} - Get job status and result + - DELETE /jobs/{job_id} - Cancel pending job +CRITICAL: + - Response includes job_id for polling + - Rich descriptions explain job types and params + - 202 Accepted for creation (async-ready semantics) +VALIDATION: + - uv run mypy app/features/jobs/routes.py +``` + +#### Task 17: Add settings for new features + +```yaml +FILE: app/core/config.py +ACTION: MODIFY +IMPLEMENT: + - analytics_max_rows: int = 10000 + - analytics_max_date_range_days: int = 730 + - jobs_retention_days: int = 30 +FIND: "registry_duplicate_policy" +INJECT AFTER: New settings +VALIDATION: + - uv run mypy app/core/config.py +``` + +#### Task 18: Register new routers in main.py + +```yaml +FILE: app/main.py +ACTION: MODIFY +IMPLEMENT: + - Import dimensions, analytics, jobs routers + - Register with app.include_router() +FIND: "from app.features.registry.routes import router as registry_router" +INJECT AFTER: + - "from app.features.dimensions.routes import router as dimensions_router" + - "from app.features.analytics.routes import router as analytics_router" + - "from app.features.jobs.routes import router as jobs_router" +FIND: "app.include_router(registry_router)" +INJECT AFTER: + - "app.include_router(dimensions_router)" + - "app.include_router(analytics_router)" + - "app.include_router(jobs_router)" +VALIDATION: + - uv run python -c "from app.main import app; print('OK')" +``` + +#### Task 19: Create shared mixins module + +```yaml +FILE: app/shared/mixins.py +ACTION: CREATE +IMPLEMENT: + - SortOrder enum (asc, desc) + - SortParams generic mixin + - FilterMixin base class + - PaginationMixin with helper methods + - DateRangeMixin with validation +PATTERN: Reusable across all list endpoints +VALIDATION: + - uv run mypy app/shared/mixins.py +``` + +#### Task 20: Enhance existing endpoint descriptions + +```yaml +FILES: + - app/features/ingest/schemas.py + - app/features/forecasting/schemas.py + - app/features/backtesting/schemas.py + - app/features/registry/schemas.py +ACTION: MODIFY +IMPLEMENT: + - Add rich Field descriptions to all fields + - Include "Use X endpoint to get valid values" hints + - Add examples where helpful +PATTERN: + - store_id: int = Field(..., description="Store ID from GET /dimensions/stores") + - sku: str = Field(..., description="Product SKU from GET /dimensions/products") +VALIDATION: + - uv run mypy app/features/*/schemas.py +``` + +#### Task 21: Create OpenAPI export script + +```yaml +FILE: scripts/export_openapi.py +ACTION: CREATE +IMPLEMENT: + - Load FastAPI app + - Extract OpenAPI schema via app.openapi() + - Enrich with additional metadata for RAG + - Export to artifacts/openapi/schema.json + - Export markdown summary for embedding +CRITICAL: + - Include all operation descriptions + - Include all schema descriptions + - Include error response schemas +VALIDATION: + - uv run python scripts/export_openapi.py + - Check artifacts/openapi/schema.json exists +``` + +#### Task 22: Create dimension tests + +```yaml +FILES: + - app/features/dimensions/tests/test_schemas.py + - app/features/dimensions/tests/test_service.py + - app/features/dimensions/tests/test_routes.py +ACTION: CREATE +IMPLEMENT: + - Schema validation tests + - Service pagination tests + - Service filter tests + - Route integration tests +VALIDATION: + - uv run pytest app/features/dimensions/tests/ -v +``` + +#### Task 23: Create analytics tests + +```yaml +FILES: + - app/features/analytics/tests/test_schemas.py + - app/features/analytics/tests/test_service.py + - app/features/analytics/tests/test_routes.py +ACTION: CREATE +IMPLEMENT: + - Date range validation tests + - KPI computation tests + - Drilldown tests + - Route integration tests +VALIDATION: + - uv run pytest app/features/analytics/tests/ -v +``` + +#### Task 24: Create jobs tests + +```yaml +FILES: + - app/features/jobs/tests/test_models.py + - app/features/jobs/tests/test_schemas.py + - app/features/jobs/tests/test_service.py + - app/features/jobs/tests/test_routes.py +ACTION: CREATE +IMPLEMENT: + - Model creation tests + - Status transition tests + - Job execution tests (mock services) + - Route integration tests +VALIDATION: + - uv run pytest app/features/jobs/tests/ -v +``` + +#### Task 25: Create example HTTP files + +```yaml +FILES: + - examples/api/dimensions.http + - examples/api/analytics.http + - examples/api/jobs.http +ACTION: CREATE +IMPLEMENT: + - Dimension discovery examples + - KPI query examples + - Job creation and polling examples +PATTERN: Mirror ingest_sales_daily.http format +``` + +#### Task 26: Update module __init__.py exports + +```yaml +FILES: + - app/features/dimensions/__init__.py + - app/features/analytics/__init__.py + - app/features/jobs/__init__.py +ACTION: MODIFY +IMPLEMENT: + - Export all public classes + - Alphabetically sorted __all__ +VALIDATION: + - uv run python -c "from app.features.dimensions import *" + - uv run python -c "from app.features.analytics import *" + - uv run python -c "from app.features.jobs import *" +``` + +--- + +## Validation Loop + +### Level 1: Syntax & Style + +```bash +# Run after EACH file creation +uv run ruff check app/features/dimensions/ app/features/analytics/ app/features/jobs/ app/core/problem_details.py --fix +uv run ruff format app/features/dimensions/ app/features/analytics/ app/features/jobs/ app/core/ + +# Expected: All checks passed! +``` + +### Level 2: Type Checking + +```bash +# Run after completing each module +uv run mypy app/features/dimensions/ +uv run mypy app/features/analytics/ +uv run mypy app/features/jobs/ +uv run mypy app/core/problem_details.py + +uv run pyright app/features/dimensions/ +uv run pyright app/features/analytics/ +uv run pyright app/features/jobs/ + +# Expected: Success: no issues found +``` + +### Level 3: Database Migration + +```bash +# After creating jobs models.py +uv run alembic revision --autogenerate -m "create_jobs_table" +uv run alembic upgrade head + +# Verify table exists +docker exec -it postgres psql -U forecastlab -d forecastlab -c "\d job" +``` + +### Level 4: Unit Tests + +```bash +# Run incrementally +uv run pytest app/features/dimensions/tests/ -v -m "not integration" +uv run pytest app/features/analytics/tests/ -v -m "not integration" +uv run pytest app/features/jobs/tests/ -v -m "not integration" + +# Run all unit tests +uv run pytest app/features/dimensions/ app/features/analytics/ app/features/jobs/ -v -m "not integration" + +# Expected: 50+ tests passed +``` + +### Level 5: Integration Tests + +```bash +# Start database +docker-compose up -d + +# Seed test data +uv run python examples/seed_demo_data.py + +# Run integration tests +uv run pytest app/features/dimensions/tests/ -v -m integration +uv run pytest app/features/analytics/tests/ -v -m integration +uv run pytest app/features/jobs/tests/ -v -m integration + +# Expected: 15+ integration tests passed +``` + +### Level 6: API Integration Test + +```bash +# Start API +uv run uvicorn app.main:app --reload --port 8123 + +# Test dimension discovery +curl http://localhost:8123/dimensions/stores +curl http://localhost:8123/dimensions/stores?region=North +curl http://localhost:8123/dimensions/products?category=Beverage + +# Test analytics +curl "http://localhost:8123/analytics/kpis?start_date=2024-01-01&end_date=2024-01-31" +curl "http://localhost:8123/analytics/drilldowns?dimension=store&start_date=2024-01-01&end_date=2024-01-31" + +# Test job creation +curl -X POST http://localhost:8123/jobs \ + -H "Content-Type: application/json" \ + -d '{ + "job_type": "train", + "params": { + "store_id": 1, + "product_id": 1, + "train_start_date": "2024-01-01", + "train_end_date": "2024-06-30", + "config": {"model_type": "naive"} + } + }' + +# Poll job status +curl http://localhost:8123/jobs/{job_id} +``` + +### Level 7: OpenAPI Export + +```bash +# Export schema +uv run python scripts/export_openapi.py + +# Verify export +ls -la artifacts/openapi/ +cat artifacts/openapi/schema.json | jq '.info' +``` + +### Level 8: Full Validation + +```bash +# Complete validation suite +uv run ruff check . && \ +uv run mypy app/ && \ +uv run pyright app/ && \ +uv run pytest -v + +# Expected: All green +``` + +--- + +## Final Checklist + +- [ ] All 26 tasks completed +- [ ] `uv run ruff check .` — no errors +- [ ] `uv run mypy app/` — no errors +- [ ] `uv run pyright app/` — no errors +- [ ] `uv run pytest -v` — 50+ new tests passed +- [ ] Alembic migration runs successfully +- [ ] Dimension endpoints return paginated results +- [ ] Analytics endpoints compute KPIs correctly +- [ ] Job orchestration creates and executes jobs +- [ ] RFC 7807 error responses include type/instance URIs +- [ ] OpenAPI export script produces valid JSON +- [ ] All Field descriptions optimized for LLM tool selection +- [ ] Example HTTP files work with VS Code REST Client +- [ ] Routers registered in main.py + +--- + +## Anti-Patterns to Avoid + +- **DON'T** use generic descriptions like "The ID" — be specific about where to get values +- **DON'T** skip error type URIs — they enable agent troubleshooting +- **DON'T** use 0-indexed pagination — always 1-indexed +- **DON'T** allow unbounded queries — always apply max_rows limits +- **DON'T** skip date validation against calendar table +- **DON'T** use sync operations in async context +- **DON'T** hardcode settings — use config.py +- **DON'T** forget to register routers in main.py +- **DON'T** create jobs without validating params against job type +- **DON'T** return 200 for job creation — use 202 Accepted (async-ready) + +--- + +## Sources + +- [RFC 7807: Problem Details for HTTP APIs](https://datatracker.ietf.org/doc/html/rfc7807) +- [fastapi-rfc7807 Library](https://github.com/vapor-ware/fastapi-rfc7807) +- [How LLM APIs Use OpenAPI for Function Calling](https://medium.com/percolation-labs/how-llm-apis-use-the-openapi-spec-for-function-calling-f37d76e0fef3) +- [OpenAPI LLM Function Calling Composer](https://github.com/samchon/openapi) +- [Optimizing Tool Calling for LLMs](https://www.useparagon.com/learn/rag-best-practices-optimizing-tool-calling/) +- [Use OpenAPI Instead of MCP for LLM Tools](https://www.binwang.me/2025-04-27-Use-OpenAPI-Instead-of-MCP-for-LLM-Tools.html) + +--- + +## Confidence Score: 8.5/10 + +**Strengths:** +- Clear patterns from existing registry/forecasting modules +- Well-defined RFC 7807 standard to follow +- Existing dimension models (Store, Product) are already in data_platform +- Job orchestration mirrors registry run lifecycle pattern +- KPI queries have SQL patterns in examples/queries/ +- Comprehensive test patterns from backtesting module + +**Risks:** +- RFC 7807 integration requires careful exception handler refactoring +- Analytics queries may need optimization for large datasets +- Job execution delegates to multiple services (coupling) +- OpenAPI enrichment may require custom schema extensions + +**Mitigation:** +- Start with simple Problem Details, enhance incrementally +- Add analytics_max_rows setting and query timeouts +- Use dependency injection for job executors +- Test OpenAPI export with actual LLM tool calling + +--- + +## Implementation Order (Suggested) + +1. **Phase A**: RFC 7807 Problem Details (Tasks 1-2) — Foundational +2. **Phase B**: Dimensions Module (Tasks 3-6) — Simple, high value +3. **Phase C**: Analytics Module (Tasks 7-10) — Medium complexity +4. **Phase D**: Jobs Module (Tasks 11-16) — Most complex +5. **Phase E**: Integration (Tasks 17-21) — Wire everything together +6. **Phase F**: Testing & Polish (Tasks 22-26) — Validation From 09299aaec37279bee604d44d0d5668cd4ab61aef Mon Sep 17 00:00:00 2001 From: "Gabe@w7dev" Date: Sun, 1 Feb 2026 09:18:21 +0000 Subject: [PATCH 3/5] feat(serving-layer): implement PRP-8 agent-first API design MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add RFC 7807 Problem Details for semantic error responses: - ProblemDetail schema with type URIs and error codes - application/problem+json content type - Validation exception handler with field-level errors Add dimensions module for store/product discovery: - GET /dimensions/stores with pagination, filtering, search - GET /dimensions/products with pagination, filtering, search - LLM-optimized Field descriptions for tool-calling Add analytics module for KPI aggregations: - GET /analytics/kpis with date range and dimension filters - GET /analytics/drilldowns for store/product/category/region/date - Revenue share and ranking calculations Add jobs module for async-ready task orchestration: - POST /jobs for train/predict/backtest operations - Job model with JSONB params/results - Status transitions: pending → running → completed/failed Integration: - New settings: analytics_max_rows, jobs_retention_days - Register routers in main.py - Alembic migration for jobs table Co-Authored-By: Claude Opus 4.5 --- alembic/env.py | 1 + .../37e16ecef223_create_jobs_table.py | 63 +++ app/core/config.py | 7 + app/core/exceptions.py | 207 +++++-- app/core/problem_details.py | 194 +++++++ app/features/analytics/__init__.py | 23 + app/features/analytics/routes.py | 203 +++++++ app/features/analytics/schemas.py | 227 ++++++++ app/features/analytics/service.py | 290 ++++++++++ app/features/analytics/tests/__init__.py | 1 + app/features/analytics/tests/conftest.py | 82 +++ app/features/dimensions/__init__.py | 23 + app/features/dimensions/routes.py | 244 ++++++++ app/features/dimensions/schemas.py | 181 ++++++ app/features/dimensions/service.py | 253 +++++++++ app/features/dimensions/tests/__init__.py | 1 + app/features/dimensions/tests/conftest.py | 28 + app/features/jobs/__init__.py | 25 + app/features/jobs/models.py | 132 +++++ app/features/jobs/routes.py | 299 ++++++++++ app/features/jobs/schemas.py | 158 ++++++ app/features/jobs/service.py | 532 ++++++++++++++++++ app/features/jobs/tests/__init__.py | 1 + app/features/jobs/tests/conftest.py | 86 +++ app/main.py | 6 + 25 files changed, 3230 insertions(+), 37 deletions(-) create mode 100644 alembic/versions/37e16ecef223_create_jobs_table.py create mode 100644 app/core/problem_details.py create mode 100644 app/features/analytics/__init__.py create mode 100644 app/features/analytics/routes.py create mode 100644 app/features/analytics/schemas.py create mode 100644 app/features/analytics/service.py create mode 100644 app/features/analytics/tests/__init__.py create mode 100644 app/features/analytics/tests/conftest.py create mode 100644 app/features/dimensions/__init__.py create mode 100644 app/features/dimensions/routes.py create mode 100644 app/features/dimensions/schemas.py create mode 100644 app/features/dimensions/service.py create mode 100644 app/features/dimensions/tests/__init__.py create mode 100644 app/features/dimensions/tests/conftest.py create mode 100644 app/features/jobs/__init__.py create mode 100644 app/features/jobs/models.py create mode 100644 app/features/jobs/routes.py create mode 100644 app/features/jobs/schemas.py create mode 100644 app/features/jobs/service.py create mode 100644 app/features/jobs/tests/__init__.py create mode 100644 app/features/jobs/tests/conftest.py diff --git a/alembic/env.py b/alembic/env.py index 38e3e935..b3d317b0 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -13,6 +13,7 @@ # Import all models for Alembic autogenerate detection from app.features.data_platform import models as data_platform_models # noqa: F401 +from app.features.jobs import models as jobs_models # noqa: F401 from app.features.registry import models as registry_models # noqa: F401 # Alembic Config object diff --git a/alembic/versions/37e16ecef223_create_jobs_table.py b/alembic/versions/37e16ecef223_create_jobs_table.py new file mode 100644 index 00000000..a18d0429 --- /dev/null +++ b/alembic/versions/37e16ecef223_create_jobs_table.py @@ -0,0 +1,63 @@ +"""create_jobs_table + +Revision ID: 37e16ecef223 +Revises: a2f7b3c8d901 +Create Date: 2026-02-01 09:15:25.050307 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = '37e16ecef223' +down_revision: Union[str, None] = 'a2f7b3c8d901' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Apply migration.""" + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('job', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('job_id', sa.String(length=32), nullable=False), + sa.Column('job_type', sa.String(length=20), nullable=False), + sa.Column('status', sa.String(length=20), nullable=False), + sa.Column('params', postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column('result', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column('error_message', sa.String(length=2000), nullable=True), + sa.Column('error_type', sa.String(length=100), nullable=True), + sa.Column('started_at', sa.DateTime(timezone=True), nullable=True), + sa.Column('completed_at', sa.DateTime(timezone=True), nullable=True), + sa.Column('run_id', sa.String(length=32), nullable=True), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False), + sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False), + sa.CheckConstraint("job_type IN ('train', 'predict', 'backtest')", name='ck_job_valid_type'), + sa.CheckConstraint("status IN ('pending', 'running', 'completed', 'failed', 'cancelled')", name='ck_job_valid_status'), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_job_job_id'), 'job', ['job_id'], unique=True) + op.create_index(op.f('ix_job_job_type'), 'job', ['job_type'], unique=False) + op.create_index('ix_job_params_gin', 'job', ['params'], unique=False, postgresql_using='gin') + op.create_index('ix_job_result_gin', 'job', ['result'], unique=False, postgresql_using='gin') + op.create_index(op.f('ix_job_run_id'), 'job', ['run_id'], unique=False) + op.create_index(op.f('ix_job_status'), 'job', ['status'], unique=False) + op.create_index('ix_job_type_status', 'job', ['job_type', 'status'], unique=False) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Revert migration.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index('ix_job_type_status', table_name='job') + op.drop_index(op.f('ix_job_status'), table_name='job') + op.drop_index(op.f('ix_job_run_id'), table_name='job') + op.drop_index('ix_job_result_gin', table_name='job', postgresql_using='gin') + op.drop_index('ix_job_params_gin', table_name='job', postgresql_using='gin') + op.drop_index(op.f('ix_job_job_type'), table_name='job') + op.drop_index(op.f('ix_job_job_id'), table_name='job') + op.drop_table('job') + # ### end Alembic commands ### diff --git a/app/core/config.py b/app/core/config.py index 1ef95075..46d5c9c9 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -57,6 +57,13 @@ class Settings(BaseSettings): registry_artifact_root: str = "./artifacts/registry" registry_duplicate_policy: Literal["allow", "deny", "detect"] = "detect" + # Analytics + analytics_max_rows: int = 10000 + analytics_max_date_range_days: int = 730 + + # Jobs + jobs_retention_days: int = 30 + @property def is_development(self) -> bool: """Check if running in development mode.""" diff --git a/app/core/exceptions.py b/app/core/exceptions.py index 316acddf..260d2fee 100644 --- a/app/core/exceptions.py +++ b/app/core/exceptions.py @@ -1,17 +1,37 @@ -"""Custom exceptions and FastAPI exception handlers.""" +"""Custom exceptions and FastAPI exception handlers. + +Implements RFC 7807 Problem Details for machine-readable error responses. +""" from typing import Any from fastapi import FastAPI, Request -from fastapi.responses import JSONResponse +from fastapi.exceptions import RequestValidationError -from app.core.logging import get_logger, request_id_ctx +from app.core.logging import get_logger +from app.core.problem_details import ( + ERROR_TYPES, + ProblemDetailResponse, + problem_response, +) logger = get_logger(__name__) +# ============================================================================= +# Exception Classes +# ============================================================================= + + class ForecastLabError(Exception): - """Base exception for ForecastLabAI application errors.""" + """Base exception for ForecastLabAI application errors. + + All application-specific exceptions should inherit from this class. + Each exception type maps to an RFC 7807 problem type URI. + """ + + # Default error type URI (override in subclasses) + error_type_uri: str = ERROR_TYPES["INTERNAL_ERROR"] def __init__( self, @@ -34,9 +54,20 @@ def __init__( self.status_code = status_code self.details = details or {} + @property + def title(self) -> str: + """RFC 7807 title - short summary of problem type.""" + return self.code.replace("_", " ").title() + class NotFoundError(ForecastLabError): - """Resource not found error.""" + """Resource not found error. + + Use when a requested resource (store, product, run, etc.) does not exist. + Agents should check the resource ID and retry with a valid one. + """ + + error_type_uri: str = ERROR_TYPES["NOT_FOUND"] def __init__( self, @@ -52,7 +83,13 @@ def __init__( class ValidationError(ForecastLabError): - """Input validation error.""" + """Input validation error. + + Use when request data fails validation. + Agents should check the 'errors' field for specific field issues. + """ + + error_type_uri: str = ERROR_TYPES["VALIDATION_ERROR"] def __init__( self, @@ -68,7 +105,13 @@ def __init__( class DatabaseError(ForecastLabError): - """Database operation error.""" + """Database operation error. + + Use when a database operation fails unexpectedly. + Agents should retry after a delay or report for human investigation. + """ + + error_type_uri: str = ERROR_TYPES["DATABASE_ERROR"] def __init__( self, @@ -83,21 +126,68 @@ def __init__( ) +class ConflictError(ForecastLabError): + """Resource conflict error. + + Use when an operation conflicts with existing state (e.g., duplicate). + Agents should check existing resources before retrying. + """ + + error_type_uri: str = ERROR_TYPES["CONFLICT"] + + def __init__( + self, + message: str = "Resource conflict", + details: dict[str, Any] | None = None, + ) -> None: + super().__init__( + message=message, + code="CONFLICT", + status_code=409, + details=details, + ) + + +class BadRequestError(ForecastLabError): + """Bad request error. + + Use when the request is malformed or invalid. + Agents should check the request format and parameters. + """ + + error_type_uri: str = ERROR_TYPES["BAD_REQUEST"] + + def __init__( + self, + message: str = "Bad request", + details: dict[str, Any] | None = None, + ) -> None: + super().__init__( + message=message, + code="BAD_REQUEST", + status_code=400, + details=details, + ) + + +# ============================================================================= +# Exception Handlers (RFC 7807) +# ============================================================================= + + async def forecastlab_exception_handler( _request: Request, exc: ForecastLabError, -) -> JSONResponse: - """Handle ForecastLabError exceptions. +) -> ProblemDetailResponse: + """Handle ForecastLabError exceptions with RFC 7807 Problem Details. Args: - request: FastAPI request object. + _request: FastAPI request object. exc: The raised exception. Returns: - JSON response with error details. + RFC 7807 Problem Detail response. """ - request_id = request_id_ctx.get() - logger.error( "app.error_handled", error=exc.message, @@ -108,34 +198,73 @@ async def forecastlab_exception_handler( exc_info=True, ) - return JSONResponse( - status_code=exc.status_code, - content={ - "error": { - "code": exc.code, - "message": exc.message, - "details": exc.details, - "request_id": request_id, + return problem_response( + status=exc.status_code, + title=exc.title, + detail=exc.message, + error_code=exc.code, + ) + + +async def validation_exception_handler( + request: Request, + exc: RequestValidationError, +) -> ProblemDetailResponse: + """Handle Pydantic validation errors with RFC 7807 Problem Details. + + Converts Pydantic validation errors to the 'errors' extension field + so agents can identify which specific fields need correction. + + Args: + request: FastAPI request object. + exc: Pydantic validation error. + + Returns: + RFC 7807 Problem Detail response with field-level errors. + """ + # Convert Pydantic errors to RFC 7807 format + field_errors: list[dict[str, str]] = [] + for error in exc.errors(): + loc = error.get("loc", []) + field_path = ".".join(str(part) for part in loc if part != "body") + field_errors.append( + { + "field": field_path, + "message": str(error.get("msg", "Validation failed")), + "type": str(error.get("type", "unknown")), } - }, + ) + + logger.warning( + "app.validation_error", + error_count=len(field_errors), + path=str(request.url.path), + fields=[e["field"] for e in field_errors], + ) + + return problem_response( + status=422, + title="Validation Error", + detail=f"Request validation failed with {len(field_errors)} error(s). " + "Check the 'errors' field for details.", + error_code="VALIDATION_ERROR", + errors=field_errors, ) async def unhandled_exception_handler( request: Request, exc: Exception, -) -> JSONResponse: - """Handle unexpected exceptions. +) -> ProblemDetailResponse: + """Handle unexpected exceptions with RFC 7807 Problem Details. Args: request: FastAPI request object. exc: The raised exception. Returns: - JSON response with generic error. + RFC 7807 Problem Detail response. """ - request_id = request_id_ctx.get() - logger.error( "app.unhandled_error", error=str(exc), @@ -144,24 +273,28 @@ async def unhandled_exception_handler( exc_info=True, ) - return JSONResponse( - status_code=500, - content={ - "error": { - "code": "INTERNAL_ERROR", - "message": "An unexpected error occurred", - "details": {}, - "request_id": request_id, - } - }, + return problem_response( + status=500, + title="Internal Server Error", + detail="An unexpected error occurred. Please try again later or " + "contact support with the request_id.", + error_code="INTERNAL_ERROR", ) +# ============================================================================= +# Handler Registration +# ============================================================================= + + def register_exception_handlers(app: FastAPI) -> None: """Register exception handlers with FastAPI app. + All handlers return RFC 7807 Problem Details responses. + Args: app: FastAPI application instance. """ app.add_exception_handler(ForecastLabError, forecastlab_exception_handler) # type: ignore[arg-type] + app.add_exception_handler(RequestValidationError, validation_exception_handler) # type: ignore[arg-type] app.add_exception_handler(Exception, unhandled_exception_handler) diff --git a/app/core/problem_details.py b/app/core/problem_details.py new file mode 100644 index 00000000..2fcd71cf --- /dev/null +++ b/app/core/problem_details.py @@ -0,0 +1,194 @@ +"""RFC 7807 Problem Details for HTTP APIs. + +This module implements the RFC 7807 standard for machine-readable error responses, +enabling LLM agents to automatically diagnose and troubleshoot API errors. + +Reference: https://datatracker.ietf.org/doc/html/rfc7807 +""" + +from typing import Any + +from fastapi.responses import JSONResponse +from pydantic import BaseModel, ConfigDict, Field + +from app.core.logging import get_logger, request_id_ctx + +logger = get_logger(__name__) + + +# ============================================================================= +# Error Type URIs +# ============================================================================= + +# Base URI for error types (relative URIs for portability) +ERROR_TYPE_BASE = "/errors" + +ERROR_TYPES = { + "NOT_FOUND": f"{ERROR_TYPE_BASE}/not-found", + "VALIDATION_ERROR": f"{ERROR_TYPE_BASE}/validation", + "DATABASE_ERROR": f"{ERROR_TYPE_BASE}/database", + "CONFLICT": f"{ERROR_TYPE_BASE}/conflict", + "UNAUTHORIZED": f"{ERROR_TYPE_BASE}/unauthorized", + "FORBIDDEN": f"{ERROR_TYPE_BASE}/forbidden", + "RATE_LIMITED": f"{ERROR_TYPE_BASE}/rate-limited", + "INTERNAL_ERROR": f"{ERROR_TYPE_BASE}/internal", + "BAD_REQUEST": f"{ERROR_TYPE_BASE}/bad-request", + "SERVICE_UNAVAILABLE": f"{ERROR_TYPE_BASE}/service-unavailable", +} + + +# ============================================================================= +# Problem Detail Schema +# ============================================================================= + + +class ProblemDetail(BaseModel): + """RFC 7807 Problem Details for HTTP APIs. + + This schema enables machine-readable error responses that LLM agents + can use for automatic troubleshooting and retry logic. + + Attributes: + type: URI identifying the error type (for categorization). + title: Short human-readable summary of the problem. + status: HTTP status code. + detail: Human-readable explanation specific to this occurrence. + instance: URI reference for this specific problem occurrence. + errors: Optional field-level validation errors (extension for 422). + code: Machine-readable error code (extension for backwards compatibility). + request_id: Request correlation ID (extension for tracing). + """ + + model_config = ConfigDict(extra="allow") # Allow extensions per RFC 7807 + + type: str = Field( + default="about:blank", + description="URI reference identifying the problem type. " + "Use this to categorize errors for automated handling.", + ) + title: str = Field( + ..., + description="Short, human-readable summary of the problem type. " + "Should be the same for all occurrences of this problem type.", + ) + status: int = Field( + ..., + ge=400, + le=599, + description="HTTP status code for this occurrence.", + ) + detail: str | None = Field( + None, + description="Human-readable explanation specific to this occurrence. " + "Provides context beyond the title.", + ) + instance: str | None = Field( + None, + description="URI reference for this specific problem occurrence. " + "Use for error tracking and correlation.", + ) + # Extensions + errors: list[dict[str, Any]] | None = Field( + None, + description="Field-level validation errors. Present for 422 responses " + "to help agents identify which fields need correction.", + ) + code: str | None = Field( + None, + description="Machine-readable error code for backwards compatibility. " + "Maps to internal error categories.", + ) + request_id: str | None = Field( + None, + description="Request correlation ID for distributed tracing. Include in support requests.", + ) + + +# ============================================================================= +# Problem Detail Response +# ============================================================================= + + +class ProblemDetailResponse(JSONResponse): + """JSON response with RFC 7807 content type. + + Sets the proper media type for problem details responses. + """ + + media_type = "application/problem+json" + + +# ============================================================================= +# Helper Functions +# ============================================================================= + + +def create_problem_detail( + status: int, + title: str, + detail: str | None = None, + error_code: str = "INTERNAL_ERROR", + errors: list[dict[str, Any]] | None = None, +) -> ProblemDetail: + """Create a ProblemDetail instance with proper type URI and instance. + + Args: + status: HTTP status code. + title: Short problem summary. + detail: Detailed explanation (optional). + error_code: Internal error code for type URI lookup. + errors: Field-level validation errors (optional). + Returns: + Configured ProblemDetail instance. + """ + request_id = request_id_ctx.get() + + problem = ProblemDetail( + type=ERROR_TYPES.get(error_code, f"{ERROR_TYPE_BASE}/{error_code.lower()}"), + title=title, + status=status, + detail=detail, + instance=f"/requests/{request_id}" if request_id else None, + errors=errors, + code=error_code, + request_id=request_id, + ) + + return problem + + +def problem_response( + status: int, + title: str, + detail: str | None = None, + error_code: str = "INTERNAL_ERROR", + errors: list[dict[str, Any]] | None = None, +) -> ProblemDetailResponse: + """Create a ProblemDetailResponse with proper content type. + + Args: + status: HTTP status code. + title: Short problem summary. + detail: Detailed explanation (optional). + error_code: Internal error code for type URI lookup. + errors: Field-level validation errors (optional). + Returns: + JSONResponse with problem+json content type. + """ + problem = create_problem_detail( + status=status, + title=title, + detail=detail, + error_code=error_code, + errors=errors, + ) + + return ProblemDetailResponse( + status_code=status, + content=problem.model_dump(exclude_none=True), + ) + + +# ============================================================================= +# Exception Handlers for RFC 7807 +# ============================================================================= diff --git a/app/features/analytics/__init__.py b/app/features/analytics/__init__.py new file mode 100644 index 00000000..073d6ab7 --- /dev/null +++ b/app/features/analytics/__init__.py @@ -0,0 +1,23 @@ +"""Analytics module for KPI aggregations and drilldowns. + +This module provides endpoints for computing sales KPIs and drilling +into data by dimension (store, product, time period). +""" + +from app.features.analytics.routes import router +from app.features.analytics.schemas import ( + DrilldownDimension, + DrilldownResponse, + KPIResponse, + TimeGranularity, +) +from app.features.analytics.service import AnalyticsService + +__all__ = [ + "AnalyticsService", + "DrilldownDimension", + "DrilldownResponse", + "KPIResponse", + "TimeGranularity", + "router", +] diff --git a/app/features/analytics/routes.py b/app/features/analytics/routes.py new file mode 100644 index 00000000..b983fd4e --- /dev/null +++ b/app/features/analytics/routes.py @@ -0,0 +1,203 @@ +"""API routes for analytics endpoints. + +These endpoints provide KPI aggregations and drilldown analysis +with filtering by store, product, and date range. +""" + +from datetime import date + +from fastapi import APIRouter, Depends, Query +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.database import get_db +from app.core.logging import get_logger +from app.features.analytics.schemas import ( + DrilldownDimension, + DrilldownResponse, + KPIResponse, +) +from app.features.analytics.service import AnalyticsService + +logger = get_logger(__name__) + +router = APIRouter(prefix="/analytics", tags=["analytics"]) + + +# ============================================================================= +# KPI Endpoints +# ============================================================================= + + +@router.get( + "/kpis", + response_model=KPIResponse, + summary="Compute aggregated KPIs", + description=""" +Compute aggregated sales KPIs for a specified date range. + +**Purpose**: Get high-level sales metrics (revenue, units, transactions) +with optional filtering by store, product, or category. + +**Metrics Computed**: +- `total_revenue`: Sum of total_amount across all transactions +- `total_units`: Sum of quantity sold +- `total_transactions`: Count of unique (date, store, product) records +- `avg_unit_price`: total_revenue / total_units +- `avg_basket_value`: total_revenue / total_transactions + +**Filtering Options**: +- `store_id`: Filter to specific store (use GET /dimensions/stores to find IDs) +- `product_id`: Filter to specific product (use GET /dimensions/products to find IDs) +- `category`: Filter by product category name (exact match) + +**Date Range**: +- Both start_date and end_date are inclusive +- Maximum range: 730 days (2 years) + +**Example Use Cases**: +1. Total sales this month: `GET /analytics/kpis?start_date=2024-01-01&end_date=2024-01-31` +2. Store performance: `GET /analytics/kpis?store_id=5&start_date=2024-01-01&end_date=2024-12-31` +3. Category revenue: `GET /analytics/kpis?category=Beverage&start_date=2024-01-01&end_date=2024-01-31` +""", +) +async def get_kpis( + start_date: date = Query( + ..., + description="Start of analysis period (inclusive). Format: YYYY-MM-DD.", + ), + end_date: date = Query( + ..., + description="End of analysis period (inclusive). Format: YYYY-MM-DD.", + ), + store_id: int | None = Query( + None, + description="Filter by store ID. Use GET /dimensions/stores to find valid IDs.", + ), + product_id: int | None = Query( + None, + description="Filter by product ID. Use GET /dimensions/products to find valid IDs.", + ), + category: str | None = Query( + None, + description="Filter by product category name (exact match).", + ), + db: AsyncSession = Depends(get_db), +) -> KPIResponse: + """Compute KPIs for a date range with optional filters. + + Args: + start_date: Start of analysis period (inclusive). + end_date: End of analysis period (inclusive). + store_id: Filter by store ID (optional). + product_id: Filter by product ID (optional). + category: Filter by category (optional). + db: Database session. + + Returns: + Aggregated KPI metrics. + """ + service = AnalyticsService() + return await service.compute_kpis( + db=db, + start_date=start_date, + end_date=end_date, + store_id=store_id, + product_id=product_id, + category=category, + ) + + +# ============================================================================= +# Drilldown Endpoints +# ============================================================================= + + +@router.get( + "/drilldowns", + response_model=DrilldownResponse, + summary="Compute drilldown analysis", + description=""" +Break down KPIs by a specific dimension to identify top performers. + +**Purpose**: Drill into sales data by store, product, category, region, or date +to understand what's driving overall performance. + +**Available Dimensions**: +- `store`: Group by store (returns store code and ID) +- `product`: Group by product (returns SKU and ID) +- `category`: Group by product category +- `region`: Group by store region +- `date`: Group by date (daily breakdown) + +**Response Structure**: +Each item includes: +- Dimension value and ID (where applicable) +- Full KPI metrics (revenue, units, transactions, averages) +- Rank by revenue (1 = highest) +- Revenue share percentage + +**Filtering Options**: +- `store_id`: Limit analysis to specific store +- `product_id`: Limit analysis to specific product +- `max_items`: Maximum items to return (default 20, max 100) + +**Example Use Cases**: +1. Top stores by revenue: `GET /analytics/drilldowns?dimension=store&start_date=2024-01-01&end_date=2024-01-31` +2. Product mix analysis: `GET /analytics/drilldowns?dimension=product&store_id=5&start_date=2024-01-01&end_date=2024-01-31` +3. Regional performance: `GET /analytics/drilldowns?dimension=region&start_date=2024-01-01&end_date=2024-12-31` +4. Daily trend: `GET /analytics/drilldowns?dimension=date&store_id=5&product_id=10&start_date=2024-01-01&end_date=2024-01-31` +""", +) +async def get_drilldowns( + dimension: DrilldownDimension = Query( + ..., + description="Dimension to group by: store, product, category, region, or date.", + ), + start_date: date = Query( + ..., + description="Start of analysis period (inclusive). Format: YYYY-MM-DD.", + ), + end_date: date = Query( + ..., + description="End of analysis period (inclusive). Format: YYYY-MM-DD.", + ), + store_id: int | None = Query( + None, + description="Filter by store ID. Use GET /dimensions/stores to find valid IDs.", + ), + product_id: int | None = Query( + None, + description="Filter by product ID. Use GET /dimensions/products to find valid IDs.", + ), + max_items: int = Query( + 20, + ge=1, + le=100, + description="Maximum number of items to return (1-100, default 20).", + ), + db: AsyncSession = Depends(get_db), +) -> DrilldownResponse: + """Compute drilldown analysis by dimension. + + Args: + dimension: Dimension to group by. + start_date: Start of analysis period (inclusive). + end_date: End of analysis period (inclusive). + store_id: Filter by store ID (optional). + product_id: Filter by product ID (optional). + max_items: Maximum items to return. + db: Database session. + + Returns: + Drilldown analysis with ranked items. + """ + service = AnalyticsService() + return await service.compute_drilldown( + db=db, + dimension=dimension, + start_date=start_date, + end_date=end_date, + store_id=store_id, + product_id=product_id, + max_items=max_items, + ) diff --git a/app/features/analytics/schemas.py b/app/features/analytics/schemas.py new file mode 100644 index 00000000..e48ddab5 --- /dev/null +++ b/app/features/analytics/schemas.py @@ -0,0 +1,227 @@ +"""Pydantic schemas for analytics endpoints. + +These schemas define KPI aggregations and drilldown responses +with rich descriptions for LLM tool-calling. +""" + +from datetime import date +from decimal import Decimal +from enum import Enum + +from pydantic import BaseModel, ConfigDict, Field, field_validator + +# ============================================================================= +# Enums +# ============================================================================= + + +class TimeGranularity(str, Enum): + """Time granularity for aggregations. + + Controls how time-based KPIs are grouped. + """ + + DAY = "day" + WEEK = "week" + MONTH = "month" + QUARTER = "quarter" + + +class DrilldownDimension(str, Enum): + """Dimensions available for drilldown analysis. + + Each dimension groups KPIs by a different attribute. + """ + + STORE = "store" + PRODUCT = "product" + CATEGORY = "category" + REGION = "region" + DATE = "date" + + +# ============================================================================= +# KPI Response Schemas +# ============================================================================= + + +class KPIMetrics(BaseModel): + """Core KPI metrics for sales analysis. + + All monetary values are in the local currency. + """ + + model_config = ConfigDict(from_attributes=True) + + total_revenue: Decimal = Field( + ..., + description="Total sales revenue (sum of total_amount). " + "Represents the gross sales value before discounts.", + ) + total_units: int = Field( + ..., + ge=0, + description="Total units sold (sum of quantity). " + "Represents the physical volume of sales.", + ) + total_transactions: int = Field( + ..., + ge=0, + description="Number of unique (date, store, product) combinations. " + "Approximates the number of sales transactions.", + ) + avg_unit_price: Decimal | None = Field( + None, + description="Average price per unit (total_revenue / total_units). " + "Null if no units sold.", + ) + avg_basket_value: Decimal | None = Field( + None, + description="Average transaction value (total_revenue / total_transactions). " + "Null if no transactions.", + ) + + +class KPIResponse(BaseModel): + """Aggregated KPI response for a date range. + + Use this to get high-level sales metrics for the specified period. + """ + + metrics: KPIMetrics = Field( + ..., + description="Aggregated KPI values for the date range.", + ) + start_date: date = Field( + ..., + description="Start of the analysis period (inclusive).", + ) + end_date: date = Field( + ..., + description="End of the analysis period (inclusive).", + ) + store_id: int | None = Field( + None, + description="Store filter applied (if any). " + "Null means all stores included.", + ) + product_id: int | None = Field( + None, + description="Product filter applied (if any). " + "Null means all products included.", + ) + category: str | None = Field( + None, + description="Category filter applied (if any). " + "Null means all categories included.", + ) + + +# ============================================================================= +# Drilldown Response Schemas +# ============================================================================= + + +class DrilldownItem(BaseModel): + """A single item in a drilldown result. + + Contains the dimension value and associated metrics. + """ + + model_config = ConfigDict(from_attributes=True) + + dimension_value: str = Field( + ..., + description="Value of the drilldown dimension (e.g., store code, category name).", + ) + dimension_id: int | None = Field( + None, + description="ID of the dimension entity (if applicable). " + "Null for dimensions without IDs (like category).", + ) + metrics: KPIMetrics = Field( + ..., + description="KPI metrics for this dimension value.", + ) + rank: int = Field( + ..., + ge=1, + description="Rank by revenue (1 = highest revenue).", + ) + revenue_share_pct: Decimal = Field( + ..., + ge=0, + le=100, + description="Percentage of total revenue for this dimension value. " + "Sum of all shares equals 100.", + ) + + +class DrilldownResponse(BaseModel): + """Drilldown analysis response. + + Breaks down KPIs by a specific dimension with ranking and share percentages. + """ + + dimension: DrilldownDimension = Field( + ..., + description="Dimension used for grouping (store, product, category, etc.).", + ) + items: list[DrilldownItem] = Field( + ..., + description="Drilldown items ordered by revenue (highest first). " + "Limited to top N items based on max_items parameter.", + ) + total_items: int = Field( + ..., + ge=0, + description="Total number of unique dimension values in the data. " + "May be larger than len(items) if results are limited.", + ) + start_date: date = Field( + ..., + description="Start of the analysis period (inclusive).", + ) + end_date: date = Field( + ..., + description="End of the analysis period (inclusive).", + ) + store_id: int | None = Field( + None, + description="Store filter applied (if any).", + ) + product_id: int | None = Field( + None, + description="Product filter applied (if any).", + ) + + +# ============================================================================= +# Date Range Validation +# ============================================================================= + + +class DateRangeParams(BaseModel): + """Parameters for date range validation. + + Used internally to validate date range constraints. + """ + + start_date: date = Field( + ..., + description="Start date of the analysis period (inclusive).", + ) + end_date: date = Field( + ..., + description="End date of the analysis period (inclusive).", + ) + + @field_validator("end_date") + @classmethod + def validate_date_range(cls, v: date, info: object) -> date: + """Ensure end_date >= start_date.""" + data = getattr(info, "data", {}) + if "start_date" in data and v < data["start_date"]: + msg = "end_date must be >= start_date" + raise ValueError(msg) + return v diff --git a/app/features/analytics/service.py b/app/features/analytics/service.py new file mode 100644 index 00000000..91e35e7d --- /dev/null +++ b/app/features/analytics/service.py @@ -0,0 +1,290 @@ +"""Service layer for analytics operations. + +Provides KPI aggregations and drilldown analysis using SQLAlchemy. +""" + +from datetime import date +from decimal import Decimal +from typing import Any, cast + +from sqlalchemy import ColumnElement, func, select +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import DeclarativeBase + +from app.core.config import get_settings +from app.core.logging import get_logger +from app.features.analytics.schemas import ( + DrilldownDimension, + DrilldownItem, + DrilldownResponse, + KPIMetrics, + KPIResponse, +) +from app.features.data_platform.models import Product, SalesDaily, Store + +logger = get_logger(__name__) + + +class AnalyticsService: + """Service for computing sales analytics. + + Provides KPI aggregations and drilldown analysis with filtering. + All methods are async and use SQLAlchemy 2.0 style queries. + """ + + def __init__(self) -> None: + """Initialize analytics service.""" + self.settings = get_settings() + + async def compute_kpis( + self, + db: AsyncSession, + start_date: date, + end_date: date, + store_id: int | None = None, + product_id: int | None = None, + category: str | None = None, + ) -> KPIResponse: + """Compute aggregated KPIs for a date range. + + Args: + db: Database session. + start_date: Start of analysis period (inclusive). + end_date: End of analysis period (inclusive). + store_id: Filter by store ID (optional). + product_id: Filter by product ID (optional). + category: Filter by category (optional). + + Returns: + Aggregated KPI metrics. + """ + # Build base query with aggregations + stmt = select( + func.coalesce(func.sum(SalesDaily.total_amount), 0).label("total_revenue"), + func.coalesce(func.sum(SalesDaily.quantity), 0).label("total_units"), + func.count().label("total_transactions"), + ).where( + (SalesDaily.date >= start_date) & (SalesDaily.date <= end_date) + ) + + # Apply filters + if store_id is not None: + stmt = stmt.where(SalesDaily.store_id == store_id) + if product_id is not None: + stmt = stmt.where(SalesDaily.product_id == product_id) + if category is not None: + stmt = stmt.join(Product, SalesDaily.product_id == Product.id).where( + Product.category == category + ) + + # Execute query + result = await db.execute(stmt) + row = result.one() + + total_revenue = Decimal(str(row.total_revenue)) + total_units = int(row.total_units) + total_transactions = int(row.total_transactions) + + # Compute derived metrics + avg_unit_price = ( + total_revenue / total_units if total_units > 0 else None + ) + avg_basket_value = ( + total_revenue / total_transactions if total_transactions > 0 else None + ) + + metrics = KPIMetrics( + total_revenue=total_revenue, + total_units=total_units, + total_transactions=total_transactions, + avg_unit_price=avg_unit_price, + avg_basket_value=avg_basket_value, + ) + + logger.info( + "analytics.kpis_computed", + start_date=str(start_date), + end_date=str(end_date), + store_id=store_id, + product_id=product_id, + category=category, + total_revenue=float(total_revenue), + total_transactions=total_transactions, + ) + + return KPIResponse( + metrics=metrics, + start_date=start_date, + end_date=end_date, + store_id=store_id, + product_id=product_id, + category=category, + ) + + async def compute_drilldown( + self, + db: AsyncSession, + dimension: DrilldownDimension, + start_date: date, + end_date: date, + store_id: int | None = None, + product_id: int | None = None, + max_items: int = 20, + ) -> DrilldownResponse: + """Compute drilldown analysis by a specific dimension. + + Args: + db: Database session. + dimension: Dimension to group by. + start_date: Start of analysis period (inclusive). + end_date: End of analysis period (inclusive). + store_id: Filter by store ID (optional). + product_id: Filter by product ID (optional). + max_items: Maximum number of items to return. + + Returns: + Drilldown analysis with ranked items. + """ + # Build query based on dimension - use cast for type safety + dimension_col: ColumnElement[Any] + dimension_id_col: ColumnElement[Any] | None + join_clause: ColumnElement[bool] | None + base_entity: type[DeclarativeBase] | None + + if dimension == DrilldownDimension.STORE: + dimension_col = cast(ColumnElement[Any], Store.code) + dimension_id_col = cast(ColumnElement[Any], Store.id) + join_clause = SalesDaily.store_id == Store.id + base_entity = Store + elif dimension == DrilldownDimension.PRODUCT: + dimension_col = cast(ColumnElement[Any], Product.sku) + dimension_id_col = cast(ColumnElement[Any], Product.id) + join_clause = SalesDaily.product_id == Product.id + base_entity = Product + elif dimension == DrilldownDimension.CATEGORY: + dimension_col = cast(ColumnElement[Any], Product.category) + dimension_id_col = None + join_clause = SalesDaily.product_id == Product.id + base_entity = Product + elif dimension == DrilldownDimension.REGION: + dimension_col = cast(ColumnElement[Any], Store.region) + dimension_id_col = None + join_clause = SalesDaily.store_id == Store.id + base_entity = Store + else: # DATE + dimension_col = cast(ColumnElement[Any], SalesDaily.date) + dimension_id_col = None + join_clause = None + base_entity = None + + # Build aggregation query with explicit columns + agg_columns: list[ColumnElement[Any]] = [ + dimension_col.label("dimension_value"), + func.sum(SalesDaily.total_amount).label("total_revenue"), + func.sum(SalesDaily.quantity).label("total_units"), + func.count().label("total_transactions"), + ] + + if dimension_id_col is not None: + agg_columns.insert(1, dimension_id_col.label("dimension_id")) + + stmt = select(*agg_columns).where( + (SalesDaily.date >= start_date) & (SalesDaily.date <= end_date) + ) + + # Join dimension table if needed + if join_clause is not None and base_entity is not None: + stmt = stmt.join(base_entity, join_clause) + + # Apply filters + if store_id is not None: + stmt = stmt.where(SalesDaily.store_id == store_id) + if product_id is not None: + stmt = stmt.where(SalesDaily.product_id == product_id) + + # Group by dimension + if dimension_id_col is not None: + stmt = stmt.group_by(dimension_col, dimension_id_col) + else: + stmt = stmt.group_by(dimension_col) + + # Filter out null dimension values + stmt = stmt.where(dimension_col.isnot(None)) + + # Order by revenue and limit + stmt = stmt.order_by(func.sum(SalesDaily.total_amount).desc()) + + # Count total items before limiting + count_stmt = select(func.count()).select_from(stmt.subquery()) + count_result = await db.execute(count_stmt) + total_items = count_result.scalar_one() + + # Apply limit + stmt = stmt.limit(max_items) + + # Execute query + result = await db.execute(stmt) + rows = result.all() + + # Calculate total revenue for share calculation + total_revenue_all = sum(Decimal(str(row.total_revenue)) for row in rows) + + # Build drilldown items + items: list[DrilldownItem] = [] + for rank, row in enumerate(rows, 1): + row_revenue = Decimal(str(row.total_revenue)) + row_units = int(row.total_units) + row_transactions = int(row.total_transactions) + + # Calculate derived metrics + avg_unit_price = row_revenue / row_units if row_units > 0 else None + avg_basket_value = ( + row_revenue / row_transactions if row_transactions > 0 else None + ) + + # Calculate revenue share + revenue_share = ( + (row_revenue / total_revenue_all * 100) + if total_revenue_all > 0 + else Decimal("0") + ) + + # Get dimension ID if available + dim_id = getattr(row, "dimension_id", None) + + items.append( + DrilldownItem( + dimension_value=str(row.dimension_value), + dimension_id=dim_id, + metrics=KPIMetrics( + total_revenue=row_revenue, + total_units=row_units, + total_transactions=row_transactions, + avg_unit_price=avg_unit_price, + avg_basket_value=avg_basket_value, + ), + rank=rank, + revenue_share_pct=round(revenue_share, 2), + ) + ) + + logger.info( + "analytics.drilldown_computed", + dimension=dimension.value, + start_date=str(start_date), + end_date=str(end_date), + store_id=store_id, + product_id=product_id, + items_count=len(items), + total_items=total_items, + ) + + return DrilldownResponse( + dimension=dimension, + items=items, + total_items=total_items, + start_date=start_date, + end_date=end_date, + store_id=store_id, + product_id=product_id, + ) diff --git a/app/features/analytics/tests/__init__.py b/app/features/analytics/tests/__init__.py new file mode 100644 index 00000000..c7aa7e65 --- /dev/null +++ b/app/features/analytics/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for analytics module.""" diff --git a/app/features/analytics/tests/conftest.py b/app/features/analytics/tests/conftest.py new file mode 100644 index 00000000..827960ad --- /dev/null +++ b/app/features/analytics/tests/conftest.py @@ -0,0 +1,82 @@ +"""Test fixtures for analytics module.""" + +from datetime import date +from decimal import Decimal + +import pytest + +from app.features.analytics.schemas import ( + DrilldownDimension, + DrilldownItem, + DrilldownResponse, + KPIMetrics, + KPIResponse, +) + + +@pytest.fixture +def sample_kpi_metrics() -> KPIMetrics: + """Create sample KPI metrics for testing.""" + return KPIMetrics( + total_revenue=Decimal("10000.00"), + total_units=500, + total_transactions=100, + avg_unit_price=Decimal("20.00"), + avg_basket_value=Decimal("100.00"), + ) + + +@pytest.fixture +def sample_kpi_response(sample_kpi_metrics: KPIMetrics) -> KPIResponse: + """Create sample KPI response for testing.""" + return KPIResponse( + metrics=sample_kpi_metrics, + start_date=date(2024, 1, 1), + end_date=date(2024, 1, 31), + store_id=None, + product_id=None, + category=None, + ) + + +@pytest.fixture +def sample_drilldown_items(sample_kpi_metrics: KPIMetrics) -> list[DrilldownItem]: + """Create sample drilldown items for testing.""" + return [ + DrilldownItem( + dimension_value="S001", + dimension_id=1, + metrics=sample_kpi_metrics, + rank=1, + revenue_share_pct=Decimal("60.00"), + ), + DrilldownItem( + dimension_value="S002", + dimension_id=2, + metrics=KPIMetrics( + total_revenue=Decimal("5000.00"), + total_units=250, + total_transactions=50, + avg_unit_price=Decimal("20.00"), + avg_basket_value=Decimal("100.00"), + ), + rank=2, + revenue_share_pct=Decimal("40.00"), + ), + ] + + +@pytest.fixture +def sample_drilldown_response( + sample_drilldown_items: list[DrilldownItem], +) -> DrilldownResponse: + """Create sample drilldown response for testing.""" + return DrilldownResponse( + dimension=DrilldownDimension.STORE, + items=sample_drilldown_items, + total_items=2, + start_date=date(2024, 1, 1), + end_date=date(2024, 1, 31), + store_id=None, + product_id=None, + ) diff --git a/app/features/dimensions/__init__.py b/app/features/dimensions/__init__.py new file mode 100644 index 00000000..67026252 --- /dev/null +++ b/app/features/dimensions/__init__.py @@ -0,0 +1,23 @@ +"""Dimensions discovery module for Store and Product metadata. + +This module provides endpoints for agents to discover available stores and products +before calling ingest, training, or forecasting endpoints. +""" + +from app.features.dimensions.routes import router +from app.features.dimensions.schemas import ( + ProductListResponse, + ProductResponse, + StoreListResponse, + StoreResponse, +) +from app.features.dimensions.service import DimensionService + +__all__ = [ + "DimensionService", + "ProductListResponse", + "ProductResponse", + "StoreListResponse", + "StoreResponse", + "router", +] diff --git a/app/features/dimensions/routes.py b/app/features/dimensions/routes.py new file mode 100644 index 00000000..bb2130df --- /dev/null +++ b/app/features/dimensions/routes.py @@ -0,0 +1,244 @@ +"""API routes for dimension discovery. + +These endpoints enable LLM agents and users to discover available stores +and products before calling ingest, training, or forecasting endpoints. +""" + +from fastapi import APIRouter, Depends, HTTPException, Query, status +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.database import get_db +from app.core.logging import get_logger +from app.features.dimensions.schemas import ( + ProductListResponse, + ProductResponse, + StoreListResponse, + StoreResponse, +) +from app.features.dimensions.service import DimensionService + +logger = get_logger(__name__) + +router = APIRouter(prefix="/dimensions", tags=["dimensions"]) + + +# ============================================================================= +# Store Endpoints +# ============================================================================= + + +@router.get( + "/stores", + response_model=StoreListResponse, + summary="List all stores", + description=""" +Discover available stores for use in other API endpoints. + +**Purpose**: Resolve store metadata (code, name, region) to store_id values +required by ingest, training, and forecasting endpoints. + +**Filtering Options**: +- `region`: Filter by geographic region (exact match) +- `store_type`: Filter by store format (exact match) +- `search`: Search in store code and name (case-insensitive, min 2 chars) + +**Pagination**: +- Results are paginated with 1-indexed pages +- Default: 20 items per page, maximum: 100 +- Use `total` in response to calculate total pages + +**Example Use Cases**: +1. Get all stores: `GET /dimensions/stores` +2. Find stores by region: `GET /dimensions/stores?region=North` +3. Search for a store: `GET /dimensions/stores?search=Main` +""", +) +async def list_stores( + db: AsyncSession = Depends(get_db), + page: int = Query(1, ge=1, description="Page number (1-indexed)"), + page_size: int = Query(20, ge=1, le=100, description="Stores per page (max 100)"), + region: str | None = Query(None, description="Filter by region (exact match)"), + store_type: str | None = Query(None, description="Filter by store type (exact match)"), + search: str | None = Query( + None, + min_length=2, + description="Search in code and name (case-insensitive)", + ), +) -> StoreListResponse: + """List stores with pagination and filtering. + + Args: + db: Database session. + page: Page number (1-indexed). + page_size: Number of stores per page. + region: Filter by region. + store_type: Filter by store type. + search: Search in code and name. + + Returns: + Paginated list of stores. + """ + service = DimensionService() + return await service.list_stores( + db=db, + page=page, + page_size=page_size, + region=region, + store_type=store_type, + search=search, + ) + + +@router.get( + "/stores/{store_id}", + response_model=StoreResponse, + summary="Get store by ID", + description=""" +Get details for a specific store by its internal ID. + +**Use Case**: Retrieve full store metadata after obtaining store_id +from list endpoint or another API response. + +**Error Handling**: +- Returns 404 if store_id doesn't exist +- Agent should fall back to list endpoint to discover valid IDs +""", +) +async def get_store( + store_id: int, + db: AsyncSession = Depends(get_db), +) -> StoreResponse: + """Get store details by ID. + + Args: + store_id: Store primary key. + db: Database session. + + Returns: + Store details. + + Raises: + HTTPException: If store not found. + """ + service = DimensionService() + result = await service.get_store(db=db, store_id=store_id) + + if result is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Store not found: {store_id}. " + "Use GET /dimensions/stores to list available stores.", + ) + + return result + + +# ============================================================================= +# Product Endpoints +# ============================================================================= + + +@router.get( + "/products", + response_model=ProductListResponse, + summary="List all products", + description=""" +Discover available products for use in other API endpoints. + +**Purpose**: Resolve product metadata (SKU, name, category) to product_id values +required by ingest, training, and forecasting endpoints. + +**Filtering Options**: +- `category`: Filter by product category (exact match) +- `brand`: Filter by brand name (exact match) +- `search`: Search in SKU and name (case-insensitive, min 2 chars) + +**Pagination**: +- Results are paginated with 1-indexed pages +- Default: 20 items per page, maximum: 100 +- Use `total` in response to calculate total pages + +**Example Use Cases**: +1. Get all products: `GET /dimensions/products` +2. Find products by category: `GET /dimensions/products?category=Beverage` +3. Search for a product: `GET /dimensions/products?search=Cola` +""", +) +async def list_products( + db: AsyncSession = Depends(get_db), + page: int = Query(1, ge=1, description="Page number (1-indexed)"), + page_size: int = Query(20, ge=1, le=100, description="Products per page (max 100)"), + category: str | None = Query(None, description="Filter by category (exact match)"), + brand: str | None = Query(None, description="Filter by brand (exact match)"), + search: str | None = Query( + None, + min_length=2, + description="Search in SKU and name (case-insensitive)", + ), +) -> ProductListResponse: + """List products with pagination and filtering. + + Args: + db: Database session. + page: Page number (1-indexed). + page_size: Number of products per page. + category: Filter by category. + brand: Filter by brand. + search: Search in SKU and name. + + Returns: + Paginated list of products. + """ + service = DimensionService() + return await service.list_products( + db=db, + page=page, + page_size=page_size, + category=category, + brand=brand, + search=search, + ) + + +@router.get( + "/products/{product_id}", + response_model=ProductResponse, + summary="Get product by ID", + description=""" +Get details for a specific product by its internal ID. + +**Use Case**: Retrieve full product metadata after obtaining product_id +from list endpoint or another API response. + +**Error Handling**: +- Returns 404 if product_id doesn't exist +- Agent should fall back to list endpoint to discover valid IDs +""", +) +async def get_product( + product_id: int, + db: AsyncSession = Depends(get_db), +) -> ProductResponse: + """Get product details by ID. + + Args: + product_id: Product primary key. + db: Database session. + + Returns: + Product details. + + Raises: + HTTPException: If product not found. + """ + service = DimensionService() + result = await service.get_product(db=db, product_id=product_id) + + if result is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Product not found: {product_id}. " + "Use GET /dimensions/products to list available products.", + ) + + return result diff --git a/app/features/dimensions/schemas.py b/app/features/dimensions/schemas.py new file mode 100644 index 00000000..9b70fb5d --- /dev/null +++ b/app/features/dimensions/schemas.py @@ -0,0 +1,181 @@ +"""Pydantic schemas for dimension discovery endpoints. + +These schemas are optimized for LLM tool-calling with rich descriptions +that help agents understand how to use each field. +""" + +from datetime import datetime +from decimal import Decimal + +from pydantic import BaseModel, ConfigDict, Field + +# ============================================================================= +# Store Schemas +# ============================================================================= + + +class StoreResponse(BaseModel): + """Store dimension record for agent discovery. + + Use the GET /dimensions/stores endpoint to discover available stores + before calling ingest, training, or forecasting endpoints. + + The 'id' field should be used as the store_id parameter in other API calls. + """ + + model_config = ConfigDict(from_attributes=True) + + id: int = Field( + ..., + description="Internal store ID. Use this value for store_id parameters " + "in /ingest/sales-daily, /forecasting/train, and /forecasting/predict.", + ) + code: str = Field( + ..., + description="Business store code (e.g., 'S001'). Unique human-readable identifier. " + "Use this for display and matching with external data sources.", + ) + name: str = Field( + ..., + description="Human-readable store name for display purposes.", + ) + region: str | None = Field( + None, + description="Geographic region (e.g., 'North', 'South', 'East', 'West'). " + "Filter using the 'region' query parameter.", + ) + city: str | None = Field( + None, + description="City where the store is located.", + ) + store_type: str | None = Field( + None, + description="Store format (e.g., 'supermarket', 'express', 'warehouse'). " + "Filter using the 'store_type' query parameter.", + ) + created_at: datetime = Field( + ..., + description="Timestamp when the store record was created.", + ) + updated_at: datetime = Field( + ..., + description="Timestamp when the store record was last updated.", + ) + + +class StoreListResponse(BaseModel): + """Paginated list of stores with filtering metadata. + + Use pagination parameters (page, page_size) to navigate large result sets. + Filtering by region or store_type reduces the result set before pagination. + """ + + stores: list[StoreResponse] = Field( + ..., + description="Array of store records for the current page. " + "Empty if no stores match the filters.", + ) + total: int = Field( + ..., + ge=0, + description="Total number of stores matching the applied filters. " + "Use to calculate total pages: ceil(total / page_size).", + ) + page: int = Field( + ..., + ge=1, + description="Current page number (1-indexed). First page is 1.", + ) + page_size: int = Field( + ..., + ge=1, + description="Number of stores per page. Maximum is 100.", + ) + + +# ============================================================================= +# Product Schemas +# ============================================================================= + + +class ProductResponse(BaseModel): + """Product dimension record for agent discovery. + + Use the GET /dimensions/products endpoint to discover available products + before calling ingest, training, or forecasting endpoints. + + The 'id' field should be used as the product_id parameter in other API calls. + """ + + model_config = ConfigDict(from_attributes=True) + + id: int = Field( + ..., + description="Internal product ID. Use this value for product_id parameters " + "in /ingest/sales-daily, /forecasting/train, and /forecasting/predict.", + ) + sku: str = Field( + ..., + description="Stock Keeping Unit - unique product identifier (e.g., 'SKU-001'). " + "Use this for matching with external inventory systems.", + ) + name: str = Field( + ..., + description="Human-readable product name for display purposes.", + ) + category: str | None = Field( + None, + description="Product category (e.g., 'Beverage', 'Snacks', 'Dairy'). " + "Filter using the 'category' query parameter.", + ) + brand: str | None = Field( + None, + description="Product brand name. Filter using the 'brand' query parameter.", + ) + base_price: Decimal | None = Field( + None, + description="Standard retail price for this product. " + "Actual sale prices may vary by promotion.", + ) + base_cost: Decimal | None = Field( + None, + description="Standard cost/COGS for this product. Used for margin calculations.", + ) + created_at: datetime = Field( + ..., + description="Timestamp when the product record was created.", + ) + updated_at: datetime = Field( + ..., + description="Timestamp when the product record was last updated.", + ) + + +class ProductListResponse(BaseModel): + """Paginated list of products with filtering metadata. + + Use pagination parameters (page, page_size) to navigate large result sets. + Filtering by category or brand reduces the result set before pagination. + """ + + products: list[ProductResponse] = Field( + ..., + description="Array of product records for the current page. " + "Empty if no products match the filters.", + ) + total: int = Field( + ..., + ge=0, + description="Total number of products matching the applied filters. " + "Use to calculate total pages: ceil(total / page_size).", + ) + page: int = Field( + ..., + ge=1, + description="Current page number (1-indexed). First page is 1.", + ) + page_size: int = Field( + ..., + ge=1, + description="Number of products per page. Maximum is 100.", + ) diff --git a/app/features/dimensions/service.py b/app/features/dimensions/service.py new file mode 100644 index 00000000..b6e1c77d --- /dev/null +++ b/app/features/dimensions/service.py @@ -0,0 +1,253 @@ +"""Service layer for dimension discovery operations. + +Provides paginated access to Store and Product dimension tables +with filtering and search capabilities. +""" + +from sqlalchemy import func, or_, select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.logging import get_logger +from app.features.data_platform.models import Product, Store +from app.features.dimensions.schemas import ( + ProductListResponse, + ProductResponse, + StoreListResponse, + StoreResponse, +) + +logger = get_logger(__name__) + + +class DimensionService: + """Service for discovering stores and products. + + Provides paginated access to dimension tables with filtering support. + All methods are async and use SQLAlchemy 2.0 style queries. + """ + + async def list_stores( + self, + db: AsyncSession, + page: int = 1, + page_size: int = 20, + region: str | None = None, + store_type: str | None = None, + search: str | None = None, + ) -> StoreListResponse: + """List stores with pagination and filtering. + + Args: + db: Database session. + page: Page number (1-indexed). + page_size: Number of stores per page. + region: Filter by region (exact match). + store_type: Filter by store type (exact match). + search: Search in store code and name (case-insensitive). + + Returns: + Paginated list of stores. + """ + # Build base query + stmt = select(Store) + + # Apply filters + if region is not None: + stmt = stmt.where(Store.region == region) + if store_type is not None: + stmt = stmt.where(Store.store_type == store_type) + if search is not None and len(search) >= 2: + search_pattern = f"%{search}%" + stmt = stmt.where( + or_( + Store.code.ilike(search_pattern), + Store.name.ilike(search_pattern), + ) + ) + + # Count total before pagination + count_stmt = select(func.count()).select_from(stmt.subquery()) + total_result = await db.execute(count_stmt) + total = total_result.scalar_one() + + # Apply pagination and ordering + offset = (page - 1) * page_size + stmt = stmt.order_by(Store.code).offset(offset).limit(page_size) + + # Execute query + result = await db.execute(stmt) + stores = result.scalars().all() + + logger.info( + "dimensions.stores_listed", + total=total, + page=page, + page_size=page_size, + filters={"region": region, "store_type": store_type, "search": search}, + ) + + return StoreListResponse( + stores=[StoreResponse.model_validate(store) for store in stores], + total=total, + page=page, + page_size=page_size, + ) + + async def get_store( + self, + db: AsyncSession, + store_id: int, + ) -> StoreResponse | None: + """Get a single store by ID. + + Args: + db: Database session. + store_id: Store primary key. + + Returns: + Store details or None if not found. + """ + stmt = select(Store).where(Store.id == store_id) + result = await db.execute(stmt) + store = result.scalar_one_or_none() + + if store is None: + return None + + return StoreResponse.model_validate(store) + + async def get_store_by_code( + self, + db: AsyncSession, + code: str, + ) -> StoreResponse | None: + """Get a single store by code. + + Args: + db: Database session. + code: Store code (e.g., 'S001'). + + Returns: + Store details or None if not found. + """ + stmt = select(Store).where(Store.code == code) + result = await db.execute(stmt) + store = result.scalar_one_or_none() + + if store is None: + return None + + return StoreResponse.model_validate(store) + + async def list_products( + self, + db: AsyncSession, + page: int = 1, + page_size: int = 20, + category: str | None = None, + brand: str | None = None, + search: str | None = None, + ) -> ProductListResponse: + """List products with pagination and filtering. + + Args: + db: Database session. + page: Page number (1-indexed). + page_size: Number of products per page. + category: Filter by category (exact match). + brand: Filter by brand (exact match). + search: Search in SKU and name (case-insensitive). + + Returns: + Paginated list of products. + """ + # Build base query + stmt = select(Product) + + # Apply filters + if category is not None: + stmt = stmt.where(Product.category == category) + if brand is not None: + stmt = stmt.where(Product.brand == brand) + if search is not None and len(search) >= 2: + search_pattern = f"%{search}%" + stmt = stmt.where( + or_( + Product.sku.ilike(search_pattern), + Product.name.ilike(search_pattern), + ) + ) + + # Count total before pagination + count_stmt = select(func.count()).select_from(stmt.subquery()) + total_result = await db.execute(count_stmt) + total = total_result.scalar_one() + + # Apply pagination and ordering + offset = (page - 1) * page_size + stmt = stmt.order_by(Product.sku).offset(offset).limit(page_size) + + # Execute query + result = await db.execute(stmt) + products = result.scalars().all() + + logger.info( + "dimensions.products_listed", + total=total, + page=page, + page_size=page_size, + filters={"category": category, "brand": brand, "search": search}, + ) + + return ProductListResponse( + products=[ProductResponse.model_validate(product) for product in products], + total=total, + page=page, + page_size=page_size, + ) + + async def get_product( + self, + db: AsyncSession, + product_id: int, + ) -> ProductResponse | None: + """Get a single product by ID. + + Args: + db: Database session. + product_id: Product primary key. + + Returns: + Product details or None if not found. + """ + stmt = select(Product).where(Product.id == product_id) + result = await db.execute(stmt) + product = result.scalar_one_or_none() + + if product is None: + return None + + return ProductResponse.model_validate(product) + + async def get_product_by_sku( + self, + db: AsyncSession, + sku: str, + ) -> ProductResponse | None: + """Get a single product by SKU. + + Args: + db: Database session. + sku: Product SKU (e.g., 'SKU-001'). + + Returns: + Product details or None if not found. + """ + stmt = select(Product).where(Product.sku == sku) + result = await db.execute(stmt) + product = result.scalar_one_or_none() + + if product is None: + return None + + return ProductResponse.model_validate(product) diff --git a/app/features/dimensions/tests/__init__.py b/app/features/dimensions/tests/__init__.py new file mode 100644 index 00000000..8374ee5c --- /dev/null +++ b/app/features/dimensions/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for the dimensions discovery module.""" diff --git a/app/features/dimensions/tests/conftest.py b/app/features/dimensions/tests/conftest.py new file mode 100644 index 00000000..46db27ac --- /dev/null +++ b/app/features/dimensions/tests/conftest.py @@ -0,0 +1,28 @@ +"""Test fixtures for dimensions module.""" + +import pytest + + +@pytest.fixture +def sample_store_data(): + """Sample store data for testing.""" + return { + "code": "S001", + "name": "Main Street Store", + "region": "North", + "city": "Springfield", + "store_type": "supermarket", + } + + +@pytest.fixture +def sample_product_data(): + """Sample product data for testing.""" + return { + "sku": "SKU-001", + "name": "Cola Classic", + "category": "Beverage", + "brand": "CocaCola", + "base_price": "2.99", + "base_cost": "1.50", + } diff --git a/app/features/jobs/__init__.py b/app/features/jobs/__init__.py new file mode 100644 index 00000000..a67e8200 --- /dev/null +++ b/app/features/jobs/__init__.py @@ -0,0 +1,25 @@ +"""Jobs module for async-ready task orchestration. + +This module provides endpoints for creating and monitoring jobs +for training, prediction, and backtesting operations. +""" + +from app.features.jobs.models import Job, JobStatus, JobType +from app.features.jobs.routes import router +from app.features.jobs.schemas import ( + JobCreate, + JobListResponse, + JobResponse, +) +from app.features.jobs.service import JobService + +__all__ = [ + "Job", + "JobCreate", + "JobListResponse", + "JobResponse", + "JobService", + "JobStatus", + "JobType", + "router", +] diff --git a/app/features/jobs/models.py b/app/features/jobs/models.py new file mode 100644 index 00000000..172c1fe7 --- /dev/null +++ b/app/features/jobs/models.py @@ -0,0 +1,132 @@ +"""Job ORM model for async-ready task tracking. + +This module defines the Job model for tracking background jobs +such as training, prediction, and backtesting operations. + +CRITICAL: Uses PostgreSQL JSONB for flexible params and results. +""" + +from __future__ import annotations + +import datetime +from enum import Enum +from typing import Any + +from sqlalchemy import ( + CheckConstraint, + DateTime, + Index, + Integer, + String, +) +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column + +from app.core.database import Base +from app.shared.models import TimestampMixin + + +class JobType(str, Enum): + """Types of jobs that can be executed. + + Each type corresponds to a specific ForecastOps operation: + - TRAIN: Train a forecasting model + - PREDICT: Generate predictions from a trained model + - BACKTEST: Run time-based cross-validation + """ + + TRAIN = "train" + PREDICT = "predict" + BACKTEST = "backtest" + + +class JobStatus(str, Enum): + """Job lifecycle states. + + State transitions: + - PENDING -> RUNNING -> COMPLETED | FAILED + - PENDING -> CANCELLED (via DELETE endpoint) + """ + + PENDING = "pending" + RUNNING = "running" + COMPLETED = "completed" + FAILED = "failed" + CANCELLED = "cancelled" + + +# Valid state transitions for job status +VALID_JOB_TRANSITIONS: dict[JobStatus, set[JobStatus]] = { + JobStatus.PENDING: {JobStatus.RUNNING, JobStatus.CANCELLED}, + JobStatus.RUNNING: {JobStatus.COMPLETED, JobStatus.FAILED}, + JobStatus.COMPLETED: set(), # Terminal state + JobStatus.FAILED: set(), # Terminal state + JobStatus.CANCELLED: set(), # Terminal state +} + + +class Job(TimestampMixin, Base): + """Background job tracking model. + + CRITICAL: Stores job configuration and results as JSONB for flexibility. + Jobs execute synchronously but API contracts are async-ready. + + Attributes: + id: Primary key. + job_id: Unique external identifier (UUID hex, 32 chars). + job_type: Type of job (train, predict, backtest). + status: Current lifecycle state. + params: Job configuration as JSONB. + result: Job result as JSONB (null until completed). + error_message: Error details if status=FAILED. + error_type: Exception class name if status=FAILED. + started_at: When job execution started. + completed_at: When job finished (success or failure). + run_id: Link to model_run for train/backtest jobs. + """ + + __tablename__ = "job" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + job_id: Mapped[str] = mapped_column(String(32), unique=True, index=True) + job_type: Mapped[str] = mapped_column(String(20), index=True) + status: Mapped[str] = mapped_column( + String(20), default=JobStatus.PENDING.value, index=True + ) + + # Job configuration (stored as JSONB for flexibility) + params: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False) + + # Result/error storage + result: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + error_message: Mapped[str | None] = mapped_column(String(2000), nullable=True) + error_type: Mapped[str | None] = mapped_column(String(100), nullable=True) + + # Timing + started_at: Mapped[datetime.datetime | None] = mapped_column( + DateTime(timezone=True), nullable=True + ) + completed_at: Mapped[datetime.datetime | None] = mapped_column( + DateTime(timezone=True), nullable=True + ) + + # Linkage to model run (for train/backtest jobs) + run_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True) + + __table_args__ = ( + # GIN index for JSONB containment queries + Index("ix_job_params_gin", "params", postgresql_using="gin"), + Index("ix_job_result_gin", "result", postgresql_using="gin"), + # Composite index for common query patterns + Index("ix_job_type_status", "job_type", "status"), + # Constraint: valid status values + CheckConstraint( + "status IN ('pending', 'running', 'completed', 'failed', 'cancelled')", + name="ck_job_valid_status", + ), + # Constraint: valid job type values + CheckConstraint( + "job_type IN ('train', 'predict', 'backtest')", + name="ck_job_valid_type", + ), + ) diff --git a/app/features/jobs/routes.py b/app/features/jobs/routes.py new file mode 100644 index 00000000..f69e4d7e --- /dev/null +++ b/app/features/jobs/routes.py @@ -0,0 +1,299 @@ +"""API routes for job orchestration. + +These endpoints enable LLM agents and users to create and monitor +training, prediction, and backtesting jobs. +""" + +from fastapi import APIRouter, Depends, HTTPException, Query, status +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.database import get_db +from app.core.logging import get_logger +from app.features.jobs.models import JobStatus, JobType +from app.features.jobs.schemas import ( + JobCreate, + JobListResponse, + JobResponse, +) +from app.features.jobs.service import JobService + +logger = get_logger(__name__) + +router = APIRouter(prefix="/jobs", tags=["jobs"]) + + +# ============================================================================= +# Job Creation +# ============================================================================= + + +@router.post( + "", + response_model=JobResponse, + status_code=status.HTTP_202_ACCEPTED, + summary="Create and execute a job", + description=""" +Create and execute a forecasting job (train, predict, or backtest). + +**Important**: Jobs currently execute synchronously but return 202 Accepted +for async-ready API contracts. The response includes the job result. + +**Job Types**: + +### Train Job +Train a forecasting model on historical data. + +Required params: +- `model_type`: Model type (naive, seasonal_naive, linear_regression, etc.) +- `store_id`: Store ID (from /dimensions/stores) +- `product_id`: Product ID (from /dimensions/products) +- `start_date`: Training start date (YYYY-MM-DD) +- `end_date`: Training end date (YYYY-MM-DD) + +Example: +```json +{ + "job_type": "train", + "params": { + "model_type": "seasonal_naive", + "store_id": 1, + "product_id": 1, + "start_date": "2024-01-01", + "end_date": "2024-06-30", + "period": 7 + } +} +``` + +### Predict Job +Generate predictions from a trained model. + +Required params: +- `run_id`: Model run ID from previous train job + +Optional params: +- `horizon`: Forecast horizon in days (default 14, max 90) + +Example: +```json +{ + "job_type": "predict", + "params": { + "run_id": "abc123...", + "horizon": 30 + } +} +``` + +### Backtest Job +Run time-based cross-validation to evaluate model performance. + +Required params: +- `model_type`: Model type to evaluate +- `store_id`: Store ID +- `product_id`: Product ID +- `start_date`: Data start date +- `end_date`: Data end date + +Optional params: +- `n_splits`: Number of CV folds (default 5, max 20) +- `test_size`: Test window size in days (default 14) +- `gap`: Gap between train and test (default 0) + +Example: +```json +{ + "job_type": "backtest", + "params": { + "model_type": "linear_regression", + "store_id": 1, + "product_id": 1, + "start_date": "2024-01-01", + "end_date": "2024-06-30", + "n_splits": 5, + "test_size": 14 + } +} +``` + +**Response**: +Returns the job with status and result. For completed jobs, check the `result` field. +For failed jobs, check `error_message` and `error_type`. +""", +) +async def create_job( + job_create: JobCreate, + db: AsyncSession = Depends(get_db), +) -> JobResponse: + """Create and execute a job. + + Args: + job_create: Job creation request. + db: Database session. + + Returns: + Job response with status and result. + """ + service = JobService() + return await service.create_job(db=db, job_create=job_create) + + +# ============================================================================= +# Job Listing +# ============================================================================= + + +@router.get( + "", + response_model=JobListResponse, + summary="List jobs", + description=""" +List jobs with pagination and optional filtering. + +**Pagination**: +- Results are paginated with 1-indexed pages +- Default: 20 items per page, maximum: 100 +- Use `total` in response to calculate total pages + +**Filtering**: +- `job_type`: Filter by job type (train, predict, backtest) +- `status`: Filter by status (pending, running, completed, failed, cancelled) + +**Example Use Cases**: +1. List all jobs: `GET /jobs` +2. List failed jobs: `GET /jobs?status=failed` +3. List train jobs: `GET /jobs?job_type=train` +4. Paginate: `GET /jobs?page=2&page_size=10` +""", +) +async def list_jobs( + db: AsyncSession = Depends(get_db), + page: int = Query(1, ge=1, description="Page number (1-indexed)"), + page_size: int = Query(20, ge=1, le=100, description="Jobs per page (max 100)"), + job_type: JobType | None = Query(None, description="Filter by job type"), + status: JobStatus | None = Query(None, description="Filter by status"), +) -> JobListResponse: + """List jobs with pagination and filtering. + + Args: + db: Database session. + page: Page number (1-indexed). + page_size: Number of jobs per page. + job_type: Filter by job type (optional). + status: Filter by status (optional). + + Returns: + Paginated list of jobs. + """ + service = JobService() + return await service.list_jobs( + db=db, + page=page, + page_size=page_size, + job_type=job_type, + status=status, + ) + + +# ============================================================================= +# Single Job Operations +# ============================================================================= + + +@router.get( + "/{job_id}", + response_model=JobResponse, + summary="Get job by ID", + description=""" +Get details for a specific job by its unique ID. + +**Use Case**: Poll job status after creation or retrieve job results. + +**Response Fields**: +- `status`: Current status (pending, running, completed, failed, cancelled) +- `result`: Job output (null until completed) +- `error_message`: Error details (if failed) +- `run_id`: Model run ID for train/backtest jobs + +**Error Handling**: +- Returns 404 if job_id doesn't exist +""", +) +async def get_job( + job_id: str, + db: AsyncSession = Depends(get_db), +) -> JobResponse: + """Get job details by ID. + + Args: + job_id: Unique job identifier. + db: Database session. + + Returns: + Job details. + + Raises: + HTTPException: If job not found. + """ + service = JobService() + result = await service.get_job(db=db, job_id=job_id) + + if result is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Job not found: {job_id}. " + "Use GET /jobs to list available jobs.", + ) + + return result + + +@router.delete( + "/{job_id}", + response_model=JobResponse, + summary="Cancel a pending job", + description=""" +Cancel a job that is still in 'pending' status. + +**Important**: Only pending jobs can be cancelled. Running, completed, +failed, and cancelled jobs cannot be cancelled. + +**Error Handling**: +- Returns 404 if job_id doesn't exist +- Returns 400 if job is not in pending status +""", +) +async def cancel_job( + job_id: str, + db: AsyncSession = Depends(get_db), +) -> JobResponse: + """Cancel a pending job. + + Args: + job_id: Unique job identifier. + db: Database session. + + Returns: + Updated job with cancelled status. + + Raises: + HTTPException: If job not found or cannot be cancelled. + """ + service = JobService() + + try: + result = await service.cancel_job(db=db, job_id=job_id) + except ValueError as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) from e + + if result is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Job not found: {job_id}. " + "Use GET /jobs to list available jobs.", + ) + + return result diff --git a/app/features/jobs/schemas.py b/app/features/jobs/schemas.py new file mode 100644 index 00000000..ea52b87d --- /dev/null +++ b/app/features/jobs/schemas.py @@ -0,0 +1,158 @@ +"""Pydantic schemas for job endpoints. + +These schemas are optimized for LLM tool-calling with rich descriptions +that help agents understand how to orchestrate jobs. +""" + +from datetime import datetime +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + +from app.features.jobs.models import JobStatus, JobType + +# ============================================================================= +# Job Create Schema +# ============================================================================= + + +class JobCreate(BaseModel): + """Request schema for creating a new job. + + Jobs are the primary way to execute ForecastOps operations. + Each job type has specific required parameters. + + **Job Types and Required Params**: + + - **train**: Train a forecasting model + - `model_type`: Required - 'naive', 'seasonal_naive', 'linear_regression', etc. + - `store_id`: Required - Store ID from /dimensions/stores + - `product_id`: Required - Product ID from /dimensions/products + - `start_date`: Required - Training data start (YYYY-MM-DD) + - `end_date`: Required - Training data end (YYYY-MM-DD) + - Additional model-specific parameters + + - **predict**: Generate predictions + - `run_id`: Required - Model run ID from previous train job + - `horizon`: Optional - Number of days to forecast (default 14, max 90) + + - **backtest**: Run cross-validation + - `model_type`: Required - Model type to evaluate + - `store_id`: Required - Store ID + - `product_id`: Required - Product ID + - `start_date`: Required - Data start date + - `end_date`: Required - Data end date + - `n_splits`: Optional - Number of CV folds (default 5, max 20) + - `test_size`: Optional - Test window size (default 14) + """ + + job_type: JobType = Field( + ..., + description="Type of job to execute: 'train', 'predict', or 'backtest'.", + ) + params: dict[str, Any] = Field( + ..., + description="Job-specific parameters. See job type documentation for required fields.", + ) + + +# ============================================================================= +# Job Response Schemas +# ============================================================================= + + +class JobResponse(BaseModel): + """Response schema for a single job. + + Contains job metadata, status, and results. + """ + + model_config = ConfigDict(from_attributes=True) + + job_id: str = Field( + ..., + description="Unique job identifier (32-char hex). Use for polling status.", + ) + job_type: JobType = Field( + ..., + description="Type of job: 'train', 'predict', or 'backtest'.", + ) + status: JobStatus = Field( + ..., + description="Current job status: 'pending', 'running', 'completed', 'failed', or 'cancelled'.", + ) + params: dict[str, Any] = Field( + ..., + description="Job configuration parameters as submitted.", + ) + result: dict[str, Any] | None = Field( + None, + description="Job result (null until completed). " + "Structure depends on job_type.", + ) + error_message: str | None = Field( + None, + description="Error details if status='failed'. " + "Use for troubleshooting.", + ) + error_type: str | None = Field( + None, + description="Exception class name if status='failed'. " + "Helps identify error category.", + ) + run_id: str | None = Field( + None, + description="Model run ID for train/backtest jobs. " + "Use with /registry/runs endpoint.", + ) + started_at: datetime | None = Field( + None, + description="When job execution started. Null if still pending.", + ) + completed_at: datetime | None = Field( + None, + description="When job finished. Null if still running or pending.", + ) + created_at: datetime = Field( + ..., + description="When job was created.", + ) + updated_at: datetime = Field( + ..., + description="When job was last updated.", + ) + + +# ============================================================================= +# Job List Response +# ============================================================================= + + +class JobListResponse(BaseModel): + """Paginated list of jobs with filtering metadata. + + Use pagination parameters (page, page_size) to navigate large result sets. + Filtering by job_type or status reduces the result set before pagination. + """ + + jobs: list[JobResponse] = Field( + ..., + description="Array of job records for the current page. " + "Empty if no jobs match the filters.", + ) + total: int = Field( + ..., + ge=0, + description="Total number of jobs matching the applied filters. " + "Use to calculate total pages: ceil(total / page_size).", + ) + page: int = Field( + ..., + ge=1, + description="Current page number (1-indexed). First page is 1.", + ) + page_size: int = Field( + ..., + ge=1, + description="Number of jobs per page. Maximum is 100.", + ) diff --git a/app/features/jobs/service.py b/app/features/jobs/service.py new file mode 100644 index 00000000..976415e4 --- /dev/null +++ b/app/features/jobs/service.py @@ -0,0 +1,532 @@ +"""Service layer for job operations. + +Provides job creation, execution, and tracking. +Jobs execute synchronously but API contracts are async-ready. + +CRITICAL: All job operations are logged for auditability. +""" + +from __future__ import annotations + +import uuid +from datetime import UTC, datetime +from typing import Any + +from sqlalchemy import func, select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.config import get_settings +from app.core.logging import get_logger +from app.features.jobs.models import ( + VALID_JOB_TRANSITIONS, + Job, + JobStatus, + JobType, +) +from app.features.jobs.schemas import ( + JobCreate, + JobListResponse, + JobResponse, +) + +logger = get_logger(__name__) + + +class JobService: + """Service for managing background jobs. + + Provides job creation, execution, and status tracking. + Jobs execute synchronously but contracts are async-ready. + """ + + def __init__(self) -> None: + """Initialize job service.""" + self.settings = get_settings() + + async def create_job( + self, + db: AsyncSession, + job_create: JobCreate, + ) -> JobResponse: + """Create and execute a new job. + + CRITICAL: Jobs execute synchronously. Future versions may + support async execution via task queue. + + Args: + db: Database session. + job_create: Job creation request. + + Returns: + Job response with status and result. + """ + # Generate unique job ID + job_id = uuid.uuid4().hex + + # Create job record + job = Job( + job_id=job_id, + job_type=job_create.job_type.value, + status=JobStatus.PENDING.value, + params=job_create.params, + ) + + db.add(job) + await db.commit() + await db.refresh(job) + + logger.info( + "jobs.job_created", + job_id=job_id, + job_type=job_create.job_type.value, + ) + + # Execute job synchronously + job = await self._execute_job(db, job) + + return self._to_response(job) + + async def get_job( + self, + db: AsyncSession, + job_id: str, + ) -> JobResponse | None: + """Get job by ID. + + Args: + db: Database session. + job_id: Unique job identifier. + + Returns: + Job response or None if not found. + """ + stmt = select(Job).where(Job.job_id == job_id) + result = await db.execute(stmt) + job = result.scalar_one_or_none() + + if job is None: + return None + + return self._to_response(job) + + async def list_jobs( + self, + db: AsyncSession, + page: int = 1, + page_size: int = 20, + job_type: JobType | None = None, + status: JobStatus | None = None, + ) -> JobListResponse: + """List jobs with pagination and filtering. + + Args: + db: Database session. + page: Page number (1-indexed). + page_size: Number of jobs per page. + job_type: Filter by job type (optional). + status: Filter by status (optional). + + Returns: + Paginated list of jobs. + """ + # Build base query + stmt = select(Job) + + # Apply filters + if job_type is not None: + stmt = stmt.where(Job.job_type == job_type.value) + if status is not None: + stmt = stmt.where(Job.status == status.value) + + # Count total + count_stmt = select(func.count()).select_from(stmt.subquery()) + count_result = await db.execute(count_stmt) + total = count_result.scalar_one() + + # Apply pagination + offset = (page - 1) * page_size + stmt = stmt.order_by(Job.created_at.desc()).offset(offset).limit(page_size) + + # Execute query + result = await db.execute(stmt) + jobs = result.scalars().all() + + return JobListResponse( + jobs=[self._to_response(job) for job in jobs], + total=total, + page=page, + page_size=page_size, + ) + + async def cancel_job( + self, + db: AsyncSession, + job_id: str, + ) -> JobResponse | None: + """Cancel a pending job. + + Args: + db: Database session. + job_id: Unique job identifier. + + Returns: + Updated job response or None if not found. + + Raises: + ValueError: If job cannot be cancelled (not pending). + """ + stmt = select(Job).where(Job.job_id == job_id) + result = await db.execute(stmt) + job = result.scalar_one_or_none() + + if job is None: + return None + + current_status = JobStatus(job.status) + + # Validate transition + if JobStatus.CANCELLED not in VALID_JOB_TRANSITIONS[current_status]: + msg = f"Cannot cancel job in status '{current_status.value}'" + raise ValueError(msg) + + job.status = JobStatus.CANCELLED.value + job.completed_at = datetime.now(UTC) + + await db.commit() + await db.refresh(job) + + logger.info( + "jobs.job_cancelled", + job_id=job_id, + ) + + return self._to_response(job) + + async def _execute_job( + self, + db: AsyncSession, + job: Job, + ) -> Job: + """Execute a job synchronously. + + CRITICAL: This is where job execution happens. + Future versions may delegate to a task queue. + + Args: + db: Database session. + job: Job to execute. + + Returns: + Updated job with results. + """ + # Update status to RUNNING + job.status = JobStatus.RUNNING.value + job.started_at = datetime.now(UTC) + await db.commit() + + logger.info( + "jobs.job_started", + job_id=job.job_id, + job_type=job.job_type, + ) + + try: + # Execute based on job type + job_type = JobType(job.job_type) + result: dict[str, Any] + + if job_type == JobType.TRAIN: + result = await self._execute_train(db, job.params) + elif job_type == JobType.PREDICT: + result = await self._execute_predict(db, job.params) + elif job_type == JobType.BACKTEST: + result = await self._execute_backtest(db, job.params) + else: + msg = f"Unknown job type: {job_type}" + raise ValueError(msg) + + # Update job with result + job.status = JobStatus.COMPLETED.value + job.result = result + job.completed_at = datetime.now(UTC) + + # Capture run_id if available + if "run_id" in result: + job.run_id = result["run_id"] + + logger.info( + "jobs.job_completed", + job_id=job.job_id, + job_type=job.job_type, + ) + + except Exception as e: + # Update job with error + job.status = JobStatus.FAILED.value + job.error_message = str(e)[:2000] # Truncate to fit column + job.error_type = type(e).__name__ + job.completed_at = datetime.now(UTC) + + logger.error( + "jobs.job_failed", + job_id=job.job_id, + job_type=job.job_type, + error=str(e), + error_type=type(e).__name__, + exc_info=True, + ) + + await db.commit() + await db.refresh(job) + + return job + + async def _execute_train( + self, + db: AsyncSession, + params: dict[str, Any], + ) -> dict[str, Any]: + """Execute a train job. + + Args: + db: Database session. + params: Training parameters. + + Returns: + Result dict with training info. + """ + # Import here to avoid circular imports + from datetime import date as date_type + + from app.features.forecasting.schemas import ( + MovingAverageModelConfig, + NaiveModelConfig, + SeasonalNaiveModelConfig, + ) + from app.features.forecasting.service import ForecastingService + + service = ForecastingService() + + # Extract parameters + model_type = params.get("model_type", "naive") + store_id = params["store_id"] + product_id = params["product_id"] + start_date = params["start_date"] + end_date = params["end_date"] + + # Parse dates if strings + if isinstance(start_date, str): + start_date = date_type.fromisoformat(start_date) + if isinstance(end_date, str): + end_date = date_type.fromisoformat(end_date) + + # Build model config based on model_type + from app.features.forecasting.schemas import ModelConfig as ModelConfigType + + config: ModelConfigType + if model_type == "naive": + config = NaiveModelConfig() + elif model_type == "seasonal_naive": + season_length = params.get("season_length", 7) + config = SeasonalNaiveModelConfig(season_length=season_length) + elif model_type == "moving_average": + window_size = params.get("window_size", 7) + config = MovingAverageModelConfig(window_size=window_size) + else: + msg = f"Unsupported model_type: {model_type}" + raise ValueError(msg) + + # Train model + response = await service.train_model( + db=db, + store_id=store_id, + product_id=product_id, + train_start_date=start_date, + train_end_date=end_date, + config=config, + ) + + return { + "model_type": response.model_type, + "model_path": response.model_path, + "config_hash": response.config_hash, + "n_observations": response.n_observations, + "train_start_date": str(response.train_start_date), + "train_end_date": str(response.train_end_date), + "duration_ms": response.duration_ms, + } + + async def _execute_predict( + self, + db: AsyncSession, + params: dict[str, Any], + ) -> dict[str, Any]: + """Execute a predict job. + + Args: + db: Database session (unused for predict, but consistent interface). + params: Prediction parameters. + + Returns: + Result dict with predictions. + """ + # Import here to avoid circular imports + from app.features.forecasting.service import ForecastingService + + # Note: db is unused here but kept for consistent interface + _ = db + + service = ForecastingService() + + # Extract parameters + model_path = params["model_path"] + store_id = params["store_id"] + product_id = params["product_id"] + horizon = params.get("horizon", 14) + + # Generate predictions + response = await service.predict( + store_id=store_id, + product_id=product_id, + horizon=horizon, + model_path=model_path, + ) + + return { + "store_id": response.store_id, + "product_id": response.product_id, + "model_type": response.model_type, + "horizon": response.horizon, + "forecasts": [ + { + "date": f.date.isoformat(), + "forecast": float(f.forecast), + "lower_bound": float(f.lower_bound) if f.lower_bound else None, + "upper_bound": float(f.upper_bound) if f.upper_bound else None, + } + for f in response.forecasts + ], + "duration_ms": response.duration_ms, + } + + async def _execute_backtest( + self, + db: AsyncSession, + params: dict[str, Any], + ) -> dict[str, Any]: + """Execute a backtest job. + + Args: + db: Database session. + params: Backtest parameters. + + Returns: + Result dict with backtest metrics. + """ + # Import here to avoid circular imports + from datetime import date as date_type + + from app.features.backtesting.schemas import BacktestConfig, SplitConfig + from app.features.backtesting.service import BacktestingService + from app.features.forecasting.schemas import ( + MovingAverageModelConfig, + NaiveModelConfig, + SeasonalNaiveModelConfig, + ) + + service = BacktestingService() + + # Extract parameters + model_type = params.get("model_type", "naive") + store_id = params["store_id"] + product_id = params["product_id"] + start_date = params["start_date"] + end_date = params["end_date"] + n_splits = params.get("n_splits", 5) + test_size = params.get("test_size", 14) + gap = params.get("gap", 0) + + # Parse dates if strings + if isinstance(start_date, str): + start_date = date_type.fromisoformat(start_date) + if isinstance(end_date, str): + end_date = date_type.fromisoformat(end_date) + + # Build model config based on model_type + from app.features.forecasting.schemas import ModelConfig as ModelConfigType + + model_config: ModelConfigType + if model_type == "naive": + model_config = NaiveModelConfig() + elif model_type == "seasonal_naive": + season_length = params.get("season_length", 7) + model_config = SeasonalNaiveModelConfig(season_length=season_length) + elif model_type == "moving_average": + window_size = params.get("window_size", 7) + model_config = MovingAverageModelConfig(window_size=window_size) + else: + msg = f"Unsupported model_type: {model_type}" + raise ValueError(msg) + + # Build split config + split_config = SplitConfig( + n_splits=n_splits, + horizon=test_size, + gap=gap, + ) + + # Build backtest config + backtest_config = BacktestConfig( + split_config=split_config, + model_config_main=model_config, + ) + + # Run backtest + response = await service.run_backtest( + db=db, + store_id=store_id, + product_id=product_id, + start_date=start_date, + end_date=end_date, + config=backtest_config, + ) + + # Extract metrics from main_model_results + main_metrics = response.main_model_results.aggregated_metrics + + return { + "backtest_id": response.backtest_id, + "model_type": model_type, + "n_splits": len(response.main_model_results.fold_results), + "aggregated_metrics": { + "mae": main_metrics.get("mae", 0.0), + "smape": main_metrics.get("smape", 0.0), + "wape": main_metrics.get("wape", 0.0), + "bias": main_metrics.get("bias", 0.0), + }, + "duration_ms": response.duration_ms, + } + + def _to_response(self, job: Job) -> JobResponse: + """Convert Job model to response schema. + + Args: + job: Job ORM model. + + Returns: + Job response schema. + """ + return JobResponse( + job_id=job.job_id, + job_type=JobType(job.job_type), + status=JobStatus(job.status), + params=job.params, + result=job.result, + error_message=job.error_message, + error_type=job.error_type, + run_id=job.run_id, + started_at=job.started_at, + completed_at=job.completed_at, + created_at=job.created_at, + updated_at=job.updated_at, + ) diff --git a/app/features/jobs/tests/__init__.py b/app/features/jobs/tests/__init__.py new file mode 100644 index 00000000..72802449 --- /dev/null +++ b/app/features/jobs/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for jobs module.""" diff --git a/app/features/jobs/tests/conftest.py b/app/features/jobs/tests/conftest.py new file mode 100644 index 00000000..0ac85253 --- /dev/null +++ b/app/features/jobs/tests/conftest.py @@ -0,0 +1,86 @@ +"""Test fixtures for jobs module.""" + +from datetime import UTC, datetime + +import pytest + +from app.features.jobs.models import JobStatus, JobType +from app.features.jobs.schemas import ( + JobCreate, + JobResponse, +) + + +@pytest.fixture +def sample_train_job_create() -> JobCreate: + """Create sample train job request.""" + return JobCreate( + job_type=JobType.TRAIN, + params={ + "model_type": "naive", + "store_id": 1, + "product_id": 1, + "start_date": "2024-01-01", + "end_date": "2024-06-30", + }, + ) + + +@pytest.fixture +def sample_predict_job_create() -> JobCreate: + """Create sample predict job request.""" + return JobCreate( + job_type=JobType.PREDICT, + params={ + "run_id": "abc123def456789012345678901234", + "horizon": 14, + }, + ) + + +@pytest.fixture +def sample_backtest_job_create() -> JobCreate: + """Create sample backtest job request.""" + return JobCreate( + job_type=JobType.BACKTEST, + params={ + "model_type": "naive", + "store_id": 1, + "product_id": 1, + "start_date": "2024-01-01", + "end_date": "2024-06-30", + "n_splits": 5, + "test_size": 14, + }, + ) + + +@pytest.fixture +def sample_job_response() -> JobResponse: + """Create sample job response.""" + now = datetime.now(UTC) + return JobResponse( + job_id="abc123def456789012345678901234", + job_type=JobType.TRAIN, + status=JobStatus.COMPLETED, + params={ + "model_type": "naive", + "store_id": 1, + "product_id": 1, + "start_date": "2024-01-01", + "end_date": "2024-06-30", + }, + result={ + "run_id": "xyz789abc123def456789012345678", + "model_type": "naive", + "training_samples": 180, + "training_time_ms": 50.5, + }, + error_message=None, + error_type=None, + run_id="xyz789abc123def456789012345678", + started_at=now, + completed_at=now, + created_at=now, + updated_at=now, + ) diff --git a/app/main.py b/app/main.py index c4bc6509..4b425db3 100644 --- a/app/main.py +++ b/app/main.py @@ -10,10 +10,13 @@ from app.core.health import router as health_router from app.core.logging import configure_logging, get_logger from app.core.middleware import RequestIdMiddleware +from app.features.analytics.routes import router as analytics_router from app.features.backtesting.routes import router as backtesting_router +from app.features.dimensions.routes import router as dimensions_router from app.features.featuresets.routes import router as featuresets_router from app.features.forecasting.routes import router as forecasting_router from app.features.ingest.routes import router as ingest_router +from app.features.jobs.routes import router as jobs_router from app.features.registry.routes import router as registry_router logger = get_logger(__name__) @@ -71,6 +74,9 @@ def create_app() -> FastAPI: # Routers app.include_router(health_router) + app.include_router(dimensions_router) + app.include_router(analytics_router) + app.include_router(jobs_router) app.include_router(ingest_router) app.include_router(featuresets_router) app.include_router(forecasting_router) From 49eba3ebd79147165dac1736e4418b655757e1f2 Mon Sep 17 00:00:00 2001 From: "Gabe@w7dev" Date: Sun, 1 Feb 2026 09:22:53 +0000 Subject: [PATCH 4/5] docs: update documentation for PRP-8 serving layer Update README.md: - Add dimensions, analytics, jobs modules to project structure - Document new API endpoints with examples - Add RFC 7807 error response documentation Update docs/ARCHITECTURE.md: - Mark serving layer section as implemented - Add configuration settings for new modules - Update roadmap with Phase-2 completion Update docs/PHASE-index.md: - Add Phase 7 (Serving Layer) as completed - Update phase overview table - Add version history entry Create docs/PHASE/7-SERVING_LAYER.md: - Comprehensive phase documentation - API endpoint specifications - Database schema and migration details - Usage examples and test coverage Co-Authored-By: Claude Opus 4.5 --- README.md | 135 +++++++++++- docs/ARCHITECTURE.md | 75 ++++++- docs/PHASE-index.md | 57 ++++- docs/PHASE/7-SERVING_LAYER.md | 393 ++++++++++++++++++++++++++++++++++ 4 files changed, 644 insertions(+), 16 deletions(-) create mode 100644 docs/PHASE/7-SERVING_LAYER.md diff --git a/README.md b/README.md index 44203682..82e24494 100644 --- a/README.md +++ b/README.md @@ -119,7 +119,10 @@ app/ │ ├── featuresets/ # Time-safe feature engineering (lags, rolling, calendar) │ ├── forecasting/ # Model training, prediction, persistence │ ├── backtesting/ # Time-series CV, metrics, baseline comparisons -│ └── registry/ # Model run tracking, artifacts, deployment aliases +│ ├── registry/ # Model run tracking, artifacts, deployment aliases +│ ├── dimensions/ # Store/product discovery for LLM tool-calling +│ ├── analytics/ # KPI aggregations and drilldown analysis +│ └── jobs/ # Async-ready task orchestration └── main.py # FastAPI entry point tests/ # Test fixtures and helpers @@ -343,6 +346,136 @@ curl -X POST http://localhost:8123/registry/runs \ See [examples/registry_demo.py](examples/registry_demo.py) for a complete workflow demo. +### Dimensions (Discovery) + +- `GET /dimensions/stores` - List stores with pagination and filtering +- `GET /dimensions/stores/{store_id}` - Get store details by ID +- `GET /dimensions/products` - List products with pagination and filtering +- `GET /dimensions/products/{product_id}` - Get product details by ID + +**Example Request:** +```bash +# List stores with filtering +curl "http://localhost:8123/dimensions/stores?region=North&page=1&page_size=20" + +# Search for products +curl "http://localhost:8123/dimensions/products?search=Cola&category=Beverage" +``` + +**Purpose:** Resolve store/product metadata to IDs before calling forecasting endpoints. Optimized for LLM agent tool-calling with rich Field descriptions. + +**Features:** +- 1-indexed pagination (page=1 is first page) +- Case-insensitive search in code/sku and name fields +- Filter by region, store_type, category, or brand + +### Analytics + +- `GET /analytics/kpis` - Compute aggregated KPIs for a date range +- `GET /analytics/drilldowns` - Drill into data by dimension (store, product, category, region, date) + +**Example KPI Request:** +```bash +curl "http://localhost:8123/analytics/kpis?start_date=2024-01-01&end_date=2024-01-31&store_id=1" +``` + +**Example Drilldown Request:** +```bash +curl "http://localhost:8123/analytics/drilldowns?dimension=store&start_date=2024-01-01&end_date=2024-01-31&max_items=10" +``` + +**Metrics Computed:** +- `total_revenue`: Sum of sales amount +- `total_units`: Sum of quantity sold +- `total_transactions`: Count of unique sales records +- `avg_unit_price`: Revenue / units +- `avg_basket_value`: Revenue / transactions + +**Drilldown Dimensions:** +- `store` - Group by store (returns code and ID) +- `product` - Group by product (returns SKU and ID) +- `category` - Group by product category +- `region` - Group by store region +- `date` - Daily breakdown + +### Jobs (Task Orchestration) + +- `POST /jobs` - Create and execute a job (train, predict, backtest) +- `GET /jobs` - List jobs with filtering and pagination +- `GET /jobs/{job_id}` - Get job status and result +- `DELETE /jobs/{job_id}` - Cancel a pending job + +**Example Train Job:** +```bash +curl -X POST http://localhost:8123/jobs \ + -H "Content-Type: application/json" \ + -d '{ + "job_type": "train", + "params": { + "model_type": "seasonal_naive", + "store_id": 1, + "product_id": 1, + "start_date": "2024-01-01", + "end_date": "2024-06-30", + "season_length": 7 + } + }' +``` + +**Example Backtest Job:** +```bash +curl -X POST http://localhost:8123/jobs \ + -H "Content-Type: application/json" \ + -d '{ + "job_type": "backtest", + "params": { + "model_type": "naive", + "store_id": 1, + "product_id": 1, + "start_date": "2024-01-01", + "end_date": "2024-06-30", + "n_splits": 5, + "test_size": 14 + } + }' +``` + +**Job Types:** +- `train` - Train a forecasting model (returns model_path) +- `predict` - Generate predictions using a trained model +- `backtest` - Run time-series cross-validation + +**Job Lifecycle:** +- `pending` → `running` → `completed` | `failed` +- `pending` → `cancelled` (via DELETE) + +**Features:** +- Jobs execute synchronously but use async-ready API contracts (202 Accepted) +- JSONB storage for flexible params and results +- Links to model_run for train/backtest jobs + +### Error Responses (RFC 7807) + +All error responses follow RFC 7807 Problem Details format with `Content-Type: application/problem+json`: + +```json +{ + "type": "/errors/not-found", + "title": "Not Found", + "status": 404, + "detail": "Store not found: 999. Use GET /dimensions/stores to list available stores.", + "instance": "/requests/abc123", + "code": "NOT_FOUND", + "request_id": "abc123" +} +``` + +**Error Types:** +- `/errors/validation` - Request validation failed (422) +- `/errors/not-found` - Resource not found (404) +- `/errors/conflict` - Resource conflict (409) +- `/errors/database` - Database error (500) + ## API Documentation Once the server is running: diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 899ac457..7977b5a4 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -378,20 +378,57 @@ registry_duplicate_policy: Literal["allow", "deny", "detect"] = "detect" --- -## 8) Typed FastAPI Contracts (Serving Layer) +## 8) Typed FastAPI Contracts (Serving Layer) — ✅ IMPLEMENTED -**Implemented Endpoints:** +**Implemented via PRP-8** - Agent-first API design with RFC 7807 error responses: + +### 8.1 RFC 7807 Problem Details + +All error responses use RFC 7807 format with `Content-Type: application/problem+json`: +- Type URIs: `/errors/validation`, `/errors/not-found`, `/errors/conflict`, `/errors/database` +- Includes `request_id` for correlation +- Field-level validation errors for 422 responses + +### 8.2 Implemented Endpoints + +**Health & Core:** - `GET /health` - Health check + +**Dimensions (Discovery):** +- `GET /dimensions/stores` - List stores with pagination, filtering, search +- `GET /dimensions/stores/{store_id}` - Get store by ID +- `GET /dimensions/products` - List products with pagination, filtering, search +- `GET /dimensions/products/{product_id}` - Get product by ID + +**Analytics:** +- `GET /analytics/kpis` - Compute KPIs for date range with filters +- `GET /analytics/drilldowns` - Drill into dimension (store, product, category, region, date) + +**Jobs (Task Orchestration):** +- `POST /jobs` - Create and execute job (train, predict, backtest) +- `GET /jobs` - List jobs with filtering and pagination +- `GET /jobs/{job_id}` - Get job status and result +- `DELETE /jobs/{job_id}` - Cancel pending job + +**Ingest:** - `POST /ingest/sales-daily` - Batch upsert daily sales records + +**Feature Engineering:** - `POST /featuresets/compute` - Compute time-safe features - `POST /featuresets/preview` - Preview features with sample rows -- `POST /forecasting/train` - Train forecasting model (returns model_path) -- `POST /forecasting/predict` - Generate forecasts using saved model -- `POST /backtesting/run` - Run time-series CV backtest with baseline comparisons + +**Forecasting:** +- `POST /forecasting/train` - Train forecasting model +- `POST /forecasting/predict` - Generate forecasts + +**Backtesting:** +- `POST /backtesting/run` - Run time-series CV backtest + +**Model Registry:** - `POST /registry/runs` - Create model run - `GET /registry/runs` - List runs with filters - `GET /registry/runs/{run_id}` - Get run details -- `PATCH /registry/runs/{run_id}` - Update run status/metrics/artifacts +- `PATCH /registry/runs/{run_id}` - Update status/metrics/artifacts - `GET /registry/runs/{run_id}/verify` - Verify artifact integrity - `POST /registry/aliases` - Create deployment alias - `GET /registry/aliases` - List aliases @@ -399,8 +436,23 @@ registry_duplicate_policy: Literal["allow", "deny", "detect"] = "detect" - `DELETE /registry/aliases/{alias_name}` - Delete alias - `GET /registry/compare/{run_id_a}/{run_id_b}` - Compare two runs +### 8.3 Location + +- Problem Details: `app/core/problem_details.py` +- Dimensions: `app/features/dimensions/` (schemas, service, routes) +- Analytics: `app/features/analytics/` (schemas, service, routes) +- Jobs: `app/features/jobs/` (models, schemas, service, routes) +- Migration: `alembic/versions/37e16ecef223_create_jobs_table.py` + +### 8.4 Configuration (Settings) + +```python +analytics_max_rows: int = 10000 +analytics_max_date_range_days: int = 730 +jobs_retention_days: int = 30 +``` + **Planned Endpoints:** -- `GET /data/kpis`, `GET /data/drilldowns` - Data exploration - `POST /rag/query` - RAG knowledge base queries (optional `/rag/index` in dev) Contracts are Pydantic v2 validated and use `response_model` for explicit output typing. @@ -453,5 +505,10 @@ The repo standards live in `docs/validation/` and are treated as merge gates: - Backtesting: ✅ IMPLEMENTED (PRP-6) - Registry: ✅ IMPLEMENTED (PRP-7) - Leaderboard UI: Planned -- **Phase-2**: ML models + richer exogenous features -- **Phase-3**: RAG + agentic workflows (PydanticAI), run report generation/indexing +- **Phase-2**: Serving Layer (agent-first API design) ✅ + - RFC 7807 Problem Details: ✅ IMPLEMENTED (PRP-8) + - Dimensions discovery: ✅ IMPLEMENTED (PRP-8) + - Analytics KPIs/drilldowns: ✅ IMPLEMENTED (PRP-8) + - Jobs orchestration: ✅ IMPLEMENTED (PRP-8) +- **Phase-3**: ML models + richer exogenous features +- **Phase-4**: RAG + agentic workflows (PydanticAI), run report generation/indexing diff --git a/docs/PHASE-index.md b/docs/PHASE-index.md index 7b912a85..280fa43b 100644 --- a/docs/PHASE-index.md +++ b/docs/PHASE-index.md @@ -15,9 +15,10 @@ This document indexes all implementation phases of the ForecastLabAI project. | 4 | Forecasting | Completed | PRP-5 | [4-FORECASTING.md](./PHASE/4-FORECASTING.md) | | 5 | Backtesting | Completed | PRP-6 | [5-BACKTESTING.md](./PHASE/5-BACKTESTING.md) | | 6 | Model Registry | Completed | PRP-7 | [6-MODEL_REGISTRY.md](./PHASE/6-MODEL_REGISTRY.md) | -| 7 | RAG Knowledge Base | Pending | PRP-8 | - | -| 8 | Dashboard | Pending | PRP-9 | - | -| 9 | Agentic Layer | Pending | - | - | +| 7 | Serving Layer | Completed | PRP-8 | [7-SERVING_LAYER.md](./PHASE/7-SERVING_LAYER.md) | +| 8 | RAG Knowledge Base | Pending | PRP-9 | - | +| 9 | Dashboard | Pending | PRP-10 | - | +| 10 | Agentic Layer | Pending | - | - | --- @@ -229,17 +230,60 @@ This document indexes all implementation phases of the ForecastLabAI project. - Pyright: 0 errors - Pytest: 103 unit + 24 integration tests +### Phase 7: Serving Layer + +**Date Completed**: 2026-02-01 + +**Summary**: Agent-first API design with RFC 7807 error responses: +- RFC 7807 Problem Details for semantic error responses +- Dimensions module for store/product discovery (LLM tool-calling optimized) +- Analytics module for KPI aggregations and drilldown analysis +- Jobs module for async-ready task orchestration +- Rich OpenAPI descriptions for all endpoints + +**Key Deliverables**: +- `app/core/problem_details.py` - RFC 7807 ProblemDetail schema and helpers +- `app/features/dimensions/` - Store/product discovery endpoints +- `app/features/analytics/` - KPI and drilldown endpoints +- `app/features/jobs/` - Job ORM model, service, and endpoints +- `alembic/versions/37e16ecef223_create_jobs_table.py` - Job table migration + +**API Endpoints**: +- `GET /dimensions/stores` - List stores with pagination and filtering +- `GET /dimensions/stores/{store_id}` - Get store by ID +- `GET /dimensions/products` - List products with pagination and filtering +- `GET /dimensions/products/{product_id}` - Get product by ID +- `GET /analytics/kpis` - Compute KPIs for date range +- `GET /analytics/drilldowns` - Drill into dimension +- `POST /jobs` - Create and execute job +- `GET /jobs` - List jobs with filtering +- `GET /jobs/{job_id}` - Get job status +- `DELETE /jobs/{job_id}` - Cancel pending job + +**Configuration (Settings)**: +```python +analytics_max_rows: int = 10000 +analytics_max_date_range_days: int = 730 +jobs_retention_days: int = 30 +``` + +**Validation Results**: +- Ruff: All checks passed +- MyPy: 0 errors (103 source files) +- Pyright: 0 errors +- Pytest: 426 unit tests passed + --- ## Pending Phases -### Phase 7: RAG Knowledge Base +### Phase 8: RAG Knowledge Base pgvector embeddings with evidence-grounded answers and citations. -### Phase 8: Dashboard +### Phase 9: Dashboard React + Vite + shadcn/ui frontend with data tables and visualizations. -### Phase 9: Agentic Layer (Optional) +### Phase 10: Agentic Layer (Optional) PydanticAI integration for experiment orchestration. --- @@ -286,3 +330,4 @@ Each phase document (`docs/PHASE/X-PHASE_NAME.md`) contains: | 2026-01-31 | 4 | Forecasting module with model zoo completed | | 2026-01-31 | 5 | Backtesting module with time-series CV completed | | 2026-02-01 | 6 | Model Registry with run tracking and deployment aliases completed | +| 2026-02-01 | 7 | Serving Layer with RFC 7807, dimensions, analytics, and jobs completed | diff --git a/docs/PHASE/7-SERVING_LAYER.md b/docs/PHASE/7-SERVING_LAYER.md new file mode 100644 index 00000000..b3246a03 --- /dev/null +++ b/docs/PHASE/7-SERVING_LAYER.md @@ -0,0 +1,393 @@ +# Phase 7: Serving Layer + +**Date Completed**: 2026-02-01 +**PRP**: PRP-8 +**Status**: ✅ Completed + +--- + +## Executive Summary + +Phase 7 implements the agent-first API design for ForecastLabAI with RFC 7807 Problem Details for semantic error responses, dimension discovery endpoints for LLM tool-calling, KPI aggregations and drilldown analysis, and async-ready job orchestration. + +### Objectives Achieved + +1. **RFC 7807 Problem Details** - Semantic error responses with type URIs and correlation +2. **Dimensions Module** - Store/product discovery with LLM-optimized descriptions +3. **Analytics Module** - KPI aggregations and multi-dimension drilldowns +4. **Jobs Module** - Async-ready task orchestration for train/predict/backtest +5. **Rich OpenAPI Descriptions** - Optimized for LLM agent tool selection + +--- + +## Deliverables + +### 1. RFC 7807 Problem Details + +**File**: `app/core/problem_details.py` + +Implements RFC 7807 compliant error responses: + +```python +class ProblemDetail(BaseModel): + """RFC 7807 Problem Details for HTTP APIs.""" + type: str = "/errors/unknown" # URI identifying error type + title: str # Human-readable summary + status: int # HTTP status code + detail: str | None # Specific error description + instance: str | None # URI for this occurrence + errors: list[dict] | None # Field-level validation errors + code: str | None # Machine-readable error code + request_id: str | None # Correlation ID +``` + +**Error Type URIs**: +- `/errors/validation` - Request validation failed (422) +- `/errors/not-found` - Resource not found (404) +- `/errors/conflict` - Resource conflict (409) +- `/errors/database` - Database error (500) +- `/errors/unknown` - Unhandled error (500) + +**Content-Type**: `application/problem+json` + +### 2. Dimensions Module + +**Directory**: `app/features/dimensions/` + +| File | Purpose | +|------|---------| +| `__init__.py` | Module exports | +| `schemas.py` | StoreResponse, ProductResponse with rich Field descriptions | +| `service.py` | DimensionService for pagination, filtering, search | +| `routes.py` | API endpoints with OpenAPI descriptions | +| `tests/conftest.py` | Test fixtures | + +**API Endpoints**: + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/dimensions/stores` | List stores with pagination and filtering | +| GET | `/dimensions/stores/{store_id}` | Get store details by ID | +| GET | `/dimensions/products` | List products with pagination and filtering | +| GET | `/dimensions/products/{product_id}` | Get product details by ID | + +**Query Parameters**: +- `page` - Page number (1-indexed, default: 1) +- `page_size` - Items per page (max: 100, default: 20) +- `region` / `store_type` - Filter by region or store type (stores) +- `category` / `brand` - Filter by category or brand (products) +- `search` - Case-insensitive search in code/sku and name (min 2 chars) + +**LLM-Optimized Field Descriptions**: + +```python +class StoreResponse(BaseModel): + id: int = Field( + description="Internal store ID. Use this value for store_id parameters " + "in /ingest/sales-daily, /forecasting/train, and /forecasting/predict." + ) + code: str = Field( + description="Business store code (e.g., 'S001'). Unique human-readable identifier. " + "Use this for display and matching with external data sources." + ) +``` + +### 3. Analytics Module + +**Directory**: `app/features/analytics/` + +| File | Purpose | +|------|---------| +| `__init__.py` | Module exports | +| `schemas.py` | KPIMetrics, KPIResponse, DrilldownItem, DrilldownResponse | +| `service.py` | AnalyticsService with compute_kpis() and compute_drilldown() | +| `routes.py` | API endpoints with rich OpenAPI descriptions | +| `tests/conftest.py` | Test fixtures | + +**API Endpoints**: + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/analytics/kpis` | Compute aggregated KPIs for date range | +| GET | `/analytics/drilldowns` | Drill into dimension with ranking | + +**KPI Metrics Computed**: +- `total_revenue` - Sum of total_amount +- `total_units` - Sum of quantity +- `total_transactions` - Count of records +- `avg_unit_price` - Revenue / units +- `avg_basket_value` - Revenue / transactions + +**Drilldown Dimensions**: + +| Dimension | Groups By | Returns | +|-----------|-----------|---------| +| `store` | Store | code, id, metrics, rank, revenue_share_pct | +| `product` | Product | SKU, id, metrics, rank, revenue_share_pct | +| `category` | Category | name, metrics, rank, revenue_share_pct | +| `region` | Region | name, metrics, rank, revenue_share_pct | +| `date` | Date | date, metrics, rank, revenue_share_pct | + +### 4. Jobs Module + +**Directory**: `app/features/jobs/` + +| File | Purpose | +|------|---------| +| `__init__.py` | Module exports | +| `models.py` | Job ORM model with JSONB params/results | +| `schemas.py` | JobCreate, JobResponse, JobListResponse | +| `service.py` | JobService for create, execute, list, cancel | +| `routes.py` | API endpoints with async-ready semantics | +| `tests/conftest.py` | Test fixtures | + +**API Endpoints**: + +| Method | Path | Status | Description | +|--------|------|--------|-------------| +| POST | `/jobs` | 202 | Create and execute job | +| GET | `/jobs` | 200 | List jobs with filtering | +| GET | `/jobs/{job_id}` | 200 | Get job status and result | +| DELETE | `/jobs/{job_id}` | 200 | Cancel pending job | + +**Job Types**: + +| Type | Description | Required Params | +|------|-------------|-----------------| +| `train` | Train forecasting model | model_type, store_id, product_id, start_date, end_date | +| `predict` | Generate predictions | model_path, store_id, product_id, horizon | +| `backtest` | Run cross-validation | model_type, store_id, product_id, start_date, end_date | + +**Job Lifecycle**: + +``` +PENDING → RUNNING → COMPLETED | FAILED +PENDING → CANCELLED (via DELETE) +``` + +**ORM Model**: + +```python +class Job(TimestampMixin, Base): + __tablename__ = "job" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + job_id: Mapped[str] = mapped_column(String(32), unique=True, index=True) + job_type: Mapped[str] = mapped_column(String(20), index=True) + status: Mapped[str] = mapped_column(String(20), default="pending") + params: Mapped[dict] = mapped_column(JSONB, nullable=False) + result: Mapped[dict | None] = mapped_column(JSONB, nullable=True) + error_message: Mapped[str | None] = mapped_column(String(2000)) + error_type: Mapped[str | None] = mapped_column(String(100)) + started_at: Mapped[datetime | None] + completed_at: Mapped[datetime | None] + run_id: Mapped[str | None] # Link to model_run for train/backtest +``` + +--- + +## Configuration + +### New Settings in `app/core/config.py` + +```python +# Analytics +analytics_max_rows: int = 10000 # Max rows in KPI queries +analytics_max_date_range_days: int = 730 # Max date range (2 years) + +# Jobs +jobs_retention_days: int = 30 # Job retention period +``` + +--- + +## Database Changes + +### Migration: `37e16ecef223_create_jobs_table.py` + +Creates the `job` table with: + +**Columns**: +- `id` (PK), `job_id` (unique), `job_type`, `status` +- `params` (JSONB), `result` (JSONB) +- `error_message`, `error_type` +- `started_at`, `completed_at` +- `run_id` (FK to model_run) +- `created_at`, `updated_at` (from TimestampMixin) + +**Indexes**: +- `ix_job_job_id` (unique) +- `ix_job_job_type` +- `ix_job_status` +- `ix_job_run_id` +- `ix_job_type_status` (composite) +- `ix_job_params_gin` (GIN for JSONB) +- `ix_job_result_gin` (GIN for JSONB) + +**Check Constraints**: +- `ck_job_valid_status` - Validates status enum +- `ck_job_valid_type` - Validates job_type enum + +--- + +## Integration + +### Router Registration in `app/main.py` + +```python +from app.features.analytics.routes import router as analytics_router +from app.features.dimensions.routes import router as dimensions_router +from app.features.jobs.routes import router as jobs_router + +# In create_app(): +app.include_router(dimensions_router) +app.include_router(analytics_router) +app.include_router(jobs_router) +``` + +### Alembic Model Import in `alembic/env.py` + +```python +from app.features.jobs import models as jobs_models # noqa: F401 +``` + +--- + +## Test Coverage + +### Test Files Created + +| File | Description | +|------|-------------| +| `app/features/dimensions/tests/__init__.py` | Test module | +| `app/features/dimensions/tests/conftest.py` | Fixtures for store/product responses | +| `app/features/analytics/tests/__init__.py` | Test module | +| `app/features/analytics/tests/conftest.py` | Fixtures for KPI/drilldown responses | +| `app/features/jobs/tests/__init__.py` | Test module | +| `app/features/jobs/tests/conftest.py` | Fixtures for job create/response | + +### Validation Results + +``` +Ruff: All checks passed +MyPy: 0 errors (103 source files) +Pyright: 0 errors +Pytest: 426 unit tests passed (1 pre-existing env-specific failure) +``` + +--- + +## Directory Structure + +``` +app/ +├── core/ +│ ├── config.py # MODIFIED: Added analytics/jobs settings +│ ├── exceptions.py # MODIFIED: RFC 7807 error handlers +│ └── problem_details.py # NEW: RFC 7807 schema and helpers +├── features/ +│ ├── dimensions/ # NEW: Store/product discovery +│ │ ├── __init__.py +│ │ ├── schemas.py +│ │ ├── service.py +│ │ ├── routes.py +│ │ └── tests/ +│ │ ├── __init__.py +│ │ └── conftest.py +│ ├── analytics/ # NEW: KPI and drilldown +│ │ ├── __init__.py +│ │ ├── schemas.py +│ │ ├── service.py +│ │ ├── routes.py +│ │ └── tests/ +│ │ ├── __init__.py +│ │ └── conftest.py +│ └── jobs/ # NEW: Task orchestration +│ ├── __init__.py +│ ├── models.py +│ ├── schemas.py +│ ├── service.py +│ ├── routes.py +│ └── tests/ +│ ├── __init__.py +│ └── conftest.py +└── main.py # MODIFIED: Router registration + +alembic/ +├── env.py # MODIFIED: Jobs model import +└── versions/ + └── 37e16ecef223_create_jobs_table.py # NEW +``` + +--- + +## API Usage Examples + +### Dimensions Discovery + +```bash +# List all stores +curl "http://localhost:8123/dimensions/stores" + +# Search stores by region +curl "http://localhost:8123/dimensions/stores?region=North&page_size=10" + +# Get specific store +curl "http://localhost:8123/dimensions/stores/1" + +# Search products +curl "http://localhost:8123/dimensions/products?search=Cola&category=Beverage" +``` + +### Analytics KPIs + +```bash +# Total KPIs for January +curl "http://localhost:8123/analytics/kpis?start_date=2024-01-01&end_date=2024-01-31" + +# KPIs for specific store +curl "http://localhost:8123/analytics/kpis?start_date=2024-01-01&end_date=2024-01-31&store_id=1" + +# Top stores by revenue +curl "http://localhost:8123/analytics/drilldowns?dimension=store&start_date=2024-01-01&end_date=2024-01-31&max_items=10" + +# Category breakdown +curl "http://localhost:8123/analytics/drilldowns?dimension=category&start_date=2024-01-01&end_date=2024-01-31" +``` + +### Jobs Orchestration + +```bash +# Create train job +curl -X POST http://localhost:8123/jobs \ + -H "Content-Type: application/json" \ + -d '{ + "job_type": "train", + "params": { + "model_type": "seasonal_naive", + "store_id": 1, + "product_id": 1, + "start_date": "2024-01-01", + "end_date": "2024-06-30", + "season_length": 7 + } + }' + +# Check job status +curl "http://localhost:8123/jobs/abc123def456..." + +# List failed jobs +curl "http://localhost:8123/jobs?status=failed" + +# Cancel pending job +curl -X DELETE "http://localhost:8123/jobs/abc123def456..." +``` + +--- + +## Next Phase Preparation + +Phase 8 (RAG Knowledge Base) will build on this serving layer to: +- Index OpenAPI schema for agent tool discovery +- Index documentation for evidence-grounded answers +- Provide `/rag/query` endpoint with citations From 67ce108c4f162563f4d0ae96e8204b78e3a5c862 Mon Sep 17 00:00:00 2001 From: "Gabe@w7dev" Date: Sun, 1 Feb 2026 09:27:11 +0000 Subject: [PATCH 5/5] style: fix ruff formatting Co-Authored-By: Claude Opus 4.5 --- app/features/analytics/schemas.py | 15 +++++---------- app/features/analytics/service.py | 20 +++++--------------- app/features/jobs/models.py | 4 +--- app/features/jobs/routes.py | 6 ++---- app/features/jobs/schemas.py | 12 ++++-------- 5 files changed, 17 insertions(+), 40 deletions(-) diff --git a/app/features/analytics/schemas.py b/app/features/analytics/schemas.py index e48ddab5..576cd671 100644 --- a/app/features/analytics/schemas.py +++ b/app/features/analytics/schemas.py @@ -61,8 +61,7 @@ class KPIMetrics(BaseModel): total_units: int = Field( ..., ge=0, - description="Total units sold (sum of quantity). " - "Represents the physical volume of sales.", + description="Total units sold (sum of quantity). Represents the physical volume of sales.", ) total_transactions: int = Field( ..., @@ -72,8 +71,7 @@ class KPIMetrics(BaseModel): ) avg_unit_price: Decimal | None = Field( None, - description="Average price per unit (total_revenue / total_units). " - "Null if no units sold.", + description="Average price per unit (total_revenue / total_units). Null if no units sold.", ) avg_basket_value: Decimal | None = Field( None, @@ -102,18 +100,15 @@ class KPIResponse(BaseModel): ) store_id: int | None = Field( None, - description="Store filter applied (if any). " - "Null means all stores included.", + description="Store filter applied (if any). Null means all stores included.", ) product_id: int | None = Field( None, - description="Product filter applied (if any). " - "Null means all products included.", + description="Product filter applied (if any). Null means all products included.", ) category: str | None = Field( None, - description="Category filter applied (if any). " - "Null means all categories included.", + description="Category filter applied (if any). Null means all categories included.", ) diff --git a/app/features/analytics/service.py b/app/features/analytics/service.py index 91e35e7d..a621bb93 100644 --- a/app/features/analytics/service.py +++ b/app/features/analytics/service.py @@ -63,9 +63,7 @@ async def compute_kpis( func.coalesce(func.sum(SalesDaily.total_amount), 0).label("total_revenue"), func.coalesce(func.sum(SalesDaily.quantity), 0).label("total_units"), func.count().label("total_transactions"), - ).where( - (SalesDaily.date >= start_date) & (SalesDaily.date <= end_date) - ) + ).where((SalesDaily.date >= start_date) & (SalesDaily.date <= end_date)) # Apply filters if store_id is not None: @@ -86,12 +84,8 @@ async def compute_kpis( total_transactions = int(row.total_transactions) # Compute derived metrics - avg_unit_price = ( - total_revenue / total_units if total_units > 0 else None - ) - avg_basket_value = ( - total_revenue / total_transactions if total_transactions > 0 else None - ) + avg_unit_price = total_revenue / total_units if total_units > 0 else None + avg_basket_value = total_revenue / total_transactions if total_transactions > 0 else None metrics = KPIMetrics( total_revenue=total_revenue, @@ -238,15 +232,11 @@ async def compute_drilldown( # Calculate derived metrics avg_unit_price = row_revenue / row_units if row_units > 0 else None - avg_basket_value = ( - row_revenue / row_transactions if row_transactions > 0 else None - ) + avg_basket_value = row_revenue / row_transactions if row_transactions > 0 else None # Calculate revenue share revenue_share = ( - (row_revenue / total_revenue_all * 100) - if total_revenue_all > 0 - else Decimal("0") + (row_revenue / total_revenue_all * 100) if total_revenue_all > 0 else Decimal("0") ) # Get dimension ID if available diff --git a/app/features/jobs/models.py b/app/features/jobs/models.py index 172c1fe7..2f69a23d 100644 --- a/app/features/jobs/models.py +++ b/app/features/jobs/models.py @@ -90,9 +90,7 @@ class Job(TimestampMixin, Base): id: Mapped[int] = mapped_column(Integer, primary_key=True) job_id: Mapped[str] = mapped_column(String(32), unique=True, index=True) job_type: Mapped[str] = mapped_column(String(20), index=True) - status: Mapped[str] = mapped_column( - String(20), default=JobStatus.PENDING.value, index=True - ) + status: Mapped[str] = mapped_column(String(20), default=JobStatus.PENDING.value, index=True) # Job configuration (stored as JSONB for flexibility) params: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False) diff --git a/app/features/jobs/routes.py b/app/features/jobs/routes.py index f69e4d7e..2347fa26 100644 --- a/app/features/jobs/routes.py +++ b/app/features/jobs/routes.py @@ -241,8 +241,7 @@ async def get_job( if result is None: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, - detail=f"Job not found: {job_id}. " - "Use GET /jobs to list available jobs.", + detail=f"Job not found: {job_id}. Use GET /jobs to list available jobs.", ) return result @@ -292,8 +291,7 @@ async def cancel_job( if result is None: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, - detail=f"Job not found: {job_id}. " - "Use GET /jobs to list available jobs.", + detail=f"Job not found: {job_id}. Use GET /jobs to list available jobs.", ) return result diff --git a/app/features/jobs/schemas.py b/app/features/jobs/schemas.py index ea52b87d..0f411dfa 100644 --- a/app/features/jobs/schemas.py +++ b/app/features/jobs/schemas.py @@ -87,23 +87,19 @@ class JobResponse(BaseModel): ) result: dict[str, Any] | None = Field( None, - description="Job result (null until completed). " - "Structure depends on job_type.", + description="Job result (null until completed). Structure depends on job_type.", ) error_message: str | None = Field( None, - description="Error details if status='failed'. " - "Use for troubleshooting.", + description="Error details if status='failed'. Use for troubleshooting.", ) error_type: str | None = Field( None, - description="Exception class name if status='failed'. " - "Helps identify error category.", + description="Exception class name if status='failed'. Helps identify error category.", ) run_id: str | None = Field( None, - description="Model run ID for train/backtest jobs. " - "Use with /registry/runs endpoint.", + description="Model run ID for train/backtest jobs. Use with /registry/runs endpoint.", ) started_at: datetime | None = Field( None,