Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,002 changes: 1,002 additions & 0 deletions PRPs/PRP-20-explorer-interactivity.md

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Portfolio-grade end-to-end retail demand forecasting system.
- **Serving Layer**: Typed FastAPI endpoints (Pydantic v2 validation)
- **Model Registry**: Run configs, metrics, artifacts, and data windows for reproducibility
- **Dashboard**: React 19 + Vite + Tailwind CSS 4 + shadcn/ui for data exploration and model management
- **Explorer**: Click-through store & product detail pages with date-scoped KPIs and revenue charts; sortable, CSV-exportable tables with column-visibility toggles; revenue bar/line charts and URL-shareable cross-filtering on the Sales page
- **RAG Knowledge Base**: Postgres pgvector embeddings + evidence-grounded answers with citations
- **Agentic Layer**: PydanticAI agents for autonomous experimentation and evidence-grounded Q&A with human-in-the-loop approval
- **Data Seeder (The Forge)**: Reproducible synthetic data generator with realistic time-series patterns, scenario presets, and retail effects
Expand Down Expand Up @@ -451,11 +452,13 @@ curl "http://localhost:8123/dimensions/products?search=Cola&category=Beverage"
- 1-indexed pagination (page=1 is first page)
- Case-insensitive search in code/sku and name fields
- Filter by region, store_type, category, or brand
- Optional `sort_by` / `sort_order` on the store and product lists (allow-listed columns; unknown values fall back to the default order)

### Analytics

- `GET /analytics/kpis` - Compute aggregated KPIs for a date range
- `GET /analytics/drilldowns` - Drill into data by dimension (store, product, category, region, date)
- `GET /analytics/timeseries` - Period-bucketed sales series (day/week/month/quarter) for revenue-over-time charts

**Example KPI Request:**
```bash
Expand Down
95 changes: 95 additions & 0 deletions app/features/analytics/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
DrilldownDimension,
DrilldownResponse,
KPIResponse,
TimeGranularity,
TimeSeriesResponse,
)
from app.features.analytics.service import AnalyticsService

Expand Down Expand Up @@ -247,3 +249,96 @@ async def get_drilldowns(
product_id=product_id,
max_items=max_items,
)


# =============================================================================
# Time Series Endpoints
# =============================================================================


@router.get(
"/timeseries",
response_model=TimeSeriesResponse,
summary="Compute a period-bucketed sales time series",
description="""
Aggregate sales into a time series bucketed by day, week, month, or quarter.

**Purpose**: Drive revenue-over-time charts. Unlike `/drilldowns?dimension=date`,
this endpoint orders points by period (not revenue), supports week/month/quarter
bucketing, and is not capped at 100 items.

**Metrics per period**: same `KPIMetrics` shape as `/analytics/kpis` —
`total_revenue`, `total_units`, `total_transactions`, `avg_unit_price`,
`avg_basket_value`.

**Filtering Options**:
- `store_id`: scope the series to a single store
- `product_id`: scope the series to a single product
- `category`: scope the series to a product category (exact match)

**Date Range**:
- Both `start_date` and `end_date` are inclusive
- Maximum range: 730 days (2 years)

**Example Use Cases**:
1. Daily revenue trend: `GET /analytics/timeseries?start_date=2024-01-01&end_date=2024-03-31&granularity=day`
2. Weekly trend for a store: `GET /analytics/timeseries?store_id=5&start_date=2024-01-01&end_date=2024-12-31&granularity=week`
""",
)
async def get_timeseries(
start_date: date = Query(
...,
description="Start of analysis period (inclusive). Format: YYYY-MM-DD.",
),
end_date: date = Query(
...,
description="End of analysis period (inclusive). Format: YYYY-MM-DD.",
),
granularity: TimeGranularity = Query(
TimeGranularity.DAY,
description="Bucket size: day, week, month, or quarter.",
),
store_id: int | None = Query(
None,
description="Filter by store ID. Use GET /dimensions/stores to find valid IDs.",
),
product_id: int | None = Query(
None,
description="Filter by product ID. Use GET /dimensions/products to find valid IDs.",
),
category: str | None = Query(
None,
description="Filter by product category name (exact match).",
),
db: AsyncSession = Depends(get_db),
) -> TimeSeriesResponse:
"""Compute a period-bucketed sales time series with optional filters.

Args:
start_date: Start of analysis period (inclusive).
end_date: End of analysis period (inclusive).
granularity: Bucket size (day, week, month, quarter).
store_id: Filter by store ID (optional).
product_id: Filter by product ID (optional).
category: Filter by category (optional).
db: Database session.

Returns:
Time series response with points in ascending period order.

Raises:
HTTPException: If date range is invalid.
"""
# Validate date range before processing
validate_date_range(start_date, end_date)

service = AnalyticsService()
return await service.compute_timeseries(
db=db,
start_date=start_date,
end_date=end_date,
granularity=granularity,
store_id=store_id,
product_id=product_id,
category=category,
)
63 changes: 63 additions & 0 deletions app/features/analytics/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,69 @@ class DrilldownResponse(BaseModel):
)


# =============================================================================
# Time Series Response Schemas
# =============================================================================


class TimeSeriesPoint(BaseModel):
"""One aggregated period of the sales time series."""

model_config = ConfigDict(from_attributes=True)

period: date = Field(
...,
description="Bucket start date (the day itself, or the first day of "
"the week/month/quarter bucket).",
)
metrics: KPIMetrics = Field(
...,
description="Aggregated KPI metrics for this period.",
)


class TimeSeriesResponse(BaseModel):
"""Period-bucketed sales time series for charting.

Points are ordered ascending by period. Use this to render
revenue-over-time trends scoped to a store, product, or category.
"""

granularity: TimeGranularity = Field(
...,
description="Bucket size used for aggregation (day, week, month, quarter).",
)
points: list[TimeSeriesPoint] = Field(
...,
description="Time series points in ascending period order.",
)
total_points: int = Field(
...,
ge=0,
description="Number of points returned (equals len(points)).",
)
start_date: date = Field(
...,
description="Start of the analysis period (inclusive).",
)
end_date: date = Field(
...,
description="End of the analysis period (inclusive).",
)
store_id: int | None = Field(
None,
description="Store filter applied (if any). Null means all stores included.",
)
product_id: int | None = Field(
None,
description="Product filter applied (if any). Null means all products included.",
)
category: str | None = Field(
None,
description="Category filter applied (if any). Null means all categories included.",
)


# =============================================================================
# Date Range Validation
# =============================================================================
Expand Down
106 changes: 105 additions & 1 deletion app/features/analytics/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from decimal import Decimal
from typing import Any, cast

from sqlalchemy import ColumnElement, func, select
from sqlalchemy import ColumnElement, Date, func, select
from sqlalchemy import cast as sa_cast
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import DeclarativeBase

Expand All @@ -19,6 +20,9 @@
DrilldownResponse,
KPIMetrics,
KPIResponse,
TimeGranularity,
TimeSeriesPoint,
TimeSeriesResponse,
)
from app.features.data_platform.models import Product, SalesDaily, Store

Expand Down Expand Up @@ -282,3 +286,103 @@ async def compute_drilldown(
store_id=store_id,
product_id=product_id,
)

async def compute_timeseries(
self,
db: AsyncSession,
start_date: date,
end_date: date,
granularity: TimeGranularity = TimeGranularity.DAY,
store_id: int | None = None,
product_id: int | None = None,
category: str | None = None,
) -> TimeSeriesResponse:
"""Compute a period-bucketed sales time series.

Args:
db: Database session.
start_date: Start of analysis period (inclusive).
end_date: End of analysis period (inclusive).
granularity: Bucket size (day, week, month, quarter).
store_id: Filter by store ID (optional).
product_id: Filter by product ID (optional).
category: Filter by category (optional).

Returns:
Time series response with points in ascending period order.
"""
# Bucket expression: DAY uses the raw date column; coarser
# granularities truncate via date_trunc and cast back to a DATE so
# the resulting period validates as a `datetime.date`.
bucket: ColumnElement[Any]
if granularity == TimeGranularity.DAY:
bucket = cast(ColumnElement[Any], SalesDaily.date)
else:
bucket = cast(
ColumnElement[Any],
sa_cast(func.date_trunc(granularity.value, SalesDaily.date), Date),
)

# Same aggregation/filter shape as compute_kpis, grouped per bucket.
stmt = select(
bucket.label("period"),
func.coalesce(func.sum(SalesDaily.total_amount), 0).label("total_revenue"),
func.coalesce(func.sum(SalesDaily.quantity), 0).label("total_units"),
func.count().label("total_transactions"),
).where((SalesDaily.date >= start_date) & (SalesDaily.date <= end_date))

if store_id is not None:
stmt = stmt.where(SalesDaily.store_id == store_id)
if product_id is not None:
stmt = stmt.where(SalesDaily.product_id == product_id)
if category is not None:
stmt = stmt.join(Product, SalesDaily.product_id == Product.id).where(
Product.category == category
)

stmt = stmt.group_by(bucket).order_by(bucket)

result = await db.execute(stmt)
rows = result.all()

points: list[TimeSeriesPoint] = []
for row in rows:
revenue = Decimal(str(row.total_revenue))
units = int(row.total_units)
transactions = int(row.total_transactions)
avg_unit_price = revenue / units if units > 0 else None
avg_basket_value = revenue / transactions if transactions > 0 else None
points.append(
TimeSeriesPoint(
period=row.period,
metrics=KPIMetrics(
total_revenue=revenue,
total_units=units,
total_transactions=transactions,
avg_unit_price=avg_unit_price,
avg_basket_value=avg_basket_value,
),
)
)

logger.info(
"analytics.timeseries_computed",
granularity=granularity.value,
start_date=str(start_date),
end_date=str(end_date),
store_id=store_id,
product_id=product_id,
category=category,
points=len(points),
)

return TimeSeriesResponse(
granularity=granularity,
points=points,
total_points=len(points),
start_date=start_date,
end_date=end_date,
store_id=store_id,
product_id=product_id,
category=category,
)
Loading