Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
838 changes: 838 additions & 0 deletions PRPs/PRP-29-feature-aware-forecasting-foundation.md

Large diffs are not rendered by default.

34 changes: 34 additions & 0 deletions app/features/backtesting/tests/test_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,40 @@ def test_run_model_backtest_without_fold_details(
# But metrics should still be present
assert fold.metrics is not None

def test_feature_aware_model_fails_loud_in_backtest(
self,
sample_dates_120: list[date],
sample_values_120: np.ndarray,
sample_split_config_expanding: SplitConfig,
) -> None:
"""A feature-aware model must fail LOUD in a backtest, never run silently.

The fold loop calls ``model.fit(y_train)`` target-only; a
``RegressionForecaster`` (``requires_features=True``) raises ``ValueError``
there because no exogenous ``X`` was supplied. Feature-aware backtesting
is wired in PRP-MLZOO-B — until then this loud, non-leaky failure is the
contract (PRP-29 DECISIONS LOCKED #7).
"""
from app.features.backtesting.splitter import TimeSeriesSplitter
from app.features.forecasting.schemas import RegressionModelConfig

service = BacktestingService()
series_data = SeriesData(
dates=sample_dates_120,
values=sample_values_120,
store_id=1,
product_id=1,
)
splitter = TimeSeriesSplitter(sample_split_config_expanding)

with pytest.raises(ValueError, match="requires exogenous features X"):
service._run_model_backtest(
series_data=series_data,
splitter=splitter,
model_config=RegressionModelConfig(),
store_fold_details=True,
)


class TestBacktestingServiceBaselineComparisons:
"""Tests for baseline comparison functionality."""
Expand Down
15 changes: 14 additions & 1 deletion app/features/forecasting/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from datetime import date as date_type
from typing import TYPE_CHECKING, Any, Literal
from typing import TYPE_CHECKING, Any, ClassVar, Literal

import numpy as np
from sklearn.ensemble import ( # type: ignore[import-untyped]
Expand Down Expand Up @@ -57,6 +57,16 @@ class BaseForecaster(ABC):

Attributes:
random_state: Random seed for reproducibility.
requires_features: True when ``fit``/``predict`` require a non-None
``X`` feature frame; baseline (target-only) models leave it False.
"""

requires_features: ClassVar[bool] = False
"""True when ``fit()``/``predict()`` REQUIRE a non-None ``X`` feature frame.

Baseline (target-only) models leave this ``False``; feature-aware models
override it to ``True``. ``ForecastingService`` branches on this flag
rather than an ``isinstance`` check or a ``model_type`` string comparison.
"""

def __init__(self, random_state: int = 42) -> None:
Expand Down Expand Up @@ -445,6 +455,9 @@ class RegressionForecaster(BaseForecaster):
max_depth: Maximum depth of each tree.
"""

requires_features: ClassVar[bool] = True
"""A feature-aware model — ``fit``/``predict`` REQUIRE a non-None ``X``."""

def __init__(
self,
*,
Expand Down
174 changes: 107 additions & 67 deletions app/features/forecasting/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@
PredictResponse,
TrainResponse,
)
from app.shared.feature_frames import (
CALENDAR_COLUMNS,
EXOGENOUS_LAGS,
HISTORY_TAIL_DAYS,
build_calendar_columns,
canonical_feature_columns,
)

if TYPE_CHECKING:
pass
Expand Down Expand Up @@ -72,31 +79,12 @@ def __post_init__(self) -> None:
# Minimum observed rows required to train a regression model — enough to
# resolve the lag features and still leave training signal (PRP-27 GOTCHA #14).
_MIN_REGRESSION_TRAIN_ROWS = 30
# Observed-target tail persisted in the bundle so the scenario future-frame
# generator can resolve long lags (PRP-27 DECISIONS LOCKED #11 — 90 days).
_REGRESSION_HISTORY_TAIL_DAYS = 90
# Target lag offsets — PRP-27 DECISIONS LOCKED #10 (EXOGENOUS_LAGS).
_REGRESSION_LAGS: tuple[int, ...] = (1, 7, 14, 28)
# Canonical regression feature columns — a PAIRED CONTRACT with
# ``app/features/scenarios/feature_frame.canonical_feature_columns()``. The
# scenarios slice owns the future-frame generator; this slice owns training.
# A cross-slice import is forbidden (AGENTS.md § Architecture, PRP-27
# DECISIONS LOCKED #3), so the column names and order are replicated here and
# kept in lock-step by the scenarios integration test (a column mismatch
# surfaces as a non-zero delta on an empty-assumption simulation).
_REGRESSION_FEATURE_COLUMNS: list[str] = [
*(f"lag_{lag}" for lag in _REGRESSION_LAGS),
"dow_sin",
"dow_cos",
"month_sin",
"month_cos",
"is_weekend",
"is_month_end",
"price_factor",
"promo_active",
"is_holiday",
"days_since_launch",
]
# The regression feature-frame contract — the lag offsets (``EXOGENOUS_LAGS``),
# the observed-target tail length (``HISTORY_TAIL_DAYS``), and the canonical
# column set and order (``canonical_feature_columns()``) — is the single source
# of truth in ``app/shared/feature_frames`` (MLZOO-A). This slice imports it
# rather than re-typing it, so a column-order mismatch with the scenarios
# slice's future-frame generator is structurally impossible.


@dataclass
Expand All @@ -107,8 +95,8 @@ class RegressionFeatureMatrix:
X: Feature matrix, shape ``[n_observations, n_features]`` (NaN allowed).
y: Target values, shape ``[n_observations]``.
feature_columns: Column order — persisted so the future frame matches.
history_tail: The last ``_REGRESSION_HISTORY_TAIL_DAYS`` observed
targets, ending at the forecast origin ``T``.
history_tail: The last ``HISTORY_TAIL_DAYS`` observed targets, ending
at the forecast origin ``T``.
history_tail_dates: ISO dates aligned with ``history_tail``.
launch_date_iso: The product launch date (ISO) or ``None``.
n_observations: Number of training rows.
Expand All @@ -123,6 +111,65 @@ class RegressionFeatureMatrix:
n_observations: int


def _assemble_regression_rows(
*,
dates: list[date_type],
quantities: list[float],
prices: list[float],
baseline_price: float,
promo_dates: set[date_type],
holiday_dates: set[date_type],
launch_date: date_type | None,
) -> list[list[float]]:
"""Assemble the historical regression feature matrix — pure, leakage-safe.

Time-safe by construction: every lag column at row ``i`` reads only the
observed target at ``i - lag`` (a strictly earlier day); calendar columns
are pure functions of the date; ``price_factor`` / ``promo_active`` /
``is_holiday`` / ``days_since_launch`` read the same-day exogenous
attributes. No row reads a future observation.

Column order is ``canonical_feature_columns()`` exactly: the target lags,
then the calendar columns, then ``price_factor``, ``promo_active``,
``is_holiday``, ``days_since_launch``.

Extracted from :meth:`ForecastingService._build_regression_features` so the
leakage invariant can be unit-tested without a database
(``test_regression_features_leakage.py``).

Args:
dates: Observed days in chronological order.
quantities: Observed target values aligned with ``dates``.
prices: Observed unit prices aligned with ``dates``.
baseline_price: The typical price; ``price_factor`` is the ratio to it.
promo_dates: Days a promotion covered.
holiday_dates: Calendar holiday days.
launch_date: The product's launch date, or ``None``.

Returns:
Row-major feature matrix ``[n_observations][n_features]``; ``NaN`` marks
a lag whose source day precedes the series, and ``days_since_launch``
when the product has no launch date.
"""
calendar_columns = build_calendar_columns(dates)
rows: list[list[float]] = []
for index, day in enumerate(dates):
row: list[float] = []
# Target long-lag columns — read only strictly-earlier observations.
for lag in EXOGENOUS_LAGS:
row.append(quantities[index - lag] if index >= lag else math.nan)
# Calendar columns — pure functions of the date (shared builder).
for name in CALENDAR_COLUMNS:
row.append(calendar_columns[name][index])
# Exogenous columns — same-day observed attributes.
row.append(prices[index] / baseline_price)
row.append(1.0 if day in promo_dates else 0.0)
row.append(1.0 if day in holiday_dates else 0.0)
row.append(float((day - launch_date).days) if launch_date is not None else math.nan)
rows.append(row)
return rows


class ForecastingService:
"""Service for training and predicting with forecasting models.

Expand Down Expand Up @@ -176,19 +223,20 @@ async def train_model(
config_hash=config.config_hash(),
)

# Build the model + bundle metadata. The regression path consumes a
# historical feature matrix; every other model trains on the raw
# target series exactly as before.
# Build the model first (cheap — no fit), then branch on its capability
# rather than on a ``model_type`` string. A feature-aware model
# (``requires_features``) consumes a historical feature matrix; every
# target-only model trains on the raw target series exactly as before.
model = model_factory(config, random_state=self.settings.forecast_random_seed)
extra_metadata: dict[str, object] = {}
if config.model_type == "regression":
if model.requires_features:
features = await self._build_regression_features(
db=db,
store_id=store_id,
product_id=product_id,
start_date=train_start_date,
end_date=train_end_date,
)
model = model_factory(config, random_state=self.settings.forecast_random_seed)
model.fit(features.y, features.X)
n_observations = features.n_observations
extra_metadata = {
Expand All @@ -210,7 +258,6 @@ async def train_model(
f"No training data found for store={store_id}, product={product_id} "
f"between {train_start_date} and {train_end_date}"
)
model = model_factory(config, random_state=self.settings.forecast_random_seed)
model.fit(training_data.y)
n_observations = training_data.n_observations

Expand Down Expand Up @@ -342,14 +389,16 @@ async def predict(
f"but prediction requested for product={product_id}"
)

# Regression models need an exogenous feature frame to forecast — that
# is built (from scenario assumptions) by POST /scenarios/simulate. The
# plain predict endpoint cannot supply one, so it rejects them cleanly.
if bundle.config.model_type == "regression":
# Feature-aware models need an exogenous feature frame to forecast —
# that is built (from scenario assumptions) by POST /scenarios/simulate.
# The plain predict endpoint cannot supply one, so it rejects them
# cleanly. Branching on ``requires_features`` (not a ``model_type``
# string) keeps this future-proof as the model zoo grows.
if bundle.model.requires_features:
raise ValueError(
"Regression models forecast through POST /scenarios/simulate, "
"Feature-aware models forecast through POST /scenarios/simulate, "
"which supplies the exogenous feature frame. POST /forecasting/"
"predict does not support model_type='regression'."
"predict does not support them."
)

# Generate forecasts
Expand Down Expand Up @@ -467,8 +516,11 @@ async def _build_regression_features(
``promo_active`` / ``is_holiday`` / ``days_since_launch`` read the
same-day exogenous attributes. No row reads a future observation.

The column set is the paired contract with the scenarios slice's
future-frame generator (see ``_REGRESSION_FEATURE_COLUMNS``).
The column set and order are ``canonical_feature_columns()`` from
``app/shared/feature_frames`` — the single source of truth shared with
the scenarios slice's future-frame generator. The pure row assembly is
factored into :func:`_assemble_regression_rows` (unit-tested for
leakage without a database).

Args:
db: Database session.
Expand Down Expand Up @@ -550,44 +602,32 @@ async def _build_regression_features(
select(Product.launch_date).where(Product.id == product_id)
)

feature_rows: list[list[float]] = []
for index, day in enumerate(dates):
row_values: list[float] = []
# Target long-lag columns — read only strictly-earlier observations.
for lag in _REGRESSION_LAGS:
row_values.append(quantities[index - lag] if index >= lag else math.nan)
# Calendar columns — pure functions of the date.
dow = day.weekday()
row_values.append(math.sin(2.0 * math.pi * dow / 7.0))
row_values.append(math.cos(2.0 * math.pi * dow / 7.0))
row_values.append(math.sin(2.0 * math.pi * day.month / 12.0))
row_values.append(math.cos(2.0 * math.pi * day.month / 12.0))
row_values.append(1.0 if dow >= 5 else 0.0)
row_values.append(1.0 if (day + timedelta(days=1)).month != day.month else 0.0)
# Exogenous columns — same-day attributes.
row_values.append(prices[index] / baseline_price)
row_values.append(1.0 if day in promo_dates else 0.0)
row_values.append(1.0 if day in holiday_dates else 0.0)
row_values.append(
float((day - launch_date).days) if launch_date is not None else math.nan
)
feature_rows.append(row_values)
feature_columns = canonical_feature_columns()
feature_rows = _assemble_regression_rows(
dates=dates,
quantities=quantities,
prices=prices,
baseline_price=baseline_price,
promo_dates=promo_dates,
holiday_dates=holiday_dates,
launch_date=launch_date,
)

tail = quantities[-_REGRESSION_HISTORY_TAIL_DAYS:]
tail_dates = [day.isoformat() for day in dates[-_REGRESSION_HISTORY_TAIL_DAYS:]]
tail = quantities[-HISTORY_TAIL_DAYS:]
tail_dates = [day.isoformat() for day in dates[-HISTORY_TAIL_DAYS:]]

logger.info(
"forecasting.regression_features_built",
store_id=store_id,
product_id=product_id,
n_observations=len(dates),
n_features=len(_REGRESSION_FEATURE_COLUMNS),
n_features=len(feature_columns),
)

return RegressionFeatureMatrix(
X=np.array(feature_rows, dtype=np.float64),
y=np.array(quantities, dtype=np.float64),
feature_columns=list(_REGRESSION_FEATURE_COLUMNS),
feature_columns=feature_columns,
history_tail=[float(value) for value in tail],
history_tail_dates=tail_dates,
launch_date_iso=launch_date.isoformat() if launch_date is not None else None,
Expand Down
Loading