From b116489d4c2fc279ae34e123ab47a60576328d38 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Tue, 19 May 2026 16:02:53 +0200 Subject: [PATCH] =?UTF-8?q?feat(forecast):=20feature-aware=20forecasting?= =?UTF-8?q?=20foundation=20=E2=80=94=20shared=20feature-frame=20contract?= =?UTF-8?q?=20(#238)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...29-feature-aware-forecasting-foundation.md | 838 ++++++++++++++++++ .../backtesting/tests/test_service.py | 34 + app/features/forecasting/models.py | 15 +- app/features/forecasting/service.py | 174 ++-- .../tests/test_regression_features_leakage.py | 94 ++ .../forecasting/tests/test_service.py | 38 + app/features/scenarios/feature_frame.py | 204 +---- app/features/scenarios/tests/conftest.py | 2 +- .../scenarios/tests/test_feature_frame.py | 16 +- .../tests/test_future_frame_leakage.py | 88 +- app/shared/feature_frames/__init__.py | 39 + app/shared/feature_frames/contract.py | 240 +++++ app/shared/feature_frames/tests/__init__.py | 1 + .../feature_frames/tests/test_contract.py | 142 +++ .../feature_frames/tests/test_leakage.py | 116 +++ examples/models/feature_frame_contract.md | 115 +++ examples/models/model_interface.md | 36 + 17 files changed, 1883 insertions(+), 309 deletions(-) create mode 100644 PRPs/PRP-29-feature-aware-forecasting-foundation.md create mode 100644 app/features/forecasting/tests/test_regression_features_leakage.py create mode 100644 app/shared/feature_frames/__init__.py create mode 100644 app/shared/feature_frames/contract.py create mode 100644 app/shared/feature_frames/tests/__init__.py create mode 100644 app/shared/feature_frames/tests/test_contract.py create mode 100644 app/shared/feature_frames/tests/test_leakage.py create mode 100644 examples/models/feature_frame_contract.md diff --git a/PRPs/PRP-29-feature-aware-forecasting-foundation.md b/PRPs/PRP-29-feature-aware-forecasting-foundation.md new file mode 100644 index 00000000..f60937df --- /dev/null +++ b/PRPs/PRP-29-feature-aware-forecasting-foundation.md @@ -0,0 +1,838 @@ +name: "PRP-29 — Feature-Aware Forecasting Foundation (MLZOO-A)" +description: | + +## Purpose + +The first PRP of the **Advanced ML Model Zoo** sequence (`PRPs/INITIAL/INITIAL-MLZOO-index.md`). +It builds the *foundation* a later LightGBM / XGBoost / Prophet-like model will stand on: +a single, leakage-safe, **shared** feature-frame contract — so a future advanced model can be +added without re-deriving the frame machinery and without breaking the baseline forecasters. + +This PRP implements **contracts, consolidation, and leakage tests only**. It adds **no** +advanced model, **no** new dependency, **no** migration, **no** frontend, **no** agent tool, +and **no** API behaviour change. If you find yourself implementing LightGBM, stop — that is +PRP-MLZOO-B. + +## What this PRP already inherits (DO NOT re-build) + +The feature-aware *machinery* already exists — it is just **fragmented and duplicated**: + +- `BaseForecaster` (`app/features/forecasting/models.py:47`) already has the feature-aware + signature: `fit(y, X=None)` / `predict(horizon, X=None)`. The three baselines ignore `X` + (every `fit`/`predict` carries `# noqa: ARG002`); `RegressionForecaster` (`models.py:428`) + *consumes* it and is the first feature-aware model. +- The **historical training frame** is built by `ForecastingService._build_regression_features` + (`app/features/forecasting/service.py:454-595`). +- The **future prediction frame** is built by `app/features/scenarios/feature_frame.py` + (the leakage-safe `build_*_columns` + `assemble_future_frame` + `build_future_frame`). +- The leakage-safe rule and the long-lag-vs-recursion decision are documented in + `PRPs/ai_docs/exogenous-regressor-forecasting.md` (§2, §5). + +The **problem this PRP fixes**: the regression feature-column contract is **physically +duplicated** across two slices — `_REGRESSION_FEATURE_COLUMNS` (`forecasting/service.py:87-99`) +and `canonical_feature_columns()` / `CALENDAR_COLUMNS` / `EXOGENOUS_COLUMNS` +(`scenarios/feature_frame.py:74-127`) — because a cross-slice import is forbidden +(AGENTS.md § Architecture, PRP-27 DECISIONS LOCKED #3). They are kept in lock-step only by a +fragile integration-test side-effect ("an empty-assumption simulation must yield a zero +delta"). A future model cannot safely build on a contract that lives in two places. + +## DEPENDS ON — read before starting + +- `PRPs/INITIAL/INITIAL-MLZOO-A-foundation-feature-frames.md` — this PRP's brief. +- `PRPs/INITIAL/INITIAL-MLZOO-index.md` — the MLZOO roadmap (A → B → C → D). +- `docs/optional-features/05-advanced-ml-model-zoo.md` — the full model-zoo vision and risks. +- `PRPs/ai_docs/exogenous-regressor-forecasting.md` — the exogenous-regressor + future-frame + leakage reference (§1 contract, §2 leakage rule, §5 de-risking recommendations). + +--- + +## Goal + +Move the regression feature-frame **contract** and its **leakage-safe pure builders** into a +single cross-cutting module, `app/shared/feature_frames/`, that both the forecasting slice +(training frame) and the scenarios slice (future frame) import — eliminating the duplicated +`_REGRESSION_FEATURE_COLUMNS` ↔ `canonical_feature_columns()` pair. Formalise the +feature-aware model contract with a `requires_features` class attribute, document the +historical/future frame requirements and the safe / conditionally-safe / unsafe feature-class +taxonomy, and add load-bearing leakage tests for the shared builders and the historical +training builder. + +**End state:** there is exactly **one** definition of the regression feature-column set, +imported (not re-typed) by both slices; a future advanced model in PRP-MLZOO-B sets +`requires_features = True` and reuses the shared frame builders with zero new contract code. + +## Why + +- **Foundation for the model zoo.** PRP-MLZOO-B (LightGBM / sklearn fallback) needs a tested, + single-source frame contract. Today it would have to choose *which* of two duplicated column + lists to extend — a guaranteed drift bug. +- **Eliminates a latent correctness hazard.** A silent mismatch between the two column lists + corrupts the `model_exogenous` re-forecast (the model is fed columns in the wrong order). + Today only an integration-test side-effect catches it; after this PRP a mismatch is + structurally impossible (one shared list). +- **Codifies the leakage rules.** `docs/optional-features/05-advanced-ml-model-zoo.md:158-163` + names "Future feature generation is easy to get wrong" and "Backtests must prevent leakage + for every generated feature" as the top risks. This PRP turns the implicit rules into a + documented taxonomy + a load-bearing test file. +- **No behaviour change, no risk to baselines.** Pure consolidation + tests + docs. The + baseline forecasters, the registry, persisted model bundles, and every HTTP/WS contract are + untouched. + +## What + +A refactor-and-document PRP. User-visible behaviour is **identical** before and after; the +value is entirely structural (one contract, tested rules, a foundation doc). + +### Technical requirements + +1. New cross-cutting package `app/shared/feature_frames/` owning: the pinned constants + (`EXOGENOUS_LAGS`, `HISTORY_TAIL_DAYS`), the column-name tuples (`CALENDAR_COLUMNS`, + `EXOGENOUS_COLUMNS`), `canonical_feature_columns()`, the `FutureFeatureFrame` dataclass, + the leakage-safe pure builders (`build_calendar_columns`, `build_long_lag_columns`), and a + `FeatureSafety` taxonomy (`FEATURE_CLASS` map). +2. `app/features/scenarios/feature_frame.py` imports the above from the shared package and + **re-exports** them (back-compat for existing importers); it keeps only the + assumption-driven, DB-touching parts (`build_exogenous_columns`, `assemble_future_frame`, + `build_future_frame`, `MAX_COMPARE_SCENARIOS`). +3. `app/features/forecasting/service.py::_build_regression_features` imports the shared + contract; the local `_REGRESSION_FEATURE_COLUMNS` / `_REGRESSION_LAGS` / + `_REGRESSION_HISTORY_TAIL_DAYS` constants are deleted. +4. `BaseForecaster` gains a `requires_features: ClassVar[bool] = False`; `RegressionForecaster` + overrides it to `True`. `ForecastingService.train_model` / `predict` branch on + `model.requires_features` instead of the `config.model_type == "regression"` string check. +5. Load-bearing leakage tests: `app/shared/feature_frames/tests/test_leakage.py` (shared + builders) and `app/features/forecasting/tests/test_regression_features_leakage.py` + (historical training builder). +6. The curated contract doc `examples/models/feature_frame_contract.md` (historical vs future + frame shape, required columns, the safe/conditional/unsafe taxonomy); an additive update to + `examples/models/model_interface.md`. + +### Success Criteria + +- [ ] The regression feature-column set is defined **exactly once** (`canonical_feature_columns()` + in `app/shared/feature_frames/`); `grep -rn "_REGRESSION_FEATURE_COLUMNS" app/` returns nothing. +- [ ] `app/shared/feature_frames/` imports nothing from `app/features/**` (verified by a test). +- [ ] `BaseForecaster.requires_features` exists; `NaiveForecaster/SeasonalNaiveForecaster/MovingAverageForecaster` → `False`, `RegressionForecaster` → `True`. +- [ ] All existing tests pass unchanged: baseline forecasters, `test_regression_forecaster.py`, + every scenarios test (including the empty-assumption zero-delta integration test). +- [ ] New leakage tests prove no shared builder and no historical training row ever reads a + target value at or after the forecast origin / cutoff. +- [ ] `examples/models/feature_frame_contract.md` exists and documents both frame shapes + the taxonomy. +- [ ] `uv run ruff check . && uv run ruff format --check . && uv run mypy app/ && uv run pyright app/ && uv run pytest -v -m "not integration"` all green. +- [ ] No new dependency in `pyproject.toml`; no Alembic migration; no change to any route, schema, or WebSocket contract. + +--- + +## All Needed Context + +### Documentation & References + +```yaml +- file: app/features/scenarios/feature_frame.py + why: The future-frame builder. Its lines 62-127 (constants, CALENDAR_COLUMNS, + EXOGENOUS_COLUMNS, canonical_feature_columns) and 93-216 (FutureFeatureFrame, + _is_month_end, build_calendar_columns, build_long_lag_columns) MOVE VERBATIM to the + shared package. Lines 219-407 (build_exogenous_columns, assemble_future_frame, + build_future_frame) STAY — they depend on ScenarioAssumptions / the calendar table. + critical: build_long_lag_columns is the leakage-critical helper; its `idx = (j-1)-k`, + `idx < 0` guard is the spec. Move it byte-for-byte; do not "improve" it. + +- file: app/features/scenarios/tests/test_future_frame_leakage.py + why: The load-bearing leakage spec to mirror. The calendar/long-lag tests (the builders that + MOVE) become app/shared/feature_frames/tests/test_leakage.py; the exogenous/assemble + tests STAY here. Mirror its disjoint value-pool idiom (lines 50-56). + critical: AGENTS.md § Safety — a *_leakage.py file may never be weakened. Splitting it + across the move is allowed; deleting an assertion is not. + +- file: app/features/forecasting/service.py + why: Lines 74-99 are the duplicated constants to DELETE; lines 454-595 + (_build_regression_features) import the shared contract instead; lines 182-216 + (train_model branch) and 348-353 (predict branch) switch to `requires_features`. + +- file: app/features/forecasting/models.py + why: BaseForecaster (line 47) gets the `requires_features` ClassVar; RegressionForecaster + (line 428) overrides it. The `# noqa: ARG002` on the baseline fit/predict is the marker + that "this model ignores X" — `requires_features=False` is the formal version of it. + +- file: app/features/featuresets/tests/test_leakage.py + why: The canonical sequential-value leakage idiom for the *historical* builder test. + Mirror its two-tier assertion (direction check THEN exact-equality check) and the + "LEAKAGE DETECTED at row {i}" message convention. + +- file: app/features/forecasting/persistence.py + why: ModelBundle stores `feature_columns` in `metadata` as a plain list[str]. Moving the + *function* that produces those strings does not change the strings — persisted bundles + stay loadable. Do NOT change ModelBundle. + +- file: app/shared/seeder/ + why: The precedent for a package under app/shared/ (with its own tests/ subdir). Mirror its + layout: __init__.py re-exporting the public surface, tests/ alongside. + +- docfile: PRPs/ai_docs/exogenous-regressor-forecasting.md + why: §2 states the future-frame leakage rule verbatim and the feature-family table; §5 is + the "long-lag + calendar + exogenous, no recursion" decision. The taxonomy in this PRP + is the executable form of that table. Reference it from feature_frame_contract.md. + +- doc: https://scikit-learn.org/stable/developers/develop.html + section: Estimator interface conventions (get_params / set_params / fit returns self) + critical: BaseForecaster already follows this. requires_features is an ADDITIVE class + attribute — it does not break the sklearn-style contract. +``` + +### Current Codebase tree (relevant slices — all already exist) + +```bash +app/ +├── shared/ +│ ├── __init__.py +│ ├── models.py +│ ├── schemas.py +│ ├── utils.py +│ └── seeder/ # precedent: a package under app/shared/ with tests/ +├── features/ +│ ├── forecasting/ +│ │ ├── models.py # BaseForecaster + 4 forecasters + model_factory +│ │ ├── schemas.py # ModelConfig union, TrainRequest/PredictRequest +│ │ ├── service.py # _build_regression_features + _REGRESSION_* constants +│ │ ├── persistence.py # ModelBundle (UNTOUCHED) +│ │ └── tests/ +│ │ ├── test_regression_forecaster.py +│ │ └── test_service.py +│ ├── scenarios/ +│ │ ├── feature_frame.py # future-frame builder + duplicated contract +│ │ ├── service.py # imports build_future_frame +│ │ ├── schemas.py +│ │ └── tests/ +│ │ ├── conftest.py # imports canonical_feature_columns +│ │ ├── test_feature_frame.py +│ │ └── test_future_frame_leakage.py # load-bearing spec +│ └── backtesting/ +│ ├── service.py # _run_model_backtest fold loop (target-only) +│ └── tests/test_service.py +examples/models/ +├── baseline_naive.py / baseline_seasonal.py / baseline_mavg.py +└── model_interface.md # stale: no regression/lightgbm config rows +PRPs/ai_docs/exogenous-regressor-forecasting.md +``` + +### Desired Codebase tree — files to ADD + +```bash +app/shared/feature_frames/ +├── __init__.py # public re-exports of contract.py +├── contract.py # constants + taxonomy + columns + FutureFeatureFrame +│ # + build_calendar_columns + build_long_lag_columns +└── tests/ + ├── __init__.py + ├── test_contract.py # column order, taxonomy, dataclass shape, determinism + └── test_leakage.py # LOAD-BEARING: calendar + long-lag leakage spec + +app/features/forecasting/tests/ +└── test_regression_features_leakage.py # LOAD-BEARING: historical training-frame leakage + +examples/models/ +└── feature_frame_contract.md # the curated contract doc (INITIAL-A asks for this) +``` + +### Files to MODIFY (all additive or behaviour-preserving) + +```bash +app/features/scenarios/feature_frame.py # import from shared + re-export; delete moved defs +app/features/scenarios/tests/test_feature_frame.py # update imports (re-export keeps it passing) +app/features/scenarios/tests/test_future_frame_leakage.py # trim moved tests; keep exogenous/assemble +app/features/forecasting/models.py # + requires_features ClassVar +app/features/forecasting/service.py # import shared contract; requires_features branching +app/features/forecasting/tests/test_service.py # + requires_features assertions +app/features/backtesting/tests/test_service.py # + 1 guard test (no production-code change) +examples/models/model_interface.md # additive: requires_features + regression row +``` + +### DECISIONS LOCKED (resolved during planning — do NOT re-litigate) + +1. **Contract home = `app/shared/feature_frames/`.** A cross-cutting package (not a new + vertical slice, not document-only). Both `forecasting` and `scenarios` import it. This is + sanctioned by AGENTS.md § Architecture ("cross-cutting code goes through `app/core/` or + `app/shared/`"). It is **not** a vertical slice — it has no `models.py`/`routes.py`/router; + it is a pure library, like `app/shared/utils.py`. + +2. **No `FeatureAwareForecaster` subclass.** `BaseForecaster` already carries the feature-aware + signature. Formalise it with a `requires_features: ClassVar[bool]` attribute instead of a + new base class — zero churn to the class hierarchy, zero change to persisted bundle types, + and the service branches on `model.requires_features` with no `isinstance` check. A future + `LightGBMForecaster` just sets `requires_features = True`. + +3. **`POST /forecasting/predict` is NOT changed.** It still rejects feature-aware models + (today: regression). Wiring an assumptions-free future frame into the predict path is + PRP-MLZOO-B scope. The rejection branch is only *generalised* from a `model_type` string + check to `model.requires_features` — same behaviour, future-proof condition. + +4. **No `TrainingFrame` dataclass.** The historical training frame's requirements are *defined* + by `canonical_feature_columns()` (the executable column contract, now shared) plus + `examples/models/feature_frame_contract.md` (the prose spec) plus the new historical + leakage test. `ForecastingService` keeps its existing internal `RegressionFeatureMatrix` + carrier (it is not persisted; only its `.feature_columns` list is copied into bundle + metadata). Introducing a second frame dataclass that nothing returns would be dead code + (product-vision.md: do not add abstractions speculatively). + +5. **Builders move; the contract is shared by IMPORT, not by re-typing.** `build_calendar_columns` + and `build_long_lag_columns` are pure (no slice imports) → they move to the shared package. + `build_exogenous_columns` takes a `ScenarioAssumptions` and `build_future_frame` reads the + `calendar` table → they STAY in `scenarios/feature_frame.py` (the shared package may not + import `app/features/**`). The forecasting historical builder keeps its own *value + derivation* (DB-observed price/promo) but consumes the shared *column list and calendar + builder*. + +6. **NaN means "unknown", never a fabricated default.** A builder emits `math.nan` for a cell + whose source is genuinely unknowable at origin `T` (a long-lag whose source day is in the + horizon; `days_since_launch` when the product has no launch date). `HistGradientBoostingRegressor` + tolerates NaN natively. The contract forbids silently substituting `0.0`. A future model + that is *not* NaN-tolerant must impute explicitly in its own `fit`/`predict` — the frame + builder must not. + +7. **Backtesting is not wired for feature-aware models in this PRP.** The fold loop + (`backtesting/service.py` `_run_model_backtest`) calls `model.fit(y_train)` target-only; a + `RegressionForecaster` there raises `ValueError("RegressionForecaster requires exogenous + features X")` — a *loud, non-leaky* failure. We add one regression test pinning that + loud-failure behaviour and document it as a known limitation. Wiring feature-aware + backtesting is PRP-MLZOO-B. + +### Known Gotchas of our codebase & Library Quirks + +```python +# CRITICAL: app/shared/** may NEVER import from app/features/** (AGENTS.md § Architecture). +# The shared package is leaf-level. build_calendar_columns / build_long_lag_columns are pure +# (stdlib `math`, `datetime`, `dataclasses` only) so this holds. A test asserts it (see Task 3). + +# CRITICAL: build_long_lag_columns must move BYTE-FOR-BYTE. The leakage guard is the line +# `idx = (j - 1) - lag` then `if idx < 0 and -tail_len <= idx:`. Any "tidy-up" risks +# re-introducing the exact bug the load-bearing test exists to catch. + +# GOTCHA: 6+ files import names from `app.features.scenarios.feature_frame` +# (service.py, tests/conftest.py, tests/test_feature_frame.py, tests/test_future_frame_leakage.py). +# After the move, feature_frame.py MUST re-export the moved names +# (`from app.shared.feature_frames import (...) # noqa: F401`) so those imports keep +# resolving. Verified import sites — re-export ALL of: EXOGENOUS_LAGS, HISTORY_TAIL_DAYS, +# CALENDAR_COLUMNS, EXOGENOUS_COLUMNS, canonical_feature_columns, FutureFeatureFrame, +# build_calendar_columns, build_long_lag_columns. + +# GOTCHA: MAX_COMPARE_SCENARIOS stays in scenarios/feature_frame.py — it is a Phase-C scenario +# comparison cap, NOT a feature-frame concept. scenarios/schemas.py:413-414 references it by +# comment. Do not move it to the shared package. + +# CRITICAL: ConfigDict(strict=True) on request bodies — N/A here. This PRP adds no request +# schema. The forecasting ModelConfig union is untouched. + +# GOTCHA: `requires_features` is a ClassVar — annotate it `ClassVar[bool]` from `typing`. +# mypy --strict / pyright --strict both gate merge; a bare `requires_features = False` +# without the ClassVar annotation reads as an instance attribute and will type-error on the +# subclass override pattern. + +# GOTCHA: model bundles are joblib-pickled. `requires_features` is a *class* attribute, not an +# instance attribute — it is NOT pickled into the bundle, so old bundles loaded after this +# PRP transparently gain the attribute from the (new) class definition. No bundle migration. + +# GOTCHA: the scenarios "empty-assumption simulation → zero delta" integration test is the +# OLD drift detector for the duplicated contract. After consolidation the two lists ARE one +# import, so that test still passes — and now for a structural reason, not a coincidence. +# It must stay green; do not delete it. + +# GOTCHA: line endings — this repo has mixed CRLF/LF files. Run `git diff --stat` before +# committing; if a moved file shows a whole-file diff, normalise to the original file's +# ending so the review shows only the real change. +``` + +--- + +## Implementation Blueprint + +### Data models and structure + +No ORM models, no Pydantic schemas, no migration. The only new structured types: + +```python +# app/shared/feature_frames/contract.py + +from enum import Enum + +class FeatureSafety(Enum): + """Leakage classification of a feature column in a FUTURE prediction frame.""" + SAFE = "safe" # pure function of the date (calendar) — never a leak + CONDITIONALLY_SAFE = "cond" # target long-lag: safe iff source day <= origin T, else NaN + UNSAFE_UNLESS_SUPPLIED = "unsafe" # future price/promo/inventory — knowable ONLY if the + # caller posits it (scenario assumption); never inferred + +# FutureFeatureFrame — MOVED VERBATIM from scenarios/feature_frame.py:93-107 (unchanged). +@dataclass +class FutureFeatureFrame: + dates: list[date] + feature_columns: list[str] + matrix: list[list[float]] # [horizon][n_features]; NaN allowed and expected + +# FEATURE_CLASS — the executable taxonomy: every canonical column → its FeatureSafety. +FEATURE_CLASS: dict[str, FeatureSafety] = { + # lag_1 .. lag_28 -> CONDITIONALLY_SAFE + # dow_sin/dow_cos/month_sin/month_cos/is_weekend/is_month_end -> SAFE + # price_factor/promo_active -> UNSAFE_UNLESS_SUPPLIED + # is_holiday -> SAFE (calendar table is a timeless attribute) + # days_since_launch -> SAFE (pure function of date once launch_date is known) +} +``` + +### list of tasks (dependency-ordered) + +```yaml +# ════════ STEP 1 — Shared feature-frame package ════════ + +Task 1 — CREATE app/shared/feature_frames/contract.py: + - PURPOSE: the single source of truth for the regression feature-frame contract. + - MOVE VERBATIM from app/features/scenarios/feature_frame.py: + * EXOGENOUS_LAGS (line 65), HISTORY_TAIL_DAYS (line 68) # NOT MAX_COMPARE_SCENARIOS + * CALENDAR_COLUMNS (lines 74-81), EXOGENOUS_COLUMNS (lines 85-90) + * FutureFeatureFrame dataclass (lines 93-107) + * canonical_feature_columns() (lines 110-127) + * _is_month_end() (lines 141-143) + * build_calendar_columns() (lines 146-170) + * build_long_lag_columns() (lines 173-216) + - ADD: `FeatureSafety` Enum + `FEATURE_CLASS` dict (see Data models above). + - ADD: `feature_safety(column: str) -> FeatureSafety` — looks up FEATURE_CLASS; for a + `lag_*` column not literally in the map (custom lag offsets), returns CONDITIONALLY_SAFE; + raises KeyError for a genuinely unknown column (callers must classify every column). + - IMPORTS: stdlib only — `math`, `dataclasses`, `datetime`, `enum`, `typing`. NOTHING from + `app.features.*`. May import `app.core.logging.get_logger` (app/core is allowed). + - PRESERVE: every docstring on the moved functions verbatim (they carry the leakage proof). + - VALIDATE: uv run ruff check app/shared/feature_frames/ && uv run mypy app/shared/feature_frames/contract.py && uv run pyright app/shared/feature_frames/ + +Task 2 — CREATE app/shared/feature_frames/__init__.py: + - RE-EXPORT the public surface from contract.py: + EXOGENOUS_LAGS, HISTORY_TAIL_DAYS, CALENDAR_COLUMNS, EXOGENOUS_COLUMNS, + FutureFeatureFrame, FeatureSafety, FEATURE_CLASS, feature_safety, + canonical_feature_columns, build_calendar_columns, build_long_lag_columns. + - PATTERN: mirror app/shared/seeder/__init__.py — explicit `from .contract import (...)` + plus an `__all__` tuple. + - VALIDATE: uv run python -c "from app.shared.feature_frames import canonical_feature_columns; print(canonical_feature_columns())" + +Task 3 — CREATE app/shared/feature_frames/tests/__init__.py + test_contract.py: + - test_contract.py covers: + * test_canonical_feature_columns_order — 4 lags, then CALENDAR_COLUMNS, then + EXOGENOUS_COLUMNS; total length == sum. (MIRROR scenarios/tests/test_feature_frame.py:48-54.) + * test_pinned_constants — EXOGENOUS_LAGS == (1,7,14,28), HISTORY_TAIL_DAYS == 90. + * test_feature_class_covers_every_canonical_column — every column from + canonical_feature_columns() has a FEATURE_CLASS entry (or feature_safety() resolves it). + * test_calendar_columns_are_all_SAFE / test_lag_columns_are_CONDITIONALLY_SAFE. + * test_shared_package_imports_nothing_from_features — IMPORTANT architectural test: + walk app/shared/feature_frames/*.py source, assert no line matches + `import app.features` or `from app.features`. (AST-walk or a simple text scan; + mirror app/core/tests/test_strict_mode_policy.py's AST-walker style.) + * test_build_calendar_columns_is_deterministic — same dates → identical output. + - CONVENTION: module-level `def test_*` functions (no class), inline constants — mirror + scenarios/tests/test_feature_frame.py. No conftest. No @pytest.mark.integration. + - VALIDATE: uv run pytest -v -m "not integration" app/shared/feature_frames/tests/test_contract.py + +Task 4 — CREATE app/shared/feature_frames/tests/test_leakage.py: + - THIS IS A LOAD-BEARING SPEC (module docstring must say so, mirroring + scenarios/tests/test_future_frame_leakage.py:1-6 — "this file IS the spec, never weaken + it, AGENTS.md § Safety"). + - MOVE the calendar + long-lag leakage tests OUT of + scenarios/tests/test_future_frame_leakage.py INTO this file (they now test shared code): + * test_long_lag_columns_never_emit_a_future_target + * test_long_lag_source_index_is_never_at_or_after_the_horizon + * test_calendar_columns_ignore_the_target_series + (the assemble/exogenous tests STAY in scenarios — see Task 7.) + - MIRROR the disjoint value-pool idiom verbatim (scenarios/tests/test_future_frame_leakage.py:50-56): + _HISTORY_TAIL = [1000.0 + i for i in range(90)] # observed pool + _FUTURE_TARGETS = {9000.0 + i for i in range(_HORIZON)} # disjoint sentinel pool + → any _FUTURE_TARGETS value in a cell == proven leak. + - IMPORT the builders from app.shared.feature_frames (the new home). + - VALIDATE: uv run pytest -v -m "not integration" app/shared/feature_frames/tests/test_leakage.py + +# ════════ STEP 2 — Rewire the scenarios slice onto the shared contract ════════ + +Task 5 — MODIFY app/features/scenarios/feature_frame.py: + - DELETE the moved definitions: EXOGENOUS_LAGS, HISTORY_TAIL_DAYS, CALENDAR_COLUMNS, + EXOGENOUS_COLUMNS, FutureFeatureFrame, canonical_feature_columns, _is_month_end, + build_calendar_columns, build_long_lag_columns. + - KEEP: MAX_COMPARE_SCENARIOS, _in_window, build_exogenous_columns, assemble_future_frame, + build_future_frame. + - ADD at the top: `from app.shared.feature_frames import (EXOGENOUS_LAGS, HISTORY_TAIL_DAYS, + CALENDAR_COLUMNS, EXOGENOUS_COLUMNS, FutureFeatureFrame, canonical_feature_columns, + build_calendar_columns, build_long_lag_columns)` — and a `# noqa: F401` because they are + RE-EXPORTED for back-compat (assemble_future_frame still calls build_*; the names also + stay importable by existing call sites). + - UPDATE the module docstring: the "feature-column contract" paragraph now points at + `app/shared/feature_frames` as the single source of truth. + - GOTCHA: assemble_future_frame calls build_long_lag_columns / build_calendar_columns — + after the import they resolve to the shared functions. No logic change. + - VALIDATE: uv run mypy app/features/scenarios/ && uv run pyright app/features/scenarios/ + +Task 6 — VERIFY scenarios import sites still resolve: + - These files import from app.features.scenarios.feature_frame and rely on the re-export: + tests/conftest.py (canonical_feature_columns), service.py (build_future_frame), + tests/test_feature_frame.py, tests/test_future_frame_leakage.py. + - PREFERRED: update tests/conftest.py and tests/test_feature_frame.py to import the MOVED + names directly from `app.shared.feature_frames` (the stays-in-scenarios names — + build_exogenous_columns, assemble_future_frame — still come from feature_frame.py). + Keep service.py importing build_future_frame from feature_frame.py (it stays there). + - VALIDATE: uv run pytest -v -m "not integration" app/features/scenarios/tests/test_feature_frame.py + +Task 7 — MODIFY app/features/scenarios/tests/test_future_frame_leakage.py: + - REMOVE the three calendar/long-lag tests moved to the shared test_leakage.py (Task 4). + - KEEP every test that exercises build_exogenous_columns, assemble_future_frame, or the + end-to-end assembled frame (those builders stay in scenarios). + - The module docstring still declares it a load-bearing spec — its remaining scope is the + assumption-driven exogenous columns + the assembled frame. + - VALIDATE: uv run pytest -v -m "not integration" app/features/scenarios/tests/test_future_frame_leakage.py + +# ════════ STEP 3 — Formalise the feature-aware model contract ════════ + +Task 9 — MODIFY app/features/forecasting/models.py: + - ADD to BaseForecaster (class body, near `random_state`): + `requires_features: ClassVar[bool] = False` + with a docstring: "True when fit()/predict() REQUIRE a non-None X feature frame. + Baseline (target-only) models leave this False; feature-aware models override to True." + - ADD `from typing import ClassVar` to the imports if not present. + - OVERRIDE in RegressionForecaster: `requires_features: ClassVar[bool] = True`. + - The three baselines inherit False — no edit needed. + - GOTCHA: ClassVar, not a plain assignment — see Known Gotchas. + - VALIDATE: uv run mypy app/features/forecasting/models.py && uv run pyright app/features/forecasting/ + +Task 10 — MODIFY app/features/forecasting/service.py: + - DELETE the local constants _REGRESSION_LAGS (line 79), _REGRESSION_HISTORY_TAIL_DAYS + (line 77), _REGRESSION_FEATURE_COLUMNS (lines 87-99). KEEP _MIN_REGRESSION_TRAIN_ROWS + (line 74 — a training-data threshold, not a frame-contract constant). + - ADD: `from app.shared.feature_frames import (canonical_feature_columns, EXOGENOUS_LAGS, + HISTORY_TAIL_DAYS, build_calendar_columns)`. + - In _build_regression_features: replace `_REGRESSION_FEATURE_COLUMNS` with + `canonical_feature_columns()`, `_REGRESSION_LAGS` with `EXOGENOUS_LAGS`, + `_REGRESSION_HISTORY_TAIL_DAYS` with `HISTORY_TAIL_DAYS`. The per-row inline calendar + math (lines 561-566) is replaced by the shared build_calendar_columns — see pseudocode. + - In train_model: replace `if config.model_type == "regression":` with + `model = model_factory(config, ...)` FIRST, then `if model.requires_features:`. + - In predict(): replace `if bundle.config.model_type == "regression":` (line 348) with + `if bundle.model.requires_features:`. Generalise the error string: "Regression models" + → "Feature-aware models". + - GOTCHA: canonical_feature_columns() returns the SAME 14 strings in the SAME order as the + deleted _REGRESSION_FEATURE_COLUMNS — verify by eye against forecasting/service.py:87-99. + A column-list test (Task 12) pins it. + - VALIDATE: uv run mypy app/ && uv run pyright app/ && uv run pytest -v -m "not integration" app/features/forecasting/tests/ + +Task 11 — CREATE app/features/forecasting/tests/test_regression_features_leakage.py: + - LOAD-BEARING spec for the HISTORICAL training builder (_build_regression_features). + - MIRROR featuresets/tests/test_leakage.py's sequential-value idiom: seed SalesDaily-shaped + input where quantity is sequential, assert lag_k at row i equals quantity[i-k] exactly + and is strictly < quantity[i] ("LEAKAGE DETECTED at row {i}" message convention). + - Assert the SQL window guard: no feature row has a date > end_date (the cutoff/origin). + - This is a service-level test → it needs the async DB session fixture; mark + @pytest.mark.integration if it hits Postgres, OR factor the pure row-assembly into a + testable helper. PREFERRED: add a small pure helper `_assemble_regression_rows(dates, + quantities, prices, ...)` inside service.py and unit-test THAT (no DB, no marker) — + mirrors how scenarios split pure `assemble_future_frame` from async `build_future_frame`. + - VALIDATE: uv run pytest -v -m "not integration" app/features/forecasting/tests/test_regression_features_leakage.py + +Task 12 — MODIFY app/features/forecasting/tests/test_service.py: + - ADD: test_requires_features_flag — model_factory(NaiveModelConfig()).requires_features is + False; same for seasonal_naive, moving_average; model_factory(RegressionModelConfig()) + .requires_features is True. + - ADD: test_canonical_columns_match_regression_contract — assert + canonical_feature_columns() equals the exact 14-name list the regression bundle expects + (pins the contract after the constant deletion). + - VALIDATE: uv run pytest -v -m "not integration" app/features/forecasting/tests/test_service.py + +# ════════ STEP 4 — Backtesting guard + docs ════════ + +Task 13 — MODIFY app/features/backtesting/tests/test_service.py: + - ADD ONE test (no production-code change): test_feature_aware_model_fails_loud_in_backtest + — a backtest of a feature-aware model must raise a clear ValueError (the fold loop calls + model.fit(y_train) target-only → RegressionForecaster.fit raises), NEVER silently run. + This pins DECISIONS LOCKED #7 — feature-aware backtesting is loud-fail until PRP-MLZOO-B. + - VALIDATE: uv run pytest -v -m "not integration" app/features/backtesting/tests/test_service.py + +Task 14 — CREATE examples/models/feature_frame_contract.md: + - SECTIONS: + * Historical training frame — shape [n_observations, n_features], rows = observed days + in [train_start, train_end], the `date <= end_date` SQL filter IS the cutoff guard, + lag_k reads quantity[i-k] (i >= k else NaN). + * Future prediction frame — shape [horizon, n_features], rows = T+1..T+horizon, lag_k + reads history_tail[(j-1)-k] only when (j-1)-k < 0 else NaN, NO recursion in v1. + * The canonical column set — the 14 columns, the order, where they come from (cite + app/shared/feature_frames). + * Feature-class taxonomy — the SAFE / CONDITIONALLY_SAFE / UNSAFE_UNLESS_SUPPLIED table, + one row per column, "how to populate / leakage trap" (mirror the table in + PRPs/ai_docs/exogenous-regressor-forecasting.md §2). + * The NaN-as-unknown rule (DECISIONS LOCKED #6) — builders never fabricate defaults. + * How a future advanced model plugs in — set requires_features=True, reuse the shared + builders; backtesting is loud-fail until PRP-MLZOO-B. + - VALIDATE: test -f examples/models/feature_frame_contract.md + +Task 15 — MODIFY examples/models/model_interface.md: + - ADDITIVE only: document the `requires_features` class attribute on the BaseForecaster + interface; add a `regression` row to the Model Configurations / Model Formulas sections; + add a one-line pointer to examples/models/feature_frame_contract.md. + - Do NOT rewrite the file; do NOT "fix" the ModelBundle drift noted in research (out of scope). + - VALIDATE: uv run ruff check . && uv run ruff format --check . +``` + +### Per-task pseudocode (critical details only) + +```python +# ── Task 1 — contract.py: the moved long-lag builder is the leakage core ── +# MOVE VERBATIM. Shown here ONLY so you can confirm it arrived unchanged. +def build_long_lag_columns(history_tail, horizon, lags=EXOGENOUS_LAGS): + tail_len = len(history_tail) + columns = {} + for lag in lags: + column = [] + for j in range(1, horizon + 1): + idx = (j - 1) - lag # <-- the leakage guard + if idx < 0 and -tail_len <= idx: # idx<0 => source day <= origin T + column.append(float(history_tail[idx])) + else: + column.append(math.nan) # future target => NaN, never recursion + columns[f"lag_{lag}"] = column + return columns + +# ── Task 1 — the new taxonomy ── +FEATURE_CLASS = { + **{f"lag_{k}": FeatureSafety.CONDITIONALLY_SAFE for k in EXOGENOUS_LAGS}, + "dow_sin": FeatureSafety.SAFE, "dow_cos": FeatureSafety.SAFE, + "month_sin": FeatureSafety.SAFE, "month_cos": FeatureSafety.SAFE, + "is_weekend": FeatureSafety.SAFE, "is_month_end": FeatureSafety.SAFE, + "is_holiday": FeatureSafety.SAFE, # calendar table = timeless attribute + "days_since_launch": FeatureSafety.SAFE, # pure fn of date once launch_date known + "price_factor": FeatureSafety.UNSAFE_UNLESS_SUPPLIED, + "promo_active": FeatureSafety.UNSAFE_UNLESS_SUPPLIED, +} + +def feature_safety(column: str) -> FeatureSafety: + if column in FEATURE_CLASS: + return FEATURE_CLASS[column] + if column.startswith("lag_"): # custom lag offset + return FeatureSafety.CONDITIONALLY_SAFE + raise KeyError(f"Unclassified feature column: {column!r}") + +# ── Task 3 — the architectural-invariant test ── +def test_shared_package_imports_nothing_from_features(): + """app/shared/** is leaf-level — it may never import a vertical slice.""" + pkg_dir = Path(__file__).resolve().parents[1] # app/shared/feature_frames/ + for py_file in pkg_dir.rglob("*.py"): + source = py_file.read_text(encoding="utf-8") + for node in ast.walk(ast.parse(source)): + if isinstance(node, ast.ImportFrom) and node.module: + assert not node.module.startswith("app.features"), ( + f"ARCHITECTURE BREACH: {py_file} imports {node.module}" + ) + if isinstance(node, ast.Import): + for alias in node.names: + assert not alias.name.startswith("app.features"), ... + +# ── Task 10 — train_model: branch on the model, not on a string ── +async def train_model(self, db, store_id, product_id, train_start, train_end, config): + # PATTERN: build the model first (cheap, no fit), then branch on its capability. + model = model_factory(config, random_state=self.settings.forecast_random_seed) + extra_metadata: dict[str, object] = {} + if model.requires_features: # was: config.model_type == "regression" + features = await self._build_regression_features(db, store_id, product_id, + train_start, train_end) + model.fit(features.y, features.X) + n_observations = features.n_observations + extra_metadata = {"feature_columns": features.feature_columns, + "history_tail": features.history_tail, + "history_tail_dates": features.history_tail_dates, + "launch_date": features.launch_date_iso} + else: + training_data = await self._load_training_data(db, store_id, product_id, + train_start, train_end) + if training_data.n_observations == 0: + raise ValueError(f"No training data found for store={store_id} ...") + model.fit(training_data.y) + n_observations = training_data.n_observations + # ... bundle creation, save, TrainResponse — UNCHANGED below this line. + +# ── Task 10 — predict(): generalise the rejection condition ── + bundle = load_model_bundle(resolved_path) + # ... store/product validation unchanged ... + if bundle.model.requires_features: # was: bundle.config.model_type == "regression" + raise ValueError( + "Feature-aware models forecast through POST /scenarios/simulate, which supplies " + "the exogenous feature frame. POST /forecasting/predict does not support them." + ) + +# ── Task 10 — _build_regression_features: consume the shared contract ── +# The DB reads (sales, calendar holidays, promotions, launch_date) are UNCHANGED. +# Replace the per-row inline calendar math with the shared column builder, and the +# local constants with the shared ones: + feature_columns = canonical_feature_columns() # was list(_REGRESSION_FEATURE_COLUMNS) + calendar_cols = build_calendar_columns(dates) # shared; replaces lines 561-566 + rows = [] + for index, day in enumerate(dates): + row = [] + for lag in EXOGENOUS_LAGS: # was _REGRESSION_LAGS + row.append(quantities[index - lag] if index >= lag else math.nan) + for name in CALENDAR_COLUMNS: # imported from shared + row.append(calendar_cols[name][index]) + row.append(prices[index] / baseline_price) # price_factor — DB-observed + row.append(1.0 if day in promo_dates else 0.0) + row.append(1.0 if day in holiday_dates else 0.0) + row.append(float((day - launch_date).days) if launch_date else math.nan) + rows.append(row) + tail = quantities[-HISTORY_TAIL_DAYS:] # was _REGRESSION_HISTORY_TAIL_DAYS + # CRITICAL: the column ORDER above (lags, calendar, price_factor, promo_active, + # is_holiday, days_since_launch) MUST equal canonical_feature_columns() — Task 12 pins it. +``` + +### Integration Points + +```yaml +PACKAGE WIRING: + - app/shared/feature_frames/ is a pure library — NO router, NO app/main.py change. + - It is imported by: app/features/scenarios/feature_frame.py and + app/features/forecasting/service.py (both directions are app/features -> app/shared, which + is the allowed direction). + +CONFIG: + - No new settings. `forecast_random_seed` (config.py, default 42) is still the determinism + source. EXOGENOUS_LAGS / HISTORY_TAIL_DAYS are code constants, not settings (matches the + PRP-27 precedent — they were code constants in feature_frame.py). + +PERSISTENCE: + - ModelBundle is UNTOUCHED. `feature_columns` in bundle metadata is still a list[str]; the + strings are identical (canonical_feature_columns() == the deleted _REGRESSION_FEATURE_COLUMNS). + - `requires_features` is a class attribute — not pickled — so bundles trained before this PRP + load cleanly and gain the attribute from the new class definition. + +NO MIGRATION: this PRP touches no SQLAlchemy model and no Alembic version. +``` + +--- + +## Validation Loop + +### Level 1: Syntax & Style + +```bash +uv run ruff check . --fix && uv run ruff format --check . +# Expected: no errors. Fix everything before Level 2. +``` + +### Level 2: Type Checks + +```bash +uv run mypy app/ # --strict; gates merge +uv run pyright app/ # --strict; gates merge +# Expected: clean. Watch for: ClassVar annotation on requires_features; the F401 re-export +# in scenarios/feature_frame.py needs a `# noqa: F401` (ruff), not a type ignore. +``` + +### Level 3: Unit Tests + +```bash +# New + moved tests +uv run pytest -v -m "not integration" app/shared/feature_frames/tests/ +uv run pytest -v -m "not integration" app/features/forecasting/tests/test_regression_features_leakage.py + +# Regression — the slices this PRP rewired MUST stay green unchanged +uv run pytest -v -m "not integration" app/features/forecasting/tests/ +uv run pytest -v -m "not integration" app/features/scenarios/tests/ +uv run pytest -v -m "not integration" app/features/backtesting/tests/ + +# Whole fast suite +uv run pytest -v -m "not integration" +# Expected: all green. The baseline-forecaster tests and test_regression_forecaster.py must +# pass with ZERO edits — if one fails, the consolidation changed behaviour (it must not). +``` + +### Level 4: Integration Tests + Contract Drift + +```bash +docker compose up -d && uv run alembic upgrade head +uv run pytest -v -m integration app/features/scenarios/ app/features/forecasting/ +# CRITICAL: the scenarios "empty-assumption model_exogenous simulation -> zero delta" test is +# the old drift detector for the duplicated contract. It MUST stay green — now structurally, +# because both slices import the one shared column list. +# No migration in this PRP -> no `alembic downgrade` round-trip needed. +``` + +### Level 5: Manual Validation (dogfood — REQUIRED) + +```bash +# 1. Shared package wires up +uv run python -c "from app.shared.feature_frames import canonical_feature_columns, FeatureSafety, feature_safety; \ +cols = canonical_feature_columns(); assert len(cols) == 14; \ +assert all(feature_safety(c) for c in cols); print('contract OK:', cols)" + +# 2. The duplicated constant is GONE +grep -rn "_REGRESSION_FEATURE_COLUMNS\|_REGRESSION_LAGS\|_REGRESSION_HISTORY_TAIL_DAYS" app/ \ + && echo "FAIL: duplicate still present" || echo "OK: single source of truth" + +# 3. requires_features is correct on every forecaster +uv run python -c " +from app.features.forecasting.models import (NaiveForecaster, SeasonalNaiveForecaster, \ +MovingAverageForecaster, RegressionForecaster); +assert NaiveForecaster.requires_features is False; +assert SeasonalNaiveForecaster.requires_features is False; +assert MovingAverageForecaster.requires_features is False; +assert RegressionForecaster.requires_features is True; +print('requires_features OK')" + +# 4. End-to-end behaviour unchanged — train a regression model and run a model_exogenous +# scenario; confirm it still produces a comparison (start backend first): +# uv run uvicorn app.main:app --port 8123 & +# curl -sX POST localhost:8123/forecasting/train -H 'Content-Type: application/json' \ +# -d '{"store_id":1,"product_id":1,"train_start_date":"2024-01-01", +# "train_end_date":"2024-06-01","config":{"model_type":"regression"}}' +# -> 200; then POST /scenarios/simulate with that run_id -> method "model_exogenous". +``` + +--- + +## Final Validation Checklist + +- [ ] `uv run ruff check .` and `uv run ruff format --check .` clean. +- [ ] `uv run mypy app/` and `uv run pyright app/` clean (both --strict). +- [ ] `uv run pytest -v -m "not integration"` fully green. +- [ ] `uv run pytest -v -m integration app/features/scenarios/ app/features/forecasting/` green — including the empty-assumption zero-delta test. +- [ ] `grep -rn "_REGRESSION_FEATURE_COLUMNS" app/` returns nothing. +- [ ] `app/shared/feature_frames/tests/test_contract.py::test_shared_package_imports_nothing_from_features` passes. +- [ ] `app/shared/feature_frames/tests/test_leakage.py` and `app/features/forecasting/tests/test_regression_features_leakage.py` exist, carry the load-bearing-spec docstring, and pass. +- [ ] Baseline-forecaster tests and `test_regression_forecaster.py` pass with **no edits**. +- [ ] `examples/models/feature_frame_contract.md` exists and documents both frame shapes + the taxonomy; `examples/models/model_interface.md` updated additively. +- [ ] `git diff --stat` shows only the intended files — no whole-file CRLF/LF noise diffs. +- [ ] No new dependency in `pyproject.toml`; no Alembic migration; no route/schema/WebSocket change. +- [ ] An OPEN GitHub issue exists for this work (`gh issue view --json state` → `OPEN`); commit `feat(forecast): feature-aware forecasting foundation — shared feature-frame contract (#)`; branch `feat/feature-aware-forecasting-foundation` off `dev`. + +--- + +## Anti-Patterns to Avoid + +- ❌ Don't implement LightGBM, XGBoost, Prophet, or any new model — that is PRP-MLZOO-B+ (INITIAL-MLZOO-index.md). This PRP is contracts + tests + docs only. +- ❌ Don't add a `FeatureAwareForecaster` base class — DECISIONS LOCKED #2 chose the `requires_features` attribute. +- ❌ Don't introduce a `TrainingFrame` dataclass nothing returns — DECISIONS LOCKED #4 (dead code; product-vision.md forbids speculative abstraction). +- ❌ Don't change `POST /forecasting/predict` behaviour — it still rejects feature-aware models (DECISIONS LOCKED #3). +- ❌ Don't "tidy up" `build_long_lag_columns` while moving it — move it byte-for-byte; the `idx = (j-1)-k` guard is the leakage spec. +- ❌ Don't weaken or delete an assertion in any `*_leakage.py` file — AGENTS.md § Safety. Splitting tests across the module move is fine; dropping coverage is not. +- ❌ Don't let `app/shared/feature_frames/` import from `app/features/**` — it is leaf-level; a test enforces it. +- ❌ Don't silently zero-fill an unknown feature cell — emit `math.nan` (DECISIONS LOCKED #6). +- ❌ Don't add an Alembic migration or touch `ModelBundle` — persistence is untouched. +- ❌ Don't wire feature-aware backtesting — DECISIONS LOCKED #7; loud-fail + a guard test is the deliverable here. + +## Open Questions — ALL RESOLVED + +The three "Required decisions" in INITIAL-MLZOO-A were resolved during planning and are +recorded as DECISIONS LOCKED #1 (contract home → `app/shared/feature_frames/`), #2 (no +`FeatureAwareForecaster` class → `requires_features` attribute), and #3 (`/forecasting/predict` +unchanged). #4–#7 record the derived decisions (no `TrainingFrame`, builders move by import, +NaN-as-unknown, backtesting loud-fail). Nothing is left to litigate at implementation time. + +## Confidence Score + +**8 / 10** for one-pass implementation success. + +Rationale: this is a consolidation-and-test PRP with no new dependency, no migration, no API +change, and no new algorithm — the highest-confidence PRP class. The feature-frame machinery +already exists and is well-tested; the work is moving pure functions into `app/shared/`, +swapping a string check for a class attribute, and adding leakage tests that mirror an +existing, proven idiom. The −2 risk is entirely in **import-update completeness**: 6+ sites +import from `scenarios/feature_frame.py`, and the `_build_regression_features` calendar +refactor (row-major inline math → shared column builder) must reproduce the exact 14-column +order. Both risks are caught fast — by `mypy`/`pyright` (unresolved imports) and by the Task 12 +column-order test plus the scenarios zero-delta integration test. Following the per-task +pseudocode and the Level 3 "baselines must pass unedited" gate makes a regression hard to miss. diff --git a/app/features/backtesting/tests/test_service.py b/app/features/backtesting/tests/test_service.py index 61dbb220..87d17cb1 100644 --- a/app/features/backtesting/tests/test_service.py +++ b/app/features/backtesting/tests/test_service.py @@ -117,6 +117,40 @@ def test_run_model_backtest_without_fold_details( # But metrics should still be present assert fold.metrics is not None + def test_feature_aware_model_fails_loud_in_backtest( + self, + sample_dates_120: list[date], + sample_values_120: np.ndarray, + sample_split_config_expanding: SplitConfig, + ) -> None: + """A feature-aware model must fail LOUD in a backtest, never run silently. + + The fold loop calls ``model.fit(y_train)`` target-only; a + ``RegressionForecaster`` (``requires_features=True``) raises ``ValueError`` + there because no exogenous ``X`` was supplied. Feature-aware backtesting + is wired in PRP-MLZOO-B — until then this loud, non-leaky failure is the + contract (PRP-29 DECISIONS LOCKED #7). + """ + from app.features.backtesting.splitter import TimeSeriesSplitter + from app.features.forecasting.schemas import RegressionModelConfig + + service = BacktestingService() + series_data = SeriesData( + dates=sample_dates_120, + values=sample_values_120, + store_id=1, + product_id=1, + ) + splitter = TimeSeriesSplitter(sample_split_config_expanding) + + with pytest.raises(ValueError, match="requires exogenous features X"): + service._run_model_backtest( + series_data=series_data, + splitter=splitter, + model_config=RegressionModelConfig(), + store_fold_details=True, + ) + class TestBacktestingServiceBaselineComparisons: """Tests for baseline comparison functionality.""" diff --git a/app/features/forecasting/models.py b/app/features/forecasting/models.py index ecb510b8..bbe99483 100644 --- a/app/features/forecasting/models.py +++ b/app/features/forecasting/models.py @@ -14,7 +14,7 @@ from abc import ABC, abstractmethod from dataclasses import dataclass, field from datetime import date as date_type -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING, Any, ClassVar, Literal import numpy as np from sklearn.ensemble import ( # type: ignore[import-untyped] @@ -57,6 +57,16 @@ class BaseForecaster(ABC): Attributes: random_state: Random seed for reproducibility. + requires_features: True when ``fit``/``predict`` require a non-None + ``X`` feature frame; baseline (target-only) models leave it False. + """ + + requires_features: ClassVar[bool] = False + """True when ``fit()``/``predict()`` REQUIRE a non-None ``X`` feature frame. + + Baseline (target-only) models leave this ``False``; feature-aware models + override it to ``True``. ``ForecastingService`` branches on this flag + rather than an ``isinstance`` check or a ``model_type`` string comparison. """ def __init__(self, random_state: int = 42) -> None: @@ -445,6 +455,9 @@ class RegressionForecaster(BaseForecaster): max_depth: Maximum depth of each tree. """ + requires_features: ClassVar[bool] = True + """A feature-aware model — ``fit``/``predict`` REQUIRE a non-None ``X``.""" + def __init__( self, *, diff --git a/app/features/forecasting/service.py b/app/features/forecasting/service.py index 839f1475..f2affc75 100644 --- a/app/features/forecasting/service.py +++ b/app/features/forecasting/service.py @@ -39,6 +39,13 @@ PredictResponse, TrainResponse, ) +from app.shared.feature_frames import ( + CALENDAR_COLUMNS, + EXOGENOUS_LAGS, + HISTORY_TAIL_DAYS, + build_calendar_columns, + canonical_feature_columns, +) if TYPE_CHECKING: pass @@ -72,31 +79,12 @@ def __post_init__(self) -> None: # Minimum observed rows required to train a regression model — enough to # resolve the lag features and still leave training signal (PRP-27 GOTCHA #14). _MIN_REGRESSION_TRAIN_ROWS = 30 -# Observed-target tail persisted in the bundle so the scenario future-frame -# generator can resolve long lags (PRP-27 DECISIONS LOCKED #11 — 90 days). -_REGRESSION_HISTORY_TAIL_DAYS = 90 -# Target lag offsets — PRP-27 DECISIONS LOCKED #10 (EXOGENOUS_LAGS). -_REGRESSION_LAGS: tuple[int, ...] = (1, 7, 14, 28) -# Canonical regression feature columns — a PAIRED CONTRACT with -# ``app/features/scenarios/feature_frame.canonical_feature_columns()``. The -# scenarios slice owns the future-frame generator; this slice owns training. -# A cross-slice import is forbidden (AGENTS.md § Architecture, PRP-27 -# DECISIONS LOCKED #3), so the column names and order are replicated here and -# kept in lock-step by the scenarios integration test (a column mismatch -# surfaces as a non-zero delta on an empty-assumption simulation). -_REGRESSION_FEATURE_COLUMNS: list[str] = [ - *(f"lag_{lag}" for lag in _REGRESSION_LAGS), - "dow_sin", - "dow_cos", - "month_sin", - "month_cos", - "is_weekend", - "is_month_end", - "price_factor", - "promo_active", - "is_holiday", - "days_since_launch", -] +# The regression feature-frame contract — the lag offsets (``EXOGENOUS_LAGS``), +# the observed-target tail length (``HISTORY_TAIL_DAYS``), and the canonical +# column set and order (``canonical_feature_columns()``) — is the single source +# of truth in ``app/shared/feature_frames`` (MLZOO-A). This slice imports it +# rather than re-typing it, so a column-order mismatch with the scenarios +# slice's future-frame generator is structurally impossible. @dataclass @@ -107,8 +95,8 @@ class RegressionFeatureMatrix: X: Feature matrix, shape ``[n_observations, n_features]`` (NaN allowed). y: Target values, shape ``[n_observations]``. feature_columns: Column order — persisted so the future frame matches. - history_tail: The last ``_REGRESSION_HISTORY_TAIL_DAYS`` observed - targets, ending at the forecast origin ``T``. + history_tail: The last ``HISTORY_TAIL_DAYS`` observed targets, ending + at the forecast origin ``T``. history_tail_dates: ISO dates aligned with ``history_tail``. launch_date_iso: The product launch date (ISO) or ``None``. n_observations: Number of training rows. @@ -123,6 +111,65 @@ class RegressionFeatureMatrix: n_observations: int +def _assemble_regression_rows( + *, + dates: list[date_type], + quantities: list[float], + prices: list[float], + baseline_price: float, + promo_dates: set[date_type], + holiday_dates: set[date_type], + launch_date: date_type | None, +) -> list[list[float]]: + """Assemble the historical regression feature matrix — pure, leakage-safe. + + Time-safe by construction: every lag column at row ``i`` reads only the + observed target at ``i - lag`` (a strictly earlier day); calendar columns + are pure functions of the date; ``price_factor`` / ``promo_active`` / + ``is_holiday`` / ``days_since_launch`` read the same-day exogenous + attributes. No row reads a future observation. + + Column order is ``canonical_feature_columns()`` exactly: the target lags, + then the calendar columns, then ``price_factor``, ``promo_active``, + ``is_holiday``, ``days_since_launch``. + + Extracted from :meth:`ForecastingService._build_regression_features` so the + leakage invariant can be unit-tested without a database + (``test_regression_features_leakage.py``). + + Args: + dates: Observed days in chronological order. + quantities: Observed target values aligned with ``dates``. + prices: Observed unit prices aligned with ``dates``. + baseline_price: The typical price; ``price_factor`` is the ratio to it. + promo_dates: Days a promotion covered. + holiday_dates: Calendar holiday days. + launch_date: The product's launch date, or ``None``. + + Returns: + Row-major feature matrix ``[n_observations][n_features]``; ``NaN`` marks + a lag whose source day precedes the series, and ``days_since_launch`` + when the product has no launch date. + """ + calendar_columns = build_calendar_columns(dates) + rows: list[list[float]] = [] + for index, day in enumerate(dates): + row: list[float] = [] + # Target long-lag columns — read only strictly-earlier observations. + for lag in EXOGENOUS_LAGS: + row.append(quantities[index - lag] if index >= lag else math.nan) + # Calendar columns — pure functions of the date (shared builder). + for name in CALENDAR_COLUMNS: + row.append(calendar_columns[name][index]) + # Exogenous columns — same-day observed attributes. + row.append(prices[index] / baseline_price) + row.append(1.0 if day in promo_dates else 0.0) + row.append(1.0 if day in holiday_dates else 0.0) + row.append(float((day - launch_date).days) if launch_date is not None else math.nan) + rows.append(row) + return rows + + class ForecastingService: """Service for training and predicting with forecasting models. @@ -176,11 +223,13 @@ async def train_model( config_hash=config.config_hash(), ) - # Build the model + bundle metadata. The regression path consumes a - # historical feature matrix; every other model trains on the raw - # target series exactly as before. + # Build the model first (cheap — no fit), then branch on its capability + # rather than on a ``model_type`` string. A feature-aware model + # (``requires_features``) consumes a historical feature matrix; every + # target-only model trains on the raw target series exactly as before. + model = model_factory(config, random_state=self.settings.forecast_random_seed) extra_metadata: dict[str, object] = {} - if config.model_type == "regression": + if model.requires_features: features = await self._build_regression_features( db=db, store_id=store_id, @@ -188,7 +237,6 @@ async def train_model( start_date=train_start_date, end_date=train_end_date, ) - model = model_factory(config, random_state=self.settings.forecast_random_seed) model.fit(features.y, features.X) n_observations = features.n_observations extra_metadata = { @@ -210,7 +258,6 @@ async def train_model( f"No training data found for store={store_id}, product={product_id} " f"between {train_start_date} and {train_end_date}" ) - model = model_factory(config, random_state=self.settings.forecast_random_seed) model.fit(training_data.y) n_observations = training_data.n_observations @@ -342,14 +389,16 @@ async def predict( f"but prediction requested for product={product_id}" ) - # Regression models need an exogenous feature frame to forecast — that - # is built (from scenario assumptions) by POST /scenarios/simulate. The - # plain predict endpoint cannot supply one, so it rejects them cleanly. - if bundle.config.model_type == "regression": + # Feature-aware models need an exogenous feature frame to forecast — + # that is built (from scenario assumptions) by POST /scenarios/simulate. + # The plain predict endpoint cannot supply one, so it rejects them + # cleanly. Branching on ``requires_features`` (not a ``model_type`` + # string) keeps this future-proof as the model zoo grows. + if bundle.model.requires_features: raise ValueError( - "Regression models forecast through POST /scenarios/simulate, " + "Feature-aware models forecast through POST /scenarios/simulate, " "which supplies the exogenous feature frame. POST /forecasting/" - "predict does not support model_type='regression'." + "predict does not support them." ) # Generate forecasts @@ -467,8 +516,11 @@ async def _build_regression_features( ``promo_active`` / ``is_holiday`` / ``days_since_launch`` read the same-day exogenous attributes. No row reads a future observation. - The column set is the paired contract with the scenarios slice's - future-frame generator (see ``_REGRESSION_FEATURE_COLUMNS``). + The column set and order are ``canonical_feature_columns()`` from + ``app/shared/feature_frames`` — the single source of truth shared with + the scenarios slice's future-frame generator. The pure row assembly is + factored into :func:`_assemble_regression_rows` (unit-tested for + leakage without a database). Args: db: Database session. @@ -550,44 +602,32 @@ async def _build_regression_features( select(Product.launch_date).where(Product.id == product_id) ) - feature_rows: list[list[float]] = [] - for index, day in enumerate(dates): - row_values: list[float] = [] - # Target long-lag columns — read only strictly-earlier observations. - for lag in _REGRESSION_LAGS: - row_values.append(quantities[index - lag] if index >= lag else math.nan) - # Calendar columns — pure functions of the date. - dow = day.weekday() - row_values.append(math.sin(2.0 * math.pi * dow / 7.0)) - row_values.append(math.cos(2.0 * math.pi * dow / 7.0)) - row_values.append(math.sin(2.0 * math.pi * day.month / 12.0)) - row_values.append(math.cos(2.0 * math.pi * day.month / 12.0)) - row_values.append(1.0 if dow >= 5 else 0.0) - row_values.append(1.0 if (day + timedelta(days=1)).month != day.month else 0.0) - # Exogenous columns — same-day attributes. - row_values.append(prices[index] / baseline_price) - row_values.append(1.0 if day in promo_dates else 0.0) - row_values.append(1.0 if day in holiday_dates else 0.0) - row_values.append( - float((day - launch_date).days) if launch_date is not None else math.nan - ) - feature_rows.append(row_values) + feature_columns = canonical_feature_columns() + feature_rows = _assemble_regression_rows( + dates=dates, + quantities=quantities, + prices=prices, + baseline_price=baseline_price, + promo_dates=promo_dates, + holiday_dates=holiday_dates, + launch_date=launch_date, + ) - tail = quantities[-_REGRESSION_HISTORY_TAIL_DAYS:] - tail_dates = [day.isoformat() for day in dates[-_REGRESSION_HISTORY_TAIL_DAYS:]] + tail = quantities[-HISTORY_TAIL_DAYS:] + tail_dates = [day.isoformat() for day in dates[-HISTORY_TAIL_DAYS:]] logger.info( "forecasting.regression_features_built", store_id=store_id, product_id=product_id, n_observations=len(dates), - n_features=len(_REGRESSION_FEATURE_COLUMNS), + n_features=len(feature_columns), ) return RegressionFeatureMatrix( X=np.array(feature_rows, dtype=np.float64), y=np.array(quantities, dtype=np.float64), - feature_columns=list(_REGRESSION_FEATURE_COLUMNS), + feature_columns=feature_columns, history_tail=[float(value) for value in tail], history_tail_dates=tail_dates, launch_date_iso=launch_date.isoformat() if launch_date is not None else None, diff --git a/app/features/forecasting/tests/test_regression_features_leakage.py b/app/features/forecasting/tests/test_regression_features_leakage.py new file mode 100644 index 00000000..8bffca19 --- /dev/null +++ b/app/features/forecasting/tests/test_regression_features_leakage.py @@ -0,0 +1,94 @@ +"""Leakage spec for the historical regression feature builder — LOAD-BEARING. + +This file IS the spec, mirroring ``app/features/featuresets/tests/test_leakage.py`` +and ``app/shared/feature_frames/tests/test_leakage.py``: it must NEVER be +weakened to make a feature pass (AGENTS.md § Safety). + +It pins the time-safety of :func:`_assemble_regression_rows` — the pure row +assembler behind ``ForecastingService._build_regression_features``. Sequential +target values (1, 2, 3, …) are used so any leakage is mathematically +detectable: with that input the lag-``k`` cell at row ``i`` MUST equal +``quantity[i-k]`` and MUST be strictly less than ``quantity[i]``. A lag cell +equal to or greater than the current row's target proves the builder read +current-or-future data. + +The cutoff itself (``date <= end_date``) is enforced upstream by the SQL window +in ``_build_regression_features``; the row assembler only ever sees the +already-bounded ``dates`` list, and emits exactly one row per supplied date. +""" + +from __future__ import annotations + +import math +from datetime import date, timedelta + +from app.features.forecasting.service import _assemble_regression_rows +from app.shared.feature_frames import EXOGENOUS_LAGS, canonical_feature_columns + +_N = 60 +_DATES = [date(2026, 1, 1) + timedelta(days=offset) for offset in range(_N)] +# Sequential targets 1.0 … 60.0 — quantity[i] == i + 1, so leakage is detectable. +_QUANTITIES = [float(offset + 1) for offset in range(_N)] +_PRICES = [10.0] * _N +_BASELINE_PRICE = 10.0 + + +def _build_rows() -> list[list[float]]: + """Assemble the regression feature matrix from the sequential fixture.""" + return _assemble_regression_rows( + dates=_DATES, + quantities=_QUANTITIES, + prices=_PRICES, + baseline_price=_BASELINE_PRICE, + promo_dates=set(), + holiday_dates=set(), + launch_date=None, + ) + + +def test_lag_columns_read_only_strictly_earlier_observations() -> None: + """CRITICAL: every lag cell reads a strictly-earlier observation, or NaN. + + With sequential targets the lag-``k`` cell at row ``i`` (for ``i >= k``) + must equal ``quantity[i-k]`` exactly and be strictly below the current + row's target ``quantity[i]``. Any cell ``>= quantity[i]`` is future + leakage. Rows before the lag offset have no source day → ``NaN``. + """ + columns = canonical_feature_columns() + rows = _build_rows() + + for lag in EXOGENOUS_LAGS: + col_index = columns.index(f"lag_{lag}") + for i in range(_N): + cell = rows[i][col_index] + if i < lag: + assert math.isnan(cell), ( + f"row {i}: lag_{lag} has no source day yet — expected NaN, got {cell}" + ) + continue + expected = _QUANTITIES[i - lag] + assert cell == expected, ( + f"LEAKAGE DETECTED at row {i}: lag_{lag}={cell} != expected={expected}. " + "Lag feature is not correctly shifted." + ) + assert cell < _QUANTITIES[i], ( + f"LEAKAGE DETECTED at row {i}: lag_{lag}={cell} >= current=" + f"{_QUANTITIES[i]}. Lag feature is using current or future data!" + ) + + +def test_assembled_matrix_shape_matches_canonical_columns() -> None: + """One row per supplied date; every row matches the canonical column width. + + The assembler emits exactly ``len(dates)`` rows and never invents a row + beyond the (cutoff-bounded) ``dates`` it was given. + """ + columns = canonical_feature_columns() + rows = _build_rows() + assert len(rows) == _N + assert all(len(row) == len(columns) for row in rows) + + +def test_assemble_regression_rows_is_deterministic() -> None: + """Identical inputs produce an identical feature matrix — no hidden state.""" + assert _build_rows() == _build_rows() diff --git a/app/features/forecasting/tests/test_service.py b/app/features/forecasting/tests/test_service.py index 6b476686..4d228574 100644 --- a/app/features/forecasting/tests/test_service.py +++ b/app/features/forecasting/tests/test_service.py @@ -344,3 +344,41 @@ async def test_train_returns_model_path(self): assert Path(response.model_path).exists() assert response.n_observations == 30 assert response.model_type == "naive" + + +class TestFeatureAwareContract: + """Tests for the feature-aware model contract (MLZOO-A / PRP-29).""" + + def test_requires_features_flag(self): + """Baseline forecasters require no features; regression requires them.""" + from app.features.forecasting.schemas import RegressionModelConfig + + assert model_factory(NaiveModelConfig()).requires_features is False + assert model_factory(SeasonalNaiveModelConfig()).requires_features is False + assert model_factory(MovingAverageModelConfig()).requires_features is False + assert model_factory(RegressionModelConfig()).requires_features is True + + def test_canonical_columns_match_regression_contract(self): + """The canonical column set is the exact 14-name regression contract. + + Pins the contract after the local duplicated column-list constant + was deleted in favour of the shared single source of truth. + """ + from app.shared.feature_frames import canonical_feature_columns + + assert canonical_feature_columns() == [ + "lag_1", + "lag_7", + "lag_14", + "lag_28", + "dow_sin", + "dow_cos", + "month_sin", + "month_cos", + "is_weekend", + "is_month_end", + "price_factor", + "promo_active", + "is_holiday", + "days_since_launch", + ] diff --git a/app/features/scenarios/feature_frame.py b/app/features/scenarios/feature_frame.py index 0c9c2635..f8307288 100644 --- a/app/features/scenarios/feature_frame.py +++ b/app/features/scenarios/feature_frame.py @@ -17,33 +17,35 @@ It may NEVER read an observed target at a horizon day. ``app/features/scenarios/tests/test_future_frame_leakage.py`` is the -load-bearing spec for that rule — it must never be weakened (AGENTS.md -§ Safety), mirroring ``app/features/featuresets/tests/test_leakage.py``. +load-bearing spec for the assumption-driven columns and the assembled frame; the +shared pure builders are spec'd by +``app/shared/feature_frames/tests/test_leakage.py``. Neither may be weakened +(AGENTS.md § Safety), mirroring ``app/features/featuresets/tests/test_leakage.py``. DECISIONS LOCKED (PRP-27): * #3 — no cross-slice ``service.py`` import. This module imports only the - ``data_platform`` ORM (a sanctioned read-only ORM import) and same-slice - schema value-objects; it replicates the small slice of leakage-safe - lag/calendar logic it needs rather than importing - ``FeatureEngineeringService``. + ``data_platform`` ORM (a sanctioned read-only ORM import), the shared + feature-frame contract (``app/shared/feature_frames`` — a leaf-level + package, the allowed ``app/features -> app/shared`` direction), and + same-slice schema value-objects. * #4 — long-lag + calendar + assumption-driven columns ONLY; no recursion. A target lag value for horizon day ``T+j`` is the observed ``y[T+j-k]``; when ``T+j-k > T`` (a future target) the cell is ``NaN`` — the model (``HistGradientBoostingRegressor``) handles ``NaN`` natively. No recursion ever fills those gaps in v1. -* #10/#11/#12 — the PINNED constants ``EXOGENOUS_LAGS``, - ``HISTORY_TAIL_DAYS`` and ``MAX_COMPARE_SCENARIOS`` live here. - -Feature-column contract: ``canonical_feature_columns()`` is the single source -of truth for the regression feature set and column order. The Phase B training -path persists exactly this list in the bundle metadata, and the future frame -reproduces it column-for-column, so a model trained today re-forecasts cleanly. +* #12 — ``MAX_COMPARE_SCENARIOS`` (the Phase-C comparison cap) lives here. + +Feature-column contract: ``app/shared/feature_frames`` is the single source of +truth for the regression feature set, its column order, the pinned constants +(``EXOGENOUS_LAGS``, ``HISTORY_TAIL_DAYS``), and the leakage-safe pure builders +(``build_calendar_columns``, ``build_long_lag_columns``). This module imports — +and re-exports, for back-compat — those names; it owns only the +assumption-driven, DB-touching parts of the future frame. """ from __future__ import annotations import math -from dataclasses import dataclass from datetime import date, timedelta from typing import TYPE_CHECKING @@ -51,6 +53,16 @@ from app.core.logging import get_logger from app.features.data_platform.models import Calendar +from app.shared.feature_frames import ( + CALENDAR_COLUMNS, + EXOGENOUS_COLUMNS, + EXOGENOUS_LAGS, + HISTORY_TAIL_DAYS, + FutureFeatureFrame, + build_calendar_columns, + build_long_lag_columns, + canonical_feature_columns, +) if TYPE_CHECKING: from sqlalchemy.ext.asyncio import AsyncSession @@ -59,73 +71,31 @@ logger = get_logger(__name__) -# ── PINNED modelling constants (PRP-27 DECISIONS LOCKED #10/#11/#12) ── -# Lag offsets (days) for the target long-lag columns: daily, weekly, -# fortnightly, and a four-week lag covering the dominant retail seasonality. -EXOGENOUS_LAGS: tuple[int, ...] = (1, 7, 14, 28) -# Observed-target tail (days, ending at the forecast origin T) fed to the -# generator — 90 comfortably exceeds the largest lag offset (28). -HISTORY_TAIL_DAYS: int = 90 +# Public surface of this module. The first block is the future-frame contract +# re-exported from ``app/shared/feature_frames`` so existing importers of this +# module keep resolving (back-compat); listing them in ``__all__`` marks the +# re-export as intentional for both ruff (F401) and pyright (reportUnusedImport). +__all__ = [ + "CALENDAR_COLUMNS", + "EXOGENOUS_COLUMNS", + "EXOGENOUS_LAGS", + "HISTORY_TAIL_DAYS", + "MAX_COMPARE_SCENARIOS", + "FutureFeatureFrame", + "assemble_future_frame", + "build_calendar_columns", + "build_exogenous_columns", + "build_future_frame", + "build_long_lag_columns", + "canonical_feature_columns", +] + # Upper bound on the multi-scenario comparison (Phase C) so the chart stays -# legible; defined here as the slice's single modelling-constants home. +# legible; defined here as the scenarios slice's single modelling-constants +# home (PRP-27 DECISIONS LOCKED #12). NOT a feature-frame concept, so it stays +# in this slice rather than moving to ``app/shared/feature_frames``. MAX_COMPARE_SCENARIOS: int = 5 -# Fixed calendar columns — each a pure function of the date, never a leak. -CALENDAR_COLUMNS: tuple[str, ...] = ( - "dow_sin", - "dow_cos", - "month_sin", - "month_cos", - "is_weekend", - "is_month_end", -) -# Fixed current-day exogenous columns — driven by the scenario assumptions -# (the planner's posited future inputs) and by timeless attributes (the -# calendar, the product launch date). Every value is knowable at origin T. -EXOGENOUS_COLUMNS: tuple[str, ...] = ( - "price_factor", - "promo_active", - "is_holiday", - "days_since_launch", -) - - -@dataclass -class FutureFeatureFrame: - """A horizon-length feature matrix for one ``(store, product)`` series. - - Attributes: - dates: The horizon days ``T+1 … T+horizon`` (chronological). - feature_columns: Column order — matches the trained bundle exactly. - matrix: Row-major ``[horizon][n_features]``; ``NaN`` is allowed and - expected (a long-lag cell whose source target lies in the future, - or ``days_since_launch`` when the product has no launch date). - """ - - dates: list[date] - feature_columns: list[str] - matrix: list[list[float]] - - -def canonical_feature_columns(lags: tuple[int, ...] = EXOGENOUS_LAGS) -> list[str]: - """Return the fixed, ordered regression feature-column list. - - This is the single source of truth for the regression feature set. The - Phase B training path persists exactly this list in the model bundle's - metadata; the future frame reproduces it column-for-column. The column - set is deliberately *fixed* (not horizon-dependent): for a long horizon - some target-lag columns are mostly ``NaN``, which the NaN-tolerant - estimator handles — far safer than a horizon-varying column set. - - Args: - lags: Target long-lag offsets (defaults to the pinned ``EXOGENOUS_LAGS``). - - Returns: - Ordered column names: target lags, then calendar, then exogenous. - """ - target_lags = [f"lag_{k}" for k in lags] - return [*target_lags, *CALENDAR_COLUMNS, *EXOGENOUS_COLUMNS] - def _in_window(point_date: date, start: date, end: date) -> bool: """True when ``point_date`` is inside the inclusive ``[start, end]`` window. @@ -138,84 +108,6 @@ def _in_window(point_date: date, start: date, end: date) -> bool: return lo <= point_date <= hi -def _is_month_end(point_date: date) -> bool: - """True when ``point_date`` is the last day of its month.""" - return (point_date + timedelta(days=1)).month != point_date.month - - -def build_calendar_columns(dates: list[date]) -> dict[str, list[float]]: - """Build the calendar feature columns — a pure function of each date. - - Calendar features carry zero leakage risk: they read only the date - itself, never the target series. Day-of-week and month use cyclical - (sin/cos) encoding so the estimator sees their periodic structure. - - Args: - dates: The horizon days. - - Returns: - A mapping of every name in :data:`CALENDAR_COLUMNS` to its per-day - values. - """ - columns: dict[str, list[float]] = {name: [] for name in CALENDAR_COLUMNS} - for point_date in dates: - dow = point_date.weekday() # 0 = Monday … 6 = Sunday - month = point_date.month - columns["dow_sin"].append(math.sin(2.0 * math.pi * dow / 7.0)) - columns["dow_cos"].append(math.cos(2.0 * math.pi * dow / 7.0)) - columns["month_sin"].append(math.sin(2.0 * math.pi * month / 12.0)) - columns["month_cos"].append(math.cos(2.0 * math.pi * month / 12.0)) - columns["is_weekend"].append(1.0 if dow >= 5 else 0.0) - columns["is_month_end"].append(1.0 if _is_month_end(point_date) else 0.0) - return columns - - -def build_long_lag_columns( - history_tail: list[float], - horizon: int, - lags: tuple[int, ...] = EXOGENOUS_LAGS, -) -> dict[str, list[float]]: - """Build the target long-lag columns — the leakage-critical helper. - - ``history_tail`` is the observed target series ending at the forecast - origin ``T``: ``history_tail[-1] == y[T]``, ``history_tail[-2] == y[T-1]``, - and so on. The lag-``k`` column at horizon day ``T+j`` (``j`` in - ``1 … horizon``) is the observed target ``y[T+j-k]``. - - SAFETY (PRP-27 DECISIONS LOCKED #4): the source index into - ``history_tail`` is ``idx = (j - 1) - k``. The cell is populated **only - when ``idx < 0``** — i.e. the source day ``T+j-k`` lies at or before the - origin ``T`` and therefore inside ``history_tail``. When ``idx >= 0`` the - source day is a *future* horizon day with no observed target, so the cell - is ``NaN`` — never a recursive prediction, never a fabricated value. This - function structurally **cannot** read a future target: its only data - input is ``history_tail`` (entirely ``<= T``). - - Args: - history_tail: Observed target values ending at the origin ``T``. - horizon: Number of horizon days. - lags: Lag offsets (defaults to the pinned ``EXOGENOUS_LAGS``). - - Returns: - A mapping ``"lag_{k}" -> [horizon values]``; out-of-range cells are - ``NaN``. - """ - tail_len = len(history_tail) - columns: dict[str, list[float]] = {} - for lag in lags: - column: list[float] = [] - for j in range(1, horizon + 1): - # Negative index from the end of history_tail. idx < 0 means the - # source day T+j-k is at/before the origin T — safe to read. - idx = (j - 1) - lag - if idx < 0 and -tail_len <= idx: - column.append(float(history_tail[idx])) - else: - column.append(math.nan) - columns[f"lag_{lag}"] = column - return columns - - def build_exogenous_columns( dates: list[date], assumptions: ScenarioAssumptions, diff --git a/app/features/scenarios/tests/conftest.py b/app/features/scenarios/tests/conftest.py index b1cc6eeb..c326246e 100644 --- a/app/features/scenarios/tests/conftest.py +++ b/app/features/scenarios/tests/conftest.py @@ -25,9 +25,9 @@ from app.features.forecasting.models import NaiveForecaster, RegressionForecaster from app.features.forecasting.persistence import ModelBundle, save_model_bundle from app.features.forecasting.schemas import NaiveModelConfig, RegressionModelConfig -from app.features.scenarios.feature_frame import canonical_feature_columns from app.features.scenarios.models import ScenarioPlan from app.main import app +from app.shared.feature_frames import canonical_feature_columns # Store / product the test bundle is trained for. High IDs that no seeder uses, # so the revenue calc deterministically hits the unit-price fallback. diff --git a/app/features/scenarios/tests/test_feature_frame.py b/app/features/scenarios/tests/test_feature_frame.py index 22306eb1..3f14b09b 100644 --- a/app/features/scenarios/tests/test_feature_frame.py +++ b/app/features/scenarios/tests/test_feature_frame.py @@ -12,16 +12,9 @@ from datetime import date, timedelta from app.features.scenarios.feature_frame import ( - CALENDAR_COLUMNS, - EXOGENOUS_COLUMNS, - EXOGENOUS_LAGS, - HISTORY_TAIL_DAYS, MAX_COMPARE_SCENARIOS, assemble_future_frame, - build_calendar_columns, build_exogenous_columns, - build_long_lag_columns, - canonical_feature_columns, ) from app.features.scenarios.schemas import ( HolidayAssumption, @@ -29,6 +22,15 @@ PromotionAssumption, ScenarioAssumptions, ) +from app.shared.feature_frames import ( + CALENDAR_COLUMNS, + EXOGENOUS_COLUMNS, + EXOGENOUS_LAGS, + HISTORY_TAIL_DAYS, + build_calendar_columns, + build_long_lag_columns, + canonical_feature_columns, +) _ORIGIN = date(2026, 6, 30) _HORIZON = 14 diff --git a/app/features/scenarios/tests/test_future_frame_leakage.py b/app/features/scenarios/tests/test_future_frame_leakage.py index 4a4659de..32e51afa 100644 --- a/app/features/scenarios/tests/test_future_frame_leakage.py +++ b/app/features/scenarios/tests/test_future_frame_leakage.py @@ -1,9 +1,16 @@ -"""Leakage spec for the future feature frame — LOAD-BEARING (PRP-27 Phase A). +"""Leakage spec for the scenarios future frame — LOAD-BEARING (PRP-27 Phase A). This file IS the spec, mirroring ``app/features/featuresets/tests/test_leakage.py`` and ``app/features/scenarios/tests/test_leakage.py``: it must NEVER be weakened to make a feature pass (AGENTS.md § Safety). +Its scope is the parts of the future frame the **scenarios slice** owns: the +assumption-driven exogenous columns (``build_exogenous_columns``) and the +end-to-end assembled frame (``assemble_future_frame``). The shared pure builders +(``build_calendar_columns``, ``build_long_lag_columns``) moved to +``app/shared/feature_frames`` in MLZOO-A and are spec'd by the load-bearing +``app/shared/feature_frames/tests/test_leakage.py``. + The model-driven scenario path re-forecasts demand through a feature-consuming regressor, which means it builds a *future feature frame*. A horizon day has no observed target, so the invariant is: @@ -16,15 +23,9 @@ Concretely this spec asserts: -1. ``build_long_lag_columns`` returns only values drawn from ``history_tail`` - (entirely ``<= T``) or ``NaN`` — never a value from the future target - series. -2. A lag cell whose source day lies at or after the first horizon day is - ``NaN`` — the generator never fabricates or recursively predicts it. -3. Calendar columns are independent of the target series entirely. -4. An assumption window that falls before the forecast origin contributes +1. An assumption window that falls before the forecast origin contributes nothing — every horizon day lies strictly after ``T``. -5. Every non-``NaN`` ``lag_*`` cell in an assembled frame is a member of +2. Every non-``NaN`` ``lag_*`` cell in an assembled frame is a member of ``history_tail``. """ @@ -34,14 +35,11 @@ from datetime import date, timedelta from app.features.scenarios.feature_frame import ( - EXOGENOUS_LAGS, assemble_future_frame, - build_calendar_columns, build_exogenous_columns, - build_long_lag_columns, - canonical_feature_columns, ) from app.features.scenarios.schemas import PriceAssumption, ScenarioAssumptions +from app.shared.feature_frames import EXOGENOUS_LAGS, canonical_feature_columns # The forecast origin T is the last observed day; the horizon runs T+1 … T+H. _ORIGIN = date(2026, 6, 30) @@ -56,70 +54,6 @@ _FUTURE_TARGETS = {9000.0 + float(i) for i in range(_HORIZON)} -def test_long_lag_columns_never_emit_a_future_target() -> None: - """Every non-NaN long-lag cell is drawn from the observed history. - - ``build_long_lag_columns`` takes ONLY ``history_tail`` as data input — it - is structurally incapable of reading the future target series. This spec - pins that: no value disjoint from ``history_tail`` may ever appear. - """ - history_values = set(_HISTORY_TAIL) - columns = build_long_lag_columns(_HISTORY_TAIL, _HORIZON) - - for name, values in columns.items(): - for cell in values: - if math.isnan(cell): - continue - assert cell in history_values, ( - f"{name} emitted {cell}, which is not an observed history value" - ) - assert cell not in _FUTURE_TARGETS, f"{name} leaked a future target value {cell}" - - -def test_long_lag_source_index_is_never_at_or_after_the_horizon() -> None: - """A lag cell is populated only when its source day lies at/before ``T``. - - For lag ``k`` and horizon day ``j`` the source index into ``history_tail`` - is ``(j-1)-k``. A non-NaN cell REQUIRES that index to be negative — i.e. - the source target lies at or before the origin ``T``. A non-negative index - would point at a future horizon day and MUST yield ``NaN``. - """ - columns = build_long_lag_columns(_HISTORY_TAIL, _HORIZON) - for lag in EXOGENOUS_LAGS: - column = columns[f"lag_{lag}"] - for j in range(1, _HORIZON + 1): - source_index = (j - 1) - lag - cell = column[j - 1] - if source_index >= 0: - assert math.isnan(cell), ( - f"lag_{lag} day {j}: source index {source_index} is in the " - "future but the cell is not NaN" - ) - else: - assert not math.isnan(cell), ( - f"lag_{lag} day {j}: source index {source_index} is in " - "history but the cell is NaN" - ) - - -def test_calendar_columns_are_independent_of_the_target_series() -> None: - """Calendar columns read only the dates — they cannot leak the target. - - ``build_calendar_columns`` does not accept the target series at all; this - spec pins that structural fact by asserting its output is identical no - matter what history precedes it. - """ - calendar_a = build_calendar_columns(_HORIZON_DATES) - calendar_b = build_calendar_columns(_HORIZON_DATES) - assert calendar_a == calendar_b - # No calendar value coincides with a history or future target value. - history_values = set(_HISTORY_TAIL) - for values in calendar_a.values(): - for cell in values: - assert cell not in history_values - assert cell not in _FUTURE_TARGETS - - def test_assumption_window_before_origin_has_no_effect() -> None: """A price window entirely before the forecast origin contributes nothing. diff --git a/app/shared/feature_frames/__init__.py b/app/shared/feature_frames/__init__.py new file mode 100644 index 00000000..b79d7d92 --- /dev/null +++ b/app/shared/feature_frames/__init__.py @@ -0,0 +1,39 @@ +"""Shared feature-frame contract for feature-aware forecasting (MLZOO-A). + +The single, cross-cutting home for the regression feature-frame contract — the +pinned constants, the canonical column set, the :class:`FutureFeatureFrame` +carrier, the leakage-safe pure builders, and the :class:`FeatureSafety` +taxonomy. Both the ``forecasting`` slice (historical training frame) and the +``scenarios`` slice (future prediction frame) import from here, so the contract +is defined exactly once. + +This package is leaf-level: it imports nothing from ``app/features/**``. +""" + +from app.shared.feature_frames.contract import ( + CALENDAR_COLUMNS, + EXOGENOUS_COLUMNS, + EXOGENOUS_LAGS, + FEATURE_CLASS, + HISTORY_TAIL_DAYS, + FeatureSafety, + FutureFeatureFrame, + build_calendar_columns, + build_long_lag_columns, + canonical_feature_columns, + feature_safety, +) + +__all__ = [ + "CALENDAR_COLUMNS", + "EXOGENOUS_COLUMNS", + "EXOGENOUS_LAGS", + "FEATURE_CLASS", + "HISTORY_TAIL_DAYS", + "FeatureSafety", + "FutureFeatureFrame", + "build_calendar_columns", + "build_long_lag_columns", + "canonical_feature_columns", + "feature_safety", +] diff --git a/app/shared/feature_frames/contract.py b/app/shared/feature_frames/contract.py new file mode 100644 index 00000000..5102a389 --- /dev/null +++ b/app/shared/feature_frames/contract.py @@ -0,0 +1,240 @@ +"""Shared feature-frame contract for feature-aware forecasting (MLZOO-A). + +This module is the **single source of truth** for the regression feature-frame +contract: the pinned modelling constants, the canonical feature-column set and +its order, the :class:`FutureFeatureFrame` carrier, the leakage-safe pure +column builders, and the :class:`FeatureSafety` leakage taxonomy. + +Before MLZOO-A the contract was duplicated across two vertical slices — the +``forecasting`` slice (the historical training frame) and the ``scenarios`` +slice (the future prediction frame) — because a cross-slice import is forbidden +(AGENTS.md § Architecture). A cross-cutting package under ``app/shared/`` is the +sanctioned home: both slices now *import* this one definition rather than +re-typing it, so a silent column-order mismatch is structurally impossible. + +LEAF-LEVEL: ``app/shared/**`` may NEVER import from ``app/features/**``. Every +builder here is pure (stdlib ``math`` / ``datetime`` / ``dataclasses`` only) so +that invariant holds; ``tests/test_contract.py`` enforces it with an AST walk. + +The leakage rule the future-frame builders obey (mirrors PRP-27 and the +load-bearing ``tests/test_leakage.py`` alongside this module): + + A future feature value for horizon day ``D`` may use ONLY information + knowable at the forecast origin ``T``: the observed history up to and + including ``T``, or the calendar (a pure function of the date). It may + NEVER read an observed target at a horizon day. +""" + +from __future__ import annotations + +import math +from dataclasses import dataclass +from datetime import date, timedelta +from enum import Enum + +# ── PINNED modelling constants (PRP-27 DECISIONS LOCKED #10/#11) ── +# Lag offsets (days) for the target long-lag columns: daily, weekly, +# fortnightly, and a four-week lag covering the dominant retail seasonality. +EXOGENOUS_LAGS: tuple[int, ...] = (1, 7, 14, 28) +# Observed-target tail (days, ending at the forecast origin T) fed to the +# generator — 90 comfortably exceeds the largest lag offset (28). +HISTORY_TAIL_DAYS: int = 90 + +# Fixed calendar columns — each a pure function of the date, never a leak. +CALENDAR_COLUMNS: tuple[str, ...] = ( + "dow_sin", + "dow_cos", + "month_sin", + "month_cos", + "is_weekend", + "is_month_end", +) +# Fixed current-day exogenous columns — driven by the scenario assumptions +# (the planner's posited future inputs) and by timeless attributes (the +# calendar, the product launch date). Every value is knowable at origin T. +EXOGENOUS_COLUMNS: tuple[str, ...] = ( + "price_factor", + "promo_active", + "is_holiday", + "days_since_launch", +) + + +@dataclass +class FutureFeatureFrame: + """A horizon-length feature matrix for one ``(store, product)`` series. + + Attributes: + dates: The horizon days ``T+1 … T+horizon`` (chronological). + feature_columns: Column order — matches the trained bundle exactly. + matrix: Row-major ``[horizon][n_features]``; ``NaN`` is allowed and + expected (a long-lag cell whose source target lies in the future, + or ``days_since_launch`` when the product has no launch date). + """ + + dates: list[date] + feature_columns: list[str] + matrix: list[list[float]] + + +def canonical_feature_columns(lags: tuple[int, ...] = EXOGENOUS_LAGS) -> list[str]: + """Return the fixed, ordered regression feature-column list. + + This is the single source of truth for the regression feature set. The + Phase B training path persists exactly this list in the model bundle's + metadata; the future frame reproduces it column-for-column. The column + set is deliberately *fixed* (not horizon-dependent): for a long horizon + some target-lag columns are mostly ``NaN``, which the NaN-tolerant + estimator handles — far safer than a horizon-varying column set. + + Args: + lags: Target long-lag offsets (defaults to the pinned ``EXOGENOUS_LAGS``). + + Returns: + Ordered column names: target lags, then calendar, then exogenous. + """ + target_lags = [f"lag_{k}" for k in lags] + return [*target_lags, *CALENDAR_COLUMNS, *EXOGENOUS_COLUMNS] + + +def _is_month_end(point_date: date) -> bool: + """True when ``point_date`` is the last day of its month.""" + return (point_date + timedelta(days=1)).month != point_date.month + + +def build_calendar_columns(dates: list[date]) -> dict[str, list[float]]: + """Build the calendar feature columns — a pure function of each date. + + Calendar features carry zero leakage risk: they read only the date + itself, never the target series. Day-of-week and month use cyclical + (sin/cos) encoding so the estimator sees their periodic structure. + + Args: + dates: The horizon days. + + Returns: + A mapping of every name in :data:`CALENDAR_COLUMNS` to its per-day + values. + """ + columns: dict[str, list[float]] = {name: [] for name in CALENDAR_COLUMNS} + for point_date in dates: + dow = point_date.weekday() # 0 = Monday … 6 = Sunday + month = point_date.month + columns["dow_sin"].append(math.sin(2.0 * math.pi * dow / 7.0)) + columns["dow_cos"].append(math.cos(2.0 * math.pi * dow / 7.0)) + columns["month_sin"].append(math.sin(2.0 * math.pi * month / 12.0)) + columns["month_cos"].append(math.cos(2.0 * math.pi * month / 12.0)) + columns["is_weekend"].append(1.0 if dow >= 5 else 0.0) + columns["is_month_end"].append(1.0 if _is_month_end(point_date) else 0.0) + return columns + + +def build_long_lag_columns( + history_tail: list[float], + horizon: int, + lags: tuple[int, ...] = EXOGENOUS_LAGS, +) -> dict[str, list[float]]: + """Build the target long-lag columns — the leakage-critical helper. + + ``history_tail`` is the observed target series ending at the forecast + origin ``T``: ``history_tail[-1] == y[T]``, ``history_tail[-2] == y[T-1]``, + and so on. The lag-``k`` column at horizon day ``T+j`` (``j`` in + ``1 … horizon``) is the observed target ``y[T+j-k]``. + + SAFETY (PRP-27 DECISIONS LOCKED #4): the source index into + ``history_tail`` is ``idx = (j - 1) - k``. The cell is populated **only + when ``idx < 0``** — i.e. the source day ``T+j-k`` lies at or before the + origin ``T`` and therefore inside ``history_tail``. When ``idx >= 0`` the + source day is a *future* horizon day with no observed target, so the cell + is ``NaN`` — never a recursive prediction, never a fabricated value. This + function structurally **cannot** read a future target: its only data + input is ``history_tail`` (entirely ``<= T``). + + Args: + history_tail: Observed target values ending at the origin ``T``. + horizon: Number of horizon days. + lags: Lag offsets (defaults to the pinned ``EXOGENOUS_LAGS``). + + Returns: + A mapping ``"lag_{k}" -> [horizon values]``; out-of-range cells are + ``NaN``. + """ + tail_len = len(history_tail) + columns: dict[str, list[float]] = {} + for lag in lags: + column: list[float] = [] + for j in range(1, horizon + 1): + # Negative index from the end of history_tail. idx < 0 means the + # source day T+j-k is at/before the origin T — safe to read. + idx = (j - 1) - lag + if idx < 0 and -tail_len <= idx: + column.append(float(history_tail[idx])) + else: + column.append(math.nan) + columns[f"lag_{lag}"] = column + return columns + + +class FeatureSafety(Enum): + """Leakage classification of a feature column in a FUTURE prediction frame. + + Every canonical feature column falls into exactly one class. The + classification governs how a future-frame builder may populate the column + for a horizon day ``D`` (which has no observed target): + + * ``SAFE`` — a pure function of the date (calendar features); reading it + can never leak a future target. + * ``CONDITIONALLY_SAFE`` — a target long-lag; safe only when its source + day lies at or before the forecast origin ``T``, otherwise the cell is + ``NaN``. + * ``UNSAFE_UNLESS_SUPPLIED`` — a future price / promotion input; knowable + at ``T`` ONLY because the caller posits it (a scenario assumption). It + is never inferred from observed data. + """ + + SAFE = "safe" + CONDITIONALLY_SAFE = "conditionally_safe" + UNSAFE_UNLESS_SUPPLIED = "unsafe_unless_supplied" + + +# The executable taxonomy: every canonical column → its FeatureSafety class. +# ``is_holiday`` and ``days_since_launch`` are SAFE — a calendar holiday row is +# a timeless attribute and ``days_since_launch`` is a pure function of the date +# once the launch date is known. ``price_factor`` / ``promo_active`` are +# UNSAFE_UNLESS_SUPPLIED — only a posited scenario assumption makes them +# knowable for a future day. +FEATURE_CLASS: dict[str, FeatureSafety] = { + **{f"lag_{k}": FeatureSafety.CONDITIONALLY_SAFE for k in EXOGENOUS_LAGS}, + "dow_sin": FeatureSafety.SAFE, + "dow_cos": FeatureSafety.SAFE, + "month_sin": FeatureSafety.SAFE, + "month_cos": FeatureSafety.SAFE, + "is_weekend": FeatureSafety.SAFE, + "is_month_end": FeatureSafety.SAFE, + "price_factor": FeatureSafety.UNSAFE_UNLESS_SUPPLIED, + "promo_active": FeatureSafety.UNSAFE_UNLESS_SUPPLIED, + "is_holiday": FeatureSafety.SAFE, + "days_since_launch": FeatureSafety.SAFE, +} + + +def feature_safety(column: str) -> FeatureSafety: + """Return the leakage classification of a feature column. + + Args: + column: A feature-column name (e.g. ``"lag_7"``, ``"dow_sin"``). + + Returns: + The column's :class:`FeatureSafety` class. A ``lag_*`` column with a + custom offset not literally in :data:`FEATURE_CLASS` resolves to + ``CONDITIONALLY_SAFE`` — every target lag is conditionally safe. + + Raises: + KeyError: When ``column`` is neither a known column nor a ``lag_*`` + column — callers must classify every column they emit. + """ + if column in FEATURE_CLASS: + return FEATURE_CLASS[column] + if column.startswith("lag_"): + return FeatureSafety.CONDITIONALLY_SAFE + raise KeyError(f"Unclassified feature column: {column!r}") diff --git a/app/shared/feature_frames/tests/__init__.py b/app/shared/feature_frames/tests/__init__.py new file mode 100644 index 00000000..d7a9bd24 --- /dev/null +++ b/app/shared/feature_frames/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for the shared feature-frame contract package.""" diff --git a/app/shared/feature_frames/tests/test_contract.py b/app/shared/feature_frames/tests/test_contract.py new file mode 100644 index 00000000..0b68fd6f --- /dev/null +++ b/app/shared/feature_frames/tests/test_contract.py @@ -0,0 +1,142 @@ +"""Unit tests for the shared feature-frame contract (MLZOO-A). + +Covers the canonical column set + order, the pinned constants, the +:class:`FeatureSafety` taxonomy coverage, the :class:`FutureFeatureFrame` +shape, builder determinism, and the leaf-level architectural invariant +(``app/shared/**`` never imports ``app/features/**``). + +The leakage invariants live separately in ``test_leakage.py`` (load-bearing). +""" + +from __future__ import annotations + +import ast +from datetime import date, timedelta +from pathlib import Path + +from app.shared.feature_frames import ( + CALENDAR_COLUMNS, + EXOGENOUS_COLUMNS, + EXOGENOUS_LAGS, + HISTORY_TAIL_DAYS, + FeatureSafety, + FutureFeatureFrame, + build_calendar_columns, + canonical_feature_columns, + feature_safety, +) + +_ORIGIN = date(2026, 6, 30) +_HORIZON = 14 +_HORIZON_DATES = [_ORIGIN + timedelta(days=offset) for offset in range(1, _HORIZON + 1)] + + +# --- pinned constants --------------------------------------------------------- + + +def test_pinned_constants() -> None: + """The pinned modelling constants hold their decided values.""" + assert EXOGENOUS_LAGS == (1, 7, 14, 28) + assert HISTORY_TAIL_DAYS == 90 + + +def test_canonical_feature_columns_order() -> None: + """The canonical column list is target lags, then calendar, then exogenous.""" + columns = canonical_feature_columns() + assert columns[:4] == ["lag_1", "lag_7", "lag_14", "lag_28"] + assert columns[4 : 4 + len(CALENDAR_COLUMNS)] == list(CALENDAR_COLUMNS) + assert columns[-len(EXOGENOUS_COLUMNS) :] == list(EXOGENOUS_COLUMNS) + assert len(columns) == len(EXOGENOUS_LAGS) + len(CALENDAR_COLUMNS) + len(EXOGENOUS_COLUMNS) + + +# --- FeatureSafety taxonomy --------------------------------------------------- + + +def test_feature_class_covers_every_canonical_column() -> None: + """Every canonical column resolves to a FeatureSafety class — no KeyError.""" + for column in canonical_feature_columns(): + assert isinstance(feature_safety(column), FeatureSafety) + + +def test_calendar_columns_are_all_SAFE() -> None: + """Calendar columns are pure functions of the date — always SAFE.""" + for column in CALENDAR_COLUMNS: + assert feature_safety(column) is FeatureSafety.SAFE + + +def test_lag_columns_are_CONDITIONALLY_SAFE() -> None: + """Target long-lag columns — including custom offsets — are conditionally safe.""" + for lag in EXOGENOUS_LAGS: + assert feature_safety(f"lag_{lag}") is FeatureSafety.CONDITIONALLY_SAFE + # A custom lag offset not literally in FEATURE_CLASS still classifies. + assert feature_safety("lag_3") is FeatureSafety.CONDITIONALLY_SAFE + + +def test_exogenous_price_and_promo_are_unsafe_unless_supplied() -> None: + """Future price / promotion inputs are knowable only when posited.""" + assert feature_safety("price_factor") is FeatureSafety.UNSAFE_UNLESS_SUPPLIED + assert feature_safety("promo_active") is FeatureSafety.UNSAFE_UNLESS_SUPPLIED + + +def test_feature_safety_rejects_an_unclassified_column() -> None: + """A genuinely unknown column raises — callers must classify every column.""" + try: + feature_safety("mystery_feature") + except KeyError: + pass + else: + raise AssertionError("feature_safety must raise KeyError for an unknown column") + + +# --- FutureFeatureFrame dataclass --------------------------------------------- + + +def test_future_feature_frame_dataclass_shape() -> None: + """FutureFeatureFrame carries dates, feature_columns, and a row-major matrix.""" + columns = canonical_feature_columns() + frame = FutureFeatureFrame( + dates=list(_HORIZON_DATES), + feature_columns=columns, + matrix=[[0.0] * len(columns) for _ in _HORIZON_DATES], + ) + assert frame.dates == _HORIZON_DATES + assert frame.feature_columns == columns + assert len(frame.matrix) == _HORIZON + assert all(len(row) == len(columns) for row in frame.matrix) + + +# --- builder determinism ------------------------------------------------------ + + +def test_build_calendar_columns_is_deterministic() -> None: + """Calendar columns depend only on the dates — two calls match exactly.""" + first = build_calendar_columns(_HORIZON_DATES) + second = build_calendar_columns(list(_HORIZON_DATES)) + assert first == second + assert set(first) == set(CALENDAR_COLUMNS) + for values in first.values(): + assert len(values) == _HORIZON + + +# --- architectural invariant -------------------------------------------------- + + +def test_shared_package_imports_nothing_from_features() -> None: + """``app/shared/**`` is leaf-level — it may never import a vertical slice. + + Walks every ``.py`` file in the package and asserts no module imports a + name under ``app.features`` (AGENTS.md § Architecture). + """ + pkg_dir = Path(__file__).resolve().parents[1] # app/shared/feature_frames/ + for py_file in pkg_dir.rglob("*.py"): + source = py_file.read_text(encoding="utf-8") + for node in ast.walk(ast.parse(source)): + if isinstance(node, ast.ImportFrom) and node.module: + assert not node.module.startswith("app.features"), ( + f"ARCHITECTURE BREACH: {py_file} imports from {node.module}" + ) + if isinstance(node, ast.Import): + for alias in node.names: + assert not alias.name.startswith("app.features"), ( + f"ARCHITECTURE BREACH: {py_file} imports {alias.name}" + ) diff --git a/app/shared/feature_frames/tests/test_leakage.py b/app/shared/feature_frames/tests/test_leakage.py new file mode 100644 index 00000000..56ba62f4 --- /dev/null +++ b/app/shared/feature_frames/tests/test_leakage.py @@ -0,0 +1,116 @@ +"""Leakage spec for the shared feature-frame builders — LOAD-BEARING (MLZOO-A). + +This file IS the spec, mirroring ``app/features/featuresets/tests/test_leakage.py`` +and ``app/features/scenarios/tests/test_future_frame_leakage.py``: it must NEVER +be weakened to make a feature pass (AGENTS.md § Safety). + +A feature-aware model re-forecasts demand through a *future feature frame*. A +horizon day has no observed target, so the invariant the shared pure builders +(:func:`build_long_lag_columns`, :func:`build_calendar_columns`) obey is: + + A future feature value for horizon day ``D`` may use ONLY information + knowable at the forecast origin ``T``: the observed history up to and + including ``T``, or the calendar (a pure function of the date). It may + NEVER read an observed target at a horizon day ``D`` (which lies after + ``T``). + +Concretely this spec asserts: + +1. ``build_long_lag_columns`` returns only values drawn from ``history_tail`` + (entirely ``<= T``) or ``NaN`` — never a value from the future target + series. +2. A lag cell whose source day lies at or after the first horizon day is + ``NaN`` — the generator never fabricates or recursively predicts it. +3. Calendar columns are independent of the target series entirely. + +The assumption-driven exogenous columns and the assembled-frame end-to-end +checks stay in ``app/features/scenarios/tests/test_future_frame_leakage.py`` — +those builders live in the scenarios slice. +""" + +from __future__ import annotations + +import math +from datetime import date, timedelta + +from app.shared.feature_frames import ( + EXOGENOUS_LAGS, + build_calendar_columns, + build_long_lag_columns, +) + +# The forecast origin T is the last observed day; the horizon runs T+1 … T+H. +_ORIGIN = date(2026, 6, 30) +_HORIZON = 21 +_HORIZON_DATES = [_ORIGIN + timedelta(days=offset) for offset in range(1, _HORIZON + 1)] + +# Observed history (all <= T): 90 distinct values 1000.0 … 1089.0. +# history_tail[-1] == y[T], the origin observation. +_HISTORY_TAIL = [1000.0 + float(i) for i in range(90)] +# A DISJOINT "future target" series the generator must never be able to read. +# Any of these values appearing in a feature cell is a leak. +_FUTURE_TARGETS = {9000.0 + float(i) for i in range(_HORIZON)} + + +def test_long_lag_columns_never_emit_a_future_target() -> None: + """Every non-NaN long-lag cell is drawn from the observed history. + + ``build_long_lag_columns`` takes ONLY ``history_tail`` as data input — it + is structurally incapable of reading the future target series. This spec + pins that: no value disjoint from ``history_tail`` may ever appear. + """ + history_values = set(_HISTORY_TAIL) + columns = build_long_lag_columns(_HISTORY_TAIL, _HORIZON) + + for name, values in columns.items(): + for cell in values: + if math.isnan(cell): + continue + assert cell in history_values, ( + f"{name} emitted {cell}, which is not an observed history value" + ) + assert cell not in _FUTURE_TARGETS, f"{name} leaked a future target value {cell}" + + +def test_long_lag_source_index_is_never_at_or_after_the_horizon() -> None: + """A lag cell is populated only when its source day lies at/before ``T``. + + For lag ``k`` and horizon day ``j`` the source index into ``history_tail`` + is ``(j-1)-k``. A non-NaN cell REQUIRES that index to be negative — i.e. + the source target lies at or before the origin ``T``. A non-negative index + would point at a future horizon day and MUST yield ``NaN``. + """ + columns = build_long_lag_columns(_HISTORY_TAIL, _HORIZON) + for lag in EXOGENOUS_LAGS: + column = columns[f"lag_{lag}"] + for j in range(1, _HORIZON + 1): + source_index = (j - 1) - lag + cell = column[j - 1] + if source_index >= 0: + assert math.isnan(cell), ( + f"lag_{lag} day {j}: source index {source_index} is in the " + "future but the cell is not NaN" + ) + else: + assert not math.isnan(cell), ( + f"lag_{lag} day {j}: source index {source_index} is in " + "history but the cell is NaN" + ) + + +def test_calendar_columns_are_independent_of_the_target_series() -> None: + """Calendar columns read only the dates — they cannot leak the target. + + ``build_calendar_columns`` does not accept the target series at all; this + spec pins that structural fact by asserting its output is identical no + matter what history precedes it. + """ + calendar_a = build_calendar_columns(_HORIZON_DATES) + calendar_b = build_calendar_columns(_HORIZON_DATES) + assert calendar_a == calendar_b + # No calendar value coincides with a history or future target value. + history_values = set(_HISTORY_TAIL) + for values in calendar_a.values(): + for cell in values: + assert cell not in history_values + assert cell not in _FUTURE_TARGETS diff --git a/examples/models/feature_frame_contract.md b/examples/models/feature_frame_contract.md new file mode 100644 index 00000000..7e0073a5 --- /dev/null +++ b/examples/models/feature_frame_contract.md @@ -0,0 +1,115 @@ +# Feature-Frame Contract + +The contract a **feature-aware** forecasting model (the regression forecaster +today; LightGBM / XGBoost / Prophet-like models in the MLZOO sequence) stands +on. The single source of truth in code is +[`app/shared/feature_frames`](../../app/shared/feature_frames/) — the pinned +constants, the canonical column set and order, the `FutureFeatureFrame` +carrier, the leakage-safe pure builders, and the `FeatureSafety` taxonomy. + +A feature-aware model consumes **two** matrices with the *same columns in the +same order*: + +| Frame | Built by | Shape | Rows | +|-------|----------|-------|------| +| Historical training frame | `ForecastingService._build_regression_features` → `_assemble_regression_rows` | `[n_observations, n_features]` | observed days in `[train_start, train_end]` | +| Future prediction frame | `app/features/scenarios/feature_frame.build_future_frame` → `assemble_future_frame` | `[horizon, n_features]` | the horizon days `T+1 … T+horizon` | + +`T` is the **forecast origin** — the last training day (`train_end`). + +## Historical training frame + +- One row per observed day in the SQL window `WHERE date >= train_start AND + date <= train_end`. **That `date <= train_end` filter IS the cutoff guard** — + no row can be assembled for a day after the origin `T`. +- `lag_k` at row `i` reads `quantity[i - k]` — a strictly earlier observation — + or `NaN` when `i < k` (no source day exists yet). +- Calendar columns are pure functions of the row's date. +- `price_factor` / `promo_active` / `is_holiday` / `days_since_launch` read the + row's **same-day** observed attributes — never a future day. +- Spec: [`test_regression_features_leakage.py`](../../app/features/forecasting/tests/test_regression_features_leakage.py) + (load-bearing — sequential targets make any leakage mathematically detectable). + +## Future prediction frame + +- One row per horizon day `T+1 … T+horizon`. +- `lag_k` at horizon day `j` reads `history_tail[(j-1) - k]` **only when + `(j-1)-k < 0`** — i.e. the source day lies at or before the origin `T` and is + therefore inside the observed history tail. When `(j-1)-k >= 0` the source day + is itself a future horizon day with no observed target, so the cell is `NaN`. + **There is no recursion in v1** — a `NaN` lag is never back-filled with a + prediction. +- Calendar columns are pure functions of the horizon date. +- `price_factor` / `promo_active` are knowable for a future day **only because + the caller posits them** (a scenario assumption); `is_holiday` and + `days_since_launch` are timeless date attributes. +- Spec: [`app/shared/feature_frames/tests/test_leakage.py`](../../app/shared/feature_frames/tests/test_leakage.py) + (the shared pure builders) and + [`app/features/scenarios/tests/test_future_frame_leakage.py`](../../app/features/scenarios/tests/test_future_frame_leakage.py) + (the assumption-driven columns + the assembled frame). + +## The canonical column set + +`canonical_feature_columns()` returns these **14 columns, in this order**: + +``` +lag_1, lag_7, lag_14, lag_28, +dow_sin, dow_cos, month_sin, month_cos, is_weekend, is_month_end, +price_factor, promo_active, is_holiday, days_since_launch +``` + +The set is deliberately **fixed** (not horizon-dependent): for a long horizon +some lag columns are mostly `NaN`, which a NaN-tolerant estimator handles — far +safer than a column set that changes shape with the horizon. The trained model +bundle persists exactly this list in its metadata; the future frame reproduces +it column-for-column. + +Pinned constants: `EXOGENOUS_LAGS = (1, 7, 14, 28)`, `HISTORY_TAIL_DAYS = 90` +(the observed-target tail length persisted in the bundle so the future frame +can resolve the longest lag). + +## Feature-class taxonomy + +Every column carries a `FeatureSafety` class (see `FEATURE_CLASS` / +`feature_safety()` in `app/shared/feature_frames`). This is the executable form +of the feature-family table in +[`PRPs/ai_docs/exogenous-regressor-forecasting.md`](../../PRPs/ai_docs/exogenous-regressor-forecasting.md) §2. + +| Column | Class | How to populate a future day — and the leakage trap | +|--------|-------|------------------------------------------------------| +| `lag_1`, `lag_7`, `lag_14`, `lag_28` | `CONDITIONALLY_SAFE` | Read `history_tail` only when the source day `<= T`; otherwise `NaN`. **Trap:** filling a future-sourced lag with a prediction (recursion) or a fabricated value. | +| `dow_sin`, `dow_cos`, `month_sin`, `month_cos`, `is_weekend`, `is_month_end` | `SAFE` | Pure function of the date — compute directly. No trap: a calendar feature cannot leak the target. | +| `is_holiday` | `SAFE` | The `calendar` table is a timeless attribute; reading a horizon day's holiday flag is not leakage. | +| `days_since_launch` | `SAFE` | `(date - launch_date).days` — a pure function of the date once the launch date is known. `NaN` when the product has no launch date. | +| `price_factor` | `UNSAFE_UNLESS_SUPPLIED` | Knowable for a future day **only** if the caller posits it (a price assumption). Never inferred from observed data. Default `1.0` (no change). | +| `promo_active` | `UNSAFE_UNLESS_SUPPLIED` | Knowable for a future day **only** if the caller posits a promotion. Default `0.0` (no promotion). | + +## The NaN-as-unknown rule + +A builder emits `math.nan` for a cell whose source is **genuinely unknowable** +at origin `T` — a long-lag whose source day lies in the horizon, or +`days_since_launch` for a product with no launch date. `NaN` means *unknown*; it +is never silently replaced with a fabricated default such as `0.0`. + +`HistGradientBoostingRegressor` tolerates `NaN` natively. A future model that is +**not** NaN-tolerant must impute explicitly inside its own `fit`/`predict` — the +shared frame builders must not impute on its behalf. + +## How a future advanced model plugs in + +A new feature-aware model (PRP-MLZOO-B onward): + +1. Subclasses `BaseForecaster` and sets `requires_features: ClassVar[bool] = True`. + `ForecastingService.train_model` / `predict` branch on this flag — no + `isinstance` check, no `model_type` string comparison. +2. Reuses the **shared** frame builders and `canonical_feature_columns()` — it + writes **zero** new contract code, so it cannot drift from the regression + contract. +3. Consumes the historical frame for `fit(y, X)` and (via + `POST /scenarios/simulate`) the future frame for `predict(horizon, X)`. + +Known limitation: **backtesting is not wired for feature-aware models.** The +backtest fold loop calls `model.fit(y_train)` target-only; a feature-aware model +raises `ValueError` there — a loud, non-leaky failure, pinned by +`test_feature_aware_model_fails_loud_in_backtest`. Feature-aware backtesting is +PRP-MLZOO-B scope. diff --git a/examples/models/model_interface.md b/examples/models/model_interface.md index bf5d391a..fa1d6303 100644 --- a/examples/models/model_interface.md +++ b/examples/models/model_interface.md @@ -86,6 +86,14 @@ Check if the model has been fitted. **Returns:** - `True` if `fit()` has been called successfully +#### `requires_features: ClassVar[bool]` + +Class attribute — `True` when `fit()`/`predict()` REQUIRE a non-`None` `X` +feature frame. Baseline (target-only) models leave it `False`; feature-aware +models (e.g. the regression forecaster) override it to `True`. The forecasting +service branches on this flag instead of an `isinstance` check or a +`model_type` string comparison. + --- ## Model Configurations @@ -121,6 +129,24 @@ Each model type has a corresponding configuration schema: } ``` +### RegressionModelConfig + +```python +{ + "schema_version": "1.0", + "model_type": "regression", + "max_iter": 200, # 10-1000 (boosting iterations) + "learning_rate": 0.05, # 0.001-1.0 + "max_depth": 6 # 1-20 +} +``` + +A **feature-aware** model (`requires_features = True`): it wraps scikit-learn's +`HistGradientBoostingRegressor` and consumes a per-day exogenous feature frame. +The feature-frame contract — the canonical column set, the historical vs future +frame shapes, and the leakage taxonomy — is documented in +[`feature_frame_contract.md`](feature_frame_contract.md). + --- ## Model Formulas @@ -149,6 +175,16 @@ Predicts the value from the same position in the previous seasonal cycle. Predicts the average of the last `window_size` observations. +### Regression Forecaster + +``` +ŷ[t+h] = HistGradientBoostingRegressor.predict(X[t+h]) +``` + +Predicts each horizon day from its exogenous feature row `X[t+h]` (target +long-lags, calendar, and posited price/promotion inputs). Unlike the baselines +it REQUIRES a feature frame — see [`feature_frame_contract.md`](feature_frame_contract.md). + --- ## Persistence (ModelBundle)