diff --git a/app/features/demo/pipeline.py b/app/features/demo/pipeline.py index 4302565f..10669ce1 100644 --- a/app/features/demo/pipeline.py +++ b/app/features/demo/pipeline.py @@ -29,7 +29,7 @@ import time from collections.abc import AsyncIterator, Awaitable, Callable from dataclasses import dataclass, field -from datetime import date, timedelta +from datetime import UTC, date, datetime, timedelta from pathlib import Path from typing import Any @@ -55,8 +55,10 @@ DEMO_SCENARIO = "demo_minimal" DEMO_SEED_STORES = 3 DEMO_SEED_PRODUCTS = 10 -DEMO_SEED_START = date(2024, 10, 1) -DEMO_SEED_END = date(2024, 12, 31) +# Seed window is anchored to *today* so the showcase always demos +# current-looking data; it runs DEMO_SEED_SPAN_DAYS back from today (92 days +# inclusive). Must stay >= 72 for a non-NaN backtest WAPE (see step_backtest). +DEMO_SEED_SPAN_DAYS = 91 DEMO_MODEL_TYPES: tuple[str, ...] = ("naive", "seasonal_naive", "moving_average") @@ -272,6 +274,8 @@ async def step_seed(ctx: DemoContext, client: _Client) -> StepResult: """Seed the ``demo_minimal`` scenario (skipped when ``skip_seed`` is set).""" if ctx.skip_seed: return ("skip", "skip_seed=true (assuming a seeded database)", {}) + seed_end = datetime.now(UTC).date() + seed_start = seed_end - timedelta(days=DEMO_SEED_SPAN_DAYS) body = await client.request( "seed", "POST", @@ -281,8 +285,8 @@ async def step_seed(ctx: DemoContext, client: _Client) -> StepResult: "seed": ctx.seed, "stores": DEMO_SEED_STORES, "products": DEMO_SEED_PRODUCTS, - "start_date": DEMO_SEED_START.isoformat(), - "end_date": DEMO_SEED_END.isoformat(), + "start_date": seed_start.isoformat(), + "end_date": seed_end.isoformat(), "sparsity": 0.0, "dry_run": False, }, diff --git a/app/features/seeder/schemas.py b/app/features/seeder/schemas.py index 90331aad..7c86d5e2 100644 --- a/app/features/seeder/schemas.py +++ b/app/features/seeder/schemas.py @@ -6,6 +6,8 @@ from pydantic import BaseModel, Field, field_validator, model_validator +from app.shared.seeder.config import default_seed_end_date, default_seed_start_date + VALID_CHANNELS: frozenset[str] = frozenset({"in_store", "online", "click_collect", "wholesale"}) """Allow-list for ``sales_daily.channel`` — mirrors the SQL CHECK.""" @@ -98,12 +100,12 @@ class GenerateParams(BaseModel): description="Number of products to generate", ) start_date: date = Field( - default_factory=lambda: date(2024, 1, 1), - description="Start of date range", + default_factory=default_seed_start_date, + description="Start of date range (defaults to one year before today)", ) end_date: date = Field( - default_factory=lambda: date(2024, 12, 31), - description="End of date range", + default_factory=default_seed_end_date, + description="End of date range (defaults to today)", ) sparsity: float = Field( default=0.0, diff --git a/app/features/seeder/service.py b/app/features/seeder/service.py index d8b5dfb8..5dc0f63f 100644 --- a/app/features/seeder/service.py +++ b/app/features/seeder/service.py @@ -4,7 +4,7 @@ import time from dataclasses import replace -from datetime import date, datetime +from datetime import date, datetime, timedelta from sqlalchemy import func, select from sqlalchemy.ext.asyncio import AsyncSession @@ -26,6 +26,7 @@ from app.features.seeder import schemas from app.shared.seeder import DataSeeder, ScenarioPreset, SeederConfig from app.shared.seeder.config import ( + DEMO_MINIMAL_SPAN_DAYS, BundleConfig, ChangepointConfig, ChangepointEvent, @@ -39,6 +40,8 @@ ReturnsConfig, SparsityConfig, SubstitutionConfig, + default_seed_end_date, + default_seed_start_date, ) logger = get_logger(__name__) @@ -315,14 +318,21 @@ def list_scenarios() -> list[schemas.ScenarioInfo]: Returns: List of ScenarioInfo with preset details. """ + # Date ranges are anchored to *today* so the picker reflects the windows the + # seeder will actually produce. holiday_rush is the one exception: it is + # deliberately calendar-pinned to a 2024 Q4 holiday window. + today = default_seed_end_date() + year_ago = default_seed_start_date() + demo_start = today - timedelta(days=DEMO_MINIMAL_SPAN_DAYS) + scenarios = [ schemas.ScenarioInfo( name="retail_standard", description="Normal retail patterns with mild seasonality and linear trend", stores=10, products=50, - start_date=date(2024, 1, 1), - end_date=date(2024, 12, 31), + start_date=year_ago, + end_date=today, ), schemas.ScenarioInfo( name="holiday_rush", @@ -337,40 +347,40 @@ def list_scenarios() -> list[schemas.ScenarioInfo]: description="Noisy, unpredictable data with frequent anomalies for robustness testing", stores=10, products=50, - start_date=date(2024, 1, 1), - end_date=date(2024, 12, 31), + start_date=year_ago, + end_date=today, ), schemas.ScenarioInfo( name="stockout_heavy", description="Frequent stockouts (25% probability) for inventory modeling", stores=10, products=50, - start_date=date(2024, 1, 1), - end_date=date(2024, 12, 31), + start_date=year_ago, + end_date=today, ), schemas.ScenarioInfo( name="new_launches", description="100 products with gradual launch ramp patterns", stores=10, products=100, - start_date=date(2024, 1, 1), - end_date=date(2024, 12, 31), + start_date=year_ago, + end_date=today, ), schemas.ScenarioInfo( name="sparse", description="50% missing combinations and random date gaps for gap handling", stores=10, products=50, - start_date=date(2024, 1, 1), - end_date=date(2024, 12, 31), + start_date=year_ago, + end_date=today, ), schemas.ScenarioInfo( name="demo_minimal", description="Tiny preset for the make demo target (3 stores x 10 products x 92 days)", stores=3, products=10, - start_date=date(2024, 10, 1), - end_date=date(2024, 12, 31), + start_date=demo_start, + end_date=today, ), ] diff --git a/app/features/seeder/tests/test_service.py b/app/features/seeder/tests/test_service.py index b712db95..f21aa28b 100644 --- a/app/features/seeder/tests/test_service.py +++ b/app/features/seeder/tests/test_service.py @@ -1,11 +1,12 @@ """Unit tests for seeder service layer.""" -from datetime import date +from datetime import date, timedelta from unittest.mock import AsyncMock, MagicMock, patch import pytest from app.features.seeder import schemas, service +from app.shared.seeder.config import DEMO_MINIMAL_SPAN_DAYS, default_seed_end_date class TestListScenarios: @@ -27,14 +28,18 @@ def test_returns_all_scenarios(self): assert "demo_minimal" in names def test_demo_minimal_dimensions(self): - """Test that demo_minimal is the small preset for the make demo target.""" + """Test that demo_minimal is the small preset for the make demo target. + + The window is anchored to *today* so the scenario picker reflects what + the seeder will actually produce. + """ scenarios = service.list_scenarios() demo = next(s for s in scenarios if s.name == "demo_minimal") assert demo.stores == 3 assert demo.products == 10 - assert demo.start_date == date(2024, 10, 1) - assert demo.end_date == date(2024, 12, 31) + assert demo.end_date == default_seed_end_date() + assert demo.start_date == default_seed_end_date() - timedelta(days=DEMO_MINIMAL_SPAN_DAYS) def test_scenario_info_structure(self): """Test that scenarios have required fields.""" diff --git a/app/shared/seeder/config.py b/app/shared/seeder/config.py index cdcdc94a..67e21654 100644 --- a/app/shared/seeder/config.py +++ b/app/shared/seeder/config.py @@ -3,10 +3,30 @@ from __future__ import annotations from dataclasses import dataclass, field -from datetime import date +from datetime import UTC, date, datetime, timedelta from enum import Enum from typing import Literal +DEFAULT_SEED_SPAN_DAYS = 365 +"""Span of the default seeded window. Generated data ends *today* and runs +this many days backwards, so datasets, forecasts, and the demo showcase +always look current instead of being frozen in a hard-coded calendar year.""" + +DEMO_MINIMAL_SPAN_DAYS = 91 +"""Span of the ``demo_minimal`` window (92 calendar days inclusive). Must stay +>= 72 so an expanding backtest with n_splits=3 + horizon=14 + +min_train_size=30 produces a non-NaN WAPE.""" + + +def default_seed_end_date() -> date: + """End of the default seeded window — anchored to the current (UTC) date.""" + return datetime.now(UTC).date() + + +def default_seed_start_date() -> date: + """Start of the default seeded window — ``DEFAULT_SEED_SPAN_DAYS`` before today.""" + return datetime.now(UTC).date() - timedelta(days=DEFAULT_SEED_SPAN_DAYS) + class ScenarioPreset(str, Enum): """Pre-built scenario presets for common testing needs.""" @@ -472,8 +492,8 @@ class SeederConfig: """ seed: int = 42 - start_date: date = field(default_factory=lambda: date(2024, 1, 1)) - end_date: date = field(default_factory=lambda: date(2024, 12, 31)) + start_date: date = field(default_factory=default_seed_start_date) + end_date: date = field(default_factory=default_seed_end_date) dimensions: DimensionConfig = field(default_factory=DimensionConfig) time_series: TimeSeriesConfig = field(default_factory=TimeSeriesConfig) retail: RetailPatternConfig = field(default_factory=RetailPatternConfig) @@ -519,6 +539,10 @@ def from_scenario(cls, scenario: ScenarioPreset, seed: int = 42) -> SeederConfig ) if scenario == ScenarioPreset.HOLIDAY_RUSH: + # Deliberately calendar-pinned: the holiday dates and Q4 monthly + # seasonality below model a specific 2024 Black Friday / Christmas + # window, so this scenario is NOT re-anchored to today. Pass an + # explicit start_date/end_date to shift it. return cls( seed=seed, start_date=date(2024, 10, 1), @@ -606,14 +630,17 @@ def from_scenario(cls, scenario: ScenarioPreset, seed: int = 42) -> SeederConfig ) if scenario == ScenarioPreset.DEMO_MINIMAL: - # Tiny preset for the `make demo` target. Keeps wall-clock comfortable - # on a developer laptop while still producing a non-NaN backtest WAPE - # with strategy=expanding, n_splits=3, horizon=14, min_train_size=30 - # (needs >= 30 + 3*14 = 72 days; 92 days here leaves margin). + # Tiny preset for the `make demo` target. Anchored to *today* so the + # showcase always demos current-looking data; the window runs + # DEMO_MINIMAL_SPAN_DAYS back from today (92 days inclusive). Keeps + # wall-clock comfortable on a developer laptop while still producing + # a non-NaN backtest WAPE with strategy=expanding, n_splits=3, + # horizon=14, min_train_size=30 (needs >= 30 + 3*14 = 72 days). + demo_end = default_seed_end_date() return cls( seed=seed, - start_date=date(2024, 10, 1), - end_date=date(2024, 12, 31), + start_date=demo_end - timedelta(days=DEMO_MINIMAL_SPAN_DAYS), + end_date=demo_end, dimensions=DimensionConfig(stores=3, products=10), time_series=TimeSeriesConfig( base_demand=100, diff --git a/app/shared/seeder/tests/test_config.py b/app/shared/seeder/tests/test_config.py index b4875652..aaed5f9f 100644 --- a/app/shared/seeder/tests/test_config.py +++ b/app/shared/seeder/tests/test_config.py @@ -1,11 +1,14 @@ """Tests for seeder configuration.""" -from datetime import date +from datetime import date, timedelta from app.shared.seeder.config import ( + DEMO_MINIMAL_SPAN_DAYS, ScenarioPreset, SeederConfig, TimeSeriesConfig, + default_seed_end_date, + default_seed_start_date, ) @@ -39,12 +42,16 @@ class TestSeederConfig: """Tests for SeederConfig.""" def test_default_values(self): - """Test default configuration values.""" + """Test default configuration values. + + The default date window is anchored to *today* and runs + DEFAULT_SEED_SPAN_DAYS backwards, so seeded data always looks current. + """ config = SeederConfig() assert config.seed == 42 - assert config.start_date == date(2024, 1, 1) - assert config.end_date == date(2024, 12, 31) + assert config.end_date == default_seed_end_date() + assert config.start_date == default_seed_start_date() assert config.dimensions.stores == 10 assert config.dimensions.products == 50 assert config.batch_size == 1000 @@ -93,15 +100,16 @@ def test_from_scenario_sparse(self): def test_from_scenario_demo_minimal(self): """Test demo_minimal scenario preset. - This preset powers the `make demo` target; the date range MUST cover at - least 72 days so an expanding backtest with n_splits=3 + horizon=14 + - min_train_size=30 produces non-NaN WAPE. + This preset powers the `make demo` target; the window is anchored to + *today* and MUST cover at least 72 days so an expanding backtest with + n_splits=3 + horizon=14 + min_train_size=30 produces non-NaN WAPE. """ config = SeederConfig.from_scenario(ScenarioPreset.DEMO_MINIMAL, seed=42) assert config.seed == 42 - assert config.start_date == date(2024, 10, 1) - assert config.end_date == date(2024, 12, 31) + assert config.end_date == default_seed_end_date() + assert config.start_date == default_seed_end_date() - timedelta(days=DEMO_MINIMAL_SPAN_DAYS) + assert (config.end_date - config.start_date).days >= 72 assert config.dimensions.stores == 3 assert config.dimensions.products == 10 assert config.time_series.trend == "linear" diff --git a/app/shared/seeder/tests/test_core.py b/app/shared/seeder/tests/test_core.py index 944b69a0..61dedc12 100644 --- a/app/shared/seeder/tests/test_core.py +++ b/app/shared/seeder/tests/test_core.py @@ -5,7 +5,12 @@ import pytest -from app.shared.seeder.config import SeederConfig, SparsityConfig +from app.shared.seeder.config import ( + SeederConfig, + SparsityConfig, + default_seed_end_date, + default_seed_start_date, +) from app.shared.seeder.core import DataSeeder, SeederResult @@ -402,12 +407,12 @@ def test_default_dimensions(self): assert seeder.config.dimensions.products == 50 def test_default_date_range(self): - """Test default date range is full year 2024.""" + """Test the default date range is anchored to today and runs backwards.""" config = SeederConfig() seeder = DataSeeder(config) - assert seeder.config.start_date == date(2024, 1, 1) - assert seeder.config.end_date == date(2024, 12, 31) + assert seeder.config.start_date == default_seed_start_date() + assert seeder.config.end_date == default_seed_end_date() def test_custom_sparsity(self): """Test custom sparsity configuration.""" diff --git a/scripts/run_demo.py b/scripts/run_demo.py index 03d26913..8acfc255 100644 --- a/scripts/run_demo.py +++ b/scripts/run_demo.py @@ -43,7 +43,7 @@ import time from collections.abc import Awaitable, Callable from dataclasses import dataclass, field -from datetime import date, timedelta +from datetime import UTC, date, datetime, timedelta from pathlib import Path from typing import Any, Final @@ -75,8 +75,10 @@ DEMO_SCENARIO: Final[str] = "demo_minimal" DEMO_SEED_STORES: Final[int] = 3 DEMO_SEED_PRODUCTS: Final[int] = 10 -DEMO_SEED_START: Final[date] = date(2024, 10, 1) -DEMO_SEED_END: Final[date] = date(2024, 12, 31) +# Seed window is anchored to *today* so the demo always runs on +# current-looking data; it spans DEMO_SEED_SPAN_DAYS back from today (92 days +# inclusive). Must stay >= 72 for a non-NaN backtest WAPE. +DEMO_SEED_SPAN_DAYS: Final[int] = 91 DEMO_MODEL_TYPES: Final[tuple[str, ...]] = ("naive", "seasonal_naive", "moving_average") @@ -411,6 +413,8 @@ async def step_seed(ctx: DemoContext, client: HttpClient) -> StepOutcome: detail="--skip-seed set", duration_ms=(time.monotonic() - start) * 1000, ) + seed_end = datetime.now(UTC).date() + seed_start = seed_end - timedelta(days=DEMO_SEED_SPAN_DAYS) body = await client.request( "seed", "POST", @@ -420,8 +424,8 @@ async def step_seed(ctx: DemoContext, client: HttpClient) -> StepOutcome: "seed": ctx.seed, "stores": DEMO_SEED_STORES, "products": DEMO_SEED_PRODUCTS, - "start_date": DEMO_SEED_START.isoformat(), - "end_date": DEMO_SEED_END.isoformat(), + "start_date": seed_start.isoformat(), + "end_date": seed_end.isoformat(), "sparsity": 0.0, "dry_run": False, }, diff --git a/scripts/seed_random.py b/scripts/seed_random.py index 27263e96..c544ad43 100644 --- a/scripts/seed_random.py +++ b/scripts/seed_random.py @@ -40,6 +40,8 @@ RetailPatternConfig, SparsityConfig, TimeSeriesConfig, + default_seed_end_date, + default_seed_start_date, ) from app.shared.seeder.rag_scenario import run_rag_scenario @@ -112,8 +114,10 @@ def load_config_from_yaml(path: Path) -> SeederConfig: # Parse date range date_range = data.get("date_range", {}) - start_date = parse_date(date_range["start"]) if "start" in date_range else date(2024, 1, 1) - end_date = parse_date(date_range["end"]) if "end" in date_range else date(2024, 12, 31) + start_date = ( + parse_date(date_range["start"]) if "start" in date_range else default_seed_start_date() + ) + end_date = parse_date(date_range["end"]) if "end" in date_range else default_seed_end_date() # Parse time series config ts_data = data.get("time_series", {}) @@ -243,14 +247,14 @@ def create_parser() -> argparse.ArgumentParser: parser.add_argument( "--start-date", type=parse_date, - default=date(2024, 1, 1), - help="Start of date range (default: 2024-01-01)", + default=default_seed_start_date(), + help="Start of date range (default: one year before today)", ) parser.add_argument( "--end-date", type=parse_date, - default=date(2024, 12, 31), - help="End of date range (default: 2024-12-31)", + default=default_seed_end_date(), + help="End of date range (default: today)", ) parser.add_argument( "--sparsity", diff --git a/tests/test_demo_showcase_integration.py b/tests/test_demo_showcase_integration.py index a9538551..5c0c5b38 100644 --- a/tests/test_demo_showcase_integration.py +++ b/tests/test_demo_showcase_integration.py @@ -7,14 +7,21 @@ ``integration`` so it is excluded from the fast unit run. """ +from datetime import timedelta + import pytest +from app.shared.seeder.config import DEMO_MINIMAL_SPAN_DAYS, default_seed_end_date + pytestmark = pytest.mark.integration async def test_demo_run_pipeline_end_to_end(client): """Seed demo_minimal, run the demo pipeline, and verify the registered winner.""" # Precondition: seed the demo_minimal scenario so skip_seed=true has data. + # The window is anchored to today, mirroring the demo pipeline's own seed step. + seed_end = default_seed_end_date() + seed_start = seed_end - timedelta(days=DEMO_MINIMAL_SPAN_DAYS) seed_resp = await client.post( "/seeder/generate", json={ @@ -22,8 +29,8 @@ async def test_demo_run_pipeline_end_to_end(client): "seed": 42, "stores": 3, "products": 10, - "start_date": "2024-10-01", - "end_date": "2024-12-31", + "start_date": seed_start.isoformat(), + "end_date": seed_end.isoformat(), "sparsity": 0.0, "dry_run": False, }, diff --git a/tests/test_run_demo_unit.py b/tests/test_run_demo_unit.py index 4140d95a..0630b1d1 100644 --- a/tests/test_run_demo_unit.py +++ b/tests/test_run_demo_unit.py @@ -8,16 +8,19 @@ from __future__ import annotations import math +from datetime import timedelta from typing import Any from unittest.mock import AsyncMock import pytest +from app.shared.seeder.config import default_seed_end_date from scripts import run_demo from scripts.run_demo import ( DEMO_ALIAS, DEMO_HORIZON, DEMO_MODEL_TYPES, + DEMO_SEED_SPAN_DAYS, GLYPHS, DemoArgs, DemoContext, @@ -341,7 +344,11 @@ class TestStepPayloads: async def test_step_seed_sends_demo_minimal( self, ) -> None: - """Seed step posts demo_minimal scenario with correct dims + dates.""" + """Seed step posts demo_minimal scenario with correct dims + dates. + + The seed window is anchored to *today* and runs DEMO_SEED_SPAN_DAYS + backwards, so the demo always seeds current-looking data. + """ calls: list[dict[str, Any]] = [] class _RecordingClient: @@ -374,8 +381,9 @@ async def request( assert body["seed"] == 42 assert body["stores"] == 3 assert body["products"] == 10 - assert body["start_date"] == "2024-10-01" - assert body["end_date"] == "2024-12-31" + today = default_seed_end_date() + assert body["end_date"] == today.isoformat() + assert body["start_date"] == (today - timedelta(days=DEMO_SEED_SPAN_DAYS)).isoformat() @pytest.mark.asyncio async def test_step_seed_skipped(self) -> None: