Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions app/features/demo/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import time
from collections.abc import AsyncIterator, Awaitable, Callable
from dataclasses import dataclass, field
from datetime import date, timedelta
from datetime import UTC, date, datetime, timedelta
from pathlib import Path
from typing import Any

Expand All @@ -55,8 +55,10 @@
DEMO_SCENARIO = "demo_minimal"
DEMO_SEED_STORES = 3
DEMO_SEED_PRODUCTS = 10
DEMO_SEED_START = date(2024, 10, 1)
DEMO_SEED_END = date(2024, 12, 31)
# Seed window is anchored to *today* so the showcase always demos
# current-looking data; it runs DEMO_SEED_SPAN_DAYS back from today (92 days
# inclusive). Must stay >= 72 for a non-NaN backtest WAPE (see step_backtest).
DEMO_SEED_SPAN_DAYS = 91

DEMO_MODEL_TYPES: tuple[str, ...] = ("naive", "seasonal_naive", "moving_average")

Expand Down Expand Up @@ -272,6 +274,8 @@ async def step_seed(ctx: DemoContext, client: _Client) -> StepResult:
"""Seed the ``demo_minimal`` scenario (skipped when ``skip_seed`` is set)."""
if ctx.skip_seed:
return ("skip", "skip_seed=true (assuming a seeded database)", {})
seed_end = datetime.now(UTC).date()
seed_start = seed_end - timedelta(days=DEMO_SEED_SPAN_DAYS)
body = await client.request(
"seed",
"POST",
Expand All @@ -281,8 +285,8 @@ async def step_seed(ctx: DemoContext, client: _Client) -> StepResult:
"seed": ctx.seed,
"stores": DEMO_SEED_STORES,
"products": DEMO_SEED_PRODUCTS,
"start_date": DEMO_SEED_START.isoformat(),
"end_date": DEMO_SEED_END.isoformat(),
"start_date": seed_start.isoformat(),
"end_date": seed_end.isoformat(),
"sparsity": 0.0,
"dry_run": False,
},
Expand Down
10 changes: 6 additions & 4 deletions app/features/seeder/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

from pydantic import BaseModel, Field, field_validator, model_validator

from app.shared.seeder.config import default_seed_end_date, default_seed_start_date

VALID_CHANNELS: frozenset[str] = frozenset({"in_store", "online", "click_collect", "wholesale"})
"""Allow-list for ``sales_daily.channel`` — mirrors the SQL CHECK."""

Expand Down Expand Up @@ -98,12 +100,12 @@ class GenerateParams(BaseModel):
description="Number of products to generate",
)
start_date: date = Field(
default_factory=lambda: date(2024, 1, 1),
description="Start of date range",
default_factory=default_seed_start_date,
description="Start of date range (defaults to one year before today)",
)
end_date: date = Field(
default_factory=lambda: date(2024, 12, 31),
description="End of date range",
default_factory=default_seed_end_date,
description="End of date range (defaults to today)",
)
sparsity: float = Field(
default=0.0,
Expand Down
36 changes: 23 additions & 13 deletions app/features/seeder/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import time
from dataclasses import replace
from datetime import date, datetime
from datetime import date, datetime, timedelta

from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
Expand All @@ -26,6 +26,7 @@
from app.features.seeder import schemas
from app.shared.seeder import DataSeeder, ScenarioPreset, SeederConfig
from app.shared.seeder.config import (
DEMO_MINIMAL_SPAN_DAYS,
BundleConfig,
ChangepointConfig,
ChangepointEvent,
Expand All @@ -39,6 +40,8 @@
ReturnsConfig,
SparsityConfig,
SubstitutionConfig,
default_seed_end_date,
default_seed_start_date,
)

logger = get_logger(__name__)
Expand Down Expand Up @@ -315,14 +318,21 @@ def list_scenarios() -> list[schemas.ScenarioInfo]:
Returns:
List of ScenarioInfo with preset details.
"""
# Date ranges are anchored to *today* so the picker reflects the windows the
# seeder will actually produce. holiday_rush is the one exception: it is
# deliberately calendar-pinned to a 2024 Q4 holiday window.
today = default_seed_end_date()
year_ago = default_seed_start_date()
demo_start = today - timedelta(days=DEMO_MINIMAL_SPAN_DAYS)

scenarios = [
schemas.ScenarioInfo(
name="retail_standard",
description="Normal retail patterns with mild seasonality and linear trend",
stores=10,
products=50,
start_date=date(2024, 1, 1),
end_date=date(2024, 12, 31),
start_date=year_ago,
end_date=today,
),
schemas.ScenarioInfo(
name="holiday_rush",
Expand All @@ -337,40 +347,40 @@ def list_scenarios() -> list[schemas.ScenarioInfo]:
description="Noisy, unpredictable data with frequent anomalies for robustness testing",
stores=10,
products=50,
start_date=date(2024, 1, 1),
end_date=date(2024, 12, 31),
start_date=year_ago,
end_date=today,
),
schemas.ScenarioInfo(
name="stockout_heavy",
description="Frequent stockouts (25% probability) for inventory modeling",
stores=10,
products=50,
start_date=date(2024, 1, 1),
end_date=date(2024, 12, 31),
start_date=year_ago,
end_date=today,
),
schemas.ScenarioInfo(
name="new_launches",
description="100 products with gradual launch ramp patterns",
stores=10,
products=100,
start_date=date(2024, 1, 1),
end_date=date(2024, 12, 31),
start_date=year_ago,
end_date=today,
),
schemas.ScenarioInfo(
name="sparse",
description="50% missing combinations and random date gaps for gap handling",
stores=10,
products=50,
start_date=date(2024, 1, 1),
end_date=date(2024, 12, 31),
start_date=year_ago,
end_date=today,
),
schemas.ScenarioInfo(
name="demo_minimal",
description="Tiny preset for the make demo target (3 stores x 10 products x 92 days)",
stores=3,
products=10,
start_date=date(2024, 10, 1),
end_date=date(2024, 12, 31),
start_date=demo_start,
end_date=today,
),
]

Expand Down
13 changes: 9 additions & 4 deletions app/features/seeder/tests/test_service.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""Unit tests for seeder service layer."""

from datetime import date
from datetime import date, timedelta
from unittest.mock import AsyncMock, MagicMock, patch

import pytest

from app.features.seeder import schemas, service
from app.shared.seeder.config import DEMO_MINIMAL_SPAN_DAYS, default_seed_end_date


class TestListScenarios:
Expand All @@ -27,14 +28,18 @@ def test_returns_all_scenarios(self):
assert "demo_minimal" in names

def test_demo_minimal_dimensions(self):
"""Test that demo_minimal is the small preset for the make demo target."""
"""Test that demo_minimal is the small preset for the make demo target.

The window is anchored to *today* so the scenario picker reflects what
the seeder will actually produce.
"""
scenarios = service.list_scenarios()

demo = next(s for s in scenarios if s.name == "demo_minimal")
assert demo.stores == 3
assert demo.products == 10
assert demo.start_date == date(2024, 10, 1)
assert demo.end_date == date(2024, 12, 31)
assert demo.end_date == default_seed_end_date()
assert demo.start_date == default_seed_end_date() - timedelta(days=DEMO_MINIMAL_SPAN_DAYS)
Comment on lines 30 to +42
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: Use a single, stable "today" reference when asserting demo_minimal scenario dates

This test calls service.list_scenarios() (which uses default_seed_end_date() internally) and then calls default_seed_end_date() twice more in assertions. If the test crosses midnight, these calls can disagree and cause a flaky failure. To avoid this, either:

  • Patch default_seed_end_date (and DEMO_MINIMAL_SPAN_DAYS if needed) so both the service and assertions share a fixed value, or
  • Capture today = default_seed_end_date() once at the top (after any patching) and assert against today.

That preserves the “anchored to today” behavior without time-boundary flakes.

Suggested change
def test_demo_minimal_dimensions(self):
"""Test that demo_minimal is the small preset for the make demo target."""
"""Test that demo_minimal is the small preset for the make demo target.
The window is anchored to *today* so the scenario picker reflects what
the seeder will actually produce.
"""
scenarios = service.list_scenarios()
demo = next(s for s in scenarios if s.name == "demo_minimal")
assert demo.stores == 3
assert demo.products == 10
assert demo.start_date == date(2024, 10, 1)
assert demo.end_date == date(2024, 12, 31)
assert demo.end_date == default_seed_end_date()
assert demo.start_date == default_seed_end_date() - timedelta(days=DEMO_MINIMAL_SPAN_DAYS)
def test_demo_minimal_dimensions(self, monkeypatch):
"""Test that demo_minimal is the small preset for the make demo target.
The window is anchored to *today* so the scenario picker reflects what
the seeder will actually produce.
"""
# Use a single, stable "today" reference for both the service and assertions.
today = date(2024, 10, 1)
monkeypatch.setattr(service, "default_seed_end_date", lambda: today)
scenarios = service.list_scenarios()
demo = next(s for s in scenarios if s.name == "demo_minimal")
assert demo.stores == 3
assert demo.products == 10
assert demo.end_date == today
assert demo.start_date == today - timedelta(days=DEMO_MINIMAL_SPAN_DAYS)


def test_scenario_info_structure(self):
"""Test that scenarios have required fields."""
Expand Down
45 changes: 36 additions & 9 deletions app/shared/seeder/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,30 @@
from __future__ import annotations

from dataclasses import dataclass, field
from datetime import date
from datetime import UTC, date, datetime, timedelta
from enum import Enum
from typing import Literal

DEFAULT_SEED_SPAN_DAYS = 365
"""Span of the default seeded window. Generated data ends *today* and runs
this many days backwards, so datasets, forecasts, and the demo showcase
always look current instead of being frozen in a hard-coded calendar year."""

DEMO_MINIMAL_SPAN_DAYS = 91
"""Span of the ``demo_minimal`` window (92 calendar days inclusive). Must stay
>= 72 so an expanding backtest with n_splits=3 + horizon=14 +
min_train_size=30 produces a non-NaN WAPE."""


def default_seed_end_date() -> date:
"""End of the default seeded window — anchored to the current (UTC) date."""
return datetime.now(UTC).date()


def default_seed_start_date() -> date:
"""Start of the default seeded window — ``DEFAULT_SEED_SPAN_DAYS`` before today."""
return datetime.now(UTC).date() - timedelta(days=DEFAULT_SEED_SPAN_DAYS)


class ScenarioPreset(str, Enum):
"""Pre-built scenario presets for common testing needs."""
Expand Down Expand Up @@ -472,8 +492,8 @@ class SeederConfig:
"""

seed: int = 42
start_date: date = field(default_factory=lambda: date(2024, 1, 1))
end_date: date = field(default_factory=lambda: date(2024, 12, 31))
start_date: date = field(default_factory=default_seed_start_date)
end_date: date = field(default_factory=default_seed_end_date)
dimensions: DimensionConfig = field(default_factory=DimensionConfig)
time_series: TimeSeriesConfig = field(default_factory=TimeSeriesConfig)
retail: RetailPatternConfig = field(default_factory=RetailPatternConfig)
Expand Down Expand Up @@ -519,6 +539,10 @@ def from_scenario(cls, scenario: ScenarioPreset, seed: int = 42) -> SeederConfig
)

if scenario == ScenarioPreset.HOLIDAY_RUSH:
# Deliberately calendar-pinned: the holiday dates and Q4 monthly
# seasonality below model a specific 2024 Black Friday / Christmas
# window, so this scenario is NOT re-anchored to today. Pass an
# explicit start_date/end_date to shift it.
return cls(
seed=seed,
start_date=date(2024, 10, 1),
Expand Down Expand Up @@ -606,14 +630,17 @@ def from_scenario(cls, scenario: ScenarioPreset, seed: int = 42) -> SeederConfig
)

if scenario == ScenarioPreset.DEMO_MINIMAL:
# Tiny preset for the `make demo` target. Keeps wall-clock comfortable
# on a developer laptop while still producing a non-NaN backtest WAPE
# with strategy=expanding, n_splits=3, horizon=14, min_train_size=30
# (needs >= 30 + 3*14 = 72 days; 92 days here leaves margin).
# Tiny preset for the `make demo` target. Anchored to *today* so the
# showcase always demos current-looking data; the window runs
# DEMO_MINIMAL_SPAN_DAYS back from today (92 days inclusive). Keeps
# wall-clock comfortable on a developer laptop while still producing
# a non-NaN backtest WAPE with strategy=expanding, n_splits=3,
# horizon=14, min_train_size=30 (needs >= 30 + 3*14 = 72 days).
demo_end = default_seed_end_date()
return cls(
seed=seed,
start_date=date(2024, 10, 1),
end_date=date(2024, 12, 31),
start_date=demo_end - timedelta(days=DEMO_MINIMAL_SPAN_DAYS),
end_date=demo_end,
dimensions=DimensionConfig(stores=3, products=10),
time_series=TimeSeriesConfig(
base_demand=100,
Expand Down
26 changes: 17 additions & 9 deletions app/shared/seeder/tests/test_config.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
"""Tests for seeder configuration."""

from datetime import date
from datetime import date, timedelta

from app.shared.seeder.config import (
DEMO_MINIMAL_SPAN_DAYS,
ScenarioPreset,
SeederConfig,
TimeSeriesConfig,
default_seed_end_date,
default_seed_start_date,
)


Expand Down Expand Up @@ -39,12 +42,16 @@ class TestSeederConfig:
"""Tests for SeederConfig."""

def test_default_values(self):
"""Test default configuration values."""
"""Test default configuration values.

The default date window is anchored to *today* and runs
DEFAULT_SEED_SPAN_DAYS backwards, so seeded data always looks current.
"""
config = SeederConfig()

assert config.seed == 42
assert config.start_date == date(2024, 1, 1)
assert config.end_date == date(2024, 12, 31)
assert config.end_date == default_seed_end_date()
assert config.start_date == default_seed_start_date()
Comment on lines 50 to +54
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Avoid time-dependent flakiness by stabilizing the "today" value used in default date range assertions

Because SeederConfig() computes its dates using default_seed_* and the assertions call default_seed_end_date() / default_seed_start_date() again, the test can fail if it crosses midnight between these calls. To avoid this flakiness, either:

  • Patch default_seed_end_date / default_seed_start_date to return fixed values in this test, or
  • Compute a single today = default_seed_end_date() (patched or real) and reuse it for both the config construction and the assertions.

Freezing time (via monkeypatch or a dedicated helper) would make this fully deterministic.

Suggested implementation:

    def test_default_values(self, monkeypatch):
        """Test default configuration values.

        The default date window is anchored to *today* and runs
        DEFAULT_SEED_SPAN_DAYS backwards, so seeded data always looks current.

        To avoid time-dependent flakiness, we freeze the default date helpers
        to a stable "today" value for this test.
        """
        fixed_today = date(2024, 1, 31)

        monkeypatch.setattr(
            "app.shared.seeder.config.default_seed_end_date",
            lambda: fixed_today,
        )
        monkeypatch.setattr(
            "app.shared.seeder.config.default_seed_start_date",
            lambda: fixed_today - timedelta(days=DEFAULT_SEED_SPAN_DAYS),
        )

        config = SeederConfig()

        assert config.seed == 42
        assert config.end_date == default_seed_end_date()
        assert config.start_date == default_seed_start_date()
        assert config.dimensions.stores == 10
        assert config.dimensions.products == 50
        assert config.batch_size == 1000

    def test_from_scenario_demo_minimal(self):
        """Test demo_minimal scenario preset.

To make this compile and behave correctly you may also need to:

  1. Ensure date and timedelta are imported, e.g.:
    • from datetime import date, timedelta
  2. Ensure DEFAULT_SEED_SPAN_DAYS is imported into this test module if it is not already, e.g.:
    • from app.shared.seeder.config import DEFAULT_SEED_SPAN_DAYS
  3. Confirm the monkeypatch target string "app.shared.seeder.config.default_seed_end_date" / "app.shared.seeder.config.default_seed_start_date" matches the actual module path where SeederConfig calls these helpers. If the helpers live in a different module, adjust the target accordingly.

assert config.dimensions.stores == 10
assert config.dimensions.products == 50
assert config.batch_size == 1000
Expand Down Expand Up @@ -93,15 +100,16 @@ def test_from_scenario_sparse(self):
def test_from_scenario_demo_minimal(self):
"""Test demo_minimal scenario preset.

This preset powers the `make demo` target; the date range MUST cover at
least 72 days so an expanding backtest with n_splits=3 + horizon=14 +
min_train_size=30 produces non-NaN WAPE.
This preset powers the `make demo` target; the window is anchored to
*today* and MUST cover at least 72 days so an expanding backtest with
n_splits=3 + horizon=14 + min_train_size=30 produces non-NaN WAPE.
"""
config = SeederConfig.from_scenario(ScenarioPreset.DEMO_MINIMAL, seed=42)

assert config.seed == 42
assert config.start_date == date(2024, 10, 1)
assert config.end_date == date(2024, 12, 31)
assert config.end_date == default_seed_end_date()
assert config.start_date == default_seed_end_date() - timedelta(days=DEMO_MINIMAL_SPAN_DAYS)
assert (config.end_date - config.start_date).days >= 72
assert config.dimensions.stores == 3
assert config.dimensions.products == 10
assert config.time_series.trend == "linear"
Expand Down
13 changes: 9 additions & 4 deletions app/shared/seeder/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@

import pytest

from app.shared.seeder.config import SeederConfig, SparsityConfig
from app.shared.seeder.config import (
SeederConfig,
SparsityConfig,
default_seed_end_date,
default_seed_start_date,
)
from app.shared.seeder.core import DataSeeder, SeederResult


Expand Down Expand Up @@ -402,12 +407,12 @@ def test_default_dimensions(self):
assert seeder.config.dimensions.products == 50

def test_default_date_range(self):
"""Test default date range is full year 2024."""
"""Test the default date range is anchored to today and runs backwards."""
config = SeederConfig()
seeder = DataSeeder(config)

assert seeder.config.start_date == date(2024, 1, 1)
assert seeder.config.end_date == date(2024, 12, 31)
assert seeder.config.start_date == default_seed_start_date()
assert seeder.config.end_date == default_seed_end_date()

def test_custom_sparsity(self):
"""Test custom sparsity configuration."""
Expand Down
Loading