From 3eb3d4e5a5b39f84ab5489b5eb9f42e7be0c0b21 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Wed, 13 May 2026 05:15:10 +0200 Subject: [PATCH] fix(forecast,features): apply strict-mode JSON date policy to request bodies (#117) Audits ConfigDict(strict=True) request models across app/features/**/schemas.py and lands the repo-wide policy from issue #117. Without the override, FastAPI's _compat/v2.py:175 calls validate_python on the JSON-parsed dict and Pydantic strict-mode rejects every ISO date string -- every HTTP caller would 422. * TrainRequest.train_start_date / train_end_date now ship with field-level strict=False (mirrors PR #115 on ComputeFeaturesRequest / PreviewFeaturesRequest). * PredictRequest has no JSON-non-native fields; test pins this expectation so a future contributor adding a date field is forced to update the override. * New TestRequestJSONDateGotcha in featuresets/test_schemas.py and forecasting/test_schemas.py exercise Model.model_validate(dict_with_iso_string) -- the exact path FastAPI takes -- so regressions land in the fast unit pass. * New test_routes.py per slice POSTs JSON with ISO date strings and asserts no date_type 422 errors (integration-marked, runs against real Postgres in CI). * docs/_base/SECURITY.md grows a "Pydantic v2 strict mode on FastAPI request bodies" subsection under Input Validation documenting the policy and rationale. Closes #117. --- app/features/featuresets/tests/test_routes.py | 73 ++++++++++++++++++ .../featuresets/tests/test_schemas.py | 41 ++++++++++ app/features/forecasting/schemas.py | 7 ++ app/features/forecasting/tests/test_routes.py | 75 +++++++++++++++++++ .../forecasting/tests/test_schemas.py | 43 +++++++++++ docs/_base/SECURITY.md | 14 ++++ 6 files changed, 253 insertions(+) create mode 100644 app/features/featuresets/tests/test_routes.py create mode 100644 app/features/forecasting/tests/test_routes.py diff --git a/app/features/featuresets/tests/test_routes.py b/app/features/featuresets/tests/test_routes.py new file mode 100644 index 00000000..7805fc19 --- /dev/null +++ b/app/features/featuresets/tests/test_routes.py @@ -0,0 +1,73 @@ +"""HTTP-level smoke tests for featuresets routes. + +These tests POST JSON request bodies containing ISO date strings and assert +the request is NOT rejected with a 422 ``date_type`` error on a date field. +Downstream may return any non-validation status (404 no data, 500 db error, +200 success) -- only the strict-mode JSON-date gotcha is gated here. + +Regression for #117 (and the original discovery in PR #115). +See ``docs/_base/SECURITY.md`` -> "Pydantic v2 strict mode on FastAPI +request bodies". +""" + +from typing import Any + +import pytest +from httpx import ASGITransport, AsyncClient + +from app.main import app + + +@pytest.fixture +async def client() -> Any: + """ASGI test client against the live FastAPI app. + + No DB override -- downstream may fail with 404/500 if the service hits + the DB; we only care that Pydantic validation passes. + """ + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as ac: + yield ac + + +def _assert_no_date_type_422(response: Any) -> None: + """Fail if the response is a 422 with a ``date_type`` error on any field. + + Any other status (200/4xx/5xx) means Pydantic accepted the request body, + which is the only invariant this test guards. + """ + if response.status_code != 422: + return + body = response.json() + errors = body.get("errors") or [] + date_type_errors = [err for err in errors if err.get("type") == "date_type"] + assert not date_type_errors, ( + "strict-mode JSON date regression: request body rejected with " + f"date_type errors: {date_type_errors}" + ) + + +@pytest.mark.integration +async def test_compute_features_accepts_iso_string_date(client: AsyncClient) -> None: + payload = { + "store_id": 1, + "product_id": 1, + "cutoff_date": "2024-01-31", + "lookback_days": 365, + "config": {"name": "test"}, + } + response = await client.post("/featuresets/compute", json=payload) + _assert_no_date_type_422(response) + + +@pytest.mark.integration +async def test_preview_features_accepts_iso_string_date(client: AsyncClient) -> None: + payload = { + "store_id": 1, + "product_id": 1, + "cutoff_date": "2024-01-31", + "sample_rows": 5, + "config": {"name": "test"}, + } + response = await client.post("/featuresets/preview", json=payload) + _assert_no_date_type_422(response) diff --git a/app/features/featuresets/tests/test_schemas.py b/app/features/featuresets/tests/test_schemas.py index 693b7db3..db01ccbf 100644 --- a/app/features/featuresets/tests/test_schemas.py +++ b/app/features/featuresets/tests/test_schemas.py @@ -13,6 +13,7 @@ ImputationConfig, LagConfig, LifecycleConfig, + PreviewFeaturesRequest, PromotionConfig, ReplenishmentConfig, RollingConfig, @@ -428,3 +429,43 @@ def test_default_lookback(self): config=FeatureSetConfig(name="test"), ) assert request.lookback_days == 365 + + +class TestRequestJSONDateGotcha: + """Regression for #117 / #115: strict-mode request bodies must accept ISO + date strings. + + FastAPI's ``_compat/v2.py:175`` calls ``TypeAdapter.validate_python`` on the + parsed JSON dict. With ``ConfigDict(strict=True)`` and no field-level + ``strict=False``, Pydantic refuses to coerce ISO date strings into + ``datetime.date`` -- every HTTP caller would 422. These tests exercise the + same ``model_validate`` path FastAPI takes, so a regression is caught at + unit-test time rather than at HTTP-boundary discovery time. + + See ``docs/_base/SECURITY.md`` -> "Pydantic v2 strict mode on FastAPI + request bodies". + """ + + def test_compute_features_request_accepts_iso_string_date(self): + request = ComputeFeaturesRequest.model_validate( + { + "store_id": 1, + "product_id": 1, + "cutoff_date": "2024-01-31", + "lookback_days": 365, + "config": {"name": "test"}, + } + ) + assert request.cutoff_date == date(2024, 1, 31) + + def test_preview_features_request_accepts_iso_string_date(self): + request = PreviewFeaturesRequest.model_validate( + { + "store_id": 1, + "product_id": 1, + "cutoff_date": "2024-01-31", + "sample_rows": 5, + "config": {"name": "test"}, + } + ) + assert request.cutoff_date == date(2024, 1, 31) diff --git a/app/features/forecasting/schemas.py b/app/features/forecasting/schemas.py index 1a6808fc..33d534f7 100644 --- a/app/features/forecasting/schemas.py +++ b/app/features/forecasting/schemas.py @@ -170,12 +170,19 @@ class TrainRequest(BaseModel): store_id: int = Field(..., ge=1, description="Store ID") product_id: int = Field(..., ge=1, description="Product ID") + # ``strict=False`` overrides the model-level ``strict=True`` so FastAPI's + # ``validate_python`` (called on the JSON-parsed dict at + # ``fastapi._compat.v2:175``) accepts ISO date strings from JSON request + # bodies. See ``docs/_base/SECURITY.md`` -> "Pydantic v2 strict mode on + # FastAPI request bodies" for the repo-wide policy (issue #117). train_start_date: date_type = Field( ..., + strict=False, description="Start date of training period", ) train_end_date: date_type = Field( ..., + strict=False, description="End date of training period (inclusive)", ) config: ModelConfig diff --git a/app/features/forecasting/tests/test_routes.py b/app/features/forecasting/tests/test_routes.py new file mode 100644 index 00000000..328d6f9a --- /dev/null +++ b/app/features/forecasting/tests/test_routes.py @@ -0,0 +1,75 @@ +"""HTTP-level smoke tests for forecasting routes. + +These tests POST JSON request bodies containing ISO date strings and assert +the request is NOT rejected with a 422 ``date_type`` error on a date field. +Downstream may return any non-validation status (404 no data, 500 db error, +200 success) -- only the strict-mode JSON-date gotcha is gated here. + +Regression for #117 (and the original discovery in PR #115). +See ``docs/_base/SECURITY.md`` -> "Pydantic v2 strict mode on FastAPI +request bodies". +""" + +from typing import Any + +import pytest +from httpx import ASGITransport, AsyncClient + +from app.main import app + + +@pytest.fixture +async def client() -> Any: + """ASGI test client against the live FastAPI app. + + No DB override -- downstream may fail with 404/500 if the service hits + the DB; we only care that Pydantic validation passes. + """ + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as ac: + yield ac + + +def _assert_no_date_type_422(response: Any) -> None: + """Fail if the response is a 422 with a ``date_type`` error on any field. + + Any other status (200/4xx/5xx) means Pydantic accepted the request body, + which is the only invariant this test guards. + """ + if response.status_code != 422: + return + body = response.json() + errors = body.get("errors") or [] + date_type_errors = [err for err in errors if err.get("type") == "date_type"] + assert not date_type_errors, ( + "strict-mode JSON date regression: request body rejected with " + f"date_type errors: {date_type_errors}" + ) + + +@pytest.mark.integration +async def test_train_accepts_iso_string_dates(client: AsyncClient) -> None: + payload = { + "store_id": 1, + "product_id": 2, + "train_start_date": "2024-01-01", + "train_end_date": "2024-01-31", + "config": {"model_type": "naive"}, + } + response = await client.post("/forecasting/train", json=payload) + _assert_no_date_type_422(response) + + +@pytest.mark.integration +async def test_predict_accepts_request(client: AsyncClient) -> None: + # PredictRequest has no date fields; this is a smoke test for completeness + # so a future contributor who adds a date field is forced to confirm the + # strict-mode override survives a real HTTP roundtrip. + payload = { + "store_id": 1, + "product_id": 2, + "horizon": 14, + "model_path": "/artifacts/models/model_abc123.joblib", + } + response = await client.post("/forecasting/predict", json=payload) + _assert_no_date_type_422(response) diff --git a/app/features/forecasting/tests/test_schemas.py b/app/features/forecasting/tests/test_schemas.py index 7663201d..7818df27 100644 --- a/app/features/forecasting/tests/test_schemas.py +++ b/app/features/forecasting/tests/test_schemas.py @@ -276,3 +276,46 @@ def test_valid_response(self): ) assert response.n_observations == 31 assert response.model_path.endswith(".joblib") + + +class TestRequestJSONDateGotcha: + """Regression for #117 / #115: strict-mode request bodies must accept ISO + date strings. + + FastAPI's ``_compat/v2.py:175`` calls ``TypeAdapter.validate_python`` on the + parsed JSON dict. With ``ConfigDict(strict=True)`` and no field-level + ``strict=False``, Pydantic refuses to coerce ISO date strings into + ``datetime.date`` -- every HTTP caller would 422. These tests exercise the + same ``model_validate`` path FastAPI takes, so a regression is caught at + unit-test time rather than at HTTP-boundary discovery time. + + See ``docs/_base/SECURITY.md`` -> "Pydantic v2 strict mode on FastAPI + request bodies". + """ + + def test_train_request_accepts_iso_string_dates(self): + request = TrainRequest.model_validate( + { + "store_id": 1, + "product_id": 2, + "train_start_date": "2024-01-01", + "train_end_date": "2024-01-31", + "config": {"model_type": "naive"}, + } + ) + assert request.train_start_date == date(2024, 1, 1) + assert request.train_end_date == date(2024, 1, 31) + + def test_predict_request_has_no_date_fields(self): + # PredictRequest has no date/datetime/UUID fields, so the strict-mode + # gotcha does not apply. Pin this expectation so a future contributor + # who adds a date field is forced to update the strict-mode override. + request = PredictRequest.model_validate( + { + "store_id": 1, + "product_id": 2, + "horizon": 14, + "model_path": "/artifacts/models/model_abc123.joblib", + } + ) + assert request.horizon == 14 diff --git a/docs/_base/SECURITY.md b/docs/_base/SECURITY.md index 4c32534d..a08b3e6a 100644 --- a/docs/_base/SECURITY.md +++ b/docs/_base/SECURITY.md @@ -40,6 +40,20 @@ Never log decrypted values, even at DEBUG. Log key NAMES only (`openai_api_key_s - LLM **responses** are not trusted: structured outputs parsed via Pydantic; freeform text never executed. - Allow-lists over deny-lists (e.g., `model_type ∈ {naive, seasonal_naive, moving_average, lightgbm}`; embedding provider ∈ `{openai, ollama}`; model identifier provider ∈ `{anthropic, openai, google-gla, google-vertex}`). +### Pydantic v2 strict mode on FastAPI request bodies + +FastAPI's `_compat/v2.py:175` calls `TypeAdapter.validate_python` (not `validate_json`) on the parsed JSON dict. With `ConfigDict(strict=True)`, Pydantic refuses to coerce ISO-string values into Python types that JSON has no native representation for, so every HTTP caller would 422 (e.g., `{"type":"date_type","loc":["body","cutoff_date"],"msg":"Input should be a valid date"}`). + +**Policy:** +- KEEP `model_config = ConfigDict(strict=True)` on request bodies — it catches subtle coercion bugs on JSON-native types (e.g., `"5"` → 5 on an ID field, `1.0` → 1 on an int). +- ADD field-level `Field(strict=False, ...)` on every field whose type has no native JSON representation: + - `datetime.date`, `datetime.datetime`, `datetime.time` + - `uuid.UUID` + - `decimal.Decimal` +- Every request-body test suite MUST include at least one case that exercises the JSON path — call `Model.model_validate({"field": "iso-string", ...})` (matches FastAPI's `validate_python` path) — so this regression class is caught at unit-test time, not at HTTP-boundary discovery time. + +Reference: PR #115 (issue #109) introduced this pattern on `ComputeFeaturesRequest.cutoff_date` and `PreviewFeaturesRequest.cutoff_date`. Issue #117 extended it repo-wide to `TrainRequest`. + ## Network Security - Backend binds `0.0.0.0:8123` by default (`api_host` / `api_port` in `Settings`, `app/core/config.py:32-33`; the `# noqa: S104` is intentional for single-host LAN demo). Fine on a personal LAN; would need a reverse proxy + TLS for any public exposure.