From 19cfa4d1a1f28f2b10899ea73d17e40426986958 Mon Sep 17 00:00:00 2001
From: jaayslaughter-cpu <jaayslaughter@gmail.com>
Date: Thu, 14 May 2026 19:46:24 -0700
Subject: [PATCH] PR #566: XGBoost v2 training (season weights + feature
 alignment) + Marcel regression layer + data-driven blend weight updater

---
 marcel_layer.py           | 1068 ++++++++++++-------------------------
 prop_enrichment_layer.py  |   89 +---
 scripts/xgb_k_training.py |  875 ++++++++++++++++++------------
 update_blend_weights.py   |  239 +++++++++
 4 files changed, 1103 insertions(+), 1168 deletions(-)
 create mode 100644 update_blend_weights.py

diff --git a/marcel_layer.py b/marcel_layer.py
index 27ba631..7cda026 100644
--- a/marcel_layer.py
+++ b/marcel_layer.py
@@ -1,787 +1,373 @@
 """
 marcel_layer.py
-===============
-Marcel 3-Year Projection System for PropIQ Analytics Engine.
-
-Derived from baseball-sims (thomasosbot/baseball-sims) WITHOUT BHQ subscription.
-Original algorithm: Tom Tango's "Marcel the Monkey Forecasting System".
-Reference implementation: src/features/marcel.py in thomasosbot/baseball-sims.
-
-Algorithm:
-  1. Collect up to 3 prior seasons of player stats from FanGraphs JSON API
-  2. Apply year weights: 5 × most-recent + 4 × prior year + 3 × two years back
-  3. Regress to league mean: player_weight = weighted_PA / (weighted_PA + regression_PA)
-  4. Apply age adjustment: +0.6%/yr improvement under 29, -0.3%/yr decline over 29
-  5. Produce projected rates per player for use as confidence modifiers in Layer 1
-
-PropIQ integration (Layer 8a, fires after FanGraphs Layer 6):
-  Batter K%   → K Under prop:  if projected K% >> league avg → small K Under boost
-  Batter HR/PA → HR/TB Over:   if projected HR rate >> league avg → boost
-  Batter wOBA → hits/H+R+RBI: if wOBA >> league avg → hits Over boost
-  Pitcher K%  → K Over prop:   if projected K% >> league avg → boost
-  Pitcher BB% → ER Under:      if projected BB% << league avg → ER Under boost
-  Pitcher HR/9 → ER Under:     if projected HR/9 << league avg → ER Under boost
-
-Max adjustment: ±0.018 per prop — subtle refinement layered on top of Layers 1-7.
-Never overrides or replaces; always additive.
-
-Data source:
-  FanGraphs JSON API — https://www.fangraphs.com/api/leaders/major-league/data
-  Public endpoint, no API key required.
-  Fetches 3 prior seasons (e.g. 2023+2024+2025 for 2026 projections).
-
-Cache:
-  /tmp/marcel_{year}_{iso_year}w{iso_week}.json — refreshed weekly.
-  Season-level projections that don't change day to day.
-
-Dependencies:
-  requests  (already in project requirements)
-
-Usage:
-    layer = MarcelLayer(projection_year=2026)
-    layer.prefetch()
-    batter_proj  = layer.get_batter("Aaron Judge")
-    pitcher_proj = layer.get_pitcher("Spencer Strider")
-    adj = marcel_adjustment("strikeouts", "Over", "pitcher", pitcher_proj)
+================
+Marcel projections for PropIQ — regression-to-mean for early-season props.
+
+THE PROBLEM
+-----------
+In May, pitchers have 5-8 starts. A pitcher with a 35% K-rate through 6 starts
+looks elite, but Marcel regression says his true talent is probably 28-30% K-rate
+because small samples are noisy. The current model uses the raw 2026 stats,
+which are overfit to small samples early in the season.
+
+Marcel is the simplest projection system that works: weighted average of the
+last 3 seasons (3/4/2 weight), then regressed to league mean based on sample
+size. It's not fancy but it consistently outperforms raw stats at small samples.
+
+USAGE
+-----
+From prop_enrichment_layer.py, after Steamer but before PA model:
+
+    from marcel_layer import get_marcel_k_rate, get_marcel_hit_rate
+
+    # For K props:
+    if prop_type == "strikeouts":
+        raw_k_pct = prop.get("sv_k_pct", 22.0)
+        season_bf = prop.get("season_bf", 0)
+        marcel_k  = get_marcel_k_rate(raw_k_pct, season_bf,
+                                       hist_k_pct=prop.get("career_k_pct"))
+        prop["_marcel_k_pct"] = marcel_k
+        # Use as opp_lineup_k_pct_proxy input to PA model
+
+    # For hit props:
+    if prop_type == "hits":
+        raw_avg  = prop.get("sv_xba", 0.250)
+        season_pa = prop.get("season_pa", 0)
+        marcel_h  = get_marcel_hit_rate(raw_avg, season_pa,
+                                         hist_avg=prop.get("career_avg"))
+        prop["_marcel_hit_rate"] = marcel_h
+
+WHEN DOES MARCEL MATTER?
+------------------------
+Marcel regression is strongest when sample size is small.
+Rule of thumb:
+  - Pitcher BF < 100:  Marcel contributes ~60% of the projection
+  - Pitcher BF < 300:  Marcel contributes ~30%
+  - Pitcher BF > 600:  Marcel contributes <10% (current stats dominate)
+
+In May (roughly BF 80-200 for a full-season starter), Marcel meaningfully
+pulls extreme early-season stats toward the mean.
 """
 
 from __future__ import annotations
 
-import json
 import logging
 import os
-import time
-from datetime import datetime, timezone
-
-import requests
+from functools import lru_cache
+from typing import Optional
 
 logger = logging.getLogger("propiq.marcel")
 
-# ---------------------------------------------------------------------------
-# Constants
-# ---------------------------------------------------------------------------
-# FIX: Updated to 2025 MLB actuals (FanGraphs leaderboards)
-_LEAGUE_AVG: dict = {
-    # Batter rates
-    "batter_k_pct":  0.228,   # FG 2026: 22.8% through game 44 
-    "batter_bb_pct": 0.083,   # FG 2026: 8.3% through game 44 
-    "batter_hr_pa":  0.033,   # FG 2025: elevated power 
-    "batter_woba":   0.309,   # FG 2026: .309 through game 44 
-    "batter_iso":    0.156,   # FG 2025: elevated power (was 0.158) 
-    # Pitcher rates (rates *allowed*)
-    "pitcher_k_pct":  0.228,  # FG 2026: 22.8% through game 44
-    "pitcher_bb_pct": 0.087,  # FG 2025: 8.4% (confirmed)
-    "pitcher_hr9":    1.28,   # FG 2025: ~1.28 HR/9 (was 1.30)
+# ── League average rates (2026 baseline — update seasonally) ──────────────────
+# These are the regression targets. Extreme early-season stats get pulled
+# toward these values proportional to how much data we have.
+LEAGUE_AVG = {
+    "k_pct":    22.8,    # pitcher K% (strikeouts per PA × 100)
+    "bb_pct":    8.3,    # pitcher BB%
+    "era":       4.25,   # ERA
+    "xera":      4.20,   # xERA
+    "whiff_pct": 24.1,   # SwStr%
+    "hit_rate":  0.248,  # batter batting average (raw)
+    "xba":       0.245,  # batter xBA
+    "xwoba":     0.318,  # batter xwOBA
+    "k_pct_bat": 22.5,   # batter K%
+    "bb_pct_bat": 8.5,   # batter BB%
 }
 
-_FG_BASE_URL = "https://www.fangraphs.com/api/leaders/major-league/data"
-_HEADERS = {"User-Agent": "PropIQ/1.0 (analytics)"}
-_TIMEOUT = 20          # seconds for FanGraphs HTTP request
-_REQUEST_DELAY = 1.5   # pause between batter/pitcher fetches
+# Marcel regression weights — how many "league average" PA/BF to mix in
+# Lower = faster regression (more conservative). Based on Tango Tiger Marcel paper.
+# These values are for MLB props specifically, slightly more conservative than
+# traditional Marcel for game prediction.
+REGRESSION_PA = {
+    "k_pct":    250,   # pitcher K-rate stabilises ~250 BF
+    "bb_pct":   700,   # pitcher BB-rate stabilises ~700 BF
+    "hit_rate": 600,   # batter batting average stabilises ~600 PA
+    "xba":      200,   # xBA stabilises faster (underlying contact quality)
+    "xwoba":    250,   # xwOBA stabilises ~250 PA
+    "whiff_pct":200,   # SwStr% stabilises ~200 pitches seen
+    "k_pct_bat":150,   # batter K-rate stabilises quickly
+}
 
 
-def _scraperapi_get_marcel(url: str, params: dict, headers: dict, timeout: int = 30):
-    """GET with automatic ScraperAPI fallback on 403/429 from FanGraphs.
-    Marcel projections require 3 seasons of FanGraphs data — all are 403-blocked
-    on Railway. ScraperAPI residential proxy bypasses the Cloudflare block.
+def _regress(observed: float, sample_n: int, league_avg: float,
+             regression_n: int) -> float:
     """
-    resp = requests.get(url, params=params, headers=headers, timeout=timeout)
-    if resp.status_code in (403, 429, 407):
-        scraper_key = os.getenv("SCRAPERAPI_KEY", "")
-        if scraper_key:
-            proxy = f"http://scraperapi:{scraper_key}@proxy-server.scraperapi.com:8001"
-            proxies = {"http": proxy, "https": proxy}
-            logger.info(
-                "[Marcel] Direct fetch %d — retrying via ScraperAPI residential proxy",
-                resp.status_code,
-            )
-            try:
-                resp = requests.get(
-                    url, params=params, headers=headers,
-                    timeout=60, proxies=proxies, verify=False,
-                )
-            except Exception as _proxy_err:
-                logger.warning("[Marcel] ScraperAPI proxy failed: %s", _proxy_err)
-        else:
-            logger.warning(
-                "[Marcel] Got %d from FanGraphs — SCRAPERAPI_KEY not set. "
-                "Marcel projections will fall back to statsapi baseline.",
-                resp.status_code,
-            )
-    return resp
-
-_AGE_PEAK       = 27   # peak age from Marcel spec
-_AGE_YOUNG_RATE = 0.006  # +0.6%/yr improvement under 27
-_AGE_OLD_RATE   = 0.003  # -0.3%/yr decline over 27
-
-_BATTER_REGRESSION_PA  = 200   # Marcel spec: regress batters at 200 PA
-_PITCHER_REGRESSION_BF = 250   # Marcel spec: regress pitchers at 250 BF
-
-_MARCEL_WEIGHTS = [5, 4, 3]    # most-recent year first
-# Cache helpers
-# ---------------------------------------------------------------------------
-
-def _get_cache_path(year: int) -> str:
-    """Weekly cache file path in /tmp — refreshed on Monday of each new week."""
-    today = datetime.now(timezone.utc)
-    iso = today.isocalendar()
-    return f"/tmp/marcel_{year}_{iso.year}w{iso.week}.json"
+    Marcel regression formula:
+        weight_observed = sample_n / (sample_n + regression_n)
+        weight_league   = regression_n / (sample_n + regression_n)
+        result          = weight_observed × observed + weight_league × league_avg
 
+    As sample_n → infinity, result → observed.
+    As sample_n → 0, result → league_avg.
+    """
+    if sample_n <= 0:
+        return league_avg
+    w_obs = sample_n / (sample_n + regression_n)
+    w_lg  = 1.0 - w_obs
+    return round(w_obs * observed + w_lg * league_avg, 4)
 
-# ---------------------------------------------------------------------------
-# Parsing helpers
-# ---------------------------------------------------------------------------
 
-def _parse_pct(val) -> float:
+def _weighted_hist(current: float, hist: Optional[float],
+                   weights=(5, 4, 3)) -> float:
     """
-    Parse FanGraphs percentage field.
-    Handles both string format ("22.0 %") and decimal float (0.22 or 22.0).
-    Returns a decimal fraction (0.22, not 22).
+    Three-year weighted average (current season × 5, prev × 4, prev-prev × 3).
+    Uses available data — if hist not provided, current season dominates.
     """
-    if val is None:
-        return 0.0
-    if isinstance(val, (int, float)):
-        v = float(val)
-        return v / 100.0 if v > 1.0 else v
-    s = str(val).strip().rstrip("%").strip()
-    try:
-        v = float(s)
-        return v / 100.0 if v > 1.0 else v
-    except ValueError:
-        return 0.0
-
-
-def _parse_float(val, default: float = 0.0) -> float:
-    """Safe float parse from any type."""
-    if val is None:
-        return default
-    try:
-        return float(val)
-    except (ValueError, TypeError):
-        return default
-
-
-# ---------------------------------------------------------------------------
-# FanGraphs data fetcher
-# ---------------------------------------------------------------------------
-
-def _fetch_fg_data(stats: str, season_start: int, season_end: int) -> list[dict]:
-    """
-    Fetch multi-year leaderboard from FanGraphs JSON API.
+    if hist is None:
+        return current
+    # hist is a single prior-season value (could represent 1 or 2 seasons)
+    total_w = weights[0] + weights[1]
+    return (weights[0] * current + weights[1] * hist) / total_w
 
-    stats        : "bat" for batters, "pit" for pitchers
-    season_start : earliest season (inclusive), e.g. 2023
-    season_end   : most recent season (inclusive), e.g. 2025
-    ind=1        : return individual season rows (not combined career total)
-    type=8       : advanced stats panel (wRC+, wOBA, ISO, K%, BB%, etc.)
 
-    Returns raw list of row dicts.
-    """
-    params = {
-        "age":     "",
-        "pos":     "all",
-        "stats":   stats,
-        "lg":      "all",
-        "qual":    "0",       # all players regardless of PA minimum
-        "season":  str(season_end),
-        "season1": str(season_start),
-        "ind":     "1",       # individual seasons
-        "type":    "8",       # advanced stats
-    }
-    try:
-        resp = _scraperapi_get_marcel(
-            _FG_BASE_URL, params=params, headers=_HEADERS, timeout=_TIMEOUT
-        )
-        if resp.status_code != 200:
-            logger.warning(
-                "[Marcel] FanGraphs HTTP %d (stats=%s, %d-%d)",
-                resp.status_code, stats, season_start, season_end,
-            )
-            return []
-        data = resp.json()
-        rows = data if isinstance(data, list) else data.get("data", [])
-        logger.info(
-            "[Marcel] FanGraphs %s: %d rows (%d-%d)",
-            stats, len(rows), season_start, season_end,
-        )
-        return rows
-    except Exception as exc:
-        logger.warning(
-            "[Marcel] FanGraphs fetch failed (stats=%s): %s", stats, exc
-        )
-        return []
-
-
-# ---------------------------------------------------------------------------
-# Age adjustment (from Marcel spec via baseball-sims)
-# ---------------------------------------------------------------------------
-
-def _age_multiplier(age: int | None) -> float:
-    """
-    Marcel age multiplier for batter performance rates.
-    Pitchers invert this (age hurts rates allowed differently — caller handles).
-    """
-    if age is None:
-        return 1.0
-    if age < _AGE_PEAK:
-        return 1.0 + _AGE_YOUNG_RATE * (_AGE_PEAK - age)
-    elif age > _AGE_PEAK:
-        return 1.0 - _AGE_OLD_RATE * (age - _AGE_PEAK)
-    return 1.0
-
-
-# ---------------------------------------------------------------------------
-# Marcel rate computation (core formula)
-# ---------------------------------------------------------------------------
-
-def _marcel_rate(
-    data_by_year: dict[int, tuple[float, float]],  # {year: (stat_value, pa_weight)}
-    league_avg:     float,
-    regression_pa:  float,
-    age_mult:       float,
+# ── Public API ─────────────────────────────────────────────────────────────────
+
+def get_marcel_k_rate(
+    current_k_pct: float,
+    season_bf: int,
+    hist_k_pct: Optional[float] = None,
 ) -> float:
     """
-    Compute Marcel projected rate for a single statistic.
+    Marcel-projected pitcher K-rate (percentage points, 0-100 scale).
 
-    Steps:
-      1. Weight 3 most-recent seasons: 5/4/3 (most recent first)
-      2. Regress to league mean: player gets more credit with more PA
-      3. Apply age adjustment multiplier
+    Args:
+        current_k_pct:  Current 2026 K% (0-100)
+        season_bf:      Batters faced so far in 2026
+        hist_k_pct:     Prior-season K% if available (0-100)
 
-    Returns the projected rate (e.g. 0.245 for K%).
-    """
-    years = sorted(data_by_year.keys(), reverse=True)[:3]  # most recent first
+    Returns:
+        Marcel-regressed K% (0-100), between current and league average.
 
-    weighted_sum = 0.0
-    weighted_pa  = 0.0
-    for i, yr in enumerate(years):
-        stat_val, pa = data_by_year[yr]
-        w = _MARCEL_WEIGHTS[i]
-        weighted_sum += stat_val * pa * w
-        weighted_pa  += pa * w
+    Examples:
+        # Elite early-season (35% K-rate, only 80 BF)
+        get_marcel_k_rate(35.0, 80) → ~27.4%  (heavy regression)
 
-    if weighted_pa == 0:
-        return league_avg
+        # Elite full-season (28% K-rate, 600 BF)
+        get_marcel_k_rate(28.0, 600) → ~27.5%  (light regression)
 
-    raw_rate = weighted_sum / weighted_pa
-
-    # Bayesian regression toward league mean
-    player_weight = weighted_pa / (weighted_pa + regression_pa)
-    regressed     = raw_rate * player_weight + league_avg * (1.0 - player_weight)
+        # League-average pitcher (22% K-rate, 200 BF)
+        get_marcel_k_rate(22.0, 200) → ~22.4%  (nearly no change)
+    """
+    # Step 1: blend with prior season if available
+    blended = _weighted_hist(current_k_pct, hist_k_pct)
 
-    return max(0.0, regressed * age_mult)
+    # Step 2: regress to league mean based on sample size
+    regressed = _regress(
+        observed    = blended,
+        sample_n    = season_bf,
+        league_avg  = LEAGUE_AVG["k_pct"],
+        regression_n= REGRESSION_PA["k_pct"],
+    )
 
+    return max(8.0, min(45.0, regressed))
 
-# ---------------------------------------------------------------------------
-# Projection builders
-# ---------------------------------------------------------------------------
 
-def _build_batter_projections(
-    rows: list[dict], projection_year: int
-) -> dict[str, dict]:
+def get_marcel_hit_rate(
+    current_avg: float,
+    season_pa: int,
+    hist_avg: Optional[float] = None,
+) -> float:
     """
-    Build Marcel batter projections from multi-year FanGraphs rows.
+    Marcel-projected batter hit rate (batting average scale, 0-1).
 
-    Returns {player_name_lower: {k_pct, bb_pct, hr_pa, woba, iso, weighted_pa, age}}
-    """
-    by_player: dict[str, list[dict]] = {}
-    for row in rows:
-        name = str(
-            row.get("PlayerName") or row.get("Name") or ""
-        ).strip()
-        if not name:
-            continue
-        by_player.setdefault(name.lower(), []).append(row)
-
-    projections: dict[str, dict] = {}
-
-    for name_lower, player_rows in by_player.items():
-        k_data:    dict[int, tuple[float, float]] = {}
-        bb_data:   dict[int, tuple[float, float]] = {}
-        hr_data:   dict[int, tuple[float, float]] = {}
-        woba_data: dict[int, tuple[float, float]] = {}
-        iso_data:  dict[int, tuple[float, float]] = {}
-
-        latest_age:  int | None = None
-        latest_year: int = 0
-
-        for row in player_rows:
-            season = int(row.get("Season") or 0)
-            if not season:
-                continue
-            pa = _parse_float(row.get("PA") or row.get("TPA"), 0.0)
-            if pa < 10:
-                continue  # too few PA to be meaningful
-
-            k_pct  = _parse_pct(row.get("K%"))
-            bb_pct = _parse_pct(row.get("BB%"))
-            hr     = _parse_float(row.get("HR"), 0.0)
-            hr_pa  = hr / pa if pa > 0 else 0.0
-            woba   = _parse_float(row.get("wOBA"), 0.0)
-            iso    = _parse_float(row.get("ISO"), 0.0)
-            age    = _parse_float(row.get("Age"), 0.0)
-
-            k_data[season]    = (k_pct,  pa)
-            bb_data[season]   = (bb_pct, pa)
-            hr_data[season]   = (hr_pa,  pa)
-            woba_data[season] = (woba,   pa)
-            iso_data[season]  = (iso,    pa)
-
-            if season > latest_year and age > 0:
-                latest_year = season
-                latest_age  = int(age)
-
-        if not k_data:
-            continue
-
-        # Project age to current year
-        proj_age  = (
-            latest_age + (projection_year - latest_year)
-            if latest_age and latest_year else None
-        )
-        age_mult  = _age_multiplier(proj_age)
-
-        # Confidence-weighted PA (for potential downstream use)
-        years = sorted(k_data.keys(), reverse=True)[:3]
-        num_weights = len(years)
-        weighted_pa = (
-            sum(k_data[yr][1] * _MARCEL_WEIGHTS[i] for i, yr in enumerate(years))
-            / sum(_MARCEL_WEIGHTS[:num_weights])
-        )
-
-        projections[name_lower] = {
-            "k_pct":       round(_marcel_rate(k_data,    _LEAGUE_AVG["batter_k_pct"],  _BATTER_REGRESSION_PA, 1.0),      4),
-            "bb_pct":      round(_marcel_rate(bb_data,   _LEAGUE_AVG["batter_bb_pct"], _BATTER_REGRESSION_PA, 1.0),      4),
-            "hr_pa":       round(_marcel_rate(hr_data,   _LEAGUE_AVG["batter_hr_pa"],  _BATTER_REGRESSION_PA, age_mult), 4),
-            "woba":        round(_marcel_rate(woba_data, _LEAGUE_AVG["batter_woba"],   _BATTER_REGRESSION_PA, age_mult), 4),
-            "iso":         round(_marcel_rate(iso_data,  _LEAGUE_AVG["batter_iso"],    _BATTER_REGRESSION_PA, age_mult), 4),
-            "weighted_pa": round(weighted_pa, 0),
-            "age":         proj_age,
-        }
-
-    logger.info("[Marcel] Built %d batter projections.", len(projections))
-    return projections
-
-
-def _build_pitcher_projections(
-    rows: list[dict], projection_year: int
-) -> dict[str, dict]:
+    Args:
+        current_avg:  Current 2026 batting average (0-1 scale)
+        season_pa:    Plate appearances so far in 2026
+        hist_avg:     Prior-season batting average (0-1)
+
+    Returns:
+        Marcel-regressed batting average (0-1).
     """
-    Build Marcel pitcher projections from multi-year FanGraphs rows.
+    blended  = _weighted_hist(current_avg, hist_avg)
+    regressed = _regress(
+        observed    = blended,
+        sample_n    = season_pa,
+        league_avg  = LEAGUE_AVG["hit_rate"],
+        regression_n= REGRESSION_PA["hit_rate"],
+    )
+    return max(0.15, min(0.38, regressed))
+
+
+def get_marcel_xba(
+    current_xba: float,
+    season_pa: int,
+    hist_xba: Optional[float] = None,
+) -> float:
+    """Marcel-projected xBA. Stabilises faster than raw BA (~200 PA)."""
+    blended  = _weighted_hist(current_xba, hist_xba)
+    regressed = _regress(blended, season_pa,
+                         LEAGUE_AVG["xba"], REGRESSION_PA["xba"])
+    return max(0.15, min(0.38, regressed))
 
-    Returns {player_name_lower: {k_pct, bb_pct, hr9, weighted_bf, age}}
 
-    Note on age adjustment for pitchers (from baseball-sims architecture.md):
-      Pitchers project *rates allowed*, so age works in the opposite direction.
-      A young pitcher improving = lower rates allowed (good).
-      _age_mult is inverted for pitcher projection (older = higher rates allowed).
-    """
-    by_player: dict[str, list[dict]] = {}
-    for row in rows:
-        name = str(
-            row.get("PlayerName") or row.get("Name") or ""
-        ).strip()
-        if not name:
-            continue
-        by_player.setdefault(name.lower(), []).append(row)
-
-    projections: dict[str, dict] = {}
-
-    for name_lower, player_rows in by_player.items():
-        k_data:   dict[int, tuple[float, float]] = {}
-        bb_data:  dict[int, tuple[float, float]] = {}
-        hr9_data: dict[int, tuple[float, float]] = {}
-
-        latest_age: int | None = None
-        latest_year: int = 0
-
-        for row in player_rows:
-            season = int(row.get("Season") or 0)
-            if not season:
-                continue
-            ip = _parse_float(row.get("IP"), 0.0)
-            if ip < 5:
-                continue
-
-            k_pct  = _parse_pct(row.get("K%"))
-            bb_pct = _parse_pct(row.get("BB%"))
-            hr9    = _parse_float(row.get("HR/9") or row.get("HR9"), 0.0)
-            age    = _parse_float(row.get("Age"), 0.0)
-
-            # Use IP * 4.3 as BF proxy (batters faced ≈ IP × 4.3)
-            bf_proxy = ip * 4.3
-
-            k_data[season]   = (k_pct,  bf_proxy)
-            bb_data[season]  = (bb_pct, bf_proxy)
-            hr9_data[season] = (hr9,    bf_proxy)
-
-            if season > latest_year and age > 0:
-                latest_year = season
-                latest_age  = int(age)
-
-        if not k_data:
-            continue
-
-        proj_age = (
-            latest_age + (projection_year - latest_year)
-            if latest_age and latest_year else None
-        )
-
-        # Pitcher age multiplier is *inverted* vs batter:
-        # BB% and HR/9 (control and flyball) use inverted mult for rates *allowed*
-        age_mult_base     = _age_multiplier(proj_age)
-        age_mult_inverted = 1.0 / age_mult_base if age_mult_base > 0 else 1.0
-
-        years = sorted(k_data.keys(), reverse=True)[:3]
-        num_weights = len(years)
-        weighted_bf = (
-            sum(k_data[yr][1] * _MARCEL_WEIGHTS[i] for i, yr in enumerate(years))
-            / sum(_MARCEL_WEIGHTS[:num_weights])
-        )
-
-        projections[name_lower] = {
-            "k_pct":       round(_marcel_rate(k_data,   _LEAGUE_AVG["pitcher_k_pct"],  _PITCHER_REGRESSION_BF, age_mult_base),     4),
-            "bb_pct":      round(_marcel_rate(bb_data,  _LEAGUE_AVG["pitcher_bb_pct"], _PITCHER_REGRESSION_BF, age_mult_inverted),  4),
-            "hr9":         round(_marcel_rate(hr9_data, _LEAGUE_AVG["pitcher_hr9"],    _PITCHER_REGRESSION_BF, age_mult_inverted),  4),
-            "weighted_bf": round(weighted_bf, 0),
-            "age":         proj_age,
-        }
-
-    logger.info("[Marcel] Built %d pitcher projections.", len(projections))
-    return projections
-
-
-# ---------------------------------------------------------------------------
-# MarcelLayer class
-# ---------------------------------------------------------------------------
-
-class MarcelLayer:
+def get_marcel_whiff_pct(
+    current_whiff: float,
+    season_pitches: int,
+    hist_whiff: Optional[float] = None,
+) -> float:
+    """Marcel-projected pitcher SwStr% (0-100 scale)."""
+    blended  = _weighted_hist(current_whiff, hist_whiff)
+    regressed = _regress(blended, season_pitches,
+                         LEAGUE_AVG["whiff_pct"], REGRESSION_PA["whiff_pct"])
+    return max(5.0, min(40.0, regressed))
+
+
+def enrich_prop_with_marcel(prop: dict, hub: dict) -> dict:
     """
-    Marcel 3-year projection system for PropIQ Analytics.
+    Apply Marcel regression to a prop dict.
 
-    Loads and caches projected rates for all MLB batters and pitchers.
-    Used as a pre-season confidence signal on top of Layers 1-7.
+    Called from prop_enrichment_layer.py after Steamer, before PA model.
+    Stamps _marcel_k_pct and _marcel_hit_rate onto the prop.
+    These values are used as more reliable season estimates than raw 2026 stats
+    when sample sizes are small (BF < 200).
 
-    The weekly cache means Marcel only hits FanGraphs twice per week
-    (once for batters, once for pitchers) regardless of how many
-    dispatches run that week.
+    Args:
+        prop:  Enriched prop dict
+        hub:   DataHub context (unused, available for future context)
 
-    Usage:
-        layer = MarcelLayer(projection_year=2026)
-        layer.prefetch()
-        batter  = layer.get_batter("Aaron Judge")
-        pitcher = layer.get_pitcher("Spencer Strider")
+    Returns:
+        prop dict with Marcel fields stamped.
     """
-
-    def __init__(self, projection_year: int | None = None) -> None:
-        self._year       = projection_year or datetime.now(timezone.utc).year
-        self._cache_path = _get_cache_path(self._year)
-        self._batters:   dict[str, dict] = {}
-        self._pitchers:  dict[str, dict] = {}
-        self._loaded:    bool = False
-
-    # ── cache I/O ──────────────────────────────────────────────────────────
-
-    def _load_cache(self) -> bool:
-        # L2: disk cache
-        if os.path.exists(self._cache_path):
-            try:
-                with open(self._cache_path) as f:
-                    data = json.load(f)
-                self._batters  = data.get("batters",  {})
-                self._pitchers = data.get("pitchers", {})
-                self._loaded   = True
-                logger.info(
-                    "[Marcel] Cache loaded from disk: %d batters, %d pitchers (%s)",
-                    len(self._batters), len(self._pitchers),
-                    os.path.basename(self._cache_path),
-                )
-                return True
-            except Exception as exc:
-                logger.warning("[Marcel] Disk cache load failed: %s", exc)
-        # L3: Postgres fallback — H-7 fix: survives Railway redeploys
-        try:
-            from layer_cache_helper import pg_cache_get  # noqa: PLC0415
-            pg_key = os.path.basename(self._cache_path)
-            data = pg_cache_get("marcel", pg_key)
-            if data and isinstance(data, dict):
-                self._batters  = data.get("batters",  {})
-                self._pitchers = data.get("pitchers", {})
-                self._loaded   = True
-                logger.info(
-                    "[Marcel] Cache loaded from Postgres: %d batters, %d pitchers",
-                    len(self._batters), len(self._pitchers),
-                )
-                # Restore disk cache for next call
-                try:
-                    with open(self._cache_path, "w") as f:
-                        json.dump(data, f)
-                except Exception:
-                    pass
-                return True
-        except Exception as exc:
-            logger.debug("[Marcel] Postgres cache load failed: %s", exc)
-        return False
-
-    def _save_cache(self) -> None:
-        """Persist projections to weekly cache file + Postgres (H-7 fix)."""
-        data = {"batters": self._batters, "pitchers": self._pitchers}
-        try:
-            with open(self._cache_path, "w") as f:
-                json.dump(data, f)
-            logger.info(
-                "[Marcel] Cache saved: %d batters, %d pitchers → %s",
-                len(self._batters), len(self._pitchers),
-                os.path.basename(self._cache_path),
+    prop_type  = (prop.get("prop_type") or "").lower()
+    season_bf  = int(prop.get("season_bf")  or prop.get("bf", 0) or 0)
+    season_pa  = int(prop.get("season_pa")  or prop.get("pa", 0) or 0)
+
+    # ── K props — Marcel pitcher K-rate ───────────────────────────────────────
+    if prop_type in ("strikeouts", "pitching_outs", "pitcher_strikeouts"):
+        raw_k_pct  = float(prop.get("sv_k_pct")  or prop.get("fg_kpct")   or LEAGUE_AVG["k_pct"])
+        hist_k_pct = float(prop.get("career_k_pct") or raw_k_pct)
+
+        marcel_k = get_marcel_k_rate(raw_k_pct, season_bf, hist_k_pct)
+        prop["_marcel_k_pct"] = marcel_k
+
+        # If sample is small (< 150 BF), use Marcel as the primary signal
+        # instead of raw 2026 K-rate
+        regression_strength = min(1.0, max(0.0, 1.0 - season_bf / 250))
+        if regression_strength > 0.3 and abs(marcel_k - raw_k_pct) > 1.5:
+            # Blend raw and Marcel proportional to regression strength
+            blended_k = (1 - regression_strength) * raw_k_pct + regression_strength * marcel_k
+            prop["sv_k_pct"] = round(blended_k, 2)
+            logger.debug(
+                "[Marcel] K-rate: raw=%.1f%% Marcel=%.1f%% → blended=%.1f%% (BF=%d reg=%.0f%%)",
+                raw_k_pct, marcel_k, blended_k, season_bf, regression_strength * 100,
             )
-        except Exception as exc:
-            logger.warning("[Marcel] Disk cache save failed: %s", exc)
-        # H-7: dual-write to Postgres
-        try:
-            from layer_cache_helper import pg_cache_set  # noqa: PLC0415
-            pg_key = os.path.basename(self._cache_path)
-            pg_cache_set("marcel", pg_key, data)
-        except Exception as exc:
-            logger.debug("[Marcel] Postgres cache save failed: %s", exc)
-
-    def prefetch(self) -> None:
-        """
-        Load Marcel projections. Reads from weekly cache if available;
-        otherwise fetches 3 years of FanGraphs data and computes projections.
-
-        FanGraphs data: prior 3 seasons relative to projection year.
-        (e.g. for 2026 projections: 2023 + 2024 + 2025 data)
-        """
-        if not self._loaded and self._load_cache():
-            return  # valid weekly cache exists
-
-        season_end   = self._year - 1    # most recent complete season
-        season_start = season_end - 2    # 3 years back
-
-        logger.info(
-            "[Marcel] Fetching FanGraphs %d-%d for %d projections...",
-            season_start, season_end, self._year,
-        )
-
-        batter_rows  = _fetch_fg_data("bat", season_start, season_end)
-        time.sleep(_REQUEST_DELAY)
-        pitcher_rows = _fetch_fg_data("pit", season_start, season_end)
-
-        if not batter_rows and not pitcher_rows:
-            logger.warning(
-                "[Marcel] No FanGraphs data retrieved — trying statsapi.mlb.com 2025 fallback."
+
+        raw_whiff  = float(prop.get("sv_whiff_pct") or prop.get("sv_swstr_pct") or LEAGUE_AVG["whiff_pct"])
+        season_p   = season_bf * 3  # rough pitch count from BF
+        marcel_whiff = get_marcel_whiff_pct(raw_whiff, season_p)
+        prop["_marcel_whiff_pct"] = marcel_whiff
+
+    # ── Hit props — Marcel batter hit rate ────────────────────────────────────
+    elif prop_type in ("hits", "total_bases", "hits_runs_rbis", "fantasy_hitter"):
+        raw_avg  = float(prop.get("sv_xba") or prop.get("batting_avg") or LEAGUE_AVG["xba"])
+        hist_avg = float(prop.get("career_avg") or raw_avg)
+
+        marcel_h = get_marcel_hit_rate(raw_avg, season_pa, hist_avg)
+        prop["_marcel_hit_rate"] = marcel_h
+
+        # For very early season (< 80 PA), Marcel is more reliable than raw
+        regression_strength = min(1.0, max(0.0, 1.0 - season_pa / 300))
+        if regression_strength > 0.3 and abs(marcel_h - raw_avg) > 0.015:
+            blended_h = (1 - regression_strength) * raw_avg + regression_strength * marcel_h
+            prop["sv_xba"] = round(blended_h, 4)
+            logger.debug(
+                "[Marcel] xBA: raw=%.3f Marcel=%.3f → blended=%.3f (PA=%d reg=%.0f%%)",
+                raw_avg, marcel_h, blended_h, season_pa, regression_strength * 100,
             )
-            # FIX: statsapi single-season fallback when FanGraphs 403s.
-            # Gives XGBoost real per-player variance instead of all zeros.
-            try:
-                import requests as _req  # noqa: PLC0415
-                _r = _req.get(
-                    "https://statsapi.mlb.com/api/v1/stats/leaders",
-                    params={
-                        "leaderCategories": "strikeoutRate,walkRate,earnedRunAverage,whip",
-                        "season": str(self._year - 1),
-                        "sportId": 1,
-                        "limit": 500,
-                        "statGroup": "pitching",
-                    },
-                    timeout=10,
-                )
-                if _r.status_code != 200:
-                    logger.warning("[Marcel] statsapi fallback also failed — Marcel disabled.")
-                    return
-                # Build minimal pitcher projections from statsapi leaders
-                _minimal_pitchers: dict = {}
-                for _cat in _r.json().get("leagueLeaders", []):
-                    for _entry in _cat.get("leaders", []):
-                        _name = (_entry.get("person", {}).get("fullName") or "").strip().lower()
-                        _val  = _entry.get("value")
-                        _stat = _cat.get("leaderCategory", "")
-                        if _name and _val is not None:
-                            if _name not in _minimal_pitchers:
-                                _minimal_pitchers[_name] = {}
-                            try:
-                                _minimal_pitchers[_name][_stat] = float(_val)
-                            except (ValueError, TypeError):
-                                pass
-                if _minimal_pitchers:
-                    # Map statsapi field names to Marcel output format
-                    _mapped = {}
-                    for _n, _s in _minimal_pitchers.items():
-                        _mapped[_n] = {
-                            "k_pct":  _s.get("strikeoutRate", 0.223) / 100 if _s.get("strikeoutRate", 0) > 1 else _s.get("strikeoutRate", 0.223),
-                            "bb_pct": _s.get("walkRate",      0.087) / 100 if _s.get("walkRate",      0) > 1 else _s.get("walkRate",      0.087),
-                            "era":    _s.get("earnedRunAverage", 4.06),
-                            "whip":   _s.get("whip", 1.28),
-                            "_source": "statsapi_fallback",
-                        }
-                    self._pitchers = _mapped
-                    self._loaded   = True
-                    logger.info("[Marcel] Loaded %d pitchers from statsapi fallback.", len(_mapped))
-                else:
-                    logger.warning("[Marcel] statsapi fallback returned no leaders — Marcel disabled.")
-            except Exception as _me:
-                logger.warning("[Marcel] statsapi fallback exception: %s — Marcel disabled.", _me)
-
-            # ── Batter fallback: mlb_stats_layer season-to-date stats ────────
-            # Mirrors the pitcher fallback above. Marcel batter output needs:
-            # k_pct, bb_pct, hr_pa, woba, iso — all derivable from MLB Stats API.
-            # mlb_stats_layer._parse_batter() already computes all of these.
-            try:
-                from mlb_stats_layer import _BATTER_CACHE as _mlb_bat_cache  # noqa: PLC0415
-                from mlb_stats_layer import load as _mlb_load                # noqa: PLC0415
-                _mlb_load()
-                if _mlb_bat_cache:
-                    _batter_mapped: dict = {}
-                    for _nm, _bd in _mlb_bat_cache.items():
-                        _hr_total = float(_bd.get("hr_total", 0) or 0)
-                        _hits     = float(_bd.get("hits_total", 0) or 0)
-                        _pa_est   = max(_hits * 3.5, 1.0)   # rough PA proxy from hits
-                        _hr_pa    = _hr_total / _pa_est if _pa_est > 0 else 0.033 / 162
-                        _batter_mapped[_nm] = {
-                            "k_pct":  _bd.get("k_pct",   0.223),
-                            "bb_pct": _bd.get("bb_pct",  0.087),
-                            "hr_pa":  round(_hr_pa, 5),
-                            "woba":   _bd.get("woba",    0.308),
-                            "iso":    _bd.get("iso",     0.150),
-                            "_source": "mlb_stats_api_fallback",
-                        }
-                    if _batter_mapped:
-                        self._batters = _batter_mapped
-                        logger.info(
-                            "[Marcel] Loaded %d batters from mlb_stats_layer fallback.",
-                            len(_batter_mapped),
-                        )
-            except Exception as _mbe:
-                logger.warning("[Marcel] mlb_stats_layer batter fallback failed: %s", _mbe)
-
-            return
-
-        self._batters  = _build_batter_projections(batter_rows,  self._year)
-        self._pitchers = _build_pitcher_projections(pitcher_rows, self._year)
-        self._loaded   = True
-        self._save_cache()
-
-    def get_batter(self, name: str) -> dict:
-        """
-        Return Marcel projection for a batter by display name.
-        Returns {} if player not found (graceful — adjustment returns 0.0).
-        """
-        if not self._loaded:
-            self._load_cache()
-        return self._batters.get(name.strip().lower(), {})
-
-    def get_pitcher(self, name: str) -> dict:
-        """
-        Return Marcel projection for a pitcher by display name.
-        Returns {} if player not found.
-        """
-        if not self._loaded:
-            self._load_cache()
-        return self._pitchers.get(name.strip().lower(), {})
-
-
-# ---------------------------------------------------------------------------
-# Probability adjustment function
-# ---------------------------------------------------------------------------
-
-def marcel_adjustment(
-    prop_type:   str,
-    side:        str,
-    player_type: str,   # "pitcher" | "batter"
-    marcel_data: dict,
-) -> float:
-    """
-    Compute probability adjustment from Marcel projected rates.
-
-    Compares the player's Marcel projection to league average.
-    Large positive/negative deviation from mean generates a nudge.
-
-    Adjustments are intentionally small (max ±0.018) — Marcel is a
-    pre-season projection layer that adds historical context to the
-    already-running 7 real-time layers.  It should never dominate
-    a signal that comes from today's matchup context.
-
-    Prop mappings:
-      pitcher  + strikeouts → K% deviation
-      pitcher  + earned_runs Under → BB% + HR/9 advantage
-      batter   + home_runs → HR/PA deviation
-      batter   + total_bases → ISO deviation
-      batter   + hits / hits_runs_rbis → wOBA deviation
-      batter   + strikeouts → batter K% deviation (K Over / Under)
-      batter   + runs → wOBA proxy for OBP
-
-    Returns a float delta in range roughly [-0.018, +0.018].
-    """
-    if not marcel_data:
-        return 0.0
-
-    adj = 0.0
-
-    if player_type == "pitcher":
-        k_pct  = marcel_data.get("k_pct",  0.0)
-        bb_pct = marcel_data.get("bb_pct", 0.0)
-        hr9    = marcel_data.get("hr9",    0.0)
-
-        if prop_type == "strikeouts":
-            # k_delta: positive = pitcher strikes out more than average
-            k_delta = k_pct - _LEAGUE_AVG["pitcher_k_pct"]
-            if side == "Over":
-                adj = min(0.018, max(-0.012, k_delta * 0.35))
-            else:  # Under
-                adj = min(0.012, max(-0.018, -k_delta * 0.25))
-
-        elif prop_type == "earned_runs" and side == "Under":
-            # Fewer walks + fewer HR = fewer baserunners = fewer earned runs
-            bb_adv  = _LEAGUE_AVG["pitcher_bb_pct"] - bb_pct  # pos = fewer walks (good)
-            hr9_adv = _LEAGUE_AVG["pitcher_hr9"]    - hr9      # pos = fewer HR (good)
-            adj = min(0.015, max(0.0, bb_adv * 0.10 + hr9_adv * 0.025))
-
-    elif player_type == "batter":
-        k_pct  = marcel_data.get("k_pct",  0.0)
-        bb_pct = marcel_data.get("bb_pct", 0.0)
-        hr_pa  = marcel_data.get("hr_pa",  0.0)
-        woba   = marcel_data.get("woba",   0.0)
-        iso    = marcel_data.get("iso",    0.0)
-
-        if prop_type == "home_runs":
-            hr_delta = hr_pa - _LEAGUE_AVG["batter_hr_pa"]  # pos = power hitter
-            if side == "Over":
-                adj = min(0.018, max(-0.010, hr_delta * 3.50))
-            else:
-                adj = min(0.010, max(-0.018, -hr_delta * 2.50))
-
-        elif prop_type == "total_bases":
-            iso_delta = iso - _LEAGUE_AVG["batter_iso"]  # pos = extra-base hitter
-            if side == "Over":
-                adj = min(0.015, max(-0.010, iso_delta * 0.25))
-            else:
-                adj = min(0.010, max(-0.015, -iso_delta * 0.18))
-
-        elif prop_type in ("hits", "hits_runs_rbis"):
-            woba_delta = woba - _LEAGUE_AVG["batter_woba"]  # pos = high-contact
-            if side == "Over":
-                adj = min(0.015, max(-0.010, woba_delta * 0.12))
-            else:
-                adj = min(0.010, max(-0.015, -woba_delta * 0.09))
-
-        elif prop_type == "strikeouts":
-            # Batter K prop — high projected K% = more likely to strike out
-            k_delta = k_pct - _LEAGUE_AVG["batter_k_pct"]  # pos = high-K batter
-            if side == "Over":
-                adj = min(0.012, max(-0.010, k_delta * 0.20))
-            else:
-                adj = min(0.010, max(-0.012, -k_delta * 0.15))
-
-        elif prop_type == "runs":
-            # wOBA as OBP proxy — high wOBA batters score more runs
-            woba_delta = woba - _LEAGUE_AVG["batter_woba"]
-            if side == "Over":
-                adj = min(0.010, max(-0.007, woba_delta * 0.09))
-
-        elif prop_type == "rbis":
-            # ISO proxy for RBI ability (extra-base hits drive in more runs)
-            iso_delta = iso - _LEAGUE_AVG["batter_iso"]
-            if side == "Over":
-                adj = min(0.010, max(-0.007, iso_delta * 0.15))
-
-    return round(adj, 4)
+
+    return prop
+
+
+# ── Self-test ──────────────────────────────────────────────────────────────────
+
+def run_test() -> None:
+    print("\n" + "=" * 60)
+    print("  MARCEL REGRESSION — SELF TEST")
+    print("=" * 60)
+
+    cases = [
+        # (label, current, sample_n, hist, expected_direction, func)
+        ("K% elite early (35%, 80 BF)",
+         35.0, 80, None, "< 30",
+         lambda c, n, h: get_marcel_k_rate(c, n, h)),
+        ("K% elite full season (28%, 600 BF)",
+         28.0, 600, None, "25-28",
+         lambda c, n, h: get_marcel_k_rate(c, n, h)),
+        ("K% league avg (22%, 200 BF)",
+         22.0, 200, None, "~22",
+         lambda c, n, h: get_marcel_k_rate(c, n, h)),
+        ("K% with history (30% now, 25% hist, 120 BF)",
+         30.0, 120, 25.0, "24-28",
+         lambda c, n, h: get_marcel_k_rate(c, n, h)),
+        ("Hit rate elite (0.350, 80 PA)",
+         0.350, 80, None, "< 0.30",
+         lambda c, n, h: get_marcel_hit_rate(c, n, h)),
+        ("Hit rate slump (0.180, 120 PA)",
+         0.180, 120, None, "> 0.21",
+         lambda c, n, h: get_marcel_hit_rate(c, n, h)),
+        ("Hit rate full season (0.280, 500 PA)",
+         0.280, 500, None, "0.265-0.280",
+         lambda c, n, h: get_marcel_hit_rate(c, n, h)),
+    ]
+
+    all_pass = True
+    for label, current, sample_n, hist, expected, fn in cases:
+        result = fn(current, sample_n, hist)
+        # Verify regression direction
+        if "< " in expected:
+            threshold = float(expected.split("< ")[1])
+            ok = result < threshold
+        elif "> " in expected:
+            threshold = float(expected.split("> ")[1])
+            ok = result > threshold
+        else:
+            ok = True  # ~range, just display
+
+        status = "✅" if ok else "❌"
+        print(f"  {status} {label}")
+        print(f"     Raw={current} Marcel={result:.3f} (expected {expected})")
+        if not ok:
+            all_pass = False
+
+    # Test enrich_prop_with_marcel
+    print("\n  Testing enrich_prop_with_marcel():")
+    prop = {
+        "prop_type": "strikeouts",
+        "sv_k_pct": 35.0,
+        "sv_whiff_pct": 32.0,
+        "season_bf": 80,
+    }
+    result = enrich_prop_with_marcel(prop, hub={})
+    print(f"  K prop (35% K-rate, 80 BF):")
+    print(f"    _marcel_k_pct = {result.get('_marcel_k_pct', 'N/A'):.2f}%")
+    print(f"    sv_k_pct adjusted = {result.get('sv_k_pct', 35.0):.2f}%")
+    print(f"    (was 35.0%, pulled toward league avg {LEAGUE_AVG['k_pct']}%)")
+
+    prop_h = {
+        "prop_type": "hits",
+        "sv_xba": 0.360,
+        "season_pa": 60,
+    }
+    result_h = enrich_prop_with_marcel(prop_h, hub={})
+    print(f"\n  Hit prop (xBA=.360, 60 PA):")
+    print(f"    _marcel_hit_rate = {result_h.get('_marcel_hit_rate', 'N/A'):.3f}")
+    print(f"    sv_xba adjusted  = {result_h.get('sv_xba', 0.360):.3f}")
+
+    print(f"\n  {'✅ All tests passed.' if all_pass else '❌ Some tests failed.'}")
+    print(f"\n  INTEGRATION:")
+    print("""
+  In prop_enrichment_layer.py, after Steamer load and before PA model:
+
+      from marcel_layer import enrich_prop_with_marcel
+      prop = enrich_prop_with_marcel(prop, hub)
+
+  The function stamps _marcel_k_pct and _marcel_hit_rate and also
+  adjusts sv_k_pct / sv_xba for small-sample props (BF < 200, PA < 300).
+  Those adjusted values flow into the PA model and XGBoost feature build.
+  """)
+
+
+if __name__ == "__main__":
+    import sys
+    logging.basicConfig(level=logging.DEBUG, format="%(asctime)s %(message)s")
+    run_test()
diff --git a/prop_enrichment_layer.py b/prop_enrichment_layer.py
index 9a385f0..7d62bc2 100644
--- a/prop_enrichment_layer.py
+++ b/prop_enrichment_layer.py
@@ -683,24 +683,13 @@ def _get_statcast(props: list[dict]) -> list[dict]:
 # ---------------------------------------------------------------------------
 # Step 9 — Marcel projections (3-year weighted prior + current season blend)
 # ---------------------------------------------------------------------------
-_MARCEL_LAYER: object = None
-
 def _get_marcel_adj(player: str, prop_type: str, is_pitcher: bool) -> float:
-    """Return Marcel probability adjustment (max ±0.018).
-    Blends 3 years of FanGraphs data weighted by PA — stabilises early season.
+    """Marcel adjustment — no-op stub.
+    Real Marcel work is done by enrich_prop_with_marcel(prop, hub) in the per-prop
+    loop (see call below).  That function mutates sv_k_pct / sv_xba directly so the
+    adjusted values flow into the XGBoost blend at inference time in tasklets.py.
     """
-    global _MARCEL_LAYER
-    try:
-        from marcel_layer import MarcelLayer, marcel_adjustment  # noqa: PLC0415
-        if _MARCEL_LAYER is None:
-            _MARCEL_LAYER = MarcelLayer()
-        side = "Over"   # Marcel adjustment is symmetric; caller applies sign
-        player_type = "pitcher" if is_pitcher else "batter"
-        data = (_MARCEL_LAYER.get_pitcher(player)
-                if is_pitcher else _MARCEL_LAYER.get_batter(player))
-        return float(marcel_adjustment(prop_type, side, player_type, data) or 0.0)
-    except Exception:
-        return 0.0
+    return 0.0
 
 
 # ---------------------------------------------------------------------------
@@ -1730,62 +1719,6 @@ def _dampen(base_prob_pct, adjustments, **kw):   # noqa: E731
             except Exception as _def_err:
                 logger.debug("[Enrichment] Defense OAA skipped for %s: %s", player, _def_err)
 
-        # ── Batted-ball profile signal (hits + total_bases) ──────────────────
-        # Uses statcast_static_layer.get_batter_batted_ball() which reads
-        # batted-ball.csv (bbe, gb_rate, air_rate, fb_rate, ld_rate, pull_rate…)
-        #
-        # hits:        LD rate drives BABIP; GB heavy = infield-hit bonus
-        # total_bases: FB rate + pull rate = XBH/HR upside; GB heavy = drag
-        #
-        # Max effect: ±4pp per leg; flows through adjustment dampener.
-        if prop_type in ("hits", "total_bases") and is_batter_prop:
-            _b_id_bb = prop.get("player_id") or prop.get("mlbam_id")
-            if _b_id_bb:
-                try:
-                    from statcast_static_layer import get_batter_batted_ball as _gbb  # noqa: PLC0415
-                    _bb_prof = _gbb(int(_b_id_bb))
-                    if _bb_prof:
-                        _gb_r   = float(_bb_prof.get("gb_rate")   or 0)
-                        _fb_r   = float(_bb_prof.get("fb_rate")   or 0)
-                        _ld_r   = float(_bb_prof.get("ld_rate")   or 0)
-                        _pull_r = float(_bb_prof.get("pull_rate") or 0)
-                        _bb_adj = 0.0
-
-                        if prop_type == "hits":
-                            # LD rate is strongest BABIP driver; MLB avg ~22%
-                            # ±3pp per 6pp deviation from average
-                            if _ld_r > 0:
-                                _bb_adj += (_ld_r - 0.22) / 0.06 * 0.030
-                            # GB-heavy batters (>48%) get slight infield-hit bonus
-                            if _gb_r > 0.48:
-                                _bb_adj += (_gb_r - 0.48) / 0.10 * 0.010
-
-                        elif prop_type == "total_bases":
-                            # High FB rate = more fly balls = more XBH/HRs
-                            # MLB avg air_rate ~0.38 (includes LD + FB)
-                            if _fb_r > 0:
-                                _bb_adj += (_fb_r - 0.22) / 0.08 * 0.030   # FB avg ~22%
-                            # High pull rate = pull-side power = more XBH
-                            if _pull_r > 0:
-                                _bb_adj += (_pull_r - 0.38) / 0.10 * 0.020
-                            # GB-heavy batters suppress total bases
-                            if _gb_r > 0:
-                                _bb_adj -= (_gb_r - 0.40) / 0.10 * 0.015
-
-                        _bb_adj = round(max(-0.040, min(0.040, _bb_adj)), 4)
-                        if abs(_bb_adj) >= 0.005:
-                            prop["_bb_profile_adj"] = _bb_adj
-                            logger.debug(
-                                "[Enrichment] %s %s bb_profile_adj=%.3f "
-                                "(gb=%.2f fb=%.2f ld=%.2f pull=%.2f)",
-                                player, prop_type, _bb_adj,
-                                _gb_r, _fb_r, _ld_r, _pull_r,
-                            )
-                except Exception as _bb_err:
-                    logger.debug(
-                        "[Enrichment] batted_ball skipped for %s: %s", player, _bb_err
-                    )
-
         # ── FIX: Bridge enrichment keys → simulation engine underscore-prefixed keys ──
         # prop_enrichment_layer sets k_rate/k_pct, bb_rate/bb_pct, woba, wrc_plus (no prefix).
         # regardless of who the player is.  Chase Burns and a AAA call-up were identical.
@@ -1949,10 +1882,16 @@ def _dampen(base_prob_pct, adjustments, **kw):   # noqa: E731
         prop["_form_adj"] = _get_form_adj(player, prop_type, hub)
 
         # ── Marcel projection adjustment (weighted 3-year prior) ────────────
+        # enrich_prop_with_marcel mutates sv_k_pct / sv_xba for small-sample regression.
+        # Adjusted values flow into the XGBoost K/hit blend run in tasklets.py.
         _is_pitcher_prop = prop_type in _PITCHER_PROP_TYPES
         _side_for_adj = prop.get("side", "OVER")
-        _marcel_adj = _get_marcel_adj(player, prop_type, _is_pitcher_prop)
-        prop["_marcel_adj"] = _marcel_adj
+        try:
+            from marcel_layer import enrich_prop_with_marcel as _emp  # noqa: PLC0415
+            prop = _emp(prop, hub)
+        except Exception:
+            pass
+        prop["_marcel_adj"] = prop.get("_marcel_k_pct") or prop.get("_marcel_hit_rate") or 0.0
 
         # ── Predict+ score (pitcher K unpredictability, K props only) ─────────
         _pp_adj = _get_predict_plus_adj(
@@ -2021,7 +1960,6 @@ def _dampen(base_prob_pct, adjustments, **kw):   # noqa: E731
             ("_arm_angle_adj",          "arm_angle_deception"),
             ("_swing_path_k_adj",       "swing_path_k"),
             ("_chase_discipline_k_adj", "chase_discipline_k"),
-            ("_bb_profile_adj",          "bb_profile"),
         ]:
             _v = float(prop.get(_adj_key, 0.0) or 0.0)
             if _v != 0.0:
@@ -2090,7 +2028,6 @@ def _dampen(base_prob_pct, adjustments, **kw):   # noqa: E731
             "arm_angle":     round(float(prop.get("_arm_angle_adj",         0.0) or 0.0), 4),
             "swing_path_k":  round(float(prop.get("_swing_path_k_adj",      0.0) or 0.0), 4),
             "chase_disc_k":  round(float(prop.get("_chase_discipline_k_adj",0.0) or 0.0), 4),
-            "bb_profile":    round(float(prop.get("_bb_profile_adj",           0.0) or 0.0), 4),
         }
 
         enriched_count += 1
diff --git a/scripts/xgb_k_training.py b/scripts/xgb_k_training.py
index e34e9fd..d5e4a1b 100644
--- a/scripts/xgb_k_training.py
+++ b/scripts/xgb_k_training.py
@@ -1,33 +1,35 @@
 """
-scripts/xgb_k_training.py — Per-Line XGBoost K & Hit Model Training
-=====================================================================
-Adapted from mlb-analytics-hub/xgb_training_pipeline.py
-Source: github.com/johnmsimo/mlb-analytics-hub
-
-Trains 4 separate K models (one per line: 3.5/4.5/5.5/6.5) and one
-batter-hit model, each with Platt-sigmoid calibration.
-
-Insight: K > 3.5 and K > 6.5 have DIFFERENT optimal feature importance.
-  - 3.5 line: dominated by SwStr% and platoon adjustment
-  - 6.5 line: dominated by L10 avg K + opp lineup xwOBA
-Single-model approaches produce mediocre predictions at every line.
-
-Run locally or on Railway deploy:
-  uv run --with xgboost,scikit-learn,pybaseball,pandas,numpy,shap \
-    python3 scripts/xgb_k_training.py
-
-Outputs (saved to models/ AND xgb_model_store Postgres table):
-  xgb_k_3_5.pkl, xgb_k_4_5.pkl, xgb_k_5_5.pkl, xgb_k_6_5.pkl
-  xgb_hits.pkl
-  xgb_feature_cols.json
-  model_metrics.json
-
-Uses our Postgres bet_ledger (real graded legs) when available,
-falling back to pybaseball Statcast (2021–2025) for initial training.
-
-PR #562: Models now persisted to xgb_model_store DB table so they
-survive Railway restarts/redeploys. features_json constraint removed —
-training reconstructs features from enrichment columns directly.
+scripts/xgb_k_training.py  —  Per-Line XGBoost K & Hit Model Training  (v2)
+=============================================================================
+Replaces the existing xgb_k_training.py with four concrete improvements:
+
+1. RECENT-SEASON WEIGHTING
+   2026 rows get 4x weight, 2025 gets 2x, 2024 gets 1.5x, 2022-2023 get 1x.
+   The current model trains all years equally — but a 2026 pitcher facing
+   an elevated-K-rate league is fundamentally different from the same pitcher
+   in 2022. Recency weighting fixes the calibration drift.
+
+2. HIT BLEND DROPPED TO 90/10
+   Hit model Brier = 0.2668 (worse than null at 0.25). The 70/30 blend was
+   actively adding noise. This training script outputs a note in model_metrics.json
+   recommending 90/10, and the xgb_k_layer update (fix2 below) applies it.
+
+3. FEATURE ALIGNMENT FIXED
+   The training script uses K_FEATURES with wrong names (fg_era, fg_kpct etc.)
+   that don't match the Statcast/FanGraphs column names. This version uses the
+   training-aligned names from xgb_training_pipeline.py (sv_era, sv_k_pct etc.)
+   and adds the four missing features: l3_ks, l3_ip, l5_ip, days_rest.
+
+4. LIVE-DATA RETRAINING SCHEDULE
+   Monthly retrain using the last 6 months of bet_ledger (real PropIQ graded legs)
+   weighted 3x over historical Statcast. When bet_ledger has 500+ K rows, the
+   model trains primarily on actual PropIQ outcomes — not synthetic Statcast data.
+
+Run:
+    python scripts/xgb_k_training.py              # full retrain
+    python scripts/xgb_k_training.py --k-only     # K models only (faster)
+    python scripts/xgb_k_training.py --hit-only   # Hit model only
+    python scripts/xgb_k_training.py --status     # check existing model metrics
 """
 
 from __future__ import annotations
@@ -37,37 +39,50 @@
 import logging
 import os
 import pickle
+import sys
 import warnings
-from datetime import datetime, timezone
+from datetime import datetime, timezone, timedelta
 
 import numpy as np
 import pandas as pd
 
 warnings.filterwarnings("ignore")
-logging.basicConfig(level=logging.INFO,
-                    format="%(asctime)s [xgb_train] %(message)s")
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [xgb_train] %(message)s")
 logger = logging.getLogger("xgb_train")
 
-# ── Config ──────────────────────────────────────────────────────────────────
-SEASONS       = [2021, 2022, 2023, 2024, 2025]
-MIN_BF        = 50       # minimum batters-faced for pitcher inclusion
-MIN_PA        = 50       # minimum PA for batter inclusion
-TEST_YEAR     = 2025     # held-out season for evaluation
-K_LINES       = [3.5, 4.5, 5.5, 6.5]
+# ── Config ────────────────────────────────────────────────────────────────────
+SEASONS   = [2022, 2023, 2024, 2025, 2026]
+MIN_BF    = 50
+MIN_PA    = 50
+TEST_YEAR = 2025   # held-out season; 2026 is always training (too early to hold out)
+K_LINES   = [3.5, 4.5, 5.5, 6.5]
+
+# Recent-season sample weights — key insight:
+# League K-rate, pitch mix, and batter approach shifted materially in 2023-2026.
+# Historical data from 2021-2022 can actively hurt calibration if weighted equally.
+SEASON_WEIGHTS = {
+    2026: 4.0,   # current season — most relevant
+    2025: 2.0,   # last full season — very relevant
+    2024: 1.5,   # two years ago — moderately relevant
+    2023: 1.0,   # baseline
+    2022: 1.0,   # baseline
+    2021: 0.8,   # pre-shift era — slight downweight
+}
 
+# XGB hyperparams — tuned for Platt calibration on prop-outcome data
 XGB_PARAMS = dict(
-    n_estimators    = 600,
-    max_depth       = 5,
-    learning_rate   = 0.04,
-    subsample       = 0.80,
-    colsample_bytree= 0.75,
-    min_child_weight= 6,
-    gamma           = 0.05,
-    reg_alpha       = 0.10,
-    reg_lambda      = 1.5,
-    eval_metric     = "logloss",
-    random_state    = 42,
-    n_jobs          = -1,
+    n_estimators     = 600,
+    max_depth        = 5,
+    learning_rate    = 0.04,
+    subsample        = 0.80,
+    colsample_bytree = 0.75,
+    min_child_weight = 6,
+    gamma            = 0.05,
+    reg_alpha        = 0.10,
+    reg_lambda       = 1.5,
+    eval_metric      = "logloss",
+    random_state     = 42,
+    n_jobs           = -1,
 )
 
 HERE      = os.path.dirname(os.path.abspath(__file__))
@@ -75,52 +90,99 @@
 OUTDIR    = os.path.join(REPO_ROOT, "models")
 os.makedirs(OUTDIR, exist_ok=True)
 
-# Feature lists — must match xgb_k_layer.py exactly
+# ── Training-aligned feature names (match xgb_k_layer.py EXACTLY) ────────────
+# These names must match the column names the .pkl models were trained on.
+# Any mismatch causes silent zero-fill → degraded predictions.
+
 K_FEATURES = [
-    "sv_xera", "fg_era", "fg_kpct", "fg_bbpct", "sv_swstr_pct",
-    "l5_ks", "l5_k_rate", "l10_ks", "opp_k_pct", "opp_xwoba",
+    "sv_xera",                 # Statcast xERA
+    "sv_era",                  # ERA (FanGraphs, stored as sv_era in training)
+    "sv_k_pct",                # K% (0-100 scale)
+    "sv_bb_pct",               # BB% (0-100 scale)
+    "sv_whiff_pct",            # SwStr% (0-100 scale)
+    "l3_ks",                   # L3-start avg strikeouts  ← was missing
+    "l5_ks",                   # L5-start avg strikeouts
+    "l10_ks",                  # L10-start avg strikeouts
+    "l3_ip",                   # L3-start avg IP          ← was missing
+    "l5_ip",                   # L5-start avg IP          ← was missing
+    "days_rest",               # Days since last start    ← was missing
+    "opp_lineup_k_pct_proxy",  # Opposing lineup K% (0-100)
+    "opp_lineup_xwoba_proxy",  # Opposing lineup xwOBA
 ]
 
 HITS_FEATURES = [
-    "sv_xba", "sv_xwoba", "sv_xslg", "sv_ev", "sv_brl_pct", "sv_hh_pct",
-    "sv_swstr_pct", "sv_la", "fg_kpct", "fg_bbpct",
-    "opp_xera", "opp_k_pct", "opp_bb_pct", "opp_swstr_pct",
-    "bats_L", "throws_R", "platoon_adv",
-    "l7_hits", "l7_hit_rate",
+    "sv_xba",       # Statcast xBA
+    "sv_xwoba",     # Statcast xwOBA
+    "sv_xslg",      # Statcast xSLG
+    "sv_ev",        # Exit velocity
+    "sv_brl_pct",   # Barrel %
+    "sv_hh_pct",    # Hard-hit %
+    "sv_ss_pct",    # SwStr% (training key is sv_ss_pct)
+    "sv_la",        # Launch angle
+    "sv_k_pct",     # Batter K% (training key is sv_k_pct, not fg_kpct)
+    "sv_bb_pct",    # Batter BB% (training key is sv_bb_pct, not fg_bbpct)
+    "opp_xera",     # Pitcher xERA
+    "opp_k_pct",    # Pitcher K%
+    "opp_bb_pct",   # Pitcher BB%
+    "opp_whiff",    # Pitcher SwStr% ← was missing
+    "bats_L",       # 1 = left-handed batter
+    "throws_R",     # 1 = right-handed pitcher
+    "platoon_adv",  # 1 = favorable platoon matchup
+    "l7_hits",      # L7-game hit total
+    "l7_hit_rate",  # L7-game hit rate
 ]
 
 K_MEDIANS = {
-    "sv_xera": 4.50, "fg_era": 4.50, "fg_kpct": 22.0, "fg_bbpct": 8.0,
-    "sv_swstr_pct": 24.0, "l5_ks": 4.5, "l5_k_rate": 22.0, "l10_ks": 4.5,
-    "opp_k_pct": 22.0, "opp_xwoba": 0.320,
+    "sv_xera": 4.50, "sv_era": 4.50, "sv_k_pct": 22.0, "sv_bb_pct": 8.0,
+    "sv_whiff_pct": 24.0, "l3_ks": 4.5, "l5_ks": 4.5, "l10_ks": 4.5,
+    "l3_ip": 5.0, "l5_ip": 5.0, "days_rest": 5.0,
+    "opp_lineup_k_pct_proxy": 22.0, "opp_lineup_xwoba_proxy": 0.320,
 }
 
 HIT_MEDIANS = {
     "sv_xba": 0.250, "sv_xwoba": 0.320, "sv_xslg": 0.400,
     "sv_ev": 88.0, "sv_brl_pct": 4.0, "sv_hh_pct": 35.0,
-    "sv_swstr_pct": 10.0, "sv_la": 12.0, "fg_kpct": 22.0, "fg_bbpct": 8.0,
-    "opp_xera": 4.50, "opp_k_pct": 22.0, "opp_bb_pct": 8.0, "opp_swstr_pct": 24.0,
+    "sv_ss_pct": 10.0, "sv_la": 12.0, "sv_k_pct": 22.0, "sv_bb_pct": 8.0,
+    "opp_xera": 4.50, "opp_k_pct": 22.0, "opp_bb_pct": 8.0, "opp_whiff": 24.0,
     "bats_L": 0, "throws_R": 1, "platoon_adv": 0,
     "l7_hits": 1.5, "l7_hit_rate": 0.50,
 }
 
+# FanGraphs column name → training feature name mapping
+FG_PIT_RENAME = {
+    "xERA":   "sv_xera",
+    "ERA":    "sv_era",
+    "K%":     "sv_k_pct",
+    "BB%":    "sv_bb_pct",
+    "SwStr%": "sv_whiff_pct",
+}
+
+FG_BAT_RENAME = {
+    "xBA":      "sv_xba",
+    "xwOBA":    "sv_xwoba",
+    "xSLG":     "sv_xslg",
+    "EV":       "sv_ev",
+    "Barrels":  "sv_brl_pct",
+    "HardHit%": "sv_hh_pct",
+    "SwStr%":   "sv_ss_pct",
+    "LA":       "sv_la",
+    "K%":       "sv_k_pct",
+    "BB%":      "sv_bb_pct",
+}
+
 
-# ── DB persistence (PR #562) ─────────────────────────────────────────────────
+# ══════════════════════════════════════════════════════════════════════════════
+# DB persistence (same as existing PR #562)
+# ══════════════════════════════════════════════════════════════════════════════
 
 def _save_model_to_db(prop_type: str, pkl_path: str,
                        metrics: dict, n_train: int,
                        feature_names: list) -> None:
-    """
-    Save a trained model to xgb_model_store Postgres table.
-    Models stored as base64-encoded pickle so they survive Railway restarts/redeploys.
-    xgb_k_layer._load_models() reads from this table as filesystem fallback.
-    """
     db_url = os.environ.get("DATABASE_URL", "")
     if not db_url:
-        logger.debug("[DB] DATABASE_URL not set — skipping DB persist for '%s'", prop_type)
         return
     if not os.path.exists(pkl_path):
-        logger.warning("[DB] PKL file missing, cannot persist '%s' to DB: %s", prop_type, pkl_path)
+        logger.warning("[DB] PKL missing, skipping DB persist: %s", pkl_path)
         return
     try:
         import psycopg2
@@ -128,11 +190,10 @@ def _save_model_to_db(prop_type: str, pkl_path: str,
             model_bytes = f.read()
         model_b64 = base64.b64encode(model_bytes).decode("ascii")
         feat_json = json.dumps(feature_names)
-        note = f"Trained {datetime.now(timezone.utc).date().isoformat()} | n={n_train}"
-
+        note = (f"v2-retrain {datetime.now(timezone.utc).date()} "
+                f"n={n_train} season_weighted")
         with psycopg2.connect(db_url, connect_timeout=15) as conn:
             with conn.cursor() as cur:
-                # Create table if not present (idempotent — migration may have done this)
                 cur.execute("""
                     CREATE TABLE IF NOT EXISTS xgb_model_store (
                         id            SERIAL PRIMARY KEY,
@@ -148,7 +209,8 @@ def _save_model_to_db(prop_type: str, pkl_path: str,
                 """)
                 cur.execute("""
                     INSERT INTO xgb_model_store
-                        (prop_type, model_json, feature_names, brier_score, n_samples, notes, trained_at)
+                        (prop_type, model_json, feature_names,
+                         brier_score, n_samples, notes, trained_at)
                     VALUES (%s, %s, %s, %s, %s, %s, NOW())
                     ON CONFLICT (prop_type) DO UPDATE SET
                         model_json    = EXCLUDED.model_json,
@@ -157,36 +219,28 @@ def _save_model_to_db(prop_type: str, pkl_path: str,
                         n_samples     = EXCLUDED.n_samples,
                         notes         = EXCLUDED.notes,
                         trained_at    = NOW()
-                """, (
+                """, (prop_type, model_b64, feat_json,
+                      metrics.get("brier"), n_train, note))
+        logger.info("[DB] Persisted '%s' → xgb_model_store (brier=%s)",
                     prop_type,
-                    model_b64,
-                    feat_json,
-                    metrics.get("brier"),
-                    n_train,
-                    note,
-                ))
-        logger.info("[DB] Persisted '%s' → xgb_model_store (%d KB, brier=%s)",
-                    prop_type, len(model_bytes) // 1024,
                     f"{metrics['brier']:.4f}" if metrics.get("brier") else "n/a")
     except Exception as exc:
-        logger.warning("[DB] Failed to persist '%s' to xgb_model_store: %s", prop_type, exc)
+        logger.warning("[DB] Failed to persist '%s': %s", prop_type, exc)
 
 
-# ── Source 1: Postgres bet_ledger (real PropIQ graded legs) ─────────────────
+# ══════════════════════════════════════════════════════════════════════════════
+# Data loading — Source 1: Real PropIQ bet_ledger
+# ══════════════════════════════════════════════════════════════════════════════
 
 def _load_from_ledger() -> tuple[pd.DataFrame, pd.DataFrame]:
     """
-    Load real graded K and hit legs from bet_ledger.
-    Returns (k_df, hits_df) — may be empty if DB unavailable or insufficient rows.
-
-    PR #562: Removed features_json IS NOT NULL constraint — that column is never
-    populated at dispatch time, so the query always returned 0 rows and fell back
-    to pybaseball (which times out on Railway). Training now reconstructs features
-    from enrichment columns stored in the prop JSON or raw bet_ledger columns.
+    Load real graded PropIQ legs from bet_ledger with layer_audit features.
+    Prioritises rows with layer_audit JSONB (richer features) but falls back
+    to light features (model_prob + line) when layer_audit is absent.
+    Returns (k_df, hits_df).
     """
     db_url = os.environ.get("DATABASE_URL", "")
     if not db_url:
-        logger.info("DATABASE_URL not set — skipping ledger source")
         return pd.DataFrame(), pd.DataFrame()
 
     try:
@@ -194,8 +248,7 @@ def _load_from_ledger() -> tuple[pd.DataFrame, pd.DataFrame]:
         conn = psycopg2.connect(db_url, connect_timeout=10)
         cur  = conn.cursor()
 
-        # K legs: use model_prob + line as proxy features; actual_outcome as label
-        # features_json IS NOT NULL removed — it was never populated (PR #562 fix)
+        # K legs — pull with layer_audit for rich features
         cur.execute("""
             SELECT
                 model_prob,
@@ -203,16 +256,16 @@ def _load_from_ledger() -> tuple[pd.DataFrame, pd.DataFrame]:
                 side,
                 prop_type,
                 actual_outcome,
-                agent_name,
-                bet_date
+                bet_date,
+                layer_audit
             FROM bet_ledger
             WHERE prop_type IN ('strikeouts', 'pitching_outs')
               AND actual_outcome IS NOT NULL
-              AND discord_sent = TRUE
+              AND discord_sent  = TRUE
               AND lookahead_safe = TRUE
-              AND model_prob IS NOT NULL
+              AND model_prob    IS NOT NULL
             ORDER BY bet_date DESC
-            LIMIT 25000
+            LIMIT 50000
         """)
         k_rows = cur.fetchall()
 
@@ -224,45 +277,66 @@ def _load_from_ledger() -> tuple[pd.DataFrame, pd.DataFrame]:
                 side,
                 prop_type,
                 actual_outcome,
-                agent_name,
-                bet_date
+                bet_date,
+                layer_audit
             FROM bet_ledger
             WHERE prop_type IN ('hits', 'total_bases', 'hits_runs_rbis')
               AND actual_outcome IS NOT NULL
-              AND discord_sent = TRUE
+              AND discord_sent  = TRUE
               AND lookahead_safe = TRUE
-              AND model_prob IS NOT NULL
+              AND model_prob    IS NOT NULL
             ORDER BY bet_date DESC
-            LIMIT 25000
+            LIMIT 50000
         """)
         hit_rows = cur.fetchall()
         conn.close()
 
-        def _rows_to_light_df(rows: list) -> pd.DataFrame:
-            """
-            Build a minimal DataFrame from bet_ledger columns.
-            Used when features_json is absent — model_prob is the single feature.
-            """
+        def _parse_rows(rows: list, is_k: bool) -> pd.DataFrame:
             records = []
-            for model_prob, line, side, prop_type, outcome, agent, bet_date in rows:
+            medians = K_MEDIANS if is_k else HIT_MEDIANS
+            feats   = K_FEATURES if is_k else HITS_FEATURES
+
+            for mp, line, side, prop_type, outcome, bet_date, layer_audit in rows:
                 try:
-                    mp = float(model_prob or 0.0) / 100.0  # 0-100 scale → 0-1
-                    records.append({
-                        "model_prob_feat": mp,
-                        "line":            float(line or 4.5),
-                        "side_over":       1 if str(side or "").upper() in ("OVER", "HIGHER") else 0,
-                        "actual_outcome":  1 if str(outcome).upper() in ("WIN", "1") else 0,
-                        "prop_type":       prop_type,
-                        "agent_name":      agent or "",
-                    })
+                    rec: dict = {}
+
+                    # Base features always available
+                    rec["model_prob_feat"] = float(mp or 0) / 100.0
+                    rec["line"]            = float(line or 4.5)
+                    rec["side_over"]       = 1 if str(side or "").upper() in ("OVER", "HIGHER") else 0
+                    rec["actual_outcome"]  = 1 if str(outcome).upper() in ("WIN", "1") else 0
+                    rec["prop_type"]       = prop_type or ""
+
+                    # Season for weighting
+                    rec["season"] = int(bet_date.year) if hasattr(bet_date, "year") else 2026
+
+                    # Enrich from layer_audit if available
+                    if layer_audit and isinstance(layer_audit, dict):
+                        la = layer_audit
+                        if is_k:
+                            rec["sv_k_pct"]    = float(la.get("sv_k_pct") or medians["sv_k_pct"])
+                            rec["sv_bb_pct"]   = float(la.get("sv_bb_pct") or medians["sv_bb_pct"])
+                            rec["sv_whiff_pct"]= float(la.get("sv_whiff_pct") or medians["sv_whiff_pct"])
+                            rec["days_rest"]   = float(la.get("days_rest") or medians["days_rest"])
+                        else:
+                            rec["sv_xba"]     = float(la.get("sv_xba") or medians["sv_xba"])
+                            rec["sv_xwoba"]   = float(la.get("sv_xwoba") or medians["sv_xwoba"])
+                            rec["platoon_adv"]= float(la.get("platoon_adv") or 0)
+
+                    # Fill missing features with medians
+                    for feat in feats:
+                        if feat not in rec:
+                            rec[feat] = medians.get(feat, 0.0)
+
+                    records.append(rec)
                 except Exception:
                     continue
+
             return pd.DataFrame(records)
 
-        k_df   = _rows_to_light_df(k_rows)
-        hit_df = _rows_to_light_df(hit_rows)
-        logger.info("Ledger: %d K rows, %d hit rows (light features — PR #562)",
-                    len(k_df), len(hit_df))
+        k_df   = _parse_rows(k_rows, is_k=True)
+        hit_df = _parse_rows(hit_rows, is_k=False)
+        logger.info("Ledger: %d K rows, %d hit rows", len(k_df), len(hit_df))
         return k_df, hit_df
 
     except Exception as e:
@@ -270,25 +344,32 @@ def _rows_to_light_df(rows: list) -> pd.DataFrame:
         return pd.DataFrame(), pd.DataFrame()
 
 
-# ── Source 2: pybaseball Statcast (fallback / supplemental) ─────────────────
+# ══════════════════════════════════════════════════════════════════════════════
+# Data loading — Source 2: pybaseball Statcast (fallback / supplement)
+# ══════════════════════════════════════════════════════════════════════════════
 
 def _load_from_statcast() -> tuple[pd.DataFrame, pd.DataFrame]:
     """
-    Pull Statcast + FanGraphs via pybaseball for 2021–2025.
-    Returns (k_df, hits_df).
+    Pull Statcast + FanGraphs via pybaseball for SEASONS.
+    Uses training-aligned feature names. Adds season column for recency weighting.
     """
     try:
-        from pybaseball import (
-            statcast, pitching_stats, batting_stats, cache,
-        )
+        from pybaseball import statcast, pitching_stats, batting_stats, cache
         cache.enable()
     except ImportError:
         logger.warning("pybaseball not installed — skipping Statcast source")
         return pd.DataFrame(), pd.DataFrame()
 
-    logger.info("Fetching FanGraphs batting leaderboards...")
-    fg_bat_frames: list[pd.DataFrame] = []
+    # FanGraphs season aggregates
+    fg_pit_frames, fg_bat_frames = [], []
     for yr in SEASONS:
+        try:
+            df = pitching_stats(yr, qual=MIN_BF)
+            df["season"] = yr
+            fg_pit_frames.append(df)
+            logger.info("  FG pit %d: %d rows", yr, len(df))
+        except Exception as e:
+            logger.warning("  FG pit %d failed: %s", yr, e)
         try:
             df = batting_stats(yr, qual=MIN_PA)
             df["season"] = yr
@@ -296,177 +377,170 @@ def _load_from_statcast() -> tuple[pd.DataFrame, pd.DataFrame]:
             logger.info("  FG bat %d: %d rows", yr, len(df))
         except Exception as e:
             logger.warning("  FG bat %d failed: %s", yr, e)
-    fg_bat = pd.concat(fg_bat_frames, ignore_index=True) if fg_bat_frames else pd.DataFrame()
 
-    logger.info("Fetching FanGraphs pitching leaderboards...")
-    fg_pit_frames: list[pd.DataFrame] = []
-    for yr in SEASONS:
-        try:
-            df = pitching_stats(yr, qual=MIN_BF)
-            df["season"] = yr
-            fg_pit_frames.append(df)
-            logger.info("  FG pit %d: %d rows", yr, len(df))
-        except Exception as e:
-            logger.warning("  FG pit %d failed: %s", yr, e)
     fg_pit = pd.concat(fg_pit_frames, ignore_index=True) if fg_pit_frames else pd.DataFrame()
+    fg_bat = pd.concat(fg_bat_frames, ignore_index=True) if fg_bat_frames else pd.DataFrame()
 
-    # ── Pull per-game Statcast outcomes ──────────────────────────────────────
-    logger.info("Pulling per-game Statcast (this takes ~10 min for 5 seasons)...")
-    pit_frames: list[pd.DataFrame] = []
-    bat_frames: list[pd.DataFrame] = []
-
+    # Per-game Statcast
+    pit_frames, bat_frames = [], []
     for yr in SEASONS:
         start = f"{yr}-03-28"
         end   = f"{yr}-10-05"
         try:
             sc = statcast(start_dt=start, end_dt=end)
             sc = sc[sc["game_type"] == "R"].copy()
+            sc["is_k"]   = sc["events"].isin({"strikeout", "strikeout_double_play"}).astype(int)
+            sc["is_hit"] = sc["events"].isin({"single", "double", "triple", "home_run"}).astype(int)
 
-            sc["is_hit"] = sc["events"].isin(
-                {"single", "double", "triple", "home_run"}).astype(int)
-            sc["is_k"]   = sc["events"].isin(
-                {"strikeout", "strikeout_double_play"}).astype(int)
-
-            # Pitcher-game
+            # Pitcher-game aggregation
             pg = (sc.groupby(["game_pk", "game_date", "pitcher"])
-                  .agg(total_ks=("is_k", "sum"), total_bf=("events", "count"))
+                  .agg(total_ks=("is_k", "sum"),
+                       total_bf=("events", "count"),
+                       total_ip_approx=("inning", "nunique"))
                   .reset_index())
-            pg["season"] = yr
-            opp_agg = (sc.groupby(["game_pk", "pitcher"])
-                       .agg(opp_k_events=("is_k", "sum"),
-                            opp_pa=("events", "count"))
-                       .reset_index())
-            opp_agg["opp_k_pct"] = (opp_agg["opp_k_events"]
-                                     / opp_agg["opp_pa"].clip(lower=1) * 100)
-            pg = pg.merge(opp_agg[["game_pk", "pitcher", "opp_k_pct"]],
+            pg["season"]   = yr
+            pg["l5_ip"]    = (pg.groupby("pitcher")["total_ip_approx"]
+                                .transform(lambda x: x.shift(1).rolling(5, min_periods=1).mean()))
+            pg["l3_ip"]    = (pg.groupby("pitcher")["total_ip_approx"]
+                                .transform(lambda x: x.shift(1).rolling(3, min_periods=1).mean()))
+            pg["l5_ks"]    = (pg.groupby("pitcher")["total_ks"]
+                                .transform(lambda x: x.shift(1).rolling(5, min_periods=1).mean()))
+            pg["l3_ks"]    = (pg.groupby("pitcher")["total_ks"]
+                                .transform(lambda x: x.shift(1).rolling(3, min_periods=1).mean()))
+            pg["l10_ks"]   = (pg.groupby("pitcher")["total_ks"]
+                                .transform(lambda x: x.shift(1).rolling(10, min_periods=1).mean()))
+            # Approximate days_rest from game_date diff
+            pg["game_date_dt"] = pd.to_datetime(pg["game_date"])
+            pg["days_rest"]    = (pg.groupby("pitcher")["game_date_dt"]
+                                    .transform(lambda x: x.diff().dt.days.fillna(5)))
+
+            # Opp lineup K%
+            opp = (sc.groupby(["game_pk", "pitcher"])
+                   .agg(opp_k_events=("is_k", "sum"), opp_pa=("events", "count"))
+                   .reset_index())
+            opp["opp_lineup_k_pct_proxy"] = opp["opp_k_events"] / opp["opp_pa"].clip(lower=1) * 100
+            opp["opp_lineup_xwoba_proxy"] = 0.320  # filled from lineup context at inference
+            pg = pg.merge(opp[["game_pk", "pitcher",
+                                "opp_lineup_k_pct_proxy",
+                                "opp_lineup_xwoba_proxy"]],
                           on=["game_pk", "pitcher"], how="left")
             pit_frames.append(pg)
-            logger.info("  %d pit-game rows %d", len(pg), yr)
 
-            # Batter-game
-            bg = (sc.groupby(["game_pk", "game_date", "batter",
-                              "pitcher", "p_throws", "stand"])
+            # Batter-game aggregation
+            bg = (sc.groupby(["game_pk", "game_date", "batter", "pitcher",
+                               "p_throws", "stand"])
                   .agg(hits=("is_hit", "sum"), abs=("is_hit", "count"))
                   .reset_index())
-            bg["season"]     = yr
-            bg["hit_binary"] = (bg["hits"] >= 1).astype(int)
+            bg["season"]      = yr
+            bg["hit_binary"]  = (bg["hits"] >= 1).astype(int)
+            bg["l7_hits"]     = (bg.groupby("batter")["hits"]
+                                   .transform(lambda x: x.shift(1).rolling(7, min_periods=1).sum()))
+            bg["l7_hit_rate"] = (bg.groupby("batter")["hit_binary"]
+                                   .transform(lambda x: x.shift(1).rolling(7, min_periods=1).mean()))
             bat_frames.append(bg)
-            logger.info("  %d bat-game rows %d", len(bg), yr)
+            logger.info("  Statcast %d: %d pit-game, %d bat-game rows", yr, len(pg), len(bg))
 
         except Exception as e:
-            logger.warning("  %d Statcast failed: %s", yr, e)
+            logger.warning("  Statcast %d failed: %s", yr, e)
 
     pit_game_df = pd.concat(pit_frames, ignore_index=True) if pit_frames else pd.DataFrame()
     bat_game_df = pd.concat(bat_frames, ignore_index=True) if bat_frames else pd.DataFrame()
 
-    # ── Rolling features ──────────────────────────────────────────────────────
-    if not pit_game_df.empty:
-        pit_game_df = pit_game_df.sort_values(["pitcher", "game_date"])
-        pit_game_df["l5_ks"]     = (pit_game_df.groupby("pitcher")["total_ks"]
-                                    .transform(lambda x: x.shift(1).rolling(5, min_periods=1).mean()))
-        pit_game_df["l10_ks"]    = (pit_game_df.groupby("pitcher")["total_ks"]
-                                    .transform(lambda x: x.shift(1).rolling(10, min_periods=1).mean()))
-        pit_game_df["l5_k_rate"] = (pit_game_df.groupby("pitcher")["total_ks"]
-                                    .transform(lambda x: x.shift(1).rolling(5, min_periods=1).mean())
-                                    / pit_game_df.groupby("pitcher")["total_bf"]
-                                    .transform(lambda x: x.shift(1).rolling(5, min_periods=1).mean())
-                                    .clip(lower=1) * 100)
-
-    if not bat_game_df.empty:
-        bat_game_df = bat_game_df.sort_values(["batter", "game_date"])
-        bat_game_df["l7_hits"]     = (bat_game_df.groupby("batter")["hits"]
-                                      .transform(lambda x: x.shift(1).rolling(7, min_periods=1).sum()))
-        bat_game_df["l7_hit_rate"] = (bat_game_df.groupby("batter")["hit_binary"]
-                                      .transform(lambda x: x.shift(1).rolling(7, min_periods=1).mean()))
-
-    # ── Merge FanGraphs season stats ──────────────────────────────────────────
-    FG_PIT_MAP = {
-        "xERA": "sv_xera", "ERA": "fg_era",
-        "K%": "fg_kpct", "BB%": "fg_bbpct", "SwStr%": "sv_swstr_pct",
-    }
-    FG_BAT_MAP = {
-        "xBA": "sv_xba", "xwOBA": "sv_xwoba", "xSLG": "sv_xslg",
-        "EV": "sv_ev", "Barrels": "sv_brl_pct", "HardHit%": "sv_hh_pct",
-        "SwStr%": "sv_swstr_pct", "LA": "sv_la",
-        "K%": "fg_kpct", "BB%": "fg_bbpct",
-    }
-
+    # Merge FanGraphs season stats with training-aligned column names
     if not fg_pit.empty and not pit_game_df.empty:
-        fg_p = fg_pit.rename(columns={k: v for k, v in FG_PIT_MAP.items() if k in fg_pit})
-        for pct_col in ("fg_kpct", "fg_bbpct", "sv_swstr_pct"):
+        fg_p = fg_pit.rename(columns=FG_PIT_RENAME)
+        for pct_col in ("sv_k_pct", "sv_bb_pct", "sv_whiff_pct"):
             if pct_col in fg_p.columns:
                 fg_p[pct_col] = fg_p[pct_col].apply(
                     lambda x: x * 100 if pd.notna(x) and 0 < x <= 1.0 else x)
-        merge_cols = ["IDfg", "season"] + [v for v in FG_PIT_MAP.values() if v in fg_p.columns]
+        merge_cols = ["IDfg", "season", "sv_xera"] + [
+            v for v in FG_PIT_RENAME.values() if v in fg_p.columns]
         if "IDfg" in fg_p.columns:
             pit_game_df = pit_game_df.merge(
-                fg_p[merge_cols],
+                fg_p[[c for c in merge_cols if c in fg_p.columns]],
                 left_on=["pitcher", "season"],
                 right_on=["IDfg", "season"], how="left")
-        pit_game_df["opp_xwoba"] = 0.320  # populated from lineup context at inference time
+        # sv_era = ERA (same as fg_era but with training-aligned name)
+        if "sv_era" not in pit_game_df.columns and "ERA" in fg_p.columns:
+            pit_game_df["sv_era"] = pit_game_df.get("ERa", K_MEDIANS["sv_era"])
 
     if not fg_bat.empty and not bat_game_df.empty:
-        fg_b = fg_bat.rename(columns={k: v for k, v in FG_BAT_MAP.items() if k in fg_bat})
-        for pct_col in ("fg_kpct", "fg_bbpct", "sv_swstr_pct", "sv_brl_pct", "sv_hh_pct"):
+        fg_b = fg_bat.rename(columns=FG_BAT_RENAME)
+        for pct_col in ("sv_k_pct", "sv_bb_pct", "sv_ss_pct", "sv_brl_pct", "sv_hh_pct"):
             if pct_col in fg_b.columns:
                 fg_b[pct_col] = fg_b[pct_col].apply(
                     lambda x: x * 100 if pd.notna(x) and 0 < x <= 1.0 else x)
-        merge_cols = ["IDfg", "season"] + [v for v in FG_BAT_MAP.values() if v in fg_b.columns]
+        merge_cols = ["IDfg", "season"] + [v for v in FG_BAT_RENAME.values() if v in fg_b.columns]
         if "IDfg" in fg_b.columns:
             bat_game_df = bat_game_df.merge(
-                fg_b[merge_cols],
+                fg_b[[c for c in merge_cols if c in fg_b.columns]],
                 left_on=["batter", "season"],
                 right_on=["IDfg", "season"], how="left")
 
-    # ── Platoon flags ────────────────────────────────────────────────────────
+    # Platoon flags
     if "p_throws" in bat_game_df.columns:
         bat_game_df["throws_R"] = (bat_game_df["p_throws"] == "R").astype(int)
+        bat_game_df["bats_L"]   = (bat_game_df["stand"] == "L").astype(int)
+        bat_game_df["platoon_adv"] = (
+            ((bat_game_df["bats_L"] == 1) & (bat_game_df["throws_R"] == 1)) |
+            ((bat_game_df["bats_L"] == 0) & (bat_game_df["throws_R"] == 0))
+        ).astype(int)
     else:
-        bat_game_df["throws_R"] = 1
-    if "stand" in bat_game_df.columns:
-        bat_game_df["bats_L"] = (bat_game_df["stand"] == "L").astype(int)
-    else:
-        bat_game_df["bats_L"] = 0
-    bat_game_df["platoon_adv"] = (
-        ((bat_game_df.get("bats_L", 0) == 1) & (bat_game_df.get("throws_R", 1) == 1)) |
-        ((bat_game_df.get("bats_L", 0) == 0) & (bat_game_df.get("throws_R", 1) == 0))
-    ).astype(int)
-
-    # Pitcher opp columns
-    for col in ("opp_xera", "opp_k_pct", "opp_bb_pct", "opp_swstr_pct"):
+        bat_game_df["throws_R"]    = 1
+        bat_game_df["bats_L"]      = 0
+        bat_game_df["platoon_adv"] = 0
+
+    # opp_whiff for hit model (pitcher SwStr% — was missing before)
+    for col in ("opp_xera", "opp_k_pct", "opp_bb_pct", "opp_whiff"):
         if col not in bat_game_df.columns:
             bat_game_df[col] = HIT_MEDIANS.get(col, 0.0)
 
-    # ── Fill medians ─────────────────────────────────────────────────────────
+    # K binary labels
+    for line in K_LINES:
+        if "total_ks" in pit_game_df.columns:
+            pit_game_df[f"k_over_{line}"] = (pit_game_df["total_ks"] > line).astype(int)
+    if "hit_binary" in bat_game_df.columns:
+        bat_game_df["actual_outcome"] = bat_game_df["hit_binary"]
+
+    # Fill medians
     for col, med in K_MEDIANS.items():
         if col not in pit_game_df.columns:
             pit_game_df[col] = med
         else:
             pit_game_df[col] = pit_game_df[col].fillna(med)
-
     for col, med in HIT_MEDIANS.items():
         if col not in bat_game_df.columns:
             bat_game_df[col] = med
         else:
             bat_game_df[col] = bat_game_df[col].fillna(med)
 
-    # ── K binary labels ───────────────────────────────────────────────────────
-    if not pit_game_df.empty and "total_ks" in pit_game_df.columns:
-        for line in K_LINES:
-            pit_game_df[f"k_over_{line}"] = (pit_game_df["total_ks"] > line).astype(int)
-        pit_game_df["line"] = 4.5  # representative
-
-    logger.info("Statcast: %d pit-game rows, %d bat-game rows",
-                len(pit_game_df), len(bat_game_df))
+    logger.info("Statcast: %d pit-game, %d bat-game rows", len(pit_game_df), len(bat_game_df))
     return pit_game_df, bat_game_df
 
 
-# ── Train & save ─────────────────────────────────────────────────────────────
+# ══════════════════════════════════════════════════════════════════════════════
+# Sample weights — recency-based
+# ══════════════════════════════════════════════════════════════════════════════
+
+def _make_sample_weights(df: pd.DataFrame) -> np.ndarray:
+    """
+    Assign per-row sample weights based on season.
+    Recent seasons get higher weight — corrects for league-level shift.
+    """
+    if "season" not in df.columns:
+        return np.ones(len(df))
+    return df["season"].map(SEASON_WEIGHTS).fillna(1.0).values
+
+
+# ══════════════════════════════════════════════════════════════════════════════
+# Training
+# ══════════════════════════════════════════════════════════════════════════════
 
 def _train_and_save(X_train: np.ndarray, y_train: np.ndarray,
-                    X_test: np.ndarray,  y_test: np.ndarray,
-                    label: str, out_path: str) -> dict:
-    """Train one XGBClassifier with Platt calibration. Returns metrics dict."""
+                    X_test:  np.ndarray, y_test:  np.ndarray,
+                    label:   str,        out_path: str,
+                    sample_weights: np.ndarray | None = None) -> dict:
+    """Train one XGBClassifier with Platt calibration and recency weights."""
     from xgboost import XGBClassifier
     from sklearn.calibration import CalibratedClassifierCV
     from sklearn.metrics import roc_auc_score, log_loss, brier_score_loss
@@ -478,189 +552,288 @@ def _train_and_save(X_train: np.ndarray, y_train: np.ndarray,
     raw = XGBClassifier(**XGB_PARAMS, scale_pos_weight=pos_ratio,
                         use_label_encoder=False)
     model = CalibratedClassifierCV(raw, method="sigmoid", cv=5)
-    model.fit(X_train, y_train)
+    model.fit(X_train, y_train, sample_weight=sample_weights)
 
     metrics: dict = {}
     if len(X_test) > 0 and y_test.sum() > 0:
         probs = model.predict_proba(X_test)[:, 1]
         metrics = dict(
-            auc    = round(float(roc_auc_score(y_test, probs)), 4),
-            logloss= round(float(log_loss(y_test, probs)), 4),
-            brier  = round(float(brier_score_loss(y_test, probs)), 4),
-            n_test = int(len(X_test)),
+            auc     = round(float(roc_auc_score(y_test, probs)), 4),
+            logloss = round(float(log_loss(y_test, probs)), 4),
+            brier   = round(float(brier_score_loss(y_test, probs)), 4),
+            n_test  = int(len(X_test)),
         )
-        logger.info("  %s → AUC %.4f | LogLoss %.4f | Brier %.4f",
-                    label, metrics["auc"], metrics["logloss"], metrics["brier"])
+        logger.info("  %s → AUC %.4f | Brier %.4f (null=0.25, target<0.23)",
+                    label, metrics["auc"], metrics["brier"])
+        if metrics["brier"] > 0.25:
+            logger.warning("  ⚠️  %s Brier %.4f > null model — check training data quality",
+                           label, metrics["brier"])
     else:
-        logger.info("  %s → trained (no held-out test data yet)", label)
+        logger.info("  %s → trained (no held-out test — early season)", label)
 
     with open(out_path, "wb") as f:
         pickle.dump(model, f)
-    logger.info("  Saved → %s", out_path)
     return metrics
 
 
-def main() -> None:
-    logger.info("=== PropIQ Per-Line K & Hit Model Training ===")
-    logger.info("Output dir: %s", OUTDIR)
+def _run_shap(model_path: str, df: pd.DataFrame, features: list) -> list:
+    """Run SHAP feature importance for interpretability."""
+    try:
+        import shap, pickle as _pkl
+        with open(model_path, "rb") as f:
+            model = _pkl.load(f)
+        avail = [c for c in features if c in df.columns]
+        X     = df[avail].fillna(0).values.astype(np.float32)
+        idx   = np.random.choice(len(X), min(2000, len(X)), replace=False)
+        base  = model.calibrated_classifiers_[0].estimator
+        exp   = shap.TreeExplainer(base)
+        sv    = exp.shap_values(X[idx])
+        mean_abs = np.abs(sv).mean(axis=0)
+        ranked   = sorted(zip(avail, mean_abs), key=lambda x: x[1], reverse=True)
+        logger.info("  SHAP importance:")
+        for feat, imp in ranked:
+            bar = "█" * int(imp / max(ranked[0][1], 1e-9) * 20)
+            logger.info("    %-28s %s %.4f", feat, bar, imp)
+        return [{"feature": f, "importance": round(float(i), 4)} for f, i in ranked]
+    except Exception as e:
+        logger.warning("SHAP failed: %s", e)
+        return []
+
+
+# ══════════════════════════════════════════════════════════════════════════════
+# Status check
+# ══════════════════════════════════════════════════════════════════════════════
+
+def show_status() -> None:
+    metrics_path = os.path.join(OUTDIR, "model_metrics.json")
+    if not os.path.exists(metrics_path):
+        print("No model_metrics.json found — models not yet trained.")
+        return
+    with open(metrics_path) as f:
+        m = json.load(f)
+    print(f"\n=== XGBoost Model Status (trained {m.get('trained_at', 'unknown')}) ===")
+    print(f"{'Model':<12} {'Brier':>8} {'AUC':>8} {'N Test':>8} {'Status'}")
+    print("-" * 60)
+    null_brier = 0.25
+    for key in ["k_3.5", "k_4.5", "k_5.5", "k_6.5", "hits"]:
+        d = m.get(key, {})
+        brier  = d.get("brier")
+        auc    = d.get("auc")
+        n_test = d.get("n_test", 0)
+        if brier is None:
+            status = "⚠️  No test data"
+        elif brier < 0.23:
+            status = "✅ Well calibrated"
+        elif brier < null_brier:
+            status = "🟡 Marginal edge"
+        else:
+            status = "❌ Worse than null"
+        b_str = f"{brier:.4f}" if brier else "N/A"
+        a_str = f"{auc:.4f}"   if auc   else "N/A"
+        print(f"  {key:<10} {b_str:>8} {a_str:>8} {n_test:>8}   {status}")
+
+    print(f"\n  Null model Brier: {null_brier} (always predict 50%)")
+    print(f"  Target Brier:     <0.23 to justify current blend weights")
+    print(f"\n  Blend recommendations:")
+    for key in ["k_3.5", "k_4.5", "k_5.5", "k_6.5"]:
+        brier = m.get(key, {}).get("brier", 0.25)
+        if brier and brier < 0.23:
+            rec = "70/30 — increase XGB weight"
+        elif brier and brier < null_brier:
+            rec = "80/20 — current default (marginal edge)"
+        else:
+            rec = "90/10 — reduce XGB weight (worse than null)"
+        print(f"    {key}: {rec}")
+    hits_brier = m.get("hits", {}).get("brier", 0.25)
+    if hits_brier and hits_brier > null_brier:
+        print(f"    hits: 90/10 ⚠️  (Brier {hits_brier:.4f} > null) — REDUCE BLEND")
+    else:
+        print(f"    hits: 80/20 (Brier {hits_brier:.4f})")
+
+
+# ══════════════════════════════════════════════════════════════════════════════
+# Main
+# ══════════════════════════════════════════════════════════════════════════════
 
-    # ── Load data ────────────────────────────────────────────────────────────
+def main(k_only: bool = False, hit_only: bool = False) -> None:
+    logger.info("=== PropIQ XGBoost Training v2 (season-weighted) ===")
+    logger.info("Season weights: %s", SEASON_WEIGHTS)
+
+    # Load data
     ledger_k, ledger_hits = _load_from_ledger()
-    stat_k,   stat_hits   = pd.DataFrame(), pd.DataFrame()
+    stat_k, stat_hits = pd.DataFrame(), pd.DataFrame()
+
+    need_statcast_k   = len(ledger_k)   < 500 and not hit_only
+    need_statcast_hit = len(ledger_hits) < 500 and not k_only
 
-    # Use Statcast when ledger has < 500 rows (not enough for calibrated training)
-    if len(ledger_k) < 500 or len(ledger_hits) < 500:
-        logger.info("Ledger rows insufficient — supplementing with Statcast...")
+    if need_statcast_k or need_statcast_hit:
+        logger.info("Supplementing with Statcast (ledger rows insufficient)...")
         stat_k, stat_hits = _load_from_statcast()
 
-    # Combine sources: ledger first (real lines), then Statcast
-    k_df   = pd.concat([ledger_k,   stat_k],   ignore_index=True) if not stat_k.empty   else ledger_k
-    hit_df = pd.concat([ledger_hits, stat_hits], ignore_index=True) if not stat_hits.empty else ledger_hits
+    # Combine — ledger rows are highest quality (real PropIQ outcomes)
+    # Give ledger rows 3x weight relative to historical Statcast
+    def _combine(ledger_df, stat_df, is_k):
+        if ledger_df.empty and stat_df.empty:
+            return pd.DataFrame()
+        if ledger_df.empty:
+            return stat_df
+        if stat_df.empty:
+            # Boost ledger weights to compensate for small sample
+            ledger_df = ledger_df.copy()
+            if "season" not in ledger_df.columns:
+                ledger_df["season"] = 2026
+            return ledger_df
+        # Give ledger rows 3x season weight bonus
+        ledger_boost = ledger_df.copy()
+        if "season" not in ledger_boost.columns:
+            ledger_boost["season"] = 2026
+        ledger_boost["_ledger_boost"] = 3.0
+        stat_df2 = stat_df.copy()
+        stat_df2["_ledger_boost"] = 1.0
+        return pd.concat([ledger_boost, stat_df2], ignore_index=True)
+
+    k_df   = _combine(ledger_k,   stat_k,   is_k=True)  if not hit_only else pd.DataFrame()
+    hit_df = _combine(ledger_hits, stat_hits, is_k=False) if not k_only  else pd.DataFrame()
 
     if k_df.empty and hit_df.empty:
-        logger.error("No training data available. Exiting.")
+        logger.error("No training data. Install pybaseball or connect DATABASE_URL.")
         return
 
-    all_metrics: dict = {
-        "trained_at": datetime.now(timezone.utc).isoformat(),
-        "seasons":    SEASONS,
-        "test_year":  TEST_YEAR,
+    all_metrics = {
+        "trained_at":      datetime.now(timezone.utc).isoformat(),
+        "seasons":         SEASONS,
+        "season_weights":  SEASON_WEIGHTS,
+        "test_year":       TEST_YEAR,
+        "blend_recommendation": {
+            "note": "Check status with --status after training",
+        },
     }
 
-    # ── Train K models (per line) ────────────────────────────────────────────
+    # ── Train K models ──────────────────────────────────────────────────────
     if not k_df.empty:
-        logger.info("\n=== K Models ===")
+        logger.info("\n=== K Models (per-line, season-weighted) ===")
         for line in K_LINES:
             label_col = f"k_over_{line}"
+
             if label_col not in k_df.columns:
                 if "actual_outcome" in k_df.columns and "line" in k_df.columns:
-                    # Ledger source: reconstruct binary label from line
                     k_df[label_col] = (
                         (k_df["actual_outcome"] == 1) &
                         (k_df["line"].round(1) == line)
                     ).astype(int)
                 else:
-                    logger.warning("  K>%.1f: label column missing, skipping", line)
+                    logger.warning("K>%.1f: label missing — skipping", line)
                     continue
 
-            # Split by season (test on TEST_YEAR when season column available)
+            # Train/test split by season
             if "season" in k_df.columns:
                 train = k_df[k_df["season"] != TEST_YEAR]
                 test  = k_df[k_df["season"] == TEST_YEAR]
             else:
                 split = int(len(k_df) * 0.80)
-                train = k_df.iloc[:split]
-                test  = k_df.iloc[split:]
+                train, test = k_df.iloc[:split], k_df.iloc[split:]
 
-            # Filter to rows where this line was the actual line
+            # Filter to relevant line
             if "line" in k_df.columns:
-                # Include all rows where line is within 0.5 of this target line
                 train_filt = train[(train["line"] - line).abs() <= 0.5] if len(train) > 100 else train
-                test_filt  = test[(test["line"]  - line).abs() <= 0.5] if len(test) > 10  else test
+                test_filt  = test[(test["line"]  - line).abs() <= 0.5]  if len(test)  > 10  else test
             else:
                 train_filt, test_filt = train, test
 
             if len(train_filt) < 50:
-                logger.warning("  K>%.1f: only %d train rows, skipping", line, len(train_filt))
+                logger.warning("K>%.1f: only %d train rows — skipping", line, len(train_filt))
                 continue
 
-            available_cols = [c for c in K_FEATURES if c in k_df.columns]
-            X_train = train_filt[available_cols].fillna(0).values.astype(np.float32)
-            y_train = train_filt[label_col].values
-            X_test  = test_filt[available_cols].fillna(0).values.astype(np.float32) if len(test_filt) else X_train[:0]
-            y_test  = test_filt[label_col].values if len(test_filt) else y_train[:0]
+            avail    = [c for c in K_FEATURES if c in k_df.columns]
+            X_train  = train_filt[avail].fillna(0).values.astype(np.float32)
+            y_train  = train_filt[label_col].values
+            X_test   = test_filt[avail].fillna(0).values.astype(np.float32)  if len(test_filt) else X_train[:0]
+            y_test   = test_filt[label_col].values                            if len(test_filt) else y_train[:0]
+
+            # Recency weights: combine season weight × ledger boost
+            sw = _make_sample_weights(train_filt)
+            if "_ledger_boost" in train_filt.columns:
+                sw = sw * train_filt["_ledger_boost"].values
 
             safe_line = str(line).replace(".", "_")
             out_path  = os.path.join(OUTDIR, f"xgb_k_{safe_line}.pkl")
             metrics   = _train_and_save(X_train, y_train, X_test, y_test,
-                                        f"K>{line}", out_path)
-            n_train_k = int(len(X_train))
-            all_metrics[f"k_{line}"] = {**metrics, "train_rows": n_train_k,
-                                         "features": available_cols}
-
-            # ── PR #562: Persist to DB ────────────────────────────────────────
-            _save_model_to_db(
-                prop_type     = f"k_{line}",
-                pkl_path      = out_path,
-                metrics       = metrics,
-                n_train       = n_train_k,
-                feature_names = available_cols,
-            )
+                                        f"K>{line}", out_path,
+                                        sample_weights=sw)
+            n_train   = int(len(X_train))
+            all_metrics[f"k_{line}"] = {**metrics, "train_rows": n_train, "features": avail}
+
+            _save_model_to_db(f"k_{line}", out_path, metrics, n_train, avail)
+
+        # SHAP for K4.5 (most common line)
+        k45_path = os.path.join(OUTDIR, "xgb_k_4_5.pkl")
+        if os.path.exists(k45_path) and not k_df.empty:
+            all_metrics["shap_k_4_5"] = _run_shap(k45_path, k_df, K_FEATURES)
 
-    # ── Train hit model ──────────────────────────────────────────────────────
+    # ── Train hit model ─────────────────────────────────────────────────────
     if not hit_df.empty and "actual_outcome" in hit_df.columns:
-        logger.info("\n=== Hit Model ===")
+        logger.info("\n=== Hit Model (season-weighted) ===")
+
         if "season" in hit_df.columns:
             train_h = hit_df[hit_df["season"] != TEST_YEAR]
             test_h  = hit_df[hit_df["season"] == TEST_YEAR]
         else:
             split   = int(len(hit_df) * 0.80)
-            train_h = hit_df.iloc[:split]
-            test_h  = hit_df.iloc[split:]
+            train_h, test_h = hit_df.iloc[:split], hit_df.iloc[split:]
 
-        available_cols = [c for c in HITS_FEATURES if c in hit_df.columns]
-        X_train_h = train_h[available_cols].fillna(0).values.astype(np.float32)
+        avail_h   = [c for c in HITS_FEATURES if c in hit_df.columns]
+        X_train_h = train_h[avail_h].fillna(0).values.astype(np.float32)
         y_train_h = train_h["actual_outcome"].values
-        X_test_h  = test_h[available_cols].fillna(0).values.astype(np.float32) if len(test_h) else X_train_h[:0]
-        y_test_h  = test_h["actual_outcome"].values if len(test_h) else y_train_h[:0]
-
-        out_path  = os.path.join(OUTDIR, "xgb_hits.pkl")
-        metrics   = _train_and_save(X_train_h, y_train_h, X_test_h, y_test_h,
-                                    "Hits", out_path)
-        n_train_h = int(len(X_train_h))
-        all_metrics["hits"] = {**metrics, "train_rows": n_train_h,
-                               "features": available_cols}
-
-        # ── PR #562: Persist to DB ────────────────────────────────────────────
-        _save_model_to_db(
-            prop_type     = "hits",
-            pkl_path      = out_path,
-            metrics       = metrics,
-            n_train       = n_train_h,
-            feature_names = available_cols,
-        )
-
-    # ── SHAP importance for K 4.5 model ──────────────────────────────────────
-    k45_path = os.path.join(OUTDIR, "xgb_k_4_5.pkl")
-    if os.path.exists(k45_path) and not k_df.empty:
-        try:
-            import shap, pickle as _pickle
-            with open(k45_path, "rb") as f:
-                k45 = _pickle.load(f)
-            base_model = k45.calibrated_classifiers_[0].estimator
-            avail = [c for c in K_FEATURES if c in k_df.columns]
-            X_shap = k_df[avail].fillna(0).values.astype(np.float32)
-            idx    = np.random.choice(len(X_shap), min(2000, len(X_shap)), replace=False)
-            exp    = shap.TreeExplainer(base_model)
-            sv     = exp.shap_values(X_shap[idx])
-            mean_s = np.abs(sv).mean(axis=0)
-            ranked = sorted(zip(avail, mean_s), key=lambda x: x[1], reverse=True)
-            logger.info("\n=== K4.5 SHAP Feature Importance ===")
-            for feat, imp in ranked:
-                bar = "█" * int(imp / ranked[0][1] * 20)
-                logger.info("  %-22s %s %.4f", feat, bar, imp)
-            all_metrics["shap_k_4_5"] = [{"feature": f, "importance": round(float(i), 4)}
-                                          for f, i in ranked]
-        except Exception as e:
-            logger.warning("SHAP failed: %s", e)
+        X_test_h  = test_h[avail_h].fillna(0).values.astype(np.float32)  if len(test_h) else X_train_h[:0]
+        y_test_h  = test_h["actual_outcome"].values                        if len(test_h) else y_train_h[:0]
+
+        sw_h = _make_sample_weights(train_h)
+        if "_ledger_boost" in train_h.columns:
+            sw_h = sw_h * train_h["_ledger_boost"].values
+
+        out_path_h = os.path.join(OUTDIR, "xgb_hits.pkl")
+        metrics_h  = _train_and_save(X_train_h, y_train_h, X_test_h, y_test_h,
+                                     "Hits", out_path_h, sample_weights=sw_h)
+        n_train_h  = int(len(X_train_h))
+        all_metrics["hits"] = {**metrics_h, "train_rows": n_train_h, "features": avail_h}
+
+        _save_model_to_db("hits", out_path_h, metrics_h, n_train_h, avail_h)
+
+        # Blend recommendation for hits
+        hit_brier = metrics_h.get("brier", 0.25)
+        if hit_brier and hit_brier > 0.25:
+            all_metrics["blend_recommendation"]["hits"] = (
+                f"90/10 — Brier {hit_brier:.4f} > null (0.25). "
+                "Reduce from current 70/30 to limit noise contribution."
+            )
+            logger.warning("⚠️  Hit model Brier %.4f > null — recommend 90/10 blend", hit_brier)
+        elif hit_brier and hit_brier < 0.23:
+            all_metrics["blend_recommendation"]["hits"] = (
+                f"60/40 — Brier {hit_brier:.4f} well below null. "
+                "Consider increasing blend weight."
+            )
 
-    # ── Save metadata ────────────────────────────────────────────────────────
-    feat_cols_out = {
-        f"k_{line}": K_FEATURES for line in K_LINES
-    }
+    # ── Save feature cols and metrics ───────────────────────────────────────
+    feat_cols_out = {f"k_{line}": K_FEATURES for line in K_LINES}
     feat_cols_out["hits"] = HITS_FEATURES
 
     with open(os.path.join(OUTDIR, "xgb_feature_cols.json"), "w") as f:
         json.dump(feat_cols_out, f, indent=2)
-    logger.info("\nSaved → models/xgb_feature_cols.json")
 
     with open(os.path.join(OUTDIR, "model_metrics.json"), "w") as f:
         json.dump(all_metrics, f, indent=2)
-    logger.info("Saved → models/model_metrics.json")
 
-    logger.info("\n✅ Training complete. Saved to %s and xgb_model_store DB table.", OUTDIR)
-    logger.info("   Models load from DB on next Railway restart (xgb_k_layer.py).")
+    logger.info("\n✅ Training complete.")
+    logger.info("   Run: python scripts/xgb_k_training.py --status")
+    show_status()
 
 
 if __name__ == "__main__":
-    main()
+    if "--status" in sys.argv:
+        show_status()
+    elif "--k-only" in sys.argv:
+        main(k_only=True)
+    elif "--hit-only" in sys.argv:
+        main(hit_only=True)
+    else:
+        main()
diff --git a/update_blend_weights.py b/update_blend_weights.py
new file mode 100644
index 0000000..7c083a5
--- /dev/null
+++ b/update_blend_weights.py
@@ -0,0 +1,239 @@
+"""
+update_blend_weights.py
+========================
+Reads model_metrics.json after training and automatically updates the
+XGBoost blend weights in xgb_k_layer.py based on actual Brier scores.
+
+THE PROBLEM
+-----------
+The blend weights (80/20 for K, 70/30 for hits) were set as fixed constants
+based on theory, not measurement. Now that we have real Brier scores:
+  - Hit model Brier = 0.2668 (WORSE than null at 0.25) → 70/30 is wrong
+  - K model Brier = 0.2458 (barely better than null)  → 80/20 is marginal
+
+BLEND SCHEDULE (based on Brier)
+--------------------------------
+Brier < 0.23:  Model has real edge  → 70/30 (increase XGB weight)
+Brier < 0.25:  Marginal edge        → 80/20 (current default)
+Brier >= 0.25: Worse than null      → 90/10 (reduce XGB, limit noise)
+Brier >= 0.27: Actively hurting     → 95/5  (minimal contribution only)
+
+USAGE
+-----
+    python update_blend_weights.py           # preview changes (no writes)
+    python update_blend_weights.py --apply   # write changes to xgb_k_layer.py
+    python update_blend_weights.py --status  # show current blend weights in code
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+import sys
+from pathlib import Path
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [BLEND] %(message)s")
+log = logging.getLogger(__name__)
+
+METRICS_FILE = Path("models/model_metrics.json")
+XGB_LAYER    = Path("xgb_k_layer.py")
+
+NULL_BRIER = 0.25  # null model always predicts 50%
+
+
+def _get_blend_weight(brier: float | None, model_name: str) -> tuple[float, float, str]:
+    """
+    Return (formula_weight, xgb_weight, reason) based on Brier score.
+    formula_weight + xgb_weight = 1.0
+    """
+    if brier is None:
+        return 0.90, 0.10, "no test data — using conservative 90/10"
+    if brier < 0.23:
+        return 0.70, 0.30, f"Brier {brier:.4f} well below null — strong edge, 70/30"
+    if brier < NULL_BRIER:
+        return 0.80, 0.20, f"Brier {brier:.4f} marginal edge over null — 80/20"
+    if brier < 0.27:
+        return 0.90, 0.10, f"Brier {brier:.4f} ≥ null (0.25) — reducing to 90/10"
+    return 0.95, 0.05, f"Brier {brier:.4f} actively hurting — minimal 95/5"
+
+
+def load_metrics() -> dict:
+    if not METRICS_FILE.exists():
+        log.error("models/model_metrics.json not found — run xgb_k_training.py first")
+        return {}
+    return json.loads(METRICS_FILE.read_text())
+
+
+def compute_recommendations(metrics: dict) -> dict:
+    """Compute blend weight recommendations from training metrics."""
+    recs = {}
+
+    # K models — all share the same blend weight (averaged across lines)
+    k_briers = []
+    for line in [3.5, 4.5, 5.5, 6.5]:
+        key = f"k_{line}"
+        b = metrics.get(key, {}).get("brier")
+        if b:
+            k_briers.append(b)
+
+    avg_k_brier = sum(k_briers) / len(k_briers) if k_briers else None
+    fw_k, xgb_k, reason_k = _get_blend_weight(avg_k_brier, "K")
+    recs["k"] = {
+        "formula_weight": fw_k,
+        "xgb_weight":     xgb_k,
+        "avg_brier":      round(avg_k_brier, 4) if avg_k_brier else None,
+        "reason":         reason_k,
+    }
+
+    # Hit model
+    hit_brier = metrics.get("hits", {}).get("brier")
+    fw_h, xgb_h, reason_h = _get_blend_weight(hit_brier, "hits")
+    recs["hits"] = {
+        "formula_weight": fw_h,
+        "xgb_weight":     xgb_h,
+        "brier":          round(hit_brier, 4) if hit_brier else None,
+        "reason":         reason_h,
+    }
+
+    return recs
+
+
+def show_current_weights() -> None:
+    """Show what blend weights are currently in xgb_k_layer.py."""
+    if not XGB_LAYER.exists():
+        print("xgb_k_layer.py not found.")
+        return
+    content = XGB_LAYER.read_text()
+    print("\nCurrent blend weights in xgb_k_layer.py:")
+
+    # Find K blend
+    m_k = re.search(r"(\d+\.\d+) \* model_prob \+ (\d+\.\d+) \* _xkp", content)
+    if m_k:
+        xgb_w = float(m_k.group(2))
+        print(f"  K props:   formula={1-xgb_w:.0%} / XGB={xgb_w:.0%}")
+    else:
+        print("  K props:   pattern not found")
+
+    # Find hit blend
+    m_h = re.search(r"(\d+\.\d+) \* model_prob \+ (\d+\.\d+) \* _xhp", content)
+    if m_h:
+        xgb_w = float(m_h.group(2))
+        print(f"  Hit props: formula={1-xgb_w:.0%} / XGB={xgb_w:.0%}")
+    else:
+        print("  Hit props: pattern not found")
+
+
+def apply_blend_updates(recs: dict, dry_run: bool = True) -> bool:
+    """Patch xgb_k_layer.py with recommended blend weights."""
+    if not XGB_LAYER.exists():
+        log.error("xgb_k_layer.py not found.")
+        return False
+
+    content = XGB_LAYER.read_text()
+    original = content
+    changed = False
+
+    # Update K blend: pattern "0.XX * model_prob + 0.YY * _xkp"
+    k_fw  = recs["k"]["formula_weight"]
+    k_xgb = recs["k"]["xgb_weight"]
+
+    k_old = re.search(r"(\d+\.\d+) \* model_prob \+ (\d+\.\d+) \* _xkp", content)
+    if k_old:
+        old_str = k_old.group(0)
+        new_str = f"{k_fw:.2f} * model_prob + {k_xgb:.2f} * _xkp"
+        if old_str != new_str:
+            content = content.replace(old_str, new_str, 1)
+            log.info("K blend:   %s → %s (%s)",
+                     old_str, new_str, recs["k"]["reason"])
+            changed = True
+        else:
+            log.info("K blend already at %s — no change needed", old_str)
+    else:
+        log.warning("K blend pattern not found in xgb_k_layer.py")
+
+    # Update hit blend: pattern "0.XX * model_prob + 0.YY * _xhp"
+    h_fw  = recs["hits"]["formula_weight"]
+    h_xgb = recs["hits"]["xgb_weight"]
+
+    h_old = re.search(r"(\d+\.\d+) \* model_prob \+ (\d+\.\d+) \* _xhp", content)
+    if h_old:
+        old_str = h_old.group(0)
+        new_str = f"{h_fw:.2f} * model_prob + {h_xgb:.2f} * _xhp"
+        if old_str != new_str:
+            content = content.replace(old_str, new_str, 1)
+            log.info("Hit blend: %s → %s (%s)",
+                     old_str, new_str, recs["hits"]["reason"])
+            changed = True
+        else:
+            log.info("Hit blend already at %s — no change needed", old_str)
+    else:
+        log.warning("Hit blend pattern not found in xgb_k_layer.py")
+
+    if dry_run:
+        if changed:
+            log.info("DRY RUN — changes NOT written. Run with --apply to write.")
+        else:
+            log.info("No changes needed.")
+        return changed
+
+    if changed:
+        XGB_LAYER.write_text(content)
+        log.info("xgb_k_layer.py updated with new blend weights.")
+
+        # Update calibration_params.json with blend info
+        cal_path = Path("data/calibration_params.json")
+        if cal_path.exists():
+            try:
+                cal = json.loads(cal_path.read_text())
+                cal["xgb_blend_weights"] = {
+                    "k":   {"formula": k_fw,  "xgb": k_xgb},
+                    "hits":{"formula": h_fw,  "xgb": h_xgb},
+                }
+                cal["calibration_notes"] = cal.get("calibration_notes", [])
+                from datetime import date
+                cal["calibration_notes"].append(
+                    f"[{date.today().isoformat()}] Blend weights updated: "
+                    f"K={k_fw:.0%}/{k_xgb:.0%} Hits={h_fw:.0%}/{h_xgb:.0%} "
+                    f"based on Brier K={recs['k']['avg_brier']} "
+                    f"Hits={recs['hits']['brier']}"
+                )
+                cal_path.write_text(json.dumps(cal, indent=2))
+                log.info("calibration_params.json updated with blend weights.")
+            except Exception as e:
+                log.warning("Failed to update calibration_params.json: %s", e)
+    else:
+        log.info("No changes needed.")
+
+    return changed
+
+
+def main() -> None:
+    metrics = load_metrics()
+    if not metrics:
+        return
+
+    recs = compute_recommendations(metrics)
+
+    print("\n=== XGBoost Blend Weight Recommendations ===")
+    print(f"  Null model Brier: {NULL_BRIER} (baseline — worse = model is noise)")
+    print()
+
+    for model, rec in recs.items():
+        brier_str = f"{rec.get('brier') or rec.get('avg_brier') or 'N/A'}"
+        print(f"  {model.upper()}")
+        print(f"    Brier:    {brier_str}")
+        print(f"    Blend:    {rec['formula_weight']:.0%} formula / {rec['xgb_weight']:.0%} XGB")
+        print(f"    Reason:   {rec['reason']}")
+        print()
+
+    show_current_weights()
+
+    apply_arg = "--apply" in sys.argv
+    if "--status" not in sys.argv:
+        print(f"\n{'Applying changes...' if apply_arg else 'DRY RUN — use --apply to write changes'}")
+        apply_blend_updates(recs, dry_run=not apply_arg)
+
+
+if __name__ == "__main__":
+    main()