Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions prop_enrichment_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1730,6 +1730,62 @@ def _dampen(base_prob_pct, adjustments, **kw): # noqa: E731
except Exception as _def_err:
logger.debug("[Enrichment] Defense OAA skipped for %s: %s", player, _def_err)

# ── Batted-ball profile signal (hits + total_bases) ──────────────────
# Uses statcast_static_layer.get_batter_batted_ball() which reads
# batted-ball.csv (bbe, gb_rate, air_rate, fb_rate, ld_rate, pull_rate…)
#
# hits: LD rate drives BABIP; GB heavy = infield-hit bonus
# total_bases: FB rate + pull rate = XBH/HR upside; GB heavy = drag
#
# Max effect: ±4pp per leg; flows through adjustment dampener.
if prop_type in ("hits", "total_bases") and is_batter_prop:
_b_id_bb = prop.get("player_id") or prop.get("mlbam_id")
if _b_id_bb:
try:
from statcast_static_layer import get_batter_batted_ball as _gbb # noqa: PLC0415
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Importing get_batter_batted_ball inside the per-prop loop is inefficient. Although Python caches module imports, the lookup overhead occurs for every prop of type hits or total_bases. It is better to move this import to the top of the enrich_props function, especially since statcast_static_layer is already being imported multiple times in this function.

_bb_prof = _gbb(int(_b_id_bb))
if _bb_prof:
_gb_r = float(_bb_prof.get("gb_rate") or 0)
_fb_r = float(_bb_prof.get("fb_rate") or 0)
_ld_r = float(_bb_prof.get("ld_rate") or 0)
_pull_r = float(_bb_prof.get("pull_rate") or 0)
_bb_adj = 0.0

if prop_type == "hits":
# LD rate is strongest BABIP driver; MLB avg ~22%
# ±3pp per 6pp deviation from average
if _ld_r > 0:
_bb_adj += (_ld_r - 0.22) / 0.06 * 0.030
# GB-heavy batters (>48%) get slight infield-hit bonus
if _gb_r > 0.48:
_bb_adj += (_gb_r - 0.48) / 0.10 * 0.010

elif prop_type == "total_bases":
# High FB rate = more fly balls = more XBH/HRs
# MLB avg air_rate ~0.38 (includes LD + FB)
if _fb_r > 0:
_bb_adj += (_fb_r - 0.22) / 0.08 * 0.030 # FB avg ~22%
Comment on lines +1765 to +1767
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

There is a numerical inconsistency between the comments and the adjustment logic for total_bases. Line 1765 states that the average air_rate (LD + FB) is ~0.38. Given that the LD average is cited as 0.22 (line 1755), this implies a FB average of 0.16. However, line 1767 uses 0.22 as the center for the FB adjustment calculation and includes a comment stating FB avg ~22%. This internal contradiction between the air_rate and fb_rate averages will likely result in a systematic bias in the calculated adjustment for league-average hitters.

# High pull rate = pull-side power = more XBH
if _pull_r > 0:
_bb_adj += (_pull_r - 0.38) / 0.10 * 0.020
# GB-heavy batters suppress total bases
if _gb_r > 0:
_bb_adj -= (_gb_r - 0.40) / 0.10 * 0.015

_bb_adj = round(max(-0.040, min(0.040, _bb_adj)), 4)
if abs(_bb_adj) >= 0.005:
prop["_bb_profile_adj"] = _bb_adj
logger.debug(
"[Enrichment] %s %s bb_profile_adj=%.3f "
"(gb=%.2f fb=%.2f ld=%.2f pull=%.2f)",
player, prop_type, _bb_adj,
_gb_r, _fb_r, _ld_r, _pull_r,
)
except Exception as _bb_err:
logger.debug(
"[Enrichment] batted_ball skipped for %s: %s", player, _bb_err
)

# ── FIX: Bridge enrichment keys → simulation engine underscore-prefixed keys ──
# prop_enrichment_layer sets k_rate/k_pct, bb_rate/bb_pct, woba, wrc_plus (no prefix).
# regardless of who the player is. Chase Burns and a AAA call-up were identical.
Expand Down Expand Up @@ -1965,6 +2021,7 @@ def _dampen(base_prob_pct, adjustments, **kw): # noqa: E731
("_arm_angle_adj", "arm_angle_deception"),
("_swing_path_k_adj", "swing_path_k"),
("_chase_discipline_k_adj", "chase_discipline_k"),
("_bb_profile_adj", "bb_profile"),
]:
_v = float(prop.get(_adj_key, 0.0) or 0.0)
if _v != 0.0:
Expand Down Expand Up @@ -2033,6 +2090,7 @@ def _dampen(base_prob_pct, adjustments, **kw): # noqa: E731
"arm_angle": round(float(prop.get("_arm_angle_adj", 0.0) or 0.0), 4),
"swing_path_k": round(float(prop.get("_swing_path_k_adj", 0.0) or 0.0), 4),
"chase_disc_k": round(float(prop.get("_chase_discipline_k_adj",0.0) or 0.0), 4),
"bb_profile": round(float(prop.get("_bb_profile_adj", 0.0) or 0.0), 4),
}

enriched_count += 1
Expand Down