diff --git a/v1/energy_grid_planning/energy_grid_planning.py b/v1/energy_grid_planning/energy_grid_planning.py index 5668fdc..201e5e1 100644 --- a/v1/energy_grid_planning/energy_grid_planning.py +++ b/v1/energy_grid_planning/energy_grid_planning.py @@ -935,7 +935,7 @@ def _query_flag(relationship, flag_name): investment_levels_df = model.select( InvestmentLevel.name.alias("level"), InvestmentLevel.budget_cap.alias("budget"), -).to_df().sort_values("budget") +).to_df().sort_values("budget").reset_index(drop=True) pareto_rows = [] for _, lvl in investment_levels_df.iterrows(): @@ -968,18 +968,132 @@ def _query_flag(relationship, flag_name): "net_value": net_value, }) -# Print Pareto frontier +# -------------------------------------------------- +# Materialize InvestmentPortfolio(InvestmentLevel) Concept +# One row per InvestmentLevel scenario. Marginal & knee detection are +# inter-row computations, so they're computed in pandas and bound back. +# The Pareto frontier is then queried from the ontology, not pareto_rows. +# -------------------------------------------------- + +InvestmentPortfolio = model.Concept( + "InvestmentPortfolio", identify_by={"investment_level_name": String} +) +InvestmentPortfolio.investment_level = model.Relationship( + f"{InvestmentPortfolio} for {InvestmentLevel}" +) +InvestmentPortfolio.dc_count = model.Property(f"{InvestmentPortfolio} has {Integer:dc_count}") +InvestmentPortfolio.total_mw = model.Property(f"{InvestmentPortfolio} has {Float:total_mw}") +InvestmentPortfolio.annual_revenue = model.Property( + f"{InvestmentPortfolio} has {Float:annual_revenue}" +) +InvestmentPortfolio.upgrade_cost = model.Property( + f"{InvestmentPortfolio} has {Float:upgrade_cost}" +) +InvestmentPortfolio.net_value = model.Property(f"{InvestmentPortfolio} has {Float:net_value}") +InvestmentPortfolio.marginal_per_m_to_next_level = model.Property( + f"{InvestmentPortfolio} has {Float:marginal_per_m_to_next_level}" +) +InvestmentPortfolio.is_knee_point = model.Property( + f"{InvestmentPortfolio} has {Boolean:is_knee_point}" +) + +# Build the dataframe with marginal & knee — inter-row computation in pandas. +portfolio_df = pd.DataFrame(pareto_rows).sort_values("budget").reset_index(drop=True) +# Note: upgrade_cost is dollars (cost_million * 1e6), to match revenue scale and net_value definition. +portfolio_df["upgrade_cost_dollars"] = portfolio_df["upgrade_cost_m"].astype(float) * 1e6 + +# marginal_per_m_to_next_level: ($net_value_next - $net_value_this) / ($M_next - $M_this); null at last +marginal = [] +for j in range(len(portfolio_df)): + if j == len(portfolio_df) - 1: + marginal.append(None) + else: + d_val = portfolio_df.loc[j + 1, "net_value"] - portfolio_df.loc[j, "net_value"] + d_budget = portfolio_df.loc[j + 1, "budget"] - portfolio_df.loc[j, "budget"] + marginal.append(d_val / d_budget if abs(d_budget) > 1e-6 else None) +portfolio_df["marginal_per_m_to_next_level"] = marginal + +# is_knee_point: row whose marginal-rate represents the largest jump from the prior row's rate. +# We measure the "jump" at row j as |rate[j-1]| / |rate[j]| (steepest drop from a high marginal +# to a lower one, i.e. diminishing-returns inflection). The knee is the row where this ratio peaks. +is_knee = [False] * len(portfolio_df) +rates = [r for r in marginal if r is not None] +if len(rates) >= 2: + max_jump, knee_idx = 0.0, 1 + for j in range(len(rates) - 1): + prev_rate, next_rate = rates[j], rates[j + 1] + if abs(next_rate) > 1e-6: + jump = abs(prev_rate / next_rate) + elif abs(prev_rate) > 1e-6: + jump = float("inf") + else: + jump = 0.0 + if jump > max_jump: + max_jump = jump + knee_idx = j + 1 + is_knee[knee_idx] = True +portfolio_df["is_knee_point"] = is_knee + +# Bind portfolio rows back as ontology instances. +portfolio_src = model.data(portfolio_df.rename(columns={ + "level": "investment_level_name", + "n_approved": "dc_count", + "total_mw": "total_mw", + "revenue": "annual_revenue", + "upgrade_cost_dollars": "upgrade_cost", + "net_value": "net_value", + "marginal_per_m_to_next_level": "marginal_per_m_to_next_level", + "is_knee_point": "is_knee_point", +})) + +model.define(InvestmentPortfolio.new( + investment_level_name=portfolio_src.INVESTMENT_LEVEL_NAME, + investment_level=InvestmentLevel.filter_by(name=portfolio_src.INVESTMENT_LEVEL_NAME), + dc_count=portfolio_src.DC_COUNT, + total_mw=portfolio_src.TOTAL_MW, + annual_revenue=portfolio_src.ANNUAL_REVENUE, + upgrade_cost=portfolio_src.UPGRADE_COST, + net_value=portfolio_src.NET_VALUE, + marginal_per_m_to_next_level=portfolio_src.MARGINAL_PER_M_TO_NEXT_LEVEL, + is_knee_point=portfolio_src.IS_KNEE_POINT, +)) + +# Query InvestmentPortfolio rows from the ontology for rendering. +InvLvlRef = InvestmentLevel.ref() +PortRef = InvestmentPortfolio.ref() +portfolio_query_df = ( + model.where(PortRef.investment_level(InvLvlRef)) + .select( + PortRef.investment_level_name.alias("level"), + InvLvlRef.budget_cap.alias("budget"), + PortRef.dc_count.alias("dc_count"), + PortRef.total_mw.alias("total_mw"), + PortRef.annual_revenue.alias("annual_revenue"), + PortRef.upgrade_cost.alias("upgrade_cost"), + PortRef.net_value.alias("net_value"), + PortRef.marginal_per_m_to_next_level.alias("marginal"), + PortRef.is_knee_point.alias("is_knee"), + ) + .to_df() + .sort_values("budget") + .reset_index(drop=True) +) +portfolio_query_df["budget"] = portfolio_query_df["budget"].astype(float) +portfolio_query_df["dc_count"] = portfolio_query_df["dc_count"].astype(int) + +# Print Pareto frontier (read from ontology, not pareto_rows) print( f"\n {'#':>3} {'Level':>8} {'Budget $M':>10} {'DCs':>5} {'DC MW':>8} " - f"{'Revenue $/yr':>14} {'Upg $M':>8} {'Upg MW':>8} {'Net Value':>14}" + f"{'Revenue $/yr':>14} {'Upg $M':>8} {'Net Value':>14} {'Knee':>5}" ) print(f" {'-' * 85}") -for j, pt in enumerate(pareto_rows): +for j, row in portfolio_query_df.iterrows(): + knee_flag = " *" if bool(row["is_knee"]) else "" print( - f" {j + 1:>3} {pt['level']:>8} {pt['budget']:>10,.0f} " - f"{pt['n_approved']:>5} {pt['total_mw']:>8,.0f} " - f"{pt['revenue']:>14,.0f} {pt['upgrade_cost_m']:>8,.1f} " - f"{pt['upgrade_mw']:>8,.1f} {pt['net_value']:>14,.0f}" + f" {j + 1:>3} {row['level']:>8} {float(row['budget']):>10,.0f} " + f"{int(row['dc_count']):>5} {float(row['total_mw']):>8,.0f} " + f"{float(row['annual_revenue']):>14,.0f} {float(row['upgrade_cost']) / 1e6:>8,.1f} " + f"{float(row['net_value']):>14,.0f} {knee_flag:>5}" ) # Detailed results per investment level @@ -1005,42 +1119,31 @@ def _query_flag(relationship, flag_name): for _, row in level_upg.iterrows(): print(f" {row['upgrade_id']}: +{float(row['capacity_mw']):.0f} MW, ${float(row['cost_m']):.1f}M") -# Marginal analysis + knee detection -if len(pareto_rows) >= 3: +# Marginal analysis + knee detection (read from ontology) +if len(portfolio_query_df) >= 3: print("\n MARGINAL ANALYSIS (value gained per additional $M budget):") - rates = [] - for j in range(len(pareto_rows) - 1): - d_val = pareto_rows[j + 1]["net_value"] - pareto_rows[j]["net_value"] - d_budget = pareto_rows[j + 1]["budget"] - pareto_rows[j]["budget"] - rate = d_val / d_budget if abs(d_budget) > 1e-6 else 0 - rates.append(rate) - d_mw = pareto_rows[j + 1]["total_mw"] - pareto_rows[j]["total_mw"] - d_dcs = pareto_rows[j + 1]["n_approved"] - pareto_rows[j]["n_approved"] + for j in range(len(portfolio_query_df) - 1): + cur = portfolio_query_df.iloc[j] + nxt = portfolio_query_df.iloc[j + 1] + d_val = float(nxt["net_value"]) - float(cur["net_value"]) + d_budget = float(nxt["budget"]) - float(cur["budget"]) + d_mw = float(nxt["total_mw"]) - float(cur["total_mw"]) + d_dcs = int(nxt["dc_count"]) - int(cur["dc_count"]) + rate = float(cur["marginal"]) if pd.notna(cur["marginal"]) else 0.0 print( - f" {pareto_rows[j]['level']:>6} -> {pareto_rows[j+1]['level']:<6}: " + f" {cur['level']:>6} -> {nxt['level']:<6}: " f"dValue={d_val:>+14,.0f}, dBudget={d_budget:>+6,.0f}$M, " f"dMW={d_mw:>+8,.0f}, dDCs={d_dcs:>+3}, " f"marginal={rate:>+12,.0f}$/M$" ) - if len(rates) >= 2: - max_jump, knee_idx = 0, 1 - for j in range(len(rates) - 1): - if abs(rates[j + 1]) > 1e-6: - jump = abs(rates[j] / rates[j + 1]) - elif abs(rates[j]) > 1e-6: - jump = float("inf") - else: - jump = 0 - if jump > max_jump: - max_jump = jump - knee_idx = j + 1 - - knee = pareto_rows[knee_idx] + knee_rows = portfolio_query_df[portfolio_query_df["is_knee"].astype(bool)] + if len(knee_rows) > 0: + knee = knee_rows.iloc[0] print( - f"\n KNEE POINT: {knee['level']} -- ${knee['budget']:,.0f}M budget, " - f"${knee['net_value']:,.0f} net value, {knee['n_approved']} DCs, " - f"{knee['total_mw']:,.0f} MW" + f"\n KNEE POINT: {knee['level']} -- ${float(knee['budget']):,.0f}M budget, " + f"${float(knee['net_value']):,.0f} net value, {int(knee['dc_count'])} DCs, " + f"{float(knee['total_mw']):,.0f} MW" ) print(" Diminishing returns beyond this investment level.") diff --git a/v1/energy_grid_planning/runbook.md b/v1/energy_grid_planning/runbook.md new file mode 100644 index 0000000..051f6ab --- /dev/null +++ b/v1/energy_grid_planning/runbook.md @@ -0,0 +1,88 @@ +# Runbook: Energy Grid Planning — Multi-Reasoner Walkthrough + +ERCOT processes 10 hyperscaler interconnection requests (2,930 MW) against a 12-substation Texas grid. The chain forecasts substation load, finds structural bottlenecks, screens compliance, and produces a Pareto frontier across 5 budget levels — no single reasoner can answer this end-to-end. + +## The chain + +``` +ERCOT has 10 hyperscaler interconnection requests totalling 2,930 MW +on a 12-substation grid where DFW is the binding capacity bottleneck. +The chain produces a Pareto frontier across 5 budget levels — the knee at +$300M unlocks 5 DCs (1,500 MW, $264M net value) including xAI Colossus. + + ───────────────────────────────────────────────────────────────── + STAGE 1 Predictive ──► Substation.predicted_load (12) + DFW: 1,100 → 1,700 MW (+54.6%) ── breaches + 1,600 MW capacity at 24mo. The only + substation predicted to breach. + ───────────────────────────────────────────────────────────────── + STAGE 2 Graph ──► Substation.betweenness (12) + (WCC/ Substation.grid_community (3 regions) + Louvain/ Substation.is_structurally_critical (3) + centrality) DFW, Houston, San Antonio dominate. 7 of 10 + DC requests target critical substations. + ───────────────────────────────────────────────────────────────── + STAGE 3 Rules ──► DataCenterRequest.is_compliant (2) + fails_capacity / fails_structural / + fails_low_carbon flags written back. + Only Crusoe (Midland) and Oracle + (Corpus Christi) pass all three. + ───────────────────────────────────────────────────────────────── + STAGE 4 Prescriptive ──► DataCenterRequest.x_approve (per InvestmentLevel) + SubstationUpgrade.x_upgrade (per InvestmentLevel) + OPTIMAL across 5 budget levels in one solve. + Knee $300M · 5 DCs · 1,500 MW · $264M net. + Google + Lambda never approved — DFW full. + ───────────────────────────────────────────────────────────────── +``` + +## Workflow + +### 1. Build ontology + +- Prompt: `/rai-build-starter-ontology Build an ontology for grid infrastructure planning from the CSVs in data/ covering substations, generators, transmission lines, demand forecasts, data center requests, and substation upgrades.` +- Response: Concepts: `Substation`, `Generator`, `TransmissionLine`, `LoadZone`, `DemandPeriod`, `RenewableProfile`, `MaintenanceWindow`, `Customer`, `DataCenterRequest`, `SubstationUpgrade`, `DemandForecast`, `LoadHistory`, `DCAnnouncement` — bound to the bundled CSVs (12 substations, 10 DC requests, 18 transmission lines). + +### 2. Examine ontology + +- Prompt: `/rai-querying Show the ontology as a concept-relationship diagram and report row counts per concept.` +- Response: 13 concepts: 12 `Substation`, 15 `Generator`, 18 `TransmissionLine`, 8 `LoadZone`, 24 `DemandPeriod`, 12 `RenewableProfile`, 8 `MaintenanceWindow`, 100 `Customer`, 10 `DataCenterRequest` (2,930 MW total), 10 `SubstationUpgrade` ($630M total), historical `LoadHistory` and forward `DemandForecast` rows backing the predictive stage. + +### 3. Discover reasoner questions + +- Prompt: `/rai-discovery We have 10 hyperscaler interconnection requests against a 12-substation grid. Which to approve, which substation upgrades to fund, at what budget level?` +- Response: Plan routing sub-questions to predictive, graph, rules, and prescriptive reasoners. + +### 4. Forecast substation load + +- Prompt: `/rai-predictive-modeling + /rai-predictive-training Forecast substation load growth over the next 24 months from historical demand, planned generator additions, and the DC request pipeline. Use the pre-trained model from the bundled DemandForecast table if available, or train fresh. Bind each substation's predicted peak load back to the ontology so the rules engine and optimizer can read it.` +- Response: `Substation.predicted_load` for all 12; DFW breaches at 1,700 MW vs 1,600 MW cap at 24 months (+54.6%). + +### 5. Find structural bottlenecks + +- Prompt: `/rai-graph-analysis Which substations are most critical to power flow based on grid topology? Flag the top 3 most structurally critical, surface any regional clustering, and persist the structural-criticality scores back to the ontology.` +- Response: 1 connected component, 3 Louvain communities (North Texas, West Texas, Gulf Coast); DFW, Houston, San Antonio flagged `is_structurally_critical`; 7 of 10 DC requests target critical nodes. + +### 6. Screen DC requests + +- Prompt: `/rai-rules-authoring Screen each data center request against three criteria: (1) substation must have enough capacity after predicted load, (2) substation's low-carbon (renewable + nuclear) generation share must meet the DC's low-carbon requirement, (3) substation shouldn't be one of the top-3 structurally critical. Which requests pass all three?` +- Response: `fails_capacity` / `fails_structural` / `fails_low_carbon` + `is_compliant`; 2 pass (Crusoe, Oracle), 8 flagged. + +### 7. Approve DCs and fund upgrades + +- Prompt: `/rai-prescriptive-problem-formulation Decide which data center requests to approve and which substation upgrades to fund at $200M, $300M, $400M, $500M, and $600M investment levels. Maximize annual revenue across all five levels in a single solve. A request can only be approved if its substation has enough capacity after upgrades, and total upgrade spend at each level must stay within that level's budget. Consider all 10 requests — the Stage 3 compliance flags are informational, not hard filters.` +- Response: OPTIMAL MIP across 5 `InvestmentLevel` values in one solve; `x_approve` and `x_upgrade` written back per level. + +### 8. Read the frontier + +- Prompt: `/rai-prescriptive-results-interpretation Which data centers get approved, which upgrades are selected, and where's the biggest return on investment at each budget level?` +- Response: Pareto frontier with knee at $300M (5 DCs, 1,500 MW, $264M net); marginal $995K/$M at knee, declining to $400K/$M by $600M; Google + Lambda never approved (DFW full). + +### 9. Persist solution concepts into the ontology + +- Prompt: `/rai-ontology-design Add an InvestmentPortfolio concept indexed by InvestmentLevel that materializes the per-budget aggregates (approved-DC count, total MW, annual revenue, upgrade cost, net value, marginal value per added $M) and flags the knee point.` +- Response: Ontology gains an `InvestmentPortfolio(InvestmentLevel)` Concept (5 rows, one per budget) with `dc_count`, `total_mw`, `annual_revenue`, `upgrade_cost`, `net_value`, `marginal_per_m_to_next_level`, `is_knee_point`. All five frontier rows — $200M ($165M net) → $300M ($264M net, knee) → $600M ($395M net) — are queryable as ontology rather than stdout. + +## Data + +Bundled CSVs in `data/`: 12 substations, 15 generators, 18 transmission lines, 10 DC requests (2,930 MW), 10 substation upgrades ($630M total), plus historical load and forecast tables. Full chain implemented in `energy_grid_planning.py`. diff --git a/v1/machine_maintenance/machine_maintenance.py b/v1/machine_maintenance/machine_maintenance.py index 331cf40..573e52d 100644 --- a/v1/machine_maintenance/machine_maintenance.py +++ b/v1/machine_maintenance/machine_maintenance.py @@ -33,7 +33,16 @@ from pathlib import Path from pandas import read_csv -from relationalai.semantics import Float, Integer, Model, String, max, sum +from relationalai.semantics import ( + Boolean, + Float, + Integer, + Model, + String, + distinct, + max, + sum, +) from relationalai.semantics.reasoners.graph import Graph from relationalai.semantics.reasoners.prescriptive import Problem from relationalai.semantics.std import aggregates as aggs @@ -189,6 +198,33 @@ CertificationExpiry.technician_id == Technician.technician_id ) +# TrainingOption concept: cross-training options per (technician, machine_type). +# Used by Stage 4 resilience analysis to recommend the cheapest non-local +# cross-training candidate for each concentrated machine type. +TrainingOption = model.Concept( + "TrainingOption", + identify_by={"technician_id": String, "machine_type": String}, +) +TrainingOption.training_cost = model.Property( + f"{TrainingOption} costs {Float:training_cost}" +) +TrainingOption.training_weeks = model.Property( + f"{TrainingOption} takes {Integer:training_weeks} weeks" +) +TrainingOption.technician = model.Property(f"{TrainingOption} for {Technician}") +training_data = model.data(training_df) +model.define( + to_ := TrainingOption.new( + technician_id=training_data["technician_id"], + machine_type=training_data["machine_type"], + ), + to_.training_cost(training_data["training_cost"]), + to_.training_weeks(training_data["training_weeks"]), +) +model.define(TrainingOption.technician(Technician)).where( + TrainingOption.technician_id == Technician.technician_id +) + # Period concept: discrete planning periods (1..PERIOD_HORIZON). Period = model.Concept("Period", identify_by={"pid": Integer}) period_data = model.data([{"pid": t} for t in range(1, PERIOD_HORIZON + 1)]) @@ -1250,6 +1286,313 @@ print("STAGE 4: Resilience -- Concentration Risk Analysis") print("=" * 70) +# -------------------------------------------------- +# Materialize prescriptive output as ontology concepts. +# These bindings turn the post-solve x_maintain / x_assigned / x_vulnerable +# property values into queryable ontology rather than ad-hoc pandas frames. +# -------------------------------------------------- + +# MaintenancePlan: singleton capturing the optimizer's cost breakdown. +MaintenancePlan = model.Concept( + "MaintenancePlan", identify_by={"key": Integer} +) +MaintenancePlan.objective = model.Property( + f"{MaintenancePlan} has objective {Float:objective}" +) +MaintenancePlan.failure_cost = model.Property( + f"{MaintenancePlan} has failure cost {Float:failure_cost}" +) +MaintenancePlan.labor_cost = model.Property( + f"{MaintenancePlan} has labor cost {Float:labor_cost}" +) +MaintenancePlan.travel_cost = model.Property( + f"{MaintenancePlan} has travel cost {Float:travel_cost}" +) +MaintenancePlan.total_jobs = model.Property( + f"{MaintenancePlan} has total jobs {Integer:total_jobs}" +) + +# Seed the singleton and bind the optimizer's reported objective onto it. +plan_data = model.data([{"key": 1, "obj_val": float(si.objective_value)}]) +model.define( + plan_seed := MaintenancePlan.new(key=plan_data["key"]), + plan_seed.objective(plan_data["obj_val"]), +) + +# Aggregate the cost components and job count off the post-solve properties. +plan_ref = MaintenancePlan.ref() +mp_fc = MachinePeriod.ref() +m_fc = Machine.ref() +model.define( + plan_ref.failure_cost( + aggs.sum( + mp_fc.x_vulnerable + * mp_fc.predicted_fp + * m_fc.estimated_parts_cost + * m_fc.criticality + * (1 + CENTRALITY_WEIGHT * m_fc.betweenness) + ).where(mp_fc.machine(m_fc)) + ) +) + +tmp_lc = TechnicianMachinePeriod.ref() +m_lc = Machine.ref() +t_lc = Technician.ref() +model.define( + plan_ref.labor_cost( + aggs.sum( + tmp_lc.x_assigned + * m_lc.maintenance_duration_hours + * t_lc.hourly_rate + ).where( + tmp_lc.machine(m_lc), + tmp_lc.technician(t_lc), + ) + ) +) + +tmp_tc = TechnicianMachinePeriod.ref() +m_tc = Machine.ref() +model.define( + plan_ref.travel_cost( + aggs.sum( + tmp_tc.x_assigned + * (1 - tmp_tc.same_location) + * m_tc.maintenance_duration_hours + * TRAVEL_COST_PER_HOUR + ).where(tmp_tc.machine(m_tc)) + ) +) + +mp_jobs = MachinePeriod.ref() +model.define( + plan_ref.total_jobs( + aggs.count(mp_jobs).where(mp_jobs.x_maintain > 0.5) + ) +) + +# TypeConcentration: per-machine-type concentration analysis. +TypeConcentration = model.Concept( + "TypeConcentration", identify_by={"machine_type": String} +) +TypeConcentration.qualified_tech_count = model.Property( + f"{TypeConcentration} has {Integer:qualified_tech_count} qualified techs" +) +TypeConcentration.qualified_tech_locations = model.Property( + f"{TypeConcentration} has tech locations {String:qualified_tech_locations}" +) +TypeConcentration.is_concentrated = model.Property( + f"{TypeConcentration} concentration flag {Boolean:is_concentrated}" +) +TypeConcentration.scheduled_jobs_total = model.Property( + f"{TypeConcentration} has {Integer:scheduled_jobs_total} scheduled jobs" +) +TypeConcentration.scheduled_jobs_traveling = model.Property( + f"{TypeConcentration} has {Integer:scheduled_jobs_traveling} traveling jobs" +) +TypeConcentration.travel_pct = model.Property( + f"{TypeConcentration} has travel pct {Float:travel_pct}" +) + +# Seed: one TypeConcentration per distinct machine_type appearing in +# Qualification (the population of types we have any tech for). +qref_seed = Qualification.ref() +model.define( + TypeConcentration.new(machine_type=qref_seed.machine_type_str) +) + +# qualified_tech_count: distinct techs qualified for this machine_type. +tc_qc = TypeConcentration.ref() +qref_qc = Qualification.ref() +tref_qc = Technician.ref() +model.define( + tc_qc.qualified_tech_count( + aggs.count(distinct(tref_qc)) + .where( + qref_qc.machine_type_str == tc_qc.machine_type, + qref_qc.technician(tref_qc), + ) + .per(tc_qc) + ) +) + +# Helper concept: distinct (machine_type, location) pairs derived from the +# qualified-technician join. Compound identity gives one entity per unique +# pair; used downstream by distinct_loc_count. +TypeLocation = model.Concept( + "TypeLocation", + identify_by={"machine_type": String, "location": String}, +) +qref_tl = Qualification.ref() +tref_tl = Technician.ref() +model.define( + TypeLocation.new( + machine_type=qref_tl.machine_type_str, + location=tref_tl.base_location, + ) +).where(qref_tl.technician(tref_tl)) + +# qualified_tech_locations: comma-joined distinct base_locations of +# qualified techs. Built in pandas because string_join is not yet supported +# by the LQP backend; bound onto TypeConcentration via model.data. +_loc_pairs = ( + qualifications_df.merge( + technicians_df[["technician_id", "base_location"]], on="technician_id" + )[["machine_type", "base_location"]] + .drop_duplicates() + .sort_values(["machine_type", "base_location"]) +) +_loc_str_rows = [ + {"mtype": mt, "loc_str": ", ".join(sorted(g["base_location"].unique()))} + for mt, g in _loc_pairs.groupby("machine_type") +] +loc_str_data = model.data(_loc_str_rows) +tc_locs = TypeConcentration.ref() +model.define(tc_locs.qualified_tech_locations(loc_str_data["loc_str"])).where( + tc_locs.machine_type == loc_str_data["mtype"] +) + +# distinct_loc_count: helper to drive the is_concentrated flag, computed +# off the TypeLocation pairs (one entity per distinct location). +TypeConcentration.distinct_loc_count = model.Property( + f"{TypeConcentration} has {Integer:distinct_loc_count} distinct tech locations" +) +tc_dlc = TypeConcentration.ref() +tl_dlc = TypeLocation.ref() +model.define( + tc_dlc.distinct_loc_count( + aggs.count(tl_dlc) + .where(tl_dlc.machine_type == tc_dlc.machine_type) + .per(tc_dlc) + ) +) + +# is_concentrated: True iff all qualified techs share a single base_location. +model.where(TypeConcentration.distinct_loc_count == 1).define( + TypeConcentration.is_concentrated(True) +) +model.where(TypeConcentration.distinct_loc_count > 1).define( + TypeConcentration.is_concentrated(False) +) + +# scheduled_jobs_total: count of scheduled (machine, period) jobs for this type. +tc_sjt = TypeConcentration.ref() +mp_sjt = MachinePeriod.ref() +m_sjt = Machine.ref() +model.define( + tc_sjt.scheduled_jobs_total( + aggs.count(mp_sjt) + .where( + mp_sjt.machine(m_sjt), + m_sjt.machine_type == tc_sjt.machine_type, + mp_sjt.x_maintain > 0.5, + ) + .per(tc_sjt) + | 0 + ) +) + +# scheduled_jobs_traveling: count of scheduled assignments where the +# assigned technician's base_location differs from the machine's location. +tc_sjr = TypeConcentration.ref() +tmp_sjr = TechnicianMachinePeriod.ref() +m_sjr = Machine.ref() +model.define( + tc_sjr.scheduled_jobs_traveling( + aggs.count(tmp_sjr) + .where( + tmp_sjr.machine(m_sjr), + m_sjr.machine_type == tc_sjr.machine_type, + tmp_sjr.x_assigned > 0.5, + tmp_sjr.same_location == 0, + ) + .per(tc_sjr) + | 0 + ) +) + +# travel_pct: 100 * traveling / total (only when total > 0). +model.where(TypeConcentration.scheduled_jobs_total > 0).define( + TypeConcentration.travel_pct( + floats.float(TypeConcentration.scheduled_jobs_traveling) + / floats.float(TypeConcentration.scheduled_jobs_total) + * 100.0 + ) +) + +# CrossTrainingRecommendation: cheapest non-local cross-training candidate +# per concentrated machine type. One row per concentrated machine_type. +CrossTrainingRecommendation = model.Concept( + "CrossTrainingRecommendation", identify_by={"machine_type": String} +) +CrossTrainingRecommendation.tech_id = model.Property( + f"{CrossTrainingRecommendation} has {String:tech_id}" +) +CrossTrainingRecommendation.tech_name = model.Property( + f"{CrossTrainingRecommendation} has {String:tech_name}" +) +CrossTrainingRecommendation.cost = model.Property( + f"{CrossTrainingRecommendation} has {Float:cost}" +) +CrossTrainingRecommendation.duration_weeks = model.Property( + f"{CrossTrainingRecommendation} has {Integer:duration_weeks} weeks" +) +CrossTrainingRecommendation.is_best_candidate = model.Property( + f"{CrossTrainingRecommendation} best flag {Boolean:is_best_candidate}" +) + +# A TrainingOption is "non-local" for a concentrated machine type when the +# candidate technician's base_location is NOT one of the locations where the +# qualified techs already sit. For singly-concentrated types that simplifies +# to: candidate.base_location != the (single) qualified-tech location string. +# Pre-compute the cheapest non-local cost per concentrated type as a derived +# property on TypeConcentration so the recommendation seeding stays simple. +TypeConcentration.min_nonlocal_cost = model.Property( + f"{TypeConcentration} has {Float:min_nonlocal_cost} cheapest non-local training cost" +) +tc_min = TypeConcentration.ref() +to_min = TrainingOption.ref() +t_min = Technician.ref() +model.where(tc_min.is_concentrated == True).define( + tc_min.min_nonlocal_cost( + aggs.min(to_min.training_cost) + .where( + to_min.machine_type == tc_min.machine_type, + to_min.technician(t_min), + t_min.base_location != tc_min.qualified_tech_locations, + ) + .per(tc_min) + ) +) + +# Seed CrossTrainingRecommendation: one row per concentrated type that has +# at least one non-local candidate (min_nonlocal_cost is populated). +tc_seed_ctr = TypeConcentration.ref() +model.where(tc_seed_ctr.min_nonlocal_cost).define( + CrossTrainingRecommendation.new(machine_type=tc_seed_ctr.machine_type) +) + +# Bind cheapest-candidate attributes onto each recommendation by joining the +# TrainingOption whose cost matches the pre-computed min_nonlocal_cost AND +# whose tech sits outside the concentrated location. +ctr_bind = CrossTrainingRecommendation.ref() +tc_bind = TypeConcentration.ref() +to_bind = TrainingOption.ref() +t_bind = Technician.ref() +model.where( + ctr_bind.machine_type == tc_bind.machine_type, + to_bind.machine_type == ctr_bind.machine_type, + to_bind.technician(t_bind), + t_bind.base_location != tc_bind.qualified_tech_locations, + to_bind.training_cost == tc_bind.min_nonlocal_cost, +).define( + ctr_bind.tech_id(t_bind.technician_id), + ctr_bind.tech_name(t_bind.technician_name), + ctr_bind.cost(to_bind.training_cost), + ctr_bind.duration_weeks(to_bind.training_weeks), + ctr_bind.is_best_candidate(True), +) + # 4a. Technician utilization from the optimal schedule. tech_assignments = ( assign_df.groupby( @@ -1273,105 +1616,102 @@ f"{row['assignment_count']} assignments ({pct:.0f}%)" ) -# 4b. Geographic concentration analysis by machine type. -# For each machine type, check if all qualified technicians are in one location. -# This reveals structural fragility invisible in the per-assignment view. +# 4b. MaintenancePlan singleton: cost breakdown from the optimizer. +plan_df = ( + model.select( + MaintenancePlan.objective.alias("objective"), + MaintenancePlan.failure_cost.alias("failure_cost"), + MaintenancePlan.labor_cost.alias("labor_cost"), + MaintenancePlan.travel_cost.alias("travel_cost"), + MaintenancePlan.total_jobs.alias("total_jobs"), + ) + .to_df() +) +plan_row = plan_df.iloc[0] +print("\nMaintenancePlan (cost breakdown):") +print(f" Objective: ${plan_row['objective']:.2f}") +print(f" Failure cost: ${plan_row['failure_cost']:.2f}") +print(f" Labor cost: ${plan_row['labor_cost']:.2f}") +print(f" Travel cost: ${plan_row['travel_cost']:.2f}") +print(f" Total jobs: {int(plan_row['total_jobs'])}") + +# 4c. TypeConcentration: per-machine-type concentration analysis. +type_conc_df = ( + model.select( + TypeConcentration.machine_type.alias("machine_type"), + TypeConcentration.qualified_tech_count.alias("qualified_tech_count"), + TypeConcentration.qualified_tech_locations.alias("qualified_tech_locations"), + TypeConcentration.is_concentrated.alias("is_concentrated"), + TypeConcentration.scheduled_jobs_total.alias("scheduled_jobs_total"), + TypeConcentration.scheduled_jobs_traveling.alias("scheduled_jobs_traveling"), + TypeConcentration.travel_pct.alias("travel_pct"), + ) + .to_df() + .sort_values("machine_type") +) + print("\nQualification coverage by machine type:") -concentrated_types = [] -machine_types = sorted(qualifications_df["machine_type"].unique()) -for mtype in machine_types: - qual_techs = qualifications_df[ - qualifications_df["machine_type"] == mtype - ]["technician_id"].tolist() - tech_info = technicians_df[technicians_df["technician_id"].isin(qual_techs)] - locations = tech_info["base_location"].unique().tolist() - tech_count = len(qual_techs) - - # Machines of this type and their locations. - type_machines = machines_df[machines_df["machine_type"] == mtype] - machine_locations = type_machines["location"].unique().tolist() - uncovered_locs = [loc for loc in machine_locations if loc not in locations] - - status = "OK" - if len(locations) == 1: - concentrated_types.append((mtype, locations[0], tech_count)) - status = f"CONCENTRATED -- all {tech_count} techs in {locations[0]}" - elif uncovered_locs: - status = f"gaps at {', '.join(uncovered_locs)}" - - print(f" {mtype}: {tech_count} techs in {', '.join(sorted(locations))} -- {status}") - -# 4c. Impact analysis for concentrated types. -if concentrated_types: +for _, row in type_conc_df.iterrows(): + tag = ( + f"CONCENTRATED -- all {int(row['qualified_tech_count'])} techs in " + f"{row['qualified_tech_locations']}" + if row["is_concentrated"] + else "OK" + ) + print( + f" {row['machine_type']}: {int(row['qualified_tech_count'])} techs " + f"in {row['qualified_tech_locations']} -- {tag}" + ) + +concentrated_df = type_conc_df[type_conc_df["is_concentrated"]] +if not concentrated_df.empty: print("\nConcentration risk detail:") - for mtype, conc_loc, tech_count in concentrated_types: - type_machines = machines_df[machines_df["machine_type"] == mtype] - remote_machines = type_machines[type_machines["location"] != conc_loc] - local_machines = type_machines[type_machines["location"] == conc_loc] - - # How many scheduled assignments for this type required travel? - type_assign = assign_df[assign_df["machine_type"] == mtype] - travel_assign = type_assign[type_assign["base_location"] != type_assign["location"]] - - print(f"\n {mtype}: {len(type_machines)} machines across " - f"{len(type_machines['facility'].unique())} facilities, " - f"all {tech_count} qualified techs in {conc_loc}") - print(f" Local machines ({conc_loc}): {len(local_machines)}") - print(f" Remote machines (require travel): {len(remote_machines)}") - if not remote_machines.empty: - for _, m in remote_machines.iterrows(): - print(f" {m['machine_id']} ({m['facility']}, {m['location']})") - if not type_assign.empty: - print(f" Scheduled {mtype} jobs: {len(type_assign)}, " - f"of which {len(travel_assign)} require travel " - f"({len(travel_assign)/len(type_assign)*100:.0f}%)") - - # Show qualified techs. - qual_techs = qualifications_df[ - qualifications_df["machine_type"] == mtype - ]["technician_id"].tolist() - tech_detail = technicians_df[technicians_df["technician_id"].isin(qual_techs)] - print(f" Qualified techs (all {conc_loc}):") - for _, t in tech_detail.iterrows(): - print(f" {t['technician_id']} ({t['technician_name']}, " - f"{t['skill_level']})") - - # 4d. Cross-training recommendation. + for _, row in concentrated_df.iterrows(): + total_jobs = int(row["scheduled_jobs_total"]) if row["scheduled_jobs_total"] else 0 + travel_jobs = ( + int(row["scheduled_jobs_traveling"]) if row["scheduled_jobs_traveling"] else 0 + ) + pct = float(row["travel_pct"]) if total_jobs else 0.0 + print( + f"\n {row['machine_type']}: all {int(row['qualified_tech_count'])} " + f"qualified techs in {row['qualified_tech_locations']}" + ) + if total_jobs: + print( + f" Scheduled {row['machine_type']} jobs: {total_jobs}, " + f"of which {travel_jobs} require travel ({pct:.0f}%)" + ) + else: + print(f" Scheduled {row['machine_type']} jobs: 0") + + # 4d. CrossTrainingRecommendation: cheapest non-local candidate per type. print(f"\n{'=' * 70}") print("RECOMMENDATION: Cross-Training to Eliminate Concentration Risk") print("=" * 70) - for mtype, conc_loc, _ in concentrated_types: - candidates = training_df[training_df["machine_type"] == mtype].merge( - technicians_df[["technician_id", "technician_name", "base_location", - "skill_level"]], - on="technician_id", - ).sort_values("training_cost") - - # Prefer candidates NOT in the concentrated location. - non_local = candidates[candidates["base_location"] != conc_loc] - if not non_local.empty: - best = non_local.iloc[0] - elif not candidates.empty: - best = candidates.iloc[0] - else: - print(f"\n No {mtype} cross-training options available.") - continue - - print(f"\n {mtype} -- add coverage outside {conc_loc}:") - print(f" Best candidate: {best['technician_id']} " - f"({best['technician_name']}, {best['skill_level']}, " - f"{best['base_location']})") - print(f" Cost: ${int(best['training_cost']):,}, " - f"Duration: {int(best['training_weeks'])} weeks") - - if len(candidates) > 1: - print(" All candidates:") - for _, cand in candidates.iterrows(): - local_tag = " (same location)" if cand["base_location"] == conc_loc else "" - print(f" {cand['technician_id']} ({cand['technician_name']}, " - f"{cand['base_location']}): " - f"${int(cand['training_cost']):,}, " - f"{int(cand['training_weeks'])} weeks{local_tag}") + rec_df = ( + model.select( + CrossTrainingRecommendation.machine_type.alias("machine_type"), + CrossTrainingRecommendation.tech_id.alias("tech_id"), + CrossTrainingRecommendation.tech_name.alias("tech_name"), + CrossTrainingRecommendation.cost.alias("cost"), + CrossTrainingRecommendation.duration_weeks.alias("duration_weeks"), + CrossTrainingRecommendation.is_best_candidate.alias("is_best_candidate"), + ) + .to_df() + .sort_values("machine_type") + ) + + if rec_df.empty: + print("\n No non-local cross-training options available.") + for _, row in rec_df.iterrows(): + conc_loc = concentrated_df[ + concentrated_df["machine_type"] == row["machine_type"] + ]["qualified_tech_locations"].iloc[0] + print(f"\n {row['machine_type']} -- add coverage outside {conc_loc}:") + print( + f" Best candidate: {row['tech_id']} ({row['tech_name']}): " + f"${int(row['cost']):,}, {int(row['duration_weeks'])} weeks" + ) else: print("\nNo geographic concentration risk detected.") diff --git a/v1/machine_maintenance/runbook.md b/v1/machine_maintenance/runbook.md new file mode 100644 index 0000000..cd0c166 --- /dev/null +++ b/v1/machine_maintenance/runbook.md @@ -0,0 +1,91 @@ +# Runbook: Machine Maintenance — Multi-Reasoner Walkthrough + +Schedules preventive maintenance for a 30-machine, 3-plant operation. OEE alone misranks the plants; sensor counts don't quantify forward risk; rules flag machines but don't allocate scarce technician time; the optimizer produces a feasible schedule but can't see that all Turbine techs sit in one city. The chain threads querying, graph, rules, and prescriptive reasoners through one ontology so each stage's enrichments feed the next. + +## The chain + +``` +Plant_B looks worst on OEE (61.4%). Plant_A looks mid-tier (68.2%). +The chain shows Plant_A is actually the highest-risk plant — and that +all 3 Turbine techs sit in one city, a $3,200 fix away from resolved. + + ───────────────────────────────────────────────────────────────── + STAGE 0 Querying ──► Machine.performance_ratio (30) + Machine.quality_ratio (30) + Machine.anomaly_count (30) + MachinePeriod.predicted_fp (120) + Plant_C 79.8% > Plant_A 68.2% > Plant_B 61.4% + 7 of 9 sensor anomalies are at Plant_A. + ───────────────────────────────────────────────────────────────── + STAGE 1 Graph ──► Machine.betweenness (30) + 30 machines → 1 connected component. + Pumps tie for top centrality (24.0). + ───────────────────────────────────────────────────────────────── + STAGE 2 Rules ──► Machine.is_overdue_maintenance (6) + Machine.is_high_risk (1) + Machine.is_chronic_downtime (3) + Machine.risk_tier (30) + M013 (Pump, Plant_A) = Critical (3 of 3). + ───────────────────────────────────────────────────────────────── + STAGE 3 Prescriptive ──► MachinePeriod.x_maintain (120 binary) + MachinePeriod.x_vulnerable (120 binary) + TechnicianMachinePeriod.x_assigned + OPTIMAL · 20 jobs · 4 periods · $605,241 + ───────────────────────────────────────────────────────────────── + STAGE 4 Resilience ──► Concentration analysis on the solve + Turbine: all 3 techs in Houston_TX. + 67% of scheduled Turbine jobs travel. + Cross-train T006 (Chicago) — $3,200 / 5 wks. + ───────────────────────────────────────────────────────────────── +``` + +## Workflow + +### 1. Build ontology + +- Prompt: `/rai-build-starter-ontology Build a manufacturing maintenance ontology from the CSVs in data/ covering machines, technicians, qualifications, periods, sensor readings, failure predictions, downtime events, production runs, parts inventory, and certification expiry.` +- Response: Concepts: `Machine`, `Technician`, `Qualification`, `Period`, `MachinePeriod`, `TechnicianPeriod`, `TechnicianMachinePeriod`, `Sensor`, `SensorReading`, `FailurePrediction`, `DowntimeEvent`, `ProductionRun`, `PartsInventory`, `CertificationExpiry` — bound to the bundled CSVs (30 machines × 3 plants, 10 technicians, 4 periods). `training_options.csv` is loaded as a DataFrame (read in Stage 4), not modeled as a concept. + +### 2. Examine ontology + +- Prompt: `/rai-querying Show the ontology as a concept-relationship diagram and report row counts per concept.` +- Response: Concepts wired to the bundled CSVs: 30 `Machine` (3 plants × 5 types), 10 `Technician` (3 cities), 16 `Qualification`, 4 `Period`, 120 `MachinePeriod`, 60 `Sensor` and 240 `SensorReading`, 120 `FailurePrediction`, 129 `DowntimeEvent`, 120 `ProductionRun`, plus parts inventory and certification-expiry rows. + +### 3. Discover reasoner questions + +- Prompt: `/rai-discovery We need to schedule preventive maintenance for 30 machines across 3 plants. Where does OEE alone mislead us, and what structural risks won't a pure optimizer surface?` +- Response: Plan routing sub-questions to querying, graph, rules, prescriptive, and resilience skills. + +### 4. Diagnose plant operations + +- Prompt: `/rai-querying What's the OEE by plant? Which machines have the most sensor anomalies, and which are most likely to fail by the end of the planning horizon?` +- Response: Plant_C 79.8% > Plant_A 68.2% > Plant_B 61.4%; 7 of 9 anomalies at Plant_A; `MachinePeriod.predicted_fp` written for 120 rows. + +### 5. Find scheduling bottlenecks + +- Prompt: `/rai-graph-analysis Which machines share qualified technicians? Score each machine by how central it is in the qualification network so the optimizer can prioritize the bottlenecks.` +- Response: 30 machines → 1 connected component; Pumps tie at top centrality (normalized to 1.0); `Machine.betweenness` stored. + +### 6. Classify machine risk + +- Prompt: `/rai-rules-authoring Rate each machine's risk: chronic if >8 downtime events, high-risk if failure prob >0.3 AND criticality 4+, plus overdue for maintenance. All three flags = Critical, two = Elevated, otherwise Standard.` +- Response: 6 overdue, 1 high-risk, 3 chronic; M013 (Pump, Plant_A) = Critical; M016 (Turbine, Plant_A) = Elevated. + +### 7. Schedule maintenance + +- Prompt: `/rai-prescriptive-problem-formulation Schedule preventive maintenance across the 30 machines and 4 periods. Each machine in each period is either maintained or left exposed to failure risk for that period. Cap maintenance at 5 jobs per period (parts/bay limit). Every overdue machine must be maintained by period 2. Each maintained machine needs a qualified technician assigned, and technicians can't exceed their available hours per period. Minimize expected failure cost on machines left exposed (weighted by criticality and centrality) plus technician labor and travel.` +- Response: Decision variables for maintain / vulnerable / technician-assignment over the 30 machines × 4 periods (assignment restricted to qualified tech-machine pairs); 5 constraint families: cumulative coverage, assignment-maintenance linkage, technician hours, parts/bay capacity, and the overdue-by-period-2 deadline carried over from Stage 2. + +### 8. Stress-test concentration + +- Prompt: `/rai-prescriptive-solver-management + /rai-prescriptive-results-interpretation For each machine type, check whether all qualified technicians sit in one location and recommend the cheapest cross-training fix.` +- Response: OPTIMAL · 20 jobs · $605,241; Turbine concentrated in Houston_TX (67% of jobs travel); cross-train T006 (Chicago_IL, Senior) for $3,200 / 5 weeks. + +### 9. Persist solution concepts into the ontology + +- Prompt: `/rai-ontology-design Materialize the prescriptive output as ontology. Add a MaintenancePlan concept with the cost breakdown (failure / labor / travel) and total job count; a TypeConcentration concept per machine type capturing where the qualified techs sit and how much travel that forces; and a CrossTrainingRecommendation concept ranking candidates by cost so the cheapest fix is queryable.` +- Response: Ontology gains: `MaintenancePlan` (singleton, with `objective`, `failure_cost`, `labor_cost`, `travel_cost`, `total_jobs`); `TypeConcentration(machine_type)` (5 rows, with `qualified_tech_count`, `qualified_tech_locations`, `is_concentrated`, `scheduled_jobs_total`, `scheduled_jobs_traveling`, `travel_pct`); `CrossTrainingRecommendation` (one row per concentrated type, ranked candidates with `tech_id`, `cost`, `duration_weeks`, `is_best_candidate`). The Turbine-in-Houston concentration and T006 / Chicago_IL / $3,200 / 5w recommendation are now queryable as ontology rather than stdout. + +## Data + +Bundled CSVs in `data/`: 30 machines (3 plants × 5 types), 10 technicians (3 cities), 16 qualifications, 4 periods, 60 sensors / 240 readings, 120 failure predictions, 129 downtime events, 120 production runs, 13 training options. All five stages run in `machine_maintenance.py`. diff --git a/v1/portfolio_balancing/portfolio_balancing.py b/v1/portfolio_balancing/portfolio_balancing.py index abd4698..42ae716 100644 --- a/v1/portfolio_balancing/portfolio_balancing.py +++ b/v1/portfolio_balancing/portfolio_balancing.py @@ -29,8 +29,8 @@ from pathlib import Path -from pandas import read_csv -from relationalai.semantics import Float, Integer, Model, String, sum +from pandas import DataFrame, read_csv +from relationalai.semantics import Boolean, Float, Integer, Model, String, sum from relationalai.semantics.reasoners.graph import Graph from relationalai.semantics.reasoners.prescriptive import Problem from relationalai.semantics.std import aggregates as aggs @@ -1011,12 +1011,175 @@ def solve_epsilon(eps_rate=None): print(f"(PSD-preserving correlation shrinkage, alpha = {CRISIS_ALPHA})") print("=" * 70) -# Side-by-side vol (sqrt variance) by budget x lambda. +# Build the per-(scenario, eps_label) frontier table and materialize it as +# the `FrontierPoint` Concept. Each Pareto point's metadata -- return, risk, +# inter-row marginals, the knee flag, and the base/crisis vol comparison -- +# becomes ontology data instead of stdout. +fp_rows = [] +for sn in scenario_names: + pts = pareto[sn] + if not pts: + continue + rates = [] + for j, pt in enumerate(pts): + if j == 0: + marginal = None + else: + dr = pt["risk"] - pts[j - 1]["risk"] + dret = pt["return_actual"] - pts[j - 1]["return_actual"] + marginal = (dr / dret) if abs(dret) > 1e-6 else 0.0 + rates.append(marginal) + fp_rows.append({ + "scenario_label": sn, + "eps_label": pt["label"], + "return": pt["return_actual"], + "risk": pt["risk"], + "marginal_risk_per_return": marginal, + "is_knee": False, + }) + + # Knee = the eps point with the largest jump in marginal vs the prior + # point (per scenario). rates[0] is None (min_risk has no marginal), + # so we start scanning from index 2 against rates[1..]. + knee_idx = None + max_jump = 0.0 + for j in range(2, len(rates)): + prev = rates[j - 1] + curr = rates[j] + if prev is None or curr is None: + continue + if abs(prev) > 1e-6: + jump = curr / prev + else: + jump = curr if curr and curr > 0 else 0.0 + if jump > max_jump: + max_jump = jump + knee_idx = j + if knee_idx is not None: + # fp_rows for this scenario starts at len(fp_rows) - len(pts). + scenario_start = len(fp_rows) - len(pts) + fp_rows[scenario_start + knee_idx]["is_knee"] = True + +# Pair base and crisis rows by (budget, eps_label) so vol_base / vol_crisis +# carry on BOTH the base-regime row and its matching crisis-regime row. +risk_by_key = { + (r["scenario_label"], r["eps_label"]): r["risk"] for r in fp_rows +} +for r in fp_rows: + sn = r["scenario_label"] + eps = r["eps_label"] + # Strip "base_" / "crisis_" prefix to get the budget suffix. + budget_suffix = sn.split("_", 1)[1] + base_risk = risk_by_key.get((f"base_{budget_suffix}", eps)) + crisis_risk = risk_by_key.get((f"crisis_{budget_suffix}", eps)) + vol_base = base_risk ** 0.5 if base_risk is not None else 0.0 + vol_crisis = crisis_risk ** 0.5 if crisis_risk is not None else 0.0 + vol_gap = vol_crisis - vol_base + vol_gap_pct = (vol_gap / vol_base * 100.0) if vol_base > 1e-9 else 0.0 + r["vol_base"] = vol_base + r["vol_crisis"] = vol_crisis + r["vol_gap"] = vol_gap + r["vol_gap_pct"] = vol_gap_pct + +fp_df = DataFrame(fp_rows) + +# FrontierPoint Concept -- one row per (Scenario, eps_label). +FrontierPoint = model.Concept( + "FrontierPoint", + identify_by={"scenario_label": String, "eps_label": String}, +) +FrontierPoint.scenario = model.Property(f"{FrontierPoint} for {Scenario}") +FrontierPoint.return_value = model.Property( + f"{FrontierPoint} has return {Float:fp_return}" +) +FrontierPoint.risk = model.Property(f"{FrontierPoint} has risk {Float:fp_risk}") +FrontierPoint.marginal_risk_per_return = model.Property( + f"{FrontierPoint} has marginal {Float:fp_marginal}" +) +FrontierPoint.is_knee = model.Property( + f"{FrontierPoint} is knee {Boolean:fp_is_knee}" +) +FrontierPoint.vol_base = model.Property( + f"{FrontierPoint} has vol_base {Float:fp_vol_base}" +) +FrontierPoint.vol_crisis = model.Property( + f"{FrontierPoint} has vol_crisis {Float:fp_vol_crisis}" +) +FrontierPoint.vol_gap = model.Property( + f"{FrontierPoint} has vol_gap {Float:fp_vol_gap}" +) +FrontierPoint.vol_gap_pct = model.Property( + f"{FrontierPoint} has vol_gap_pct {Float:fp_vol_gap_pct}" +) + +# Two-pass load: marginal_risk_per_return is null at min_risk, so it +# can't sit on the same model.data() frame as the all-rows columns +# (NaN breaks model.data()). +fp_main_df = fp_df[[ + "scenario_label", "eps_label", "return", "risk", "is_knee", + "vol_base", "vol_crisis", "vol_gap", "vol_gap_pct", +]].reset_index(drop=True) +fp_data = model.data(fp_main_df) +model.define( + fp := FrontierPoint.new( + scenario_label=fp_data["scenario_label"], + eps_label=fp_data["eps_label"], + ), + fp.return_value(fp_data["return"]), + fp.risk(fp_data["risk"]), + fp.is_knee(fp_data["is_knee"]), + fp.vol_base(fp_data["vol_base"]), + fp.vol_crisis(fp_data["vol_crisis"]), + fp.vol_gap(fp_data["vol_gap"]), + fp.vol_gap_pct(fp_data["vol_gap_pct"]), +) + +# Link FrontierPoint to its Scenario by matching scenario_label == Scenario.name. +fp_link_ref = FrontierPoint.ref() +sc_link_ref = Scenario.ref() +model.where( + fp_link_ref.scenario_label == sc_link_ref.name, +).define(fp_link_ref.scenario(sc_link_ref)) + +# Second pass: marginal_risk_per_return (only the 30 non-min_risk rows). +fp_marg_df = ( + fp_df[fp_df["marginal_risk_per_return"].notna()][ + ["scenario_label", "eps_label", "marginal_risk_per_return"] + ] + .reset_index(drop=True) +) +fp_marg_data = model.data(fp_marg_df) +fp_marg_ref = FrontierPoint.ref() +model.where( + fp_marg_ref.scenario_label == fp_marg_data["scenario_label"], + fp_marg_ref.eps_label == fp_marg_data["eps_label"], +).define( + fp_marg_ref.marginal_risk_per_return(fp_marg_data["marginal_risk_per_return"]) +) + +# Side-by-side vol (sqrt variance) by budget x lambda -- now sourced from +# the FrontierPoint Concept rather than the in-memory pareto dict. print("\n Volatility comparison (sqrt risk) -- base vs crisis at each lambda:") +fp_q = FrontierPoint.ref() +fp_query_df = ( + model.select( + fp_q.scenario_label.alias("scenario_label"), + fp_q.eps_label.alias("eps_label"), + fp_q.vol_base.alias("vol_base"), + fp_q.vol_crisis.alias("vol_crisis"), + fp_q.vol_gap.alias("vol_gap"), + fp_q.vol_gap_pct.alias("vol_gap_pct"), + ) + .to_df() +) + +eps_order = ["min_risk", "eps_1", "eps_2", "eps_3", "eps_4", "eps_5"] for budget in budgets: base_sn = f"base_{budget}" crisis_sn = f"crisis_{budget}" - if len(pareto[base_sn]) < 2 or len(pareto[crisis_sn]) < 2: + base_rows = fp_query_df[fp_query_df["scenario_label"] == base_sn] + crisis_rows = fp_query_df[fp_query_df["scenario_label"] == crisis_sn] + if base_rows.empty or crisis_rows.empty: continue print(f"\n Budget {budget}:") print( @@ -1024,16 +1187,16 @@ def solve_epsilon(eps_rate=None): f"{'gap':>10} {'gap_%':>8}" ) print(f" {'-' * 56}") - for j in range(min(len(pareto[base_sn]), len(pareto[crisis_sn]))): - b_pt = pareto[base_sn][j] - c_pt = pareto[crisis_sn][j] - vol_b = b_pt["risk"] ** 0.5 - vol_c = c_pt["risk"] ** 0.5 - gap = vol_c - vol_b - gap_pct = (gap / vol_b * 100.0) if vol_b > 1e-9 else 0.0 + base_by_eps = {row["eps_label"]: row for _, row in base_rows.iterrows()} + for eps in eps_order: + if eps not in base_by_eps: + continue + row = base_by_eps[eps] print( - f" {b_pt['label']:>10} {vol_b:>12.4f} {vol_c:>12.4f} " - f"{gap:>+10.4f} {gap_pct:>+7.1f}%" + f" {eps:>10} {float(row['vol_base']):>12.4f} " + f"{float(row['vol_crisis']):>12.4f} " + f"{float(row['vol_gap']):>+10.4f} " + f"{float(row['vol_gap_pct']):>+7.1f}%" ) print( diff --git a/v1/portfolio_balancing/runbook.md b/v1/portfolio_balancing/runbook.md new file mode 100644 index 0000000..2a87ca5 --- /dev/null +++ b/v1/portfolio_balancing/runbook.md @@ -0,0 +1,88 @@ +# Runbook: Portfolio Balancing — Multi-Reasoner Walkthrough + +Rebalance an 8-stock book under compliance + crisis stress. Rules surface broken positions, graph collapses redundant bets via correlation clustering, prescriptive solves a Markowitz QP across 6 (budget, regime) scenarios. No single reasoner does all three: rules don't allocate, graph doesn't optimize, prescriptive on the full universe stacks near-duplicate cluster members. + +## The chain + +``` +The current book breaks compliance on 4 holdings + 2 sectors. Naive +"diversification" hides correlated bets. The chain collapses 8 stocks +into 5 distinct cluster representatives, traces the efficient frontier +under base + crisis covariance, and shows crisis vol sits 25-30% above +base at every lambda — without the cluster collapse, the gap would grow. + + ───────────────────────────────────────────────────────────────── + STAGE 1 Rules ──► Holding.is_overconcentrated (4) + Holding.is_sector_concentrated (2) + User.is_high_risk_trader (2) + 4 holdings > 15% of balance, 2 sectors + > 30%, 2 traders with risk > 0.8 + flagged. + ───────────────────────────────────────────────────────────────── + STAGE 2 Graph ──► Stock.variance / volatility / correlation + Stock.cluster, Stock.is_representative (5) + 4 edges (|rho| >= 0.3), 5 Louvain clusters, + intra +0.683 vs inter +0.131. + ───────────────────────────────────────────────────────────────── + STAGE 3 Prescriptive ──► Stock.x_quantity (per Scenario) + (QP) 6 scenarios = 3 budgets x 2 regimes. + Min-risk anchor + 5 epsilon points = 6-point + frontier per scenario. Knee at eps_1. + ───────────────────────────────────────────────────────────────── + STAGE 4 Stress ──► Stock.regime_covar (PSD-preserving) + Crisis vol 25-30% above base across + the frontier. Gap peaks mid-frontier + (eps_1..eps_2 at +29.8%), narrows + toward the concentrated end (+25.2%). + ───────────────────────────────────────────────────────────────── +``` + +## Workflow + +### 1. Build ontology + +- Prompt: `/rai-build-starter-ontology Build a portfolio ontology from the CSVs in data/ covering stocks, sectors, the covariance matrix, accounts, holdings, users, and transactions.` +- Response: Concepts: `Stock` (with binary `Stock.covar(Stock, Stock)` property carrying covariance), `Sector`, `User`, `Account`, `Holding`, `Transaction` — bound to the bundled CSVs (8 stocks, 64 covariance entries). Stage 3 adds the `Regime` and `Scenario` Concepts (2 regimes x 3 budgets = 6 scenarios). + +### 2. Examine ontology + +- Prompt: `/rai-querying Show the ontology as a concept-relationship diagram and report row counts per concept.` +- Response: Concepts: 8 `Stock` across 5 `Sector`, the binary `Stock.covar` covariance property (64 entries), 6 `User`, 4 `Account`, 15 `Holding`, 21 `Transaction` — Stage 5 will introduce `Regime` and `Scenario`. + +### 3. Discover reasoner questions + +- Prompt: `/rai-discovery Our 8-stock book breaks compliance and concentrates risk. Rebuild it under Markowitz mean-variance with caps, deduplicate redundant bets via correlation clustering, and stress-test under crisis. What questions does each reasoner family handle?` +- Response: Plan: rules for compliance flags, graph for correlation clustering + representatives, prescriptive QP indexed by Scenario, stress as regime-swap re-solve. + +### 4. Compliance scan + +- Prompt: `/rai-rules-authoring Flag any holding worth more than 15% of its account, any sector worth more than 30% of the account, and any user with a risk score above 0.8 and more than five flagged transactions.` +- Response: 4 holdings flagged (AAPL/MSFT on Account 1, JNJ/PFE on Account 4); 2 (account, sector) pairs flagged (Account 1 Tech 34.0%, Account 4 Healthcare 32.2%); 2 users flagged (Alice Chen 0.85, Eve Taylor 0.92). + +### 5. Cluster correlated bets + +- Prompt: `/rai-graph-analysis Cluster stocks where absolute return correlation is at least 0.3 — those are redundant bets. Pick one representative per cluster by highest Sharpe ratio and flag the rest as non-representatives so downstream optimization can exclude them.` +- Response: 4 edges (|rho| >= 0.3), 5 Louvain clusters, intra +0.683 vs inter +0.131. 5 representatives picked: PFE, GOOGL, JPM, PG, XOM. AAPL/MSFT/JNJ flagged `is_non_representative`. + +### 6. Solve mean-variance frontier + +- Prompt: `/rai-prescriptive-problem-formulation Build a Markowitz mean-variance frontier across 6 scenarios = 3 budgets (500, 1000, 2000) x 2 regimes (base, crisis). Each scenario must be fully invested; cap any single position at 30% of budget and any sector at 30%. Only invest in cluster representatives. Show 6 frontier points per scenario from min-risk through high-return.` +- Response: 48 decision vars (`Stock.x_quantity`, 8 stocks x 6 scenarios; non-reps forced to 0). Constraint families: non-negativity, budget equality (sum = budget per scenario), position cap (30%), sector cap (30%), non-representative = 0, plus epsilon return-rate floor on sweep solves. Return-rate range [0.0634, 0.0840]. 6-point frontier per scenario (min-risk anchor + 5 epsilon points); 7 solves per scenario x 6 scenarios = 42 `LOCALLY_SOLVED` portfolios via Ipopt. + +### 7. Read the frontier + +- Prompt: `/rai-prescriptive-results-interpretation For each scenario, list the six-point Pareto frontier and find the knee — where does the marginal risk per unit return jump the most?` +- Response: base_500 frontier: returns 32.43 -> 40.28, risk 1160 -> 1742. Marginal `delta_risk/delta_return` jumps ~3x at eps_1 (knee). Same shape across all 6 scenarios — risk scales as budget^2, rate-form frontier is budget-independent. + +### 8. Stress under crisis + +- Prompt: `/rai-pyrel-coding + /rai-prescriptive-results-interpretation Add a regime-conditioned covariance to the ontology so the base regime keeps the original covariance and the crisis regime shrinks correlations 30% of the way toward all-ones (PSD-preserving). Re-solve the same frontier under crisis covariance and report how much volatility expands at each frontier point versus the base regime.` +- Response: Crisis vol 25-30% above base across the frontier (budget 500: min_risk 34.06 -> 43.74 at +28.4%, eps_1 34.30 -> 44.54 at +29.8% peak). Gap peaks mid-frontier (eps_1..eps_2 at +29.8%), narrows to +25.2% at eps_5 — the cluster-collapse payoff. + +### 9. Persist solution concepts into the ontology + +- Prompt: `/rai-ontology-design Add a FrontierPoint concept indexed by (Scenario, eps_label) that materializes each Pareto point's metadata: return, risk, marginal risk-per-return, knee flag, base-regime volatility, crisis-regime volatility, and the percentage gap between them.` +- Response: Ontology gains a `FrontierPoint(Scenario, eps_label)` Concept (36 rows = 6 scenarios x 6 points) with `return`, `risk`, `marginal_risk_per_return`, `is_knee`, `vol_base`, `vol_crisis`, `vol_gap`, `vol_gap_pct`. The frontier shape (32.43->40.28 / 1160->1742 in base_500), knee at eps_1, and crisis vol gap (+28.4% min_risk -> +29.8% peak -> +25.2% eps_5) are now queryable as ontology rather than stdout. + +## Data + +Bundled CSVs in `data/`: `returns.csv` (8 stocks across 5 sectors), `covar.csv` (64 symmetric covariance entries), plus `users.csv` (6), `accounts.csv` (4), `holdings.csv` (15), `transactions.csv` (21). All four stages run in `portfolio_balancing.py`. diff --git a/v1/supply_chain_resilience/runbook.md b/v1/supply_chain_resilience/runbook.md new file mode 100644 index 0000000..042bdeb --- /dev/null +++ b/v1/supply_chain_resilience/runbook.md @@ -0,0 +1,85 @@ +# Runbook: Supply Chain Resilience — Multi-Reasoner Walkthrough + +Risk-adjusted network flow with disruption scenarios, traced across four RAI reasoning stages. Each stage writes properties back to the same ontology that downstream stages consume, so the optimizer can hard-block bad suppliers, surcharge watch suppliers, and weight bottleneck hubs using upstream graph and rules signals. + +## The chain + +``` +Two HIGH-priority customers depend on 6 upstream suppliers — one of which +(PowerCell, B003) is flagged "watch" by rules. The chain produces a +$1,865 baseline plan, then quantifies disruption: top hub offline = +88.5%, +watch->avoid downgrade = +0.0% (optimizer already routed around it). + + ───────────────────────────────────────────────────────────────── + STAGE 0 Reachability ──► Business.is_high_priority_customer (2) + Upstream supplier dependency map for + each HIGH-priority customer (B008, B009). + ───────────────────────────────────────────────────────────────── + STAGE 1 Graph ──► Site.centrality (normalized) + Top hubs: S004 TechAssembly 1.000, + S006 West Coast DC 0.776, S003 PowerCell 0.735. + 2 weakly-connected components. + ───────────────────────────────────────────────────────────────── + STAGE 2 Rules ──► Business.is_unreliable (1) + Business.has_high_delay_risk (2) + Business.is_watch_level (2) + Demand.is_escalated (9) + [X] B017 avoid · [!] B003 watch + 37 of 262 shipments late (14%). + ───────────────────────────────────────────────────────────────── + STAGE 3 Prescriptive ──► Operation.x_flow / Demand.x_unmet + OPTIMAL · $1,865 · 8 active flows · 0 unmet + + 2 scenario re-solves (S004 offline, watch->avoid) + ───────────────────────────────────────────────────────────────── +``` + +## Workflow + +### 1. Build ontology + +- Prompt: `/rai-build-starter-ontology Build a supply chain ontology from the CSVs in data/ covering sites, businesses, SKUs, shipping operations, demand orders, historical shipments, and quarterly delay predictions.` +- Response: Concepts: `Site`, `Business`, `SKU`, `Operation`, `Demand`, `Shipment`, `DelayPrediction` — bound to the bundled CSVs (31 sites, 31 businesses, 9 SKUs, 70 operations, 20 demands, 262 shipments, 36 delay predictions). + +### 2. Examine ontology + +- Prompt: `/rai-querying Show the ontology as a concept-relationship diagram and report row counts per concept.` +- Response: 7 concepts: 31 `Site` (APAC / AMERICAS / EMEA), 31 `Business` (suppliers / manufacturers / warehouses / buyers), 9 `SKU`, 70 `Operation`, 20 `Demand` (9 HIGH-priority), 262 `Shipment` (37 late, 14%), 36 quarterly `DelayPrediction`. + +### 3. Discover reasoner questions + +- Prompt: `/rai-discovery We need a risk-adjusted routing plan. What's our exposure to each supplier, which sites are bottlenecks, which suppliers are unreliable, what does the minimum-cost flow look like once those risks are priced in, and how robust is that plan to disruptions?` +- Response: Reasoner-routing plan: (1) Graph reachability for upstream supplier exposure, (2) Graph centrality for hub identification, (3) Rules for supplier reliability classification, (4) Prescriptive MILP for risk-adjusted flow, (5) Scenario re-solves for disruption quantification. + +### 4. Map upstream supplier exposure + +- Prompt: `/rai-graph-analysis If a key supplier goes offline, which downstream buyers and finished products are at risk? For each HIGH-priority customer, list the suppliers it transitively depends on through the shipment graph, with their reliability scores.` +- Response: `Business.is_high_priority_customer` (2 buyers: B008 MegaCorp Enterprise, B009 TechGiant Inc); each transitively depends on the same 6 SUPPLIER-typed upstream nodes (B015, B016, B017, B018, B019, B020). + +### 5. Rank network hubs + +- Prompt: `/rai-graph-analysis Which sites are the most influential hubs in the supply network — sites that connect to other influential sites, not just sites with many direct connections? Persist the centrality score back to each site so the optimizer can use it as a bottleneck weight.` +- Response: `Site.centrality` normalized [0,1]: S004=1.000, S006=0.776, S003=0.735; 2 weakly-connected components. + +### 6. Classify supplier reliability + +- Prompt: `/rai-rules-authoring Rate each supplier's delivery reliability. Flag any with reliability score below 0.80 as unreliable, and any with a Q1 delay prediction above 0.15 as high-delay-risk. Classify suppliers as 'avoid' (both flags fire), 'watch' (either flag fires), or 'reliable' (neither). Also flag HIGH-priority demand orders as escalated so downstream solves can prioritize them.` +- Response: `is_unreliable` (1: B017), `has_high_delay_risk` (2: B003, B017), `is_watch_level` (2), `Demand.is_escalated` (9). + +### 7. Solve risk-adjusted flow + +- Prompt: `/rai-prescriptive-problem-formulation Find the minimum-cost shipping plan that fulfills all open demand. Don't ship from 'avoid' suppliers at all, add a $5/unit surcharge for 'watch' suppliers, prefer non-bottleneck sites, and charge $100/unit for any unmet demand.` +- Response: MILP on `Operation.x_flow` + `Demand.x_unmet`; objective = transport + risk surcharge + centrality weight + unmet penalty. + +### 8. Quantify disruption scenarios + +- Prompt: `/rai-prescriptive-solver-management + /rai-prescriptive-results-interpretation Re-solve with the highest-centrality site offline, and again with watch-level suppliers downgraded to avoid. What's the cost delta in each, and why are they asymmetric?` +- Response: Baseline OPTIMAL $1,865 / 8 flows / 0 unmet; S004 offline +88.5%; watch->avoid +0.0% (B003 already off optimal lanes). + +### 9. Persist solution concepts into the ontology + +- Prompt: `/rai-ontology-design Add a RoutingScenario concept that materializes each scenario solve (Baseline, S004-offline, Watch->Avoid) with its status, total cost, cost delta versus baseline, active flow count, unmet total, and any blocked businesses.` +- Response: Ontology gains a `RoutingScenario` Concept (3 rows: Baseline, S004-offline, Watch-Avoid) with `status`, `total_cost`, `cost_delta_pct`, `active_flow_count`, `unmet_total`, `blocked_businesses`. The disruption deltas — Baseline $1,865 / 8 flows / 0 unmet, S004-offline +88.5%, Watch->Avoid +0.0% — are queryable as ontology rather than scenario-comparison stdout. + +## Data + +Bundled CSVs in `data/`: 31 sites (APAC/AMERICAS/EMEA), 31 businesses, 9 SKUs, 70 operations, 20 demand orders, 262 shipments (37 late), 36 quarterly delay predictions. Combined script with stage banners: `supply_chain_resilience.py`. diff --git a/v1/supply_chain_resilience/supply_chain_resilience.py b/v1/supply_chain_resilience/supply_chain_resilience.py index 314fd69..7be0335 100644 --- a/v1/supply_chain_resilience/supply_chain_resilience.py +++ b/v1/supply_chain_resilience/supply_chain_resilience.py @@ -439,6 +439,13 @@ model.where(Business.is_unreliable()).define(Business.is_watch_level()) model.where(Business.has_high_delay_risk()).define(Business.is_watch_level()) +# Rule: Business is avoid when BOTH unreliable AND high delay risk fire. +Business.is_avoid = model.Relationship(f"{Business} is avoid") +model.define(Business.is_avoid()).where( + Business.is_unreliable(), + Business.has_high_delay_risk(), +) + # Derive risk classification in Python from RAI flags. # The optimization uses is_unreliable() and has_high_delay_risk() directly. unreliable_df = ( @@ -465,14 +472,12 @@ ) print("\nSupplier risk classification:") -avoid_ids = set() for _, row in all_biz_df.sort_values("reliability").iterrows(): biz_id = row["id"] is_unrel = biz_id in unreliable_ids is_delay = biz_id in high_delay_ids if is_unrel and is_delay: rc, marker = "avoid", "[X]" - avoid_ids.add(biz_id) elif is_unrel or is_delay: rc, marker = "watch", "[!]" else: @@ -482,6 +487,15 @@ f"reliability={row['reliability']:.2f}, class={rc}" ) +# Pull avoid ids from the Relationship for the Watch->Avoid scenario set math +# and for printing/per-scenario blocked-businesses bookkeeping. +avoid_df = ( + model.select(Business.id.alias("id")) + .where(Business.is_avoid()) + .to_df() +) +avoid_ids = set(avoid_df["id"]) if len(avoid_df) > 0 else set() + # Rule 4: Demand is escalated when priority is HIGH. Demand.is_escalated = model.Relationship(f"{Demand} is escalated") model.where(Demand.priority == "HIGH").define(Demand.is_escalated()) @@ -508,13 +522,19 @@ Demand.x_unmet = model.Property(f"{Demand} has {Float:unmet}") -def solve_flow(label, exclude_site_id=None, block_business_ids=None): +def solve_flow(label, exclude_site_id=None, block_business_ids=None, use_avoid_relationship=True): """Solve the network flow, optionally disabling a site or blocking suppliers. Args: label: Display name for this scenario. exclude_site_id: Site ID string to take offline (all ops from this site get zero flow). block_business_ids: Set of Business ID strings whose source operations get zero flow. + Used for scenario-specific extras (e.g., downgrade watch->avoid). Combined with + the ontology Business.is_avoid() block when use_avoid_relationship=True. + use_avoid_relationship: If True, hard-block all Business.is_avoid() suppliers via a + single ontology-driven constraint. The baseline and Site-offline scenarios use + this; the Watch->Avoid scenario sets it False because its block_business_ids set + already supersedes the ontology avoid set. """ if block_business_ids is None: block_business_ids = set() @@ -555,9 +575,22 @@ def solve_flow(label, exclude_site_id=None, block_business_ids=None): name=["demand_sat", D.id], ) - # Constraint: block operations sourced from blocked businesses. - # Uses explicit Python-side business IDs passed per scenario, so the - # constraint is guaranteed to differ across scenarios. + # Constraint: block operations sourced from "avoid" businesses, read + # directly from the ontology Business.is_avoid() Relationship. + if use_avoid_relationship: + biz_avoid = Business.ref() + op_avoid = Operation.ref() + problem.satisfy( + model.require(op_avoid.x_flow == 0).where( + op_avoid.source_business(biz_avoid), + biz_avoid.is_avoid(), + ), + name=["block_avoid", op_avoid.id], + ) + + # Scenario-specific extras: block additional businesses from a Python set + # (e.g., the Watch->Avoid scenario blocks the union of unreliable and + # high-delay-risk suppliers, a strict superset of the avoid Relationship). for biz_id in sorted(block_business_ids): biz_block = Business.ref() op_block = Operation.ref() @@ -653,12 +686,20 @@ def solve_flow(label, exclude_site_id=None, block_business_ids=None): else: print(" All demand satisfied") - return {"label": label, "status": status, "objective": obj, "unmet": total_unmet} + return { + "label": label, + "status": status, + "objective": obj, + "unmet": total_unmet, + "active_flows": n_active, + "blocked": sorted(block_business_ids), + } -# Baseline solve: block only "avoid" suppliers (both unreliable AND high delay). +# Baseline solve: block only "avoid" suppliers (both unreliable AND high delay) +# via the Business.is_avoid() Relationship from the ontology. results = [] -results.append(solve_flow("Baseline", block_business_ids=avoid_ids)) +results.append(solve_flow("Baseline")) print(f" Blocked businesses (avoid): {sorted(avoid_ids) if avoid_ids else 'none'}") # -------------------------------------------------- @@ -677,7 +718,6 @@ def solve_flow(label, exclude_site_id=None, block_business_ids=None): solve_flow( f"Site {top_site_id} offline", exclude_site_id=top_site_id, - block_business_ids=avoid_ids, ) ) @@ -687,23 +727,82 @@ def solve_flow(label, exclude_site_id=None, block_business_ids=None): print("\nScenario: All 'watch' suppliers downgraded to 'avoid'") print(f" Blocked businesses: {sorted(watch_and_avoid_ids)}") results.append( - solve_flow("Watch->Avoid", block_business_ids=watch_and_avoid_ids) + solve_flow( + "Watch->Avoid", + block_business_ids=watch_and_avoid_ids, + use_avoid_relationship=False, + ) ) -# Summary table. +# -------------------------------------------------- +# Persist scenario solves into the ontology (RoutingScenario Concept) +# -------------------------------------------------- +# Per /rai-prescriptive-problem-formulation/scenario-analysis.md "Scenario +# Concept" pattern: load scenario rows via model.data(list_of_tuples, +# columns=[...]) and bind via Concept.new(...).to_schema(). + +canonical_names = { + "Baseline": "Baseline", + f"Site {top_site_id} offline": f"{top_site_id}-offline", + "Watch->Avoid": "Watch-Avoid", +} + +RoutingScenario = model.Concept("RoutingScenario", identify_by={"name": String}) +RoutingScenario.status = model.Property(f"{RoutingScenario} has {String:status}") +RoutingScenario.total_cost = model.Property(f"{RoutingScenario} has {Float:total_cost}") +RoutingScenario.cost_delta_pct = model.Property(f"{RoutingScenario} has {Float:cost_delta_pct}") +RoutingScenario.active_flow_count = model.Property(f"{RoutingScenario} has {Integer:active_flow_count}") +RoutingScenario.unmet_total = model.Property(f"{RoutingScenario} has {Float:unmet_total}") +RoutingScenario.blocked_businesses = model.Property(f"{RoutingScenario} has {String:blocked_businesses}") + +# Materialize per-scenario results as RoutingScenario instances. Pre-create +# the three instances by name, then bind each property via filter_by — this +# avoids the row-collapse seen with model.data() multi-row binding here. +baseline_obj = results[0]["objective"] +for r in results: + name = canonical_names.get(r["label"], r["label"]) + obj = float(r["objective"]) if r["objective"] is not None else 0.0 + delta_pct = 0.0 + if r["label"] != "Baseline" and baseline_obj and r["objective"] is not None: + delta_pct = (r["objective"] - baseline_obj) / baseline_obj * 100 + blocked_str = ",".join(r["blocked"]) if r["blocked"] else "" + unmet_val = float(r["unmet"]) if r["unmet"] else 0.0 + + model.define(RoutingScenario.new(name=name)) + rs = RoutingScenario.filter_by(name=name) + model.define(rs.status(r["status"] or "UNKNOWN")) + model.define(rs.total_cost(obj)) + model.define(rs.cost_delta_pct(float(delta_pct))) + model.define(rs.active_flow_count(int(r["active_flows"]))) + model.define(rs.unmet_total(unmet_val)) + model.define(rs.blocked_businesses(blocked_str)) + +# Summary table — query the RoutingScenario Concept rather than Python state. print(f"\n{'=' * 70}") print("SCENARIO COMPARISON") print(f"{'=' * 70}") print(f" {'Scenario':<25} {'Status':<18} {'Cost':>12} {'Unmet':>10}") print(f" {'-' * 65}") -baseline_obj = results[0]["objective"] -for r in results: - cost_str = f"{r['objective']:,.2f}" if r["objective"] else "N/A" - unmet_str = f"{r['unmet']:,.0f}" if r["unmet"] else "0" + +rs_df = ( + model.select( + RoutingScenario.name.alias("name"), + RoutingScenario.status.alias("status"), + RoutingScenario.total_cost.alias("total_cost"), + RoutingScenario.cost_delta_pct.alias("cost_delta_pct"), + RoutingScenario.unmet_total.alias("unmet_total"), + ) + .to_df() +) +order = [canonical_names[r["label"]] for r in results] +rs_df = rs_df.set_index("name").loc[order].reset_index() +for _, row in rs_df.iterrows(): + cost_str = f"{row['total_cost']:,.2f}" + unmet_str = f"{row['unmet_total']:,.0f}" if row["unmet_total"] else "0" delta = "" - if r["objective"] and baseline_obj and r["label"] != "Baseline": - pct = (r["objective"] - baseline_obj) / baseline_obj * 100 + if row["name"] != "Baseline": + pct = row["cost_delta_pct"] delta = f" (+{pct:.1f}%)" if pct > 0 else f" ({pct:.1f}%)" print( - f" {r['label']:<25} {r['status']:<18} {cost_str:>12}{delta} {unmet_str:>10}" + f" {row['name']:<25} {row['status']:<18} {cost_str:>12}{delta} {unmet_str:>10}" ) diff --git a/v1/telco_network_recovery/runbook.md b/v1/telco_network_recovery/runbook.md new file mode 100644 index 0000000..688f01e --- /dev/null +++ b/v1/telco_network_recovery/runbook.md @@ -0,0 +1,92 @@ +# Runbook: Telco WEST Recovery — Multi-Reasoner Walkthrough + +WEST revenue collapsed ~29% in Q4 2024 (a ~$2.7M shortfall vs the other-regions average) while every other region held flat or grew. No single reasoner can answer where to spend a $5M recovery budget: descriptive scopes the crisis, rules flag broken towers, graph weights them by social blast radius, predictive forecasts forward demand, and prescriptive composes all four signals into the upgrade plan. Each stage writes derived properties back to the same ontology that downstream stages consume. + +## The chain + +``` +WEST Q4 revenue is down ~29% (~$2.7M gap vs the other-regions avg). +The chain produces a $5M plan that recovers 122 Gbps capacity +across all 15 critical towers, prioritized by social blast radius. + + ───────────────────────────────────────────────────────────────── + STAGE 1 Descriptive ──► WEST: Q4 revenue −29% vs H1 baseline, + avail 94.6 vs 99.5, 15 of 81 DEGRADED. + Retention angle? No — this is + operational, not subscriber churn. + ───────────────────────────────────────────────────────────────── + STAGE 2 Rules ──► CellTower.is_critical_restore (15) + 4 derived health metrics + a compound + flag: WEST + DEGRADED + health < 0.85. + ───────────────────────────────────────────────────────────────── + STAGE 3 Graph ──► Subscriber.influence_score (PageRank) + CellTower.weighted_impact (15) + 404 distinct subs (33% of base) route + calls through a critical tower. + ───────────────────────────────────────────────────────────────── + STAGE 4 Predictive ──► CellTower.projected_demand_growth (250) + (GNN) WEST: 0.9998× ── flat/slightly contracting + while 8 other regions sit at +0.45 to +0.91%/day. + ───────────────────────────────────────────────────────────────── + STAGE 5 Prescriptive ──► TowerUpgradeOption.selected (15) + OPTIMAL · 12 GOLD · 2 SILVER · 1 BRONZE + $4.96M of $5M (binding) · 122 Gbps + 164 of 200 install-weeks (slack) + ───────────────────────────────────────────────────────────────── +``` + +## Workflow + +### 1. Build ontology + +- Prompt: `/rai-build-starter-ontology Build a telco network ontology from the eight CSVs in data/: cell_towers, network_equipment, equipment_health, network_performance, subscribers, call_detail_records, tower_upgrade_options, time_series_metrics. The time-series file has one row per (date, region); make sure that's modelled as a composite-key concept since we'll want to forecast region-level trends later.` +- Response: Concepts: `CellTower`, `NetworkEquipment`, `EquipmentHealth`, `NetworkPerformance`, `Subscriber`, `CallDetailRecord` (edge concept: caller → callee, routed_through tower), `TowerUpgradeOption` (composite key tower_id+tier), `RegionMetric` (composite key metric_date+region) — all bound to the bundled CSVs. Step 7 (predictive) introduces a same-region 1-day-lag `TemporalEdge` concept derived from `RegionMetric` for GNN message passing. + +### 2. Examine ontology + +- Prompt: `/rai-querying Show the ontology as a concept-relationship diagram and report row counts per concept and the data range covered.` +- Response: 9 concepts wired to the bundled CSVs: 250 `CellTower`, 1,200 `Subscriber`, 6,000 `CallDetailRecord`, ~5,000 `NetworkPerformance`, 544 `NetworkEquipment`, 544 `EquipmentHealth`, 360 `TowerUpgradeOption`, 3,285 `RegionMetric` (365d × 9 regions), plus same-region 1-day-lag `TemporalEdge` rows for the GNN. + +### 3. Discover reasoner questions + +- Prompt: `/rai-discovery WEST is missing revenue targets while every other region grows. We have a $5M capex budget and 200 install crew-weeks to allocate to tower upgrades. Which RAI reasoners do we need, in what order, to land on a defensible upgrade plan grounded in the available data (towers, subscribers, calls, equipment health, performance, daily KPIs, and tiered upgrade options)?` +- Response: Plans the 4-reasoner chain on the shared ontology — descriptive (`/rai-querying`) to scope the WEST crisis and rule out a retention angle; rules (`/rai-rules-authoring`) to flag critical-restore towers; graph (`/rai-graph-analysis`) to score subscriber influence and aggregate per-tower blast radius; predictive (`/rai-predictive-modeling` + `/rai-predictive-training`) to forecast per-region growth and bind it as a per-tower demand multiplier; prescriptive (`/rai-prescriptive-problem-formulation` + `/rai-prescriptive-results-interpretation`) to compose all three signals into the tier-selection MIP and explain the binding constraint. + +### 4. Diagnose WEST + +- Prompt: `/rai-querying Where are we missing revenue targets, and which region has the worst Q4 2024 network availability? For the worst region, show the cell tower fleet by status and how bad the degraded towers look on packet loss.` +- Response: WEST Q4 avail 94.6% vs 99.5% in every other region; WEST Q4 revenue $6.6M vs ~$9.0–9.5M everywhere else (≈$2.7M Q4 deficit, −29% vs WEST's own H1 baseline); 81 WEST towers split into 49 ACTIVE / 17 MAINTENANCE / 15 DEGRADED, with the 15 DEGRADED towers averaging 7.6–10.3% packet loss (median ~8.2%). Subscriber-churn signals stay flat — this is an operational network failure, not retention. + +### 5. Flag critical-restore towers + +- Prompt: `/rai-rules-authoring Flag towers as critical-restore if they're in WEST and either (a) DEGRADED with poor average equipment health (below 0.85), or (b) showing high average packet loss (above 5%) with poor health — the second branch catches ACTIVE-but-failing towers.` +- Response: 4 derived health properties (`avg_packet_loss`, `avg_latency_ms`, `avg_error_rate`, `avg_health_score`) computed for all 250 towers via `aggs.avg(...).per(CellTower)`. The two-branch `CellTower.is_critical_restore` relationship fires on 15 towers — all 15 are WEST + DEGRADED + health < 0.85, so Branch 1 alone produces the same set, but Branch 2 is kept as a guard against ACTIVE-but-failing failure modes. + +### 6. Score subscriber blast radius + +- Prompt: `/rai-graph-analysis Who are our most socially influential subscribers based on call patterns? For each critical-restore tower, score its blast radius — how many distinct subscribers route calls through it, weighted by their influence.` +- Response: `Subscriber.influence_score` (PageRank) on all 1,200 subs; `CellTower.weighted_impact` on 15 critical towers; 404 distinct subs (33% of base) route through a critical tower; TWR-0014 has the largest footprint (61 subs, 0.0502). + +### 7. Forecast regional demand + +- Prompt: `/rai-predictive-modeling + /rai-predictive-training Predict next-quarter subscriber growth per region from the daily KPI history (train on data through October, validate Nov, test Dec). Bind each region's forecast back to its towers as a demand multiplier.` +- Response: GNN node regression on 365d × 9 regions with same-region 1-day-lag temporal edges; per-region mean of the Dec test predictions yields WEST multiplier ≈0.9998× (flat/slightly contracting) while the 8 other regions sit at +0.45% to +0.91%/day. The multiplier is loaded into a `RegionGrowth` concept and joined to `CellTower.projected_demand_growth` via region — populating all 250 towers (CellTower covers 5 regions; the other 4 RegionMetric regions are forecast but have no towers to bind to). + +### 8. Optimize tier selection + +- Prompt: `/rai-prescriptive-problem-formulation Recover WEST capacity within $5M and 200 install-weeks. For each critical-restore tower, pick at most one upgrade tier (BRONZE, SILVER, or GOLD) that maximizes capacity restored, weighted by each tower's blast radius and the regional demand forecast.` +- Response: Status OPTIMAL with all 15 critical towers covered (one tier each). Tier mix: 12 GOLD / 2 SILVER / 1 BRONZE. Total capacity restored 122 Gbps. Total cost $4,956,843 of the $5M budget (binding). Total install crew-weeks 164 of 200 (slack). The tier mix skews toward GOLD because the per-Gbps cost on GOLD is competitive once it is multiplied by `weighted_impact` and `projected_demand_growth` in the objective. + +### 9. Interpret the plan + +- Prompt: `/rai-prescriptive-results-interpretation Summarize the plan: total cost, capacity restored, tier mix, towers covered. Which constraint is binding, and what would relaxing it by 10-20% unlock?` +- Response: Budget is binding at $4.96M/$5M (only $43K of headroom); a re-solve at higher budget would quantify which BRONZE/SILVER swaps unlock — TWR-0009 BRONZE → GOLD is the next-cheapest tier upgrade in the candidate set. Install-weeks have 36 weeks of slack (164/200) so crew capacity is not the bottleneck. All 15 critical towers are covered, so the 404 service-affected subscribers identified by the graph stage are addressed within the rollout window. + +### 10. Persist solution concepts into the ontology + +- Prompt: `/rai-ontology-design Materialize the optimal plan and the 15 selected upgrades as queryable ontology. Add a RestorePlan concept holding the plan summary (total cost, install-weeks, capacity restored, tier-mix counts, towers covered, binding constraint) and a SelectedUpgrade view restricted to the chosen tower-tier rows.` +- Response: Ontology gains a singleton `RestorePlan` Concept with `total_cost`, `total_install_weeks`, `capacity_restored_gbps`, `gold_count`, `silver_count`, `bronze_count`, `towers_covered`, `binding_constraint`; plus `SelectedUpgrade` (a view over the 15 chosen rows of `TowerUpgradeOption`). All headline plan numbers — $4,956,843 / 164 weeks / 122 Gbps / 12 GOLD / 2 SILVER / 1 BRONZE / 15 covered — are now queryable as ontology, not stdout. + +## Data + +Bundled CSVs in `data/`: 250 cell towers (15 WEST DEGRADED), 1,200 subscribers, 6,000 directed CDRs, ~5,000 NetworkPerformance measurements, 544 NetworkEquipment + EquipmentHealth rows, 360 TowerUpgradeOptions (3 tiers × 120 in-scope towers), 3,285 daily KPI rows (365 days × 9 regions). All stages run end-to-end via `telco_network_recovery.py`. diff --git a/v1/telco_network_recovery/telco_network_recovery.py b/v1/telco_network_recovery/telco_network_recovery.py index 7a29d7e..2318241 100644 --- a/v1/telco_network_recovery/telco_network_recovery.py +++ b/v1/telco_network_recovery/telco_network_recovery.py @@ -674,6 +674,131 @@ print(f" Tier mix: {selected['tier'].value_counts().to_dict()}") print(f" Towers covered: {len(selected)} of {plan_df['tower_id'].nunique()} critical") +# -------------------------------------------------- +# Stage 5: Persist solution concepts into the ontology +# -------------------------------------------------- + +print(f"\n{'=' * 60}") +print("STAGE 5: ONTOLOGY -- materialize RestorePlan + SelectedUpgrade") +print("=" * 60) + +# SelectedUpgrade view-concept: unary relationship narrowing +# TowerUpgradeOption to the 15 chosen rows (selected == 1). +TowerUpgradeOption.is_selected_upgrade = model.Relationship( + f"{TowerUpgradeOption} is selected upgrade" +) +model.where(TowerUpgradeOption.selected == 1).define( + TowerUpgradeOption.is_selected_upgrade() +) + +# RestorePlan singleton: one row keyed by key=1 holding the plan summary. +RestorePlan = model.Concept("RestorePlan", identify_by={"key": Integer}) +RestorePlan.total_cost = model.Property(f"{RestorePlan} has {Float:total_cost}") +RestorePlan.total_install_weeks = model.Property( + f"{RestorePlan} has {Integer:total_install_weeks}" +) +RestorePlan.capacity_restored_gbps = model.Property( + f"{RestorePlan} has {Integer:capacity_restored_gbps}" +) +RestorePlan.gold_count = model.Property(f"{RestorePlan} has {Integer:gold_count}") +RestorePlan.silver_count = model.Property(f"{RestorePlan} has {Integer:silver_count}") +RestorePlan.bronze_count = model.Property(f"{RestorePlan} has {Integer:bronze_count}") +RestorePlan.towers_covered = model.Property(f"{RestorePlan} has {Integer:towers_covered}") +RestorePlan.binding_constraint = model.Property( + f"{RestorePlan} has {String:binding_constraint}" +) + +model.define(RestorePlan.new(key=1)) + +# Bind aggregations off the SelectedUpgrade view back onto the singleton. +rp = RestorePlan.ref() +model.define( + rp.total_cost( + aggs.sum(TowerUpgradeOption.cost).where( + TowerUpgradeOption.is_selected_upgrade() + ) + ) +) +model.define( + rp.total_install_weeks( + aggs.sum(TowerUpgradeOption.install_weeks).where( + TowerUpgradeOption.is_selected_upgrade() + ) + ) +) +model.define( + rp.capacity_restored_gbps( + aggs.sum(TowerUpgradeOption.capacity_increase_gbps).where( + TowerUpgradeOption.is_selected_upgrade() + ) + ) +) +model.define( + rp.gold_count( + aggs.count(TowerUpgradeOption).where( + TowerUpgradeOption.is_selected_upgrade(), + TowerUpgradeOption.tier == "GOLD", + ) + ) +) +model.define( + rp.silver_count( + aggs.count(TowerUpgradeOption).where( + TowerUpgradeOption.is_selected_upgrade(), + TowerUpgradeOption.tier == "SILVER", + ) + ) +) +model.define( + rp.bronze_count( + aggs.count(TowerUpgradeOption).where( + TowerUpgradeOption.is_selected_upgrade(), + TowerUpgradeOption.tier == "BRONZE", + ) + ) +) +model.define( + rp.towers_covered( + aggs.count(distinct(CellTower)).where( + TowerUpgradeOption.for_tower(CellTower), + TowerUpgradeOption.is_selected_upgrade(), + ) + ) +) + +# Binding-constraint classification: +# "budget" if total_cost is within $50k of $5M +# else "install_weeks" if total_install_weeks is within 5 of 200 +# else "neither" +model.where( + rp.total_cost >= BUDGET_USD - 50_000, +).define(rp.binding_constraint("budget")) +model.where( + rp.total_cost < BUDGET_USD - 50_000, + rp.total_install_weeks >= INSTALL_WEEKS_BUDGET - 5, +).define(rp.binding_constraint("install_weeks")) +model.where( + rp.total_cost < BUDGET_USD - 50_000, + rp.total_install_weeks < INSTALL_WEEKS_BUDGET - 5, +).define(rp.binding_constraint("neither")) + +# Read the singleton back from the ontology and surface it. +plan_summary_df = ( + model.select( + RestorePlan.total_cost.alias("total_cost"), + RestorePlan.total_install_weeks.alias("total_install_weeks"), + RestorePlan.capacity_restored_gbps.alias("capacity_restored_gbps"), + RestorePlan.gold_count.alias("gold_count"), + RestorePlan.silver_count.alias("silver_count"), + RestorePlan.bronze_count.alias("bronze_count"), + RestorePlan.towers_covered.alias("towers_covered"), + RestorePlan.binding_constraint.alias("binding_constraint"), + ) + .to_df() +) +print("\n RestorePlan (queried from ontology):") +print(plan_summary_df.to_string(index=False)) + print(f"\n{'=' * 60}") print("PIPELINE COMPLETE: 4 stages executed on the shared Telco ontology") print("=" * 60)