RelationalAI · cafzal · May 6, 2026 · May 5, 2026 · May 6, 2026 · May 6, 2026
diff --git a/v1/energy_grid_planning/energy_grid_planning.py b/v1/energy_grid_planning/energy_grid_planning.py
@@ -935,7 +935,7 @@ def _query_flag(relationship, flag_name):
 investment_levels_df = model.select(
     InvestmentLevel.name.alias("level"),
     InvestmentLevel.budget_cap.alias("budget"),
-).to_df().sort_values("budget")
+).to_df().sort_values("budget").reset_index(drop=True)
 
 pareto_rows = []
 for _, lvl in investment_levels_df.iterrows():
@@ -968,18 +968,132 @@ def _query_flag(relationship, flag_name):
         "net_value": net_value,
     })
 
-# Print Pareto frontier
+# --------------------------------------------------
+# Materialize InvestmentPortfolio(InvestmentLevel) Concept
+#   One row per InvestmentLevel scenario. Marginal & knee detection are
+#   inter-row computations, so they're computed in pandas and bound back.
+#   The Pareto frontier is then queried from the ontology, not pareto_rows.
+# --------------------------------------------------
+
+InvestmentPortfolio = model.Concept(
+    "InvestmentPortfolio", identify_by={"investment_level_name": String}
+)
+InvestmentPortfolio.investment_level = model.Relationship(
+    f"{InvestmentPortfolio} for {InvestmentLevel}"
+)
+InvestmentPortfolio.dc_count = model.Property(f"{InvestmentPortfolio} has {Integer:dc_count}")
+InvestmentPortfolio.total_mw = model.Property(f"{InvestmentPortfolio} has {Float:total_mw}")
+InvestmentPortfolio.annual_revenue = model.Property(
+    f"{InvestmentPortfolio} has {Float:annual_revenue}"
+)
+InvestmentPortfolio.upgrade_cost = model.Property(
+    f"{InvestmentPortfolio} has {Float:upgrade_cost}"
+)
+InvestmentPortfolio.net_value = model.Property(f"{InvestmentPortfolio} has {Float:net_value}")
+InvestmentPortfolio.marginal_per_m_to_next_level = model.Property(
+    f"{InvestmentPortfolio} has {Float:marginal_per_m_to_next_level}"
+)
+InvestmentPortfolio.is_knee_point = model.Property(
+    f"{InvestmentPortfolio} has {Boolean:is_knee_point}"
+)
+
+# Build the dataframe with marginal & knee — inter-row computation in pandas.
+portfolio_df = pd.DataFrame(pareto_rows).sort_values("budget").reset_index(drop=True)
+# Note: upgrade_cost is dollars (cost_million * 1e6), to match revenue scale and net_value definition.
+portfolio_df["upgrade_cost_dollars"] = portfolio_df["upgrade_cost_m"].astype(float) * 1e6
+
+# marginal_per_m_to_next_level: ($net_value_next - $net_value_this) / ($M_next - $M_this); null at last
+marginal = []
+for j in range(len(portfolio_df)):
+    if j == len(portfolio_df) - 1:
+        marginal.append(None)
+    else:
+        d_val = portfolio_df.loc[j + 1, "net_value"] - portfolio_df.loc[j, "net_value"]
+        d_budget = portfolio_df.loc[j + 1, "budget"] - portfolio_df.loc[j, "budget"]
+        marginal.append(d_val / d_budget if abs(d_budget) > 1e-6 else None)
+portfolio_df["marginal_per_m_to_next_level"] = marginal
+
+# is_knee_point: row whose marginal-rate represents the largest jump from the prior row's rate.
+# We measure the "jump" at row j as |rate[j-1]| / |rate[j]| (steepest drop from a high marginal
+# to a lower one, i.e. diminishing-returns inflection). The knee is the row where this ratio peaks.
+is_knee = [False] * len(portfolio_df)
+rates = [r for r in marginal if r is not None]
+if len(rates) >= 2:
+    max_jump, knee_idx = 0.0, 1
+    for j in range(len(rates) - 1):
+        prev_rate, next_rate = rates[j], rates[j + 1]
+        if abs(next_rate) > 1e-6:
+            jump = abs(prev_rate / next_rate)
+        elif abs(prev_rate) > 1e-6:
+            jump = float("inf")
+        else:
+            jump = 0.0
+        if jump > max_jump:
+            max_jump = jump
+            knee_idx = j + 1
+    is_knee[knee_idx] = True
+portfolio_df["is_knee_point"] = is_knee
+
+# Bind portfolio rows back as ontology instances.
+portfolio_src = model.data(portfolio_df.rename(columns={
+    "level": "investment_level_name",
+    "n_approved": "dc_count",
+    "total_mw": "total_mw",
+    "revenue": "annual_revenue",
+    "upgrade_cost_dollars": "upgrade_cost",
+    "net_value": "net_value",
+    "marginal_per_m_to_next_level": "marginal_per_m_to_next_level",
+    "is_knee_point": "is_knee_point",
+}))
+
+model.define(InvestmentPortfolio.new(
+    investment_level_name=portfolio_src.INVESTMENT_LEVEL_NAME,
+    investment_level=InvestmentLevel.filter_by(name=portfolio_src.INVESTMENT_LEVEL_NAME),
+    dc_count=portfolio_src.DC_COUNT,
+    total_mw=portfolio_src.TOTAL_MW,
+    annual_revenue=portfolio_src.ANNUAL_REVENUE,
+    upgrade_cost=portfolio_src.UPGRADE_COST,
+    net_value=portfolio_src.NET_VALUE,
+    marginal_per_m_to_next_level=portfolio_src.MARGINAL_PER_M_TO_NEXT_LEVEL,
+    is_knee_point=portfolio_src.IS_KNEE_POINT,
+))
+
+# Query InvestmentPortfolio rows from the ontology for rendering.
+InvLvlRef = InvestmentLevel.ref()
+PortRef = InvestmentPortfolio.ref()
+portfolio_query_df = (
+    model.where(PortRef.investment_level(InvLvlRef))
+    .select(
+        PortRef.investment_level_name.alias("level"),
+        InvLvlRef.budget_cap.alias("budget"),
+        PortRef.dc_count.alias("dc_count"),
+        PortRef.total_mw.alias("total_mw"),
+        PortRef.annual_revenue.alias("annual_revenue"),
+        PortRef.upgrade_cost.alias("upgrade_cost"),
+        PortRef.net_value.alias("net_value"),
+        PortRef.marginal_per_m_to_next_level.alias("marginal"),
+        PortRef.is_knee_point.alias("is_knee"),
+    )
+    .to_df()
+    .sort_values("budget")
+    .reset_index(drop=True)
+)
+portfolio_query_df["budget"] = portfolio_query_df["budget"].astype(float)
+portfolio_query_df["dc_count"] = portfolio_query_df["dc_count"].astype(int)
+
+# Print Pareto frontier (read from ontology, not pareto_rows)
 print(
     f"\n  {'#':>3} {'Level':>8} {'Budget $M':>10} {'DCs':>5} {'DC MW':>8} "
-    f"{'Revenue $/yr':>14} {'Upg $M':>8} {'Upg MW':>8} {'Net Value':>14}"
+    f"{'Revenue $/yr':>14} {'Upg $M':>8} {'Net Value':>14} {'Knee':>5}"
 )
 print(f"  {'-' * 85}")
-for j, pt in enumerate(pareto_rows):
+for j, row in portfolio_query_df.iterrows():
+    knee_flag = " *" if bool(row["is_knee"]) else ""
     print(
-        f"  {j + 1:>3} {pt['level']:>8} {pt['budget']:>10,.0f} "
-        f"{pt['n_approved']:>5} {pt['total_mw']:>8,.0f} "
-        f"{pt['revenue']:>14,.0f} {pt['upgrade_cost_m']:>8,.1f} "
-        f"{pt['upgrade_mw']:>8,.1f} {pt['net_value']:>14,.0f}"
+        f"  {j + 1:>3} {row['level']:>8} {float(row['budget']):>10,.0f} "
+        f"{int(row['dc_count']):>5} {float(row['total_mw']):>8,.0f} "
+        f"{float(row['annual_revenue']):>14,.0f} {float(row['upgrade_cost']) / 1e6:>8,.1f} "
+        f"{float(row['net_value']):>14,.0f} {knee_flag:>5}"
     )
 
 # Detailed results per investment level
@@ -1005,42 +1119,31 @@ def _query_flag(relationship, flag_name):
         for _, row in level_upg.iterrows():
             print(f"    {row['upgrade_id']}: +{float(row['capacity_mw']):.0f} MW, ${float(row['cost_m']):.1f}M")
 
-# Marginal analysis + knee detection
-if len(pareto_rows) >= 3:
+# Marginal analysis + knee detection (read from ontology)
+if len(portfolio_query_df) >= 3:
     print("\n  MARGINAL ANALYSIS (value gained per additional $M budget):")
-    rates = []
-    for j in range(len(pareto_rows) - 1):
-        d_val = pareto_rows[j + 1]["net_value"] - pareto_rows[j]["net_value"]
-        d_budget = pareto_rows[j + 1]["budget"] - pareto_rows[j]["budget"]
-        rate = d_val / d_budget if abs(d_budget) > 1e-6 else 0
-        rates.append(rate)
-        d_mw = pareto_rows[j + 1]["total_mw"] - pareto_rows[j]["total_mw"]
-        d_dcs = pareto_rows[j + 1]["n_approved"] - pareto_rows[j]["n_approved"]
+    for j in range(len(portfolio_query_df) - 1):
+        cur = portfolio_query_df.iloc[j]
+        nxt = portfolio_query_df.iloc[j + 1]
+        d_val = float(nxt["net_value"]) - float(cur["net_value"])
+        d_budget = float(nxt["budget"]) - float(cur["budget"])
+        d_mw = float(nxt["total_mw"]) - float(cur["total_mw"])
+        d_dcs = int(nxt["dc_count"]) - int(cur["dc_count"])
+        rate = float(cur["marginal"]) if pd.notna(cur["marginal"]) else 0.0
         print(
-            f"    {pareto_rows[j]['level']:>6} -> {pareto_rows[j+1]['level']:<6}: "
+            f"    {cur['level']:>6} -> {nxt['level']:<6}: "
             f"dValue={d_val:>+14,.0f}, dBudget={d_budget:>+6,.0f}$M, "
             f"dMW={d_mw:>+8,.0f}, dDCs={d_dcs:>+3}, "
             f"marginal={rate:>+12,.0f}$/M$"
         )
 
-    if len(rates) >= 2:
-        max_jump, knee_idx = 0, 1
-        for j in range(len(rates) - 1):
-            if abs(rates[j + 1]) > 1e-6:
-                jump = abs(rates[j] / rates[j + 1])
-            elif abs(rates[j]) > 1e-6:
-                jump = float("inf")
-            else:
-                jump = 0
-            if jump > max_jump:
-                max_jump = jump
-                knee_idx = j + 1
-
-        knee = pareto_rows[knee_idx]
+    knee_rows = portfolio_query_df[portfolio_query_df["is_knee"].astype(bool)]
+    if len(knee_rows) > 0:
+        knee = knee_rows.iloc[0]
         print(
-            f"\n  KNEE POINT: {knee['level']} -- ${knee['budget']:,.0f}M budget, "
-            f"${knee['net_value']:,.0f} net value, {knee['n_approved']} DCs, "
-            f"{knee['total_mw']:,.0f} MW"
+            f"\n  KNEE POINT: {knee['level']} -- ${float(knee['budget']):,.0f}M budget, "
+            f"${float(knee['net_value']):,.0f} net value, {int(knee['dc_count'])} DCs, "
+            f"{float(knee['total_mw']):,.0f} MW"
         )
         print("  Diminishing returns beyond this investment level.")
 

diff --git a/v1/energy_grid_planning/runbook.md b/v1/energy_grid_planning/runbook.md
@@ -0,0 +1,88 @@
+# Runbook: Energy Grid Planning — Multi-Reasoner Walkthrough
+
+ERCOT processes 10 hyperscaler interconnection requests (2,930 MW) against a 12-substation Texas grid. The chain forecasts substation load, finds structural bottlenecks, screens compliance, and produces a Pareto frontier across 5 budget levels — no single reasoner can answer this end-to-end.
+
+## The chain
+
+```
+ERCOT has 10 hyperscaler interconnection requests totalling 2,930 MW
+on a 12-substation grid where DFW is the binding capacity bottleneck.
+The chain produces a Pareto frontier across 5 budget levels — the knee at
+$300M unlocks 5 DCs (1,500 MW, $264M net value) including xAI Colossus.
+
+  ─────────────────────────────────────────────────────────────────
+  STAGE 1  Predictive   ──►  Substation.predicted_load        (12)
+                              DFW: 1,100 → 1,700 MW (+54.6%) ── breaches
+                              1,600 MW capacity at 24mo. The only
+                              substation predicted to breach.
+  ─────────────────────────────────────────────────────────────────
+  STAGE 2  Graph        ──►  Substation.betweenness          (12)
+                 (WCC/      Substation.grid_community         (3 regions)
+                 Louvain/   Substation.is_structurally_critical (3)
+                 centrality) DFW, Houston, San Antonio dominate. 7 of 10
+                              DC requests target critical substations.
+  ─────────────────────────────────────────────────────────────────
+  STAGE 3  Rules        ──►  DataCenterRequest.is_compliant    (2)
+                              fails_capacity / fails_structural /
+                              fails_low_carbon flags written back.
+                              Only Crusoe (Midland) and Oracle
+                              (Corpus Christi) pass all three.
+  ─────────────────────────────────────────────────────────────────
+  STAGE 4  Prescriptive ──►  DataCenterRequest.x_approve  (per InvestmentLevel)
+                              SubstationUpgrade.x_upgrade  (per InvestmentLevel)
+                              OPTIMAL across 5 budget levels in one solve.
+                              Knee $300M · 5 DCs · 1,500 MW · $264M net.
+                              Google + Lambda never approved — DFW full.
+  ─────────────────────────────────────────────────────────────────
+```
+
+## Workflow
+
+### 1. Build ontology
+
+- Prompt: `/rai-build-starter-ontology Build an ontology for grid infrastructure planning from the CSVs in data/ covering substations, generators, transmission lines, demand forecasts, data center requests, and substation upgrades.`
+- Response: Concepts: `Substation`, `Generator`, `TransmissionLine`, `LoadZone`, `DemandPeriod`, `RenewableProfile`, `MaintenanceWindow`, `Customer`, `DataCenterRequest`, `SubstationUpgrade`, `DemandForecast`, `LoadHistory`, `DCAnnouncement` — bound to the bundled CSVs (12 substations, 10 DC requests, 18 transmission lines).
+
+### 2. Examine ontology
+
+- Prompt: `/rai-querying Show the ontology as a concept-relationship diagram and report row counts per concept.`
+- Response: 13 concepts: 12 `Substation`, 15 `Generator`, 18 `TransmissionLine`, 8 `LoadZone`, 24 `DemandPeriod`, 12 `RenewableProfile`, 8 `MaintenanceWindow`, 100 `Customer`, 10 `DataCenterRequest` (2,930 MW total), 10 `SubstationUpgrade` ($630M total), historical `LoadHistory` and forward `DemandForecast` rows backing the predictive stage.
+
+### 3. Discover reasoner questions
+
+- Prompt: `/rai-discovery We have 10 hyperscaler interconnection requests against a 12-substation grid. Which to approve, which substation upgrades to fund, at what budget level?`
+- Response: Plan routing sub-questions to predictive, graph, rules, and prescriptive reasoners.
+
+### 4. Forecast substation load
+
+- Prompt: `/rai-predictive-modeling + /rai-predictive-training Forecast substation load growth over the next 24 months from historical demand, planned generator additions, and the DC request pipeline. Use the pre-trained model from the bundled DemandForecast table if available, or train fresh. Bind each substation's predicted peak load back to the ontology so the rules engine and optimizer can read it.`
+- Response: `Substation.predicted_load` for all 12; DFW breaches at 1,700 MW vs 1,600 MW cap at 24 months (+54.6%).
+
+### 5. Find structural bottlenecks
+
+- Prompt: `/rai-graph-analysis Which substations are most critical to power flow based on grid topology? Flag the top 3 most structurally critical, surface any regional clustering, and persist the structural-criticality scores back to the ontology.`
+- Response: 1 connected component, 3 Louvain communities (North Texas, West Texas, Gulf Coast); DFW, Houston, San Antonio flagged `is_structurally_critical`; 7 of 10 DC requests target critical nodes.
+
+### 6. Screen DC requests
+
+- Prompt: `/rai-rules-authoring Screen each data center request against three criteria: (1) substation must have enough capacity after predicted load, (2) substation's low-carbon (renewable + nuclear) generation share must meet the DC's low-carbon requirement, (3) substation shouldn't be one of the top-3 structurally critical. Which requests pass all three?`
+- Response: `fails_capacity` / `fails_structural` / `fails_low_carbon` + `is_compliant`; 2 pass (Crusoe, Oracle), 8 flagged.
+
+### 7. Approve DCs and fund upgrades
+
+- Prompt: `/rai-prescriptive-problem-formulation Decide which data center requests to approve and which substation upgrades to fund at $200M, $300M, $400M, $500M, and $600M investment levels. Maximize annual revenue across all five levels in a single solve. A request can only be approved if its substation has enough capacity after upgrades, and total upgrade spend at each level must stay within that level's budget. Consider all 10 requests — the Stage 3 compliance flags are informational, not hard filters.`
+- Response: OPTIMAL MIP across 5 `InvestmentLevel` values in one solve; `x_approve` and `x_upgrade` written back per level.
+
+### 8. Read the frontier
+
+- Prompt: `/rai-prescriptive-results-interpretation Which data centers get approved, which upgrades are selected, and where's the biggest return on investment at each budget level?`
+- Response: Pareto frontier with knee at $300M (5 DCs, 1,500 MW, $264M net); marginal $995K/$M at knee, declining to $400K/$M by $600M; Google + Lambda never approved (DFW full).
+
+### 9. Persist solution concepts into the ontology
+
+- Prompt: `/rai-ontology-design Add an InvestmentPortfolio concept indexed by InvestmentLevel that materializes the per-budget aggregates (approved-DC count, total MW, annual revenue, upgrade cost, net value, marginal value per added $M) and flags the knee point.`
+- Response: Ontology gains an `InvestmentPortfolio(InvestmentLevel)` Concept (5 rows, one per budget) with `dc_count`, `total_mw`, `annual_revenue`, `upgrade_cost`, `net_value`, `marginal_per_m_to_next_level`, `is_knee_point`. All five frontier rows — $200M ($165M net) → $300M ($264M net, knee) → $600M ($395M net) — are queryable as ontology rather than stdout.
+
+## Data
+
+Bundled CSVs in `data/`: 12 substations, 15 generators, 18 transmission lines, 10 DC requests (2,930 MW), 10 substation upgrades ($630M total), plus historical load and forecast tables. Full chain implemented in `energy_grid_planning.py`.