From ba11284f2753c4645019f6b0949c0943e4d45359 Mon Sep 17 00:00:00 2001
From: Duksh Koonjoobeeharry <586947+duksh@users.noreply.github.com>
Date: Mon, 11 May 2026 20:29:04 +0400
Subject: [PATCH] feat: add FinOps discipline across templates, workflow
 engine, and CI (#1)

19 files changed, 759 additions, 31 deletions.

- Cost Allocation table in spec-template.md
- AI Model Tier / Estimated Token Budget fields in plan-template.md
- Economic Efficiency principle (Article VI) in constitution-template.md
- Clarification budget guard in clarify.md; cost field validation in plan.md
- budget-gate step type (circuit breaker at 80%/100% LLM spend)
- Fan-out max_parallel rename + batched engine loop
- cost-tracker extension (record + report commands, after_implement hook)
- cost_profile surfaced in specify extension list CLI
- cost-review gate + spend-check step in speckit workflow
- CI: path filter, fail-fast, uv cache

https://claude.ai/code/session_017mzZmq4F57rAFJVsvVpnva
---
 .github/workflows/test.yml                    |  11 ++
 extensions/catalog.json                       |  15 ++
 .../commands/speckit.cost-tracker.record.md   |  60 ++++++
 .../commands/speckit.cost-tracker.report.md   |  69 +++++++
 extensions/cost-tracker/config-template.yml   |  15 ++
 extensions/cost-tracker/extension.yml         |  53 ++++++
 extensions/template/extension.yml             |   8 +
 presets/lean/README.md                        |  24 +++
 src/specify_cli/__init__.py                   |  11 ++
 src/specify_cli/extensions.py                 |   8 +-
 src/specify_cli/workflows/__init__.py         |   2 +
 src/specify_cli/workflows/engine.py           |  44 +++--
 .../workflows/steps/budget_gate/__init__.py   | 156 ++++++++++++++++
 .../workflows/steps/fan_out/__init__.py       |  18 +-
 templates/commands/clarify.md                 |  38 +++-
 templates/commands/plan.md                    |  18 ++
 templates/constitution-template.md            |  11 ++
 templates/plan-template.md                    |   4 +-
 templates/spec-template.md                    |  11 ++
 tests/test_workflows.py                       | 171 +++++++++++++++++-
 workflows/speckit/workflow.yml                |  22 +++
 21 files changed, 738 insertions(+), 31 deletions(-)
 create mode 100644 extensions/cost-tracker/commands/speckit.cost-tracker.record.md
 create mode 100644 extensions/cost-tracker/commands/speckit.cost-tracker.report.md
 create mode 100644 extensions/cost-tracker/config-template.yml
 create mode 100644 extensions/cost-tracker/extension.yml
 create mode 100644 src/specify_cli/workflows/steps/budget_gate/__init__.py

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index f7130aa8d1..f7d86a2eb4 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -7,6 +7,13 @@ on:
   push:
     branches: ["main"]
   pull_request:
+    paths:
+      - "src/**"
+      - "tests/**"
+      - "templates/**"
+      - "workflows/**"
+      - "extensions/**"
+      - "pyproject.toml"
 
 jobs:
   ruff:
@@ -29,6 +36,7 @@ jobs:
   pytest:
     runs-on: ${{ matrix.os }}
     strategy:
+      fail-fast: true
       matrix:
         os: [ubuntu-latest, windows-latest]
         python-version: ["3.11", "3.12", "3.13"]
@@ -38,6 +46,9 @@ jobs:
 
       - name: Install uv
         uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
+        with:
+          enable-cache: true
+          cache-dependency-glob: "**/pyproject.toml"
 
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
diff --git a/extensions/catalog.json b/extensions/catalog.json
index de9372e2bc..62551d0e4d 100644
--- a/extensions/catalog.json
+++ b/extensions/catalog.json
@@ -17,6 +17,21 @@
         "workflow",
         "core"
       ]
+    },
+    "cost-tracker": {
+      "name": "LLM Cost Tracker",
+      "id": "cost-tracker",
+      "version": "1.0.0",
+      "description": "Records actual LLM spend back to the spec's Cost Allocation section and surfaces per-feature budget status",
+      "author": "spec-kit-core",
+      "repository": "https://github.com/github/spec-kit",
+      "bundled": true,
+      "tags": [
+        "finops",
+        "cost",
+        "llm",
+        "budget"
+      ]
     }
   }
 }
\ No newline at end of file
diff --git a/extensions/cost-tracker/commands/speckit.cost-tracker.record.md b/extensions/cost-tracker/commands/speckit.cost-tracker.record.md
new file mode 100644
index 0000000000..056d17b4b8
--- /dev/null
+++ b/extensions/cost-tracker/commands/speckit.cost-tracker.record.md
@@ -0,0 +1,60 @@
+---
+description: "Update the Actual LLM Spend field in the current spec with the reported spend amount"
+---
+
+# Record LLM Spend
+
+Update the **Actual LLM Spend (USD)** field in the active spec's `## Cost Allocation`
+section to reflect the spend incurred during the most recent command.
+
+## Outline
+
+1. **Locate the active spec file**
+   - Find the spec document for the current feature in `.specify/specs/`.
+   - If no spec file is found, emit a warning and exit — do not create one.
+
+2. **Determine the spend amount**
+   - If invoked as an `after_implement` hook, check whether the integration
+     reported token usage for the completed step.
+   - If token counts are available, convert to USD using the model's published
+     per-token pricing (default: haiku at $0.00025/1K input + $0.00125/1K output).
+   - If token counts are not available, prompt:
+     ```
+     Enter the LLM spend for this step in USD (e.g. 0.04), or press Enter to skip:
+     ```
+     If the user presses Enter or provides a non-numeric value, skip and exit.
+
+3. **Read the current value**
+   - Parse the `## Cost Allocation` table in the spec.
+   - Read the current **Actual LLM Spend (USD)** cell value.
+   - If the cell is absent or contains a placeholder, treat current value as 0.
+
+4. **Add and write back**
+   - New total = current value + spend amount from step 2.
+   - Overwrite the **Actual LLM Spend (USD)** cell with `$<new_total>` (2 decimal places).
+   - Preserve all other table rows exactly.
+
+5. **Threshold check**
+   - Read **Approved LLM Budget (USD)** from the same table.
+   - Compute `pct = new_total / approved * 100`.
+   - If `pct >= 100`: emit
+     ```
+     ⛔ Budget exceeded: $<new_total> spent of $<approved> approved (<pct>%).
+     Consider pausing and reviewing with your team before continuing.
+     ```
+   - If `pct >= warn_at_pct` (default 80, from config): emit
+     ```
+     ⚠  Budget warning: $<new_total> spent of $<approved> approved (<pct>%).
+     ```
+   - Otherwise: emit
+     ```
+     ✓ Spend recorded: $<new_total> of $<approved> approved (<pct>% used).
+     ```
+
+## Graceful Degradation
+
+- If the spec has no `## Cost Allocation` section: skip with a one-line warning.
+- If the approved budget field is absent or zero: skip the threshold check; still
+  write the spend value.
+- If the spec file is read-only or the write fails: emit an error message and exit
+  without modifying the file.
diff --git a/extensions/cost-tracker/commands/speckit.cost-tracker.report.md b/extensions/cost-tracker/commands/speckit.cost-tracker.report.md
new file mode 100644
index 0000000000..4e51e207eb
--- /dev/null
+++ b/extensions/cost-tracker/commands/speckit.cost-tracker.report.md
@@ -0,0 +1,69 @@
+---
+description: "Show a budget summary table across all specs in the project"
+---
+
+# Cost Report
+
+Display a budget summary table covering every spec in the current project that
+has a `## Cost Allocation` section.
+
+## Outline
+
+1. **Discover specs**
+   - Enumerate all `*.md` files under `.specify/specs/` (non-recursive).
+   - For each file, attempt to parse the `## Cost Allocation` table.
+   - Skip files where the section is absent or cannot be parsed.
+
+2. **Extract fields per spec**
+   For each spec with a Cost Allocation section, read:
+   - **Feature** — the spec filename (without extension) or the first H1 heading
+   - **Team** — from the Team row
+   - **Cost Center** — from the Cost Center row
+   - **Priority** — from the Feature Priority row
+   - **Approved (USD)** — from the Approved LLM Budget row (parse `$X.XX` → float)
+   - **Actual (USD)** — from the Actual LLM Spend row (parse `$X.XX` → float; 0 if placeholder)
+   - **% Used** — compute `actual / approved * 100` (0 if approved is 0)
+
+3. **Render the summary table**
+
+   ```
+   ┌─────────────────────────────────────────────────────────────────────────┐
+   │  LLM Cost Report                                   2024-01-15 14:30 UTC │
+   ├──────────────────────┬───────────┬──────────┬────────┬───────┬──────────┤
+   │ Feature              │ Priority  │ Approved │ Actual │ % Used│ Status   │
+   ├──────────────────────┼───────────┼──────────┼────────┼───────┼──────────┤
+   │ add-login            │ P1        │  $10.00  │  $7.80 │  78%  │ ✓ ok     │
+   │ dark-mode            │ P2        │   $5.00  │  $4.10 │  82%  │ ⚠ warn   │
+   │ data-export          │ P3        │   $3.00  │  $3.50 │ 117%  │ ⛔ over  │
+   ├──────────────────────┼───────────┼──────────┼────────┼───────┼──────────┤
+   │ TOTAL                │           │  $18.00  │ $15.40 │  86%  │ ⚠ warn   │
+   └──────────────────────┴───────────┴──────────┴────────┴───────┴──────────┘
+   ```
+
+   Status legend:
+   - `✓ ok` — below 80% of approved budget
+   - `⚠ warn` — 80–99% of approved budget
+   - `⛔ over` — 100%+ of approved budget
+
+4. **Exit code**
+   - Exit 0 if all features are under budget.
+   - Exit 1 if any feature has exceeded its approved budget (for CI use).
+
+## Options
+
+This command accepts no arguments. Configuration comes from
+`.specify/extensions/cost-tracker/cost-tracker-config.yml`:
+
+```yaml
+warn_at_pct: 80   # Percentage at which ⚠ warning status is shown
+```
+
+## Graceful Degradation
+
+- If `.specify/specs/` does not exist or contains no parseable specs, print:
+  ```
+  No specs with Cost Allocation data found in .specify/specs/.
+  Run /speckit.specify to create a spec, then add a ## Cost Allocation section.
+  ```
+- Specs where **Approved LLM Budget** is absent or zero are listed with
+  `N/A` in the Approved and % Used columns and excluded from the total.
diff --git a/extensions/cost-tracker/config-template.yml b/extensions/cost-tracker/config-template.yml
new file mode 100644
index 0000000000..2baecdd3c7
--- /dev/null
+++ b/extensions/cost-tracker/config-template.yml
@@ -0,0 +1,15 @@
+# Cost Tracker Extension Configuration
+# Copy to .specify/extensions/cost-tracker/cost-tracker-config.yml and customize.
+
+# Percentage of approved budget at which a warning is emitted (default: 80)
+warn_at_pct: 80
+
+# Currency symbol used in output (display only)
+currency: "USD"
+
+# Token pricing used when converting token counts to USD.
+# Defaults match Anthropic haiku pricing at time of writing.
+# Override if you use a different model or pricing has changed.
+token_pricing:
+  input_per_1k: 0.00025   # USD per 1,000 input tokens
+  output_per_1k: 0.00125  # USD per 1,000 output tokens
diff --git a/extensions/cost-tracker/extension.yml b/extensions/cost-tracker/extension.yml
new file mode 100644
index 0000000000..64c09a864a
--- /dev/null
+++ b/extensions/cost-tracker/extension.yml
@@ -0,0 +1,53 @@
+schema_version: "1.0"
+
+extension:
+  id: "cost-tracker"
+  name: "LLM Cost Tracker"
+  version: "1.0.0"
+  description: "Records actual LLM spend back to the spec's Cost Allocation section and surfaces per-feature budget status"
+  author: "spec-kit-core"
+  repository: "https://github.com/github/spec-kit"
+  license: "MIT"
+  homepage: "https://github.com/github/spec-kit"
+
+requires:
+  speckit_version: ">=0.7.2"
+
+provides:
+  commands:
+    - name: "speckit.cost-tracker.record"
+      file: "commands/speckit.cost-tracker.record.md"
+      description: "Update the Actual LLM Spend field in the current spec with the reported spend amount"
+
+    - name: "speckit.cost-tracker.report"
+      file: "commands/speckit.cost-tracker.report.md"
+      description: "Show a budget summary table across all specs in the project"
+
+  config:
+    - name: "cost-tracker-config.yml"
+      template: "config-template.yml"
+      description: "Cost tracker configuration"
+      required: false
+
+hooks:
+  after_implement:
+    command: "speckit.cost-tracker.record"
+    optional: true
+    prompt: "Record LLM spend for this implementation step?"
+    description: "Prompts for actual spend and writes it to the spec's Cost Allocation section"
+
+tags:
+  - "finops"
+  - "cost"
+  - "llm"
+  - "budget"
+
+cost_profile:
+  llm_calls_per_invocation: "none"
+  estimated_tokens_per_call: 0
+  supports_caching: false
+  recommended_model_tier: "haiku"
+
+defaults:
+  currency: "USD"
+  warn_at_pct: 80
diff --git a/extensions/template/extension.yml b/extensions/template/extension.yml
index abf7e45afc..6bf38d4e58 100644
--- a/extensions/template/extension.yml
+++ b/extensions/template/extension.yml
@@ -86,6 +86,14 @@ tags:
   - "template"
   # ADD MORE: "category", "tool-name", etc.
 
+# CUSTOMIZE: Declare the cost characteristics of this extension (optional)
+# llm_calls_per_invocation: none=0 calls, low=1-3, medium=4-10, high=>10
+cost_profile:
+  llm_calls_per_invocation: "none"   # none | low | medium | high
+  estimated_tokens_per_call: 0       # average tokens per LLM call
+  supports_caching: false            # true if prompt caching reduces cost
+  recommended_model_tier: "haiku"    # haiku | sonnet | opus
+
 # CUSTOMIZE: Default configuration values (optional)
 # These are merged with user config
 defaults:
diff --git a/presets/lean/README.md b/presets/lean/README.md
index ab17257f96..652d015624 100644
--- a/presets/lean/README.md
+++ b/presets/lean/README.md
@@ -43,3 +43,27 @@ specify preset remove lean
 ## License
 
 MIT
+
+## Cost Profile
+
+Lean uses approximately 67% fewer tokens per spec/plan/tasks cycle than the
+standard templates. The reduced prompt size also makes haiku the appropriate
+model tier for all lean commands.
+
+| Command | Lean | Standard | Reduction |
+|---------|------|----------|-----------|
+| `speckit.specify` | ~800 tokens | ~2,400 tokens | ~67% |
+| `speckit.plan` | ~1,200 tokens | ~3,600 tokens | ~67% |
+| `speckit.tasks` | ~600 tokens | ~1,800 tokens | ~67% |
+| `speckit.implement` | ~500 tokens | ~500 tokens | ~0% |
+| `speckit.constitution` | ~400 tokens | ~1,200 tokens | ~67% |
+
+*Token estimates are approximate and vary with input size.*
+
+### When to Use Lean by Priority
+
+| Priority | Recommendation |
+|----------|---------------|
+| **P1** | Use standard. P1 features justify full artifacts and review gates. |
+| **P2** | Lean is suitable. Reduced boilerplate still produces reviewable artifacts. |
+| **P3** | Lean is preferred. Minimal investment for exploratory or low-risk changes. |
diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py
index 325692900e..9dd68dcd5f 100644
--- a/src/specify_cli/__init__.py
+++ b/src/specify_cli/__init__.py
@@ -4055,6 +4055,17 @@ def extension_list(
             console.print(f"     [dim]{ext['id']}[/dim]")
             console.print(f"     {ext['description']}")
             console.print(f"     Commands: {ext['command_count']} | Hooks: {ext['hook_count']} | Priority: {ext['priority']} | Status: {'Enabled' if ext['enabled'] else 'Disabled'}")
+            cp = ext.get("cost_profile", {})
+            if cp:
+                tier = cp.get("recommended_model_tier", "haiku")
+                calls = cp.get("llm_calls_per_invocation", "none")
+                tokens = cp.get("estimated_tokens_per_call", 0)
+                caching = cp.get("supports_caching", False)
+                caching_str = "[green]yes[/green]" if caching else "no"
+                console.print(
+                    f"     [dim]Cost: tier=[cyan]{tier}[/cyan]  calls={calls}  "
+                    f"tokens/call~{tokens}  caching={caching_str}[/dim]"
+                )
             console.print()
 
     if available or all_extensions:
diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py
index 944ee4a06d..b85c549c8d 100644
--- a/src/specify_cli/extensions.py
+++ b/src/specify_cli/extensions.py
@@ -357,6 +357,11 @@ def hooks(self) -> Dict[str, Any]:
         """Get hook definitions."""
         return self.data.get("hooks", {})
 
+    @property
+    def cost_profile(self) -> Dict[str, Any]:
+        """Get cost profile metadata (optional — returns empty dict if absent)."""
+        return self.data.get("cost_profile", {})
+
     def get_hash(self) -> str:
         """Calculate SHA256 hash of manifest file."""
         with open(self.path, 'rb') as f:
@@ -1519,7 +1524,8 @@ def list_installed(self) -> List[Dict[str, Any]]:
                     "priority": normalize_priority(metadata.get("priority")),
                     "installed_at": metadata.get("installed_at"),
                     "command_count": len(manifest.commands),
-                    "hook_count": len(manifest.hooks)
+                    "hook_count": len(manifest.hooks),
+                    "cost_profile": manifest.cost_profile,
                 })
             except ValidationError:
                 # Corrupted extension
diff --git a/src/specify_cli/workflows/__init__.py b/src/specify_cli/workflows/__init__.py
index 13782f620b..d261c4bd87 100644
--- a/src/specify_cli/workflows/__init__.py
+++ b/src/specify_cli/workflows/__init__.py
@@ -42,6 +42,7 @@ def get_step_type(type_key: str) -> StepBase | None:
 
 def _register_builtin_steps() -> None:
     """Register all built-in step types."""
+    from .steps.budget_gate import BudgetGateStep
     from .steps.command import CommandStep
     from .steps.do_while import DoWhileStep
     from .steps.fan_in import FanInStep
@@ -53,6 +54,7 @@ def _register_builtin_steps() -> None:
     from .steps.switch import SwitchStep
     from .steps.while_loop import WhileStep
 
+    _register_step(BudgetGateStep())
     _register_step(CommandStep())
     _register_step(DoWhileStep())
     _register_step(FanInStep())
diff --git a/src/specify_cli/workflows/engine.py b/src/specify_cli/workflows/engine.py
index d6a73bbeb0..77d1827ff7 100644
--- a/src/specify_cli/workflows/engine.py
+++ b/src/specify_cli/workflows/engine.py
@@ -663,19 +663,37 @@ def _execute_steps(
                 template = result.output.get("step_template", {})
                 if template and items:
                     fan_out_results = []
-                    for item_idx, item_val in enumerate(result.output["items"]):
-                        context.item = item_val
-                        # Per-item ID: parentId:templateId:index
-                        item_step = dict(template)
-                        base_id = item_step.get("id", "item")
-                        item_step["id"] = f"{step_id}:{base_id}:{item_idx}"
-                        self._execute_steps(
-                            [item_step], context, state, registry,
-                            step_offset=-1,
-                        )
-                        # Collect per-item result for fan-in
-                        item_result = context.steps.get(item_step["id"], {})
-                        fan_out_results.append(item_result.get("output", {}))
+                    max_parallel = result.output.get("max_parallel", 3)
+                    if not isinstance(max_parallel, int) or max_parallel < 1:
+                        max_parallel = 3
+                    # Items are processed sequentially in batches of max_parallel.
+                    # This is intentional rate-limiting (cost/resource control),
+                    # not concurrent execution. True parallelism would require
+                    # thread-safe context and state management.
+                    # Process items in batches of max_parallel
+                    for batch_start in range(0, len(items), max_parallel):
+                        batch = items[batch_start : batch_start + max_parallel]
+                        for item_idx, item_val in enumerate(
+                            batch, start=batch_start
+                        ):
+                            context.item = item_val
+                            # Per-item ID: parentId:templateId:index
+                            item_step = dict(template)
+                            base_id = item_step.get("id", "item")
+                            item_step["id"] = f"{step_id}:{base_id}:{item_idx}"
+                            self._execute_steps(
+                                [item_step], context, state, registry,
+                                step_offset=-1,
+                            )
+                            # Collect per-item result for fan-in
+                            item_result = context.steps.get(item_step["id"], {})
+                            fan_out_results.append(item_result.get("output", {}))
+                            if state.status in (
+                                RunStatus.PAUSED,
+                                RunStatus.FAILED,
+                                RunStatus.ABORTED,
+                            ):
+                                break
                         if state.status in (
                             RunStatus.PAUSED,
                             RunStatus.FAILED,
diff --git a/src/specify_cli/workflows/steps/budget_gate/__init__.py b/src/specify_cli/workflows/steps/budget_gate/__init__.py
new file mode 100644
index 0000000000..24994fd036
--- /dev/null
+++ b/src/specify_cli/workflows/steps/budget_gate/__init__.py
@@ -0,0 +1,156 @@
+"""Budget gate step — LLM spend circuit breaker."""
+
+from __future__ import annotations
+
+import sys
+from typing import Any
+
+from specify_cli.workflows.base import StepBase, StepContext, StepResult, StepStatus
+from specify_cli.workflows.expressions import evaluate_expression
+
+_VALID_ON_WARNING = frozenset({"pause", "notify", "continue"})
+_VALID_ON_EXCEEDED = frozenset({"abort", "pause"})
+
+
+class BudgetGateStep(StepBase):
+    """LLM spend circuit breaker.
+
+    Compares ``current_spend_usd`` against ``threshold_usd`` and takes
+    action before spending runs away:
+
+    * ``< 80 %`` — completes silently (no interruption to the workflow).
+    * ``>= 80 %`` — emits a warning; behaviour controlled by ``on_warning``
+      (``pause`` / ``notify`` / ``continue``).
+    * ``>= 100 %`` — emits an error; behaviour controlled by ``on_exceeded``
+      (``abort`` / ``pause``).
+
+    Both ``threshold_usd`` and ``current_spend_usd`` support ``{{ }}``
+    template expressions so they can read from previous step outputs
+    (e.g. ``{{ steps.cost_tracker.output.total_spend }}``).
+    """
+
+    type_key = "budget-gate"
+
+    def execute(self, config: dict[str, Any], context: StepContext) -> StepResult:
+        threshold_raw = config.get("threshold_usd", 0)
+        if isinstance(threshold_raw, str) and "{{" in threshold_raw:
+            threshold_raw = evaluate_expression(threshold_raw, context)
+        threshold = float(threshold_raw) if threshold_raw else 0.0
+
+        spend_raw = config.get("current_spend_usd", 0)
+        if isinstance(spend_raw, str) and "{{" in spend_raw:
+            spend_raw = evaluate_expression(spend_raw, context)
+        current_spend = float(spend_raw) if spend_raw else 0.0
+
+        on_warning = config.get("on_warning", "pause")
+        on_exceeded = config.get("on_exceeded", "abort")
+
+        pct = (current_spend / threshold * 100) if threshold > 0 else 0.0
+
+        output: dict[str, Any] = {
+            "threshold_usd": threshold,
+            "current_spend_usd": current_spend,
+            "pct_used": round(pct, 1),
+            "status": "ok",
+        }
+
+        if threshold > 0 and current_spend >= threshold:
+            output["status"] = "exceeded"
+            self._print_exceeded(current_spend, threshold, pct)
+            if on_exceeded == "abort":
+                return StepResult(
+                    status=StepStatus.FAILED,
+                    output=output,
+                    error=(
+                        f"Budget gate: LLM spend ${current_spend:.2f} exceeded "
+                        f"threshold ${threshold:.2f} ({pct:.1f}%)"
+                    ),
+                )
+            # on_exceeded == "pause"
+            return StepResult(status=StepStatus.PAUSED, output=output)
+
+        if threshold > 0 and current_spend >= threshold * 0.8:
+            output["status"] = "warning"
+            self._print_warning(current_spend, threshold, pct)
+            if on_warning == "continue":
+                return StepResult(status=StepStatus.COMPLETED, output=output)
+            if on_warning == "notify":
+                # Emit the warning but don't block execution
+                return StepResult(status=StepStatus.COMPLETED, output=output)
+            # on_warning == "pause" — pause if not interactive, else prompt
+            if not sys.stdin.isatty():
+                return StepResult(status=StepStatus.PAUSED, output=output)
+            choice = self._prompt_warning(current_spend, threshold)
+            if choice == "abort":
+                output["status"] = "exceeded"
+                return StepResult(
+                    status=StepStatus.FAILED,
+                    output=output,
+                    error=(
+                        f"Budget gate: user aborted at ${current_spend:.2f} "
+                        f"/ ${threshold:.2f} ({pct:.1f}%)"
+                    ),
+                )
+            return StepResult(status=StepStatus.COMPLETED, output=output)
+
+        return StepResult(status=StepStatus.COMPLETED, output=output)
+
+    @staticmethod
+    def _print_warning(spend: float, threshold: float, pct: float) -> None:
+        print("\n  ┌─ Budget Gate ───────────────────────────────")
+        print(f"  │  ⚠  WARNING: {pct:.1f}% of LLM budget used")
+        print(f"  │     Spent:     ${spend:.2f}")
+        print(f"  │     Threshold: ${threshold:.2f}")
+        print("  └────────────────────────────────────────────")
+
+    @staticmethod
+    def _print_exceeded(spend: float, threshold: float, pct: float) -> None:
+        print("\n  ┌─ Budget Gate ───────────────────────────────")
+        print(f"  │  ✗  EXCEEDED: {pct:.1f}% of LLM budget used")
+        print(f"  │     Spent:     ${spend:.2f}")
+        print(f"  │     Threshold: ${threshold:.2f}")
+        print("  └────────────────────────────────────────────")
+
+    @staticmethod
+    def _prompt_warning(spend: float, threshold: float) -> str:
+        """Prompt user to continue or abort after a budget warning."""
+        print(f"  Continue workflow with ${spend:.2f} / ${threshold:.2f} spent?")
+        print("  [1] continue")
+        print("  [2] abort")
+        while True:
+            try:
+                raw = input("  Choose [1-2]: ").strip()
+            except (EOFError, KeyboardInterrupt):
+                print()
+                return "abort"
+            if raw == "1" or raw.lower() == "continue":
+                return "continue"
+            if raw == "2" or raw.lower() == "abort":
+                return "abort"
+            print("  Enter 1 (continue) or 2 (abort).")
+
+    def validate(self, config: dict[str, Any]) -> list[str]:
+        errors = super().validate(config)
+        if "threshold_usd" not in config:
+            errors.append(
+                f"Budget gate step {config.get('id', '?')!r} is missing "
+                f"'threshold_usd' field."
+            )
+        if "current_spend_usd" not in config:
+            errors.append(
+                f"Budget gate step {config.get('id', '?')!r} is missing "
+                f"'current_spend_usd' field."
+            )
+        on_warning = config.get("on_warning", "pause")
+        if on_warning not in _VALID_ON_WARNING:
+            errors.append(
+                f"Budget gate step {config.get('id', '?')!r}: 'on_warning' must be "
+                f"one of {sorted(_VALID_ON_WARNING)}."
+            )
+        on_exceeded = config.get("on_exceeded", "abort")
+        if on_exceeded not in _VALID_ON_EXCEEDED:
+            errors.append(
+                f"Budget gate step {config.get('id', '?')!r}: 'on_exceeded' must be "
+                f"one of {sorted(_VALID_ON_EXCEEDED)}."
+            )
+        return errors
diff --git a/src/specify_cli/workflows/steps/fan_out/__init__.py b/src/specify_cli/workflows/steps/fan_out/__init__.py
index c2fff1face..7aa06a9fe8 100644
--- a/src/specify_cli/workflows/steps/fan_out/__init__.py
+++ b/src/specify_cli/workflows/steps/fan_out/__init__.py
@@ -12,9 +12,8 @@ class FanOutStep(StepBase):
     """Dispatch a step template for each item in a collection.
 
     The engine executes the nested ``step:`` template once per item,
-    setting ``context.item`` for each iteration.  Execution is
-    currently sequential; ``max_concurrency`` is accepted but not
-    enforced.
+    setting ``context.item`` for each iteration.  ``max_parallel`` caps
+    the batch size (default 3); ``max_concurrency`` is a deprecated alias.
     """
 
     type_key = "fan-out"
@@ -25,14 +24,15 @@ def execute(self, config: dict[str, Any], context: StepContext) -> StepResult:
         if not isinstance(items, list):
             items = []
 
-        max_concurrency = config.get("max_concurrency", 1)
+        # max_parallel is canonical; max_concurrency is a deprecated alias
+        max_parallel = config.get("max_parallel", config.get("max_concurrency", 3))
         step_template = config.get("step", {})
 
         return StepResult(
             status=StepStatus.COMPLETED,
             output={
                 "items": items,
-                "max_concurrency": max_concurrency,
+                "max_parallel": max_parallel,
                 "step_template": step_template,
                 "item_count": len(items),
             },
@@ -55,4 +55,12 @@ def validate(self, config: dict[str, Any]) -> list[str]:
             errors.append(
                 f"Fan-out step {config.get('id', '?')!r}: 'step' must be a mapping."
             )
+        # Validate both canonical key and deprecated alias
+        max_parallel = config.get("max_parallel", config.get("max_concurrency"))
+        if max_parallel is not None:
+            if not isinstance(max_parallel, int) or max_parallel < 1:
+                errors.append(
+                    f"Fan-out step {config.get('id', '?')!r}: 'max_parallel' "
+                    f"(or deprecated 'max_concurrency') must be a positive integer."
+                )
         return errors
diff --git a/templates/commands/clarify.md b/templates/commands/clarify.md
index 57cd01def1..e0c966471b 100644
--- a/templates/commands/clarify.md
+++ b/templates/commands/clarify.md
@@ -66,7 +66,27 @@ Execution steps:
    - If JSON parsing fails, abort and instruct user to re-run `__SPECKIT_COMMAND_SPECIFY__` or verify feature branch environment.
    - For single quotes in args like "I'm Groot", use escape syntax: e.g 'I'\''m Groot' (or double-quote if possible: "I'm Groot").
 
-2. Load the current spec file. Perform a structured ambiguity & coverage scan using this taxonomy. For each category, mark status: Clear / Partial / Missing. Produce an internal coverage map used for prioritization (do not output raw map unless no questions will be asked).
+2. **Clarification Budget check**: Before scanning for ambiguities, read the
+   `## Cost Allocation` section of the loaded spec.
+   - Locate the **Clarification Budget** field (e.g., `3`). If the field is
+     absent or unparseable, treat the budget as unlimited and continue normally.
+   - Count existing `### Session YYYY-MM-DD #N` subsections under
+     `## Clarifications` in the spec (each represents one past session).
+   - If `session_count >= clarification_budget`:
+     - Output:
+       ```
+       WARNING: Clarification budget exhausted.
+       Budget: {budget} rounds | Sessions used: {count}
+       Reply "override clarification budget" to continue anyway.
+       Reply "proceed" to skip clarification and go to `__SPECKIT_COMMAND_PLAN__`.
+       ```
+     - Pause and wait for user input.
+       - If user replies `override clarification budget`: continue to step 3.
+       - If user replies `proceed`: exit command and suggest running `__SPECKIT_COMMAND_PLAN__`.
+       - Any other input: repeat the warning once, then exit.
+   - If `session_count < clarification_budget`, continue to step 3.
+
+3. Load the current spec file. Perform a structured ambiguity & coverage scan using this taxonomy. For each category, mark status: Clear / Partial / Missing. Produce an internal coverage map used for prioritization (do not output raw map unless no questions will be asked).
 
    Functional Scope & Behavior:
    - Core user goals & success criteria
@@ -122,7 +142,7 @@ Execution steps:
    - Clarification would not materially change implementation or validation strategy
    - Information is better deferred to planning phase (note internally)
 
-3. Generate (internally) a prioritized queue of candidate clarification questions (maximum 5). Do NOT output them all at once. Apply these constraints:
+4. Generate (internally) a prioritized queue of candidate clarification questions (maximum 5). Do NOT output them all at once. Apply these constraints:
     - Maximum of 5 total questions across the whole session.
     - Each question must be answerable with EITHER:
        - A short multiple‑choice selection (2–5 distinct, mutually exclusive options), OR
@@ -133,7 +153,7 @@ Execution steps:
     - Favor clarifications that reduce downstream rework risk or prevent misaligned acceptance tests.
     - If more than 5 categories remain unresolved, select the top 5 by (Impact * Uncertainty) heuristic.
 
-4. Sequential questioning loop (interactive):
+5. Sequential questioning loop (interactive):
     - Present EXACTLY ONE question at a time.
     - For multiple‑choice questions:
        - **Analyze all options** and determine the **most suitable option** based on:
@@ -169,11 +189,11 @@ Execution steps:
     - Never reveal future queued questions in advance.
     - If no valid questions exist at start, immediately report no critical ambiguities.
 
-5. Integration after EACH accepted answer (incremental update approach):
+6. Integration after EACH accepted answer (incremental update approach):
     - Maintain in-memory representation of the spec (loaded once at start) plus the raw file contents.
     - For the first integrated answer in this session:
        - Ensure a `## Clarifications` section exists (create it just after the highest-level contextual/overview section per the spec template if missing).
-       - Under it, create (if not present) a `### Session YYYY-MM-DD` subheading for today.
+       - Under it, create a new `### Session YYYY-MM-DD #N` subheading (increment N for each session, including multiple on the same date).
     - Append a bullet line immediately after acceptance: `- Q: <question> → A: <final answer>`.
     - Then immediately apply the clarification to the most appropriate section(s):
        - Functional ambiguity → Update or add a bullet in Functional Requirements.
@@ -187,17 +207,17 @@ Execution steps:
     - Preserve formatting: do not reorder unrelated sections; keep heading hierarchy intact.
     - Keep each inserted clarification minimal and testable (avoid narrative drift).
 
-6. Validation (performed after EACH write plus final pass):
+7. Validation (performed after EACH write plus final pass):
    - Clarifications session contains exactly one bullet per accepted answer (no duplicates).
    - Total asked (accepted) questions ≤ 5.
    - Updated sections contain no lingering vague placeholders the new answer was meant to resolve.
    - No contradictory earlier statement remains (scan for now-invalid alternative choices removed).
-   - Markdown structure valid; only allowed new headings: `## Clarifications`, `### Session YYYY-MM-DD`.
+   - Markdown structure valid; only allowed new headings: `## Clarifications`, `### Session YYYY-MM-DD #N`.
    - Terminology consistency: same canonical term used across all updated sections.
 
-7. Write the updated spec back to `FEATURE_SPEC`.
+8. Write the updated spec back to `FEATURE_SPEC`.
 
-8. Report completion (after questioning loop ends or early termination):
+9. Report completion (after questioning loop ends or early termination):
    - Number of questions asked & answered.
    - Path to updated spec.
    - Sections touched (list names).
diff --git a/templates/commands/plan.md b/templates/commands/plan.md
index 04db94ffaa..0d226c9740 100644
--- a/templates/commands/plan.md
+++ b/templates/commands/plan.md
@@ -55,6 +55,24 @@ You **MUST** consider the user input before proceeding (if not empty).
     ```
 - If no hooks are registered or `.specify/extensions.yml` does not exist, skip silently
 
+**Validate cost fields before planning**:
+- After loading the IMPL_PLAN template (Outline step 2), inspect the
+  **Technical Context** section for:
+  - `**AI Model Tier**`
+  - `**Estimated Token Budget**`
+- If either field is absent OR still contains the literal placeholder text
+  (`[haiku | sonnet | opus]` or `[~N tokens / ~$X.XX]`), stop and emit:
+
+  ```
+  ERROR: Plan cannot proceed. The following Technical Context fields are
+  empty or contain unfilled placeholders:
+    - AI Model Tier
+    - Estimated Token Budget
+  Fill these fields before re-running `__SPECKIT_COMMAND_PLAN__`.
+  ```
+
+- Do NOT infer or auto-fill these values; they require an explicit human decision.
+
 ## Outline
 
 1. **Setup**: Run `{SCRIPT}` from repo root and parse JSON for FEATURE_SPEC, IMPL_PLAN, SPECS_DIR, BRANCH. For single quotes in args like "I'm Groot", use escape syntax: e.g 'I'\''m Groot' (or double-quote if possible: "I'm Groot").
diff --git a/templates/constitution-template.md b/templates/constitution-template.md
index a4670ff469..f665d7eaea 100644
--- a/templates/constitution-template.md
+++ b/templates/constitution-template.md
@@ -28,6 +28,17 @@
 [PRINCIPLE_5_DESCRIPTION]
 <!-- Example: Text I/O ensures debuggability; Structured logging required; Or: MAJOR.MINOR.BUILD format; Or: Start simple, YAGNI principles -->
 
+### VI. Economic Efficiency
+
+Every AI-assisted workflow step must declare its model tier and token budget
+before execution. Use the lowest-cost model tier (haiku) by default; justify
+in writing any step that requires sonnet or opus. Fan-out operations must cap
+parallel invocations to `max_parallel: 3` unless a higher limit is approved
+in the plan. LLM spend against each feature is tracked against the approved
+budget in the spec's Cost Allocation section; the feature is paused for
+budget review if actual spend reaches 80% of approved before implementation
+is complete.
+
 ## [SECTION_2_NAME]
 <!-- Example: Additional Constraints, Security Requirements, Performance Standards, etc. -->
 
diff --git a/templates/plan-template.md b/templates/plan-template.md
index ee57c35656..6344042621 100644
--- a/templates/plan-template.md
+++ b/templates/plan-template.md
@@ -25,7 +25,9 @@
 **Project Type**: [e.g., library/cli/web-service/mobile-app/compiler/desktop-app or NEEDS CLARIFICATION]  
 **Performance Goals**: [domain-specific, e.g., 1000 req/s, 10k lines/sec, 60 fps or NEEDS CLARIFICATION]  
 **Constraints**: [domain-specific, e.g., <200ms p95, <100MB memory, offline-capable or NEEDS CLARIFICATION]  
-**Scale/Scope**: [domain-specific, e.g., 10k users, 1M LOC, 50 screens or NEEDS CLARIFICATION]
+**Scale/Scope**: [domain-specific, e.g., 10k users, 1M LOC, 50 screens or NEEDS CLARIFICATION]  
+**AI Model Tier**: [haiku | sonnet | opus] — justify if not haiku  
+**Estimated Token Budget**: [~N tokens / ~$X.XX]
 
 ## Constitution Check
 
diff --git a/templates/spec-template.md b/templates/spec-template.md
index 4581e40529..a0c211d104 100644
--- a/templates/spec-template.md
+++ b/templates/spec-template.md
@@ -5,6 +5,17 @@
 **Status**: Draft  
 **Input**: User description: "$ARGUMENTS"
 
+## Cost Allocation
+
+| Field | Value |
+|-------|-------|
+| **Team** | [team name] |
+| **Cost Center** | [cost-center-id] |
+| **Feature Priority** | [P1 / P2 / P3 — aligns with User Story priorities below] |
+| **Approved LLM Budget (USD)** | [$X.XX] |
+| **Actual LLM Spend (USD)** | [populated by Cost Tracker extension] |
+| **Clarification Budget (rounds)** | [3] |
+
 ## User Scenarios & Testing *(mandatory)*
 
 <!--
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index 4c042fc7d5..5f5215c87f 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -1047,12 +1047,27 @@ def test_execute_with_items(self):
         config = {
             "id": "parallel",
             "items": "{{ steps.tasks.output.task_list }}",
-            "max_concurrency": 3,
+            "max_parallel": 3,
             "step": {"id": "impl", "command": "speckit.implement"},
         }
         result = step.execute(config, ctx)
         assert result.output["item_count"] == 2
-        assert result.output["max_concurrency"] == 3
+        assert result.output["max_parallel"] == 3
+
+    def test_execute_max_concurrency_alias(self):
+        from specify_cli.workflows.steps.fan_out import FanOutStep
+        from specify_cli.workflows.base import StepContext
+
+        step = FanOutStep()
+        ctx = StepContext()
+        config = {
+            "id": "parallel",
+            "items": "[]",
+            "max_concurrency": 5,  # deprecated alias
+            "step": {"id": "impl", "command": "speckit.implement"},
+        }
+        result = step.execute(config, ctx)
+        assert result.output["max_parallel"] == 5
 
     def test_execute_non_list_items_resolves_empty(self):
         from specify_cli.workflows.steps.fan_out import FanOutStep
@@ -1088,6 +1103,18 @@ def test_validate_step_not_mapping(self):
         })
         assert any("'step' must be a mapping" in e for e in errors)
 
+    def test_validate_max_parallel_invalid(self):
+        from specify_cli.workflows.steps.fan_out import FanOutStep
+
+        step = FanOutStep()
+        errors = step.validate({
+            "id": "test",
+            "items": "{{ x }}",
+            "step": {"id": "impl", "command": "speckit.implement"},
+            "max_parallel": 0,
+        })
+        assert any("must be a positive integer" in e for e in errors)
+
 
 class TestFanInStep:
     """Test the fan-in step type."""
@@ -1843,3 +1870,143 @@ def test_switch_workflow(self, project_dir):
         assert state.status == RunStatus.COMPLETED
         assert "do-plan" in state.step_results
         assert "do-specify" not in state.step_results
+
+
+class TestBudgetGateStep:
+    """Test the budget-gate step type."""
+
+    def test_ok_below_threshold(self):
+        from specify_cli.workflows.steps.budget_gate import BudgetGateStep
+        from specify_cli.workflows.base import StepContext, StepStatus
+
+        step = BudgetGateStep()
+        ctx = StepContext()
+        config = {
+            "id": "budget",
+            "threshold_usd": 10.0,
+            "current_spend_usd": 5.0,
+        }
+        result = step.execute(config, ctx)
+        assert result.status == StepStatus.COMPLETED
+        assert result.output["status"] == "ok"
+        assert result.output["pct_used"] == 50.0
+
+    def test_warning_at_eighty_percent(self):
+        from specify_cli.workflows.steps.budget_gate import BudgetGateStep
+        from specify_cli.workflows.base import StepContext, StepStatus
+
+        step = BudgetGateStep()
+        ctx = StepContext()
+        config = {
+            "id": "budget",
+            "threshold_usd": 10.0,
+            "current_spend_usd": 8.5,  # 85% — above 80% warning threshold
+            "on_warning": "continue",
+        }
+        result = step.execute(config, ctx)
+        assert result.status == StepStatus.COMPLETED
+        assert result.output["status"] == "warning"
+        assert result.output["pct_used"] == 85.0
+
+    def test_exceeded_aborts(self):
+        from specify_cli.workflows.steps.budget_gate import BudgetGateStep
+        from specify_cli.workflows.base import StepContext, StepStatus
+
+        step = BudgetGateStep()
+        ctx = StepContext()
+        config = {
+            "id": "budget",
+            "threshold_usd": 10.0,
+            "current_spend_usd": 12.0,  # 120% — exceeded
+            "on_exceeded": "abort",
+        }
+        result = step.execute(config, ctx)
+        assert result.status == StepStatus.FAILED
+        assert result.output["status"] == "exceeded"
+        assert result.error is not None
+        assert "exceeded" in result.error
+
+    def test_exceeded_pauses(self):
+        from specify_cli.workflows.steps.budget_gate import BudgetGateStep
+        from specify_cli.workflows.base import StepContext, StepStatus
+
+        step = BudgetGateStep()
+        ctx = StepContext()
+        config = {
+            "id": "budget",
+            "threshold_usd": 10.0,
+            "current_spend_usd": 11.0,
+            "on_exceeded": "pause",
+        }
+        result = step.execute(config, ctx)
+        assert result.status == StepStatus.PAUSED
+        assert result.output["status"] == "exceeded"
+
+    def test_zero_threshold_always_ok(self):
+        from specify_cli.workflows.steps.budget_gate import BudgetGateStep
+        from specify_cli.workflows.base import StepContext, StepStatus
+
+        step = BudgetGateStep()
+        ctx = StepContext()
+        config = {
+            "id": "budget",
+            "threshold_usd": 0,
+            "current_spend_usd": 999.0,
+        }
+        result = step.execute(config, ctx)
+        assert result.status == StepStatus.COMPLETED
+        assert result.output["status"] == "ok"
+
+    def test_expression_resolution(self):
+        from specify_cli.workflows.steps.budget_gate import BudgetGateStep
+        from specify_cli.workflows.base import StepContext, StepStatus
+
+        step = BudgetGateStep()
+        ctx = StepContext(
+            steps={"tracker": {"output": {"total_spend": 3.5}}}
+        )
+        config = {
+            "id": "budget",
+            "threshold_usd": 10.0,
+            "current_spend_usd": "{{ steps.tracker.output.total_spend }}",
+        }
+        result = step.execute(config, ctx)
+        assert result.status == StepStatus.COMPLETED
+        assert result.output["current_spend_usd"] == 3.5
+
+    def test_validate_missing_fields(self):
+        from specify_cli.workflows.steps.budget_gate import BudgetGateStep
+
+        step = BudgetGateStep()
+        errors = step.validate({"id": "budget"})
+        assert any("threshold_usd" in e for e in errors)
+        assert any("current_spend_usd" in e for e in errors)
+
+    def test_validate_invalid_on_warning(self):
+        from specify_cli.workflows.steps.budget_gate import BudgetGateStep
+
+        step = BudgetGateStep()
+        errors = step.validate({
+            "id": "budget",
+            "threshold_usd": 10.0,
+            "current_spend_usd": 5.0,
+            "on_warning": "explode",
+        })
+        assert any("on_warning" in e for e in errors)
+
+    def test_validate_invalid_on_exceeded(self):
+        from specify_cli.workflows.steps.budget_gate import BudgetGateStep
+
+        step = BudgetGateStep()
+        errors = step.validate({
+            "id": "budget",
+            "threshold_usd": 10.0,
+            "current_spend_usd": 5.0,
+            "on_exceeded": "ignore",
+        })
+        assert any("on_exceeded" in e for e in errors)
+
+    def test_registered_in_step_registry(self):
+        from specify_cli.workflows import STEP_REGISTRY
+
+        assert "budget-gate" in STEP_REGISTRY
diff --git a/workflows/speckit/workflow.yml b/workflows/speckit/workflow.yml
index bf18451029..ff0508300e 100644
--- a/workflows/speckit/workflow.yml
+++ b/workflows/speckit/workflow.yml
@@ -24,6 +24,10 @@ inputs:
     type: string
     default: "full"
     enum: ["full", "backend-only", "frontend-only"]
+  budget_usd:
+    type: number
+    default: 10.00
+    prompt: "Approved LLM budget in USD for this feature"
 
 steps:
   - id: specify
@@ -38,6 +42,15 @@ steps:
     options: [approve, reject]
     on_reject: abort
 
+  - id: cost-review
+    type: gate
+    message: >
+      Budget confirmation required before planning begins.
+      Review the spec's Cost Allocation section and confirm the approved
+      LLM budget covers the planned AI usage for this feature.
+    options: [approve, reject]
+    on_reject: abort
+
   - id: plan
     command: speckit.plan
     integration: "{{ inputs.integration }}"
@@ -61,3 +74,12 @@ steps:
     integration: "{{ inputs.integration }}"
     input:
       args: "{{ inputs.spec }}"
+
+  - id: spend-check
+    type: budget-gate
+    threshold_usd: "{{ inputs.budget_usd }}"
+    # Wire to your cost-tracker extension output once installed:
+    #   current_spend_usd: "{{ steps.cost-tracker.output.total_spend_usd }}"
+    current_spend_usd: 0
+    on_warning: notify
+    on_exceeded: pause