From ba11284f2753c4645019f6b0949c0943e4d45359 Mon Sep 17 00:00:00 2001 From: Duksh Koonjoobeeharry <586947+duksh@users.noreply.github.com> Date: Mon, 11 May 2026 20:29:04 +0400 Subject: [PATCH] feat: add FinOps discipline across templates, workflow engine, and CI (#1) 19 files changed, 759 additions, 31 deletions. - Cost Allocation table in spec-template.md - AI Model Tier / Estimated Token Budget fields in plan-template.md - Economic Efficiency principle (Article VI) in constitution-template.md - Clarification budget guard in clarify.md; cost field validation in plan.md - budget-gate step type (circuit breaker at 80%/100% LLM spend) - Fan-out max_parallel rename + batched engine loop - cost-tracker extension (record + report commands, after_implement hook) - cost_profile surfaced in specify extension list CLI - cost-review gate + spend-check step in speckit workflow - CI: path filter, fail-fast, uv cache https://claude.ai/code/session_017mzZmq4F57rAFJVsvVpnva --- .github/workflows/test.yml | 11 ++ extensions/catalog.json | 15 ++ .../commands/speckit.cost-tracker.record.md | 60 ++++++ .../commands/speckit.cost-tracker.report.md | 69 +++++++ extensions/cost-tracker/config-template.yml | 15 ++ extensions/cost-tracker/extension.yml | 53 ++++++ extensions/template/extension.yml | 8 + presets/lean/README.md | 24 +++ src/specify_cli/__init__.py | 11 ++ src/specify_cli/extensions.py | 8 +- src/specify_cli/workflows/__init__.py | 2 + src/specify_cli/workflows/engine.py | 44 +++-- .../workflows/steps/budget_gate/__init__.py | 156 ++++++++++++++++ .../workflows/steps/fan_out/__init__.py | 18 +- templates/commands/clarify.md | 38 +++- templates/commands/plan.md | 18 ++ templates/constitution-template.md | 11 ++ templates/plan-template.md | 4 +- templates/spec-template.md | 11 ++ tests/test_workflows.py | 171 +++++++++++++++++- workflows/speckit/workflow.yml | 22 +++ 21 files changed, 738 insertions(+), 31 deletions(-) create mode 100644 extensions/cost-tracker/commands/speckit.cost-tracker.record.md create mode 100644 extensions/cost-tracker/commands/speckit.cost-tracker.report.md create mode 100644 extensions/cost-tracker/config-template.yml create mode 100644 extensions/cost-tracker/extension.yml create mode 100644 src/specify_cli/workflows/steps/budget_gate/__init__.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f7130aa8d1..f7d86a2eb4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,6 +7,13 @@ on: push: branches: ["main"] pull_request: + paths: + - "src/**" + - "tests/**" + - "templates/**" + - "workflows/**" + - "extensions/**" + - "pyproject.toml" jobs: ruff: @@ -29,6 +36,7 @@ jobs: pytest: runs-on: ${{ matrix.os }} strategy: + fail-fast: true matrix: os: [ubuntu-latest, windows-latest] python-version: ["3.11", "3.12", "3.13"] @@ -38,6 +46,9 @@ jobs: - name: Install uv uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 + with: + enable-cache: true + cache-dependency-glob: "**/pyproject.toml" - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 diff --git a/extensions/catalog.json b/extensions/catalog.json index de9372e2bc..62551d0e4d 100644 --- a/extensions/catalog.json +++ b/extensions/catalog.json @@ -17,6 +17,21 @@ "workflow", "core" ] + }, + "cost-tracker": { + "name": "LLM Cost Tracker", + "id": "cost-tracker", + "version": "1.0.0", + "description": "Records actual LLM spend back to the spec's Cost Allocation section and surfaces per-feature budget status", + "author": "spec-kit-core", + "repository": "https://github.com/github/spec-kit", + "bundled": true, + "tags": [ + "finops", + "cost", + "llm", + "budget" + ] } } } \ No newline at end of file diff --git a/extensions/cost-tracker/commands/speckit.cost-tracker.record.md b/extensions/cost-tracker/commands/speckit.cost-tracker.record.md new file mode 100644 index 0000000000..056d17b4b8 --- /dev/null +++ b/extensions/cost-tracker/commands/speckit.cost-tracker.record.md @@ -0,0 +1,60 @@ +--- +description: "Update the Actual LLM Spend field in the current spec with the reported spend amount" +--- + +# Record LLM Spend + +Update the **Actual LLM Spend (USD)** field in the active spec's `## Cost Allocation` +section to reflect the spend incurred during the most recent command. + +## Outline + +1. **Locate the active spec file** + - Find the spec document for the current feature in `.specify/specs/`. + - If no spec file is found, emit a warning and exit — do not create one. + +2. **Determine the spend amount** + - If invoked as an `after_implement` hook, check whether the integration + reported token usage for the completed step. + - If token counts are available, convert to USD using the model's published + per-token pricing (default: haiku at $0.00025/1K input + $0.00125/1K output). + - If token counts are not available, prompt: + ``` + Enter the LLM spend for this step in USD (e.g. 0.04), or press Enter to skip: + ``` + If the user presses Enter or provides a non-numeric value, skip and exit. + +3. **Read the current value** + - Parse the `## Cost Allocation` table in the spec. + - Read the current **Actual LLM Spend (USD)** cell value. + - If the cell is absent or contains a placeholder, treat current value as 0. + +4. **Add and write back** + - New total = current value + spend amount from step 2. + - Overwrite the **Actual LLM Spend (USD)** cell with `$` (2 decimal places). + - Preserve all other table rows exactly. + +5. **Threshold check** + - Read **Approved LLM Budget (USD)** from the same table. + - Compute `pct = new_total / approved * 100`. + - If `pct >= 100`: emit + ``` + ⛔ Budget exceeded: $ spent of $ approved (%). + Consider pausing and reviewing with your team before continuing. + ``` + - If `pct >= warn_at_pct` (default 80, from config): emit + ``` + ⚠ Budget warning: $ spent of $ approved (%). + ``` + - Otherwise: emit + ``` + ✓ Spend recorded: $ of $ approved (% used). + ``` + +## Graceful Degradation + +- If the spec has no `## Cost Allocation` section: skip with a one-line warning. +- If the approved budget field is absent or zero: skip the threshold check; still + write the spend value. +- If the spec file is read-only or the write fails: emit an error message and exit + without modifying the file. diff --git a/extensions/cost-tracker/commands/speckit.cost-tracker.report.md b/extensions/cost-tracker/commands/speckit.cost-tracker.report.md new file mode 100644 index 0000000000..4e51e207eb --- /dev/null +++ b/extensions/cost-tracker/commands/speckit.cost-tracker.report.md @@ -0,0 +1,69 @@ +--- +description: "Show a budget summary table across all specs in the project" +--- + +# Cost Report + +Display a budget summary table covering every spec in the current project that +has a `## Cost Allocation` section. + +## Outline + +1. **Discover specs** + - Enumerate all `*.md` files under `.specify/specs/` (non-recursive). + - For each file, attempt to parse the `## Cost Allocation` table. + - Skip files where the section is absent or cannot be parsed. + +2. **Extract fields per spec** + For each spec with a Cost Allocation section, read: + - **Feature** — the spec filename (without extension) or the first H1 heading + - **Team** — from the Team row + - **Cost Center** — from the Cost Center row + - **Priority** — from the Feature Priority row + - **Approved (USD)** — from the Approved LLM Budget row (parse `$X.XX` → float) + - **Actual (USD)** — from the Actual LLM Spend row (parse `$X.XX` → float; 0 if placeholder) + - **% Used** — compute `actual / approved * 100` (0 if approved is 0) + +3. **Render the summary table** + + ``` + ┌─────────────────────────────────────────────────────────────────────────┐ + │ LLM Cost Report 2024-01-15 14:30 UTC │ + ├──────────────────────┬───────────┬──────────┬────────┬───────┬──────────┤ + │ Feature │ Priority │ Approved │ Actual │ % Used│ Status │ + ├──────────────────────┼───────────┼──────────┼────────┼───────┼──────────┤ + │ add-login │ P1 │ $10.00 │ $7.80 │ 78% │ ✓ ok │ + │ dark-mode │ P2 │ $5.00 │ $4.10 │ 82% │ ⚠ warn │ + │ data-export │ P3 │ $3.00 │ $3.50 │ 117% │ ⛔ over │ + ├──────────────────────┼───────────┼──────────┼────────┼───────┼──────────┤ + │ TOTAL │ │ $18.00 │ $15.40 │ 86% │ ⚠ warn │ + └──────────────────────┴───────────┴──────────┴────────┴───────┴──────────┘ + ``` + + Status legend: + - `✓ ok` — below 80% of approved budget + - `⚠ warn` — 80–99% of approved budget + - `⛔ over` — 100%+ of approved budget + +4. **Exit code** + - Exit 0 if all features are under budget. + - Exit 1 if any feature has exceeded its approved budget (for CI use). + +## Options + +This command accepts no arguments. Configuration comes from +`.specify/extensions/cost-tracker/cost-tracker-config.yml`: + +```yaml +warn_at_pct: 80 # Percentage at which ⚠ warning status is shown +``` + +## Graceful Degradation + +- If `.specify/specs/` does not exist or contains no parseable specs, print: + ``` + No specs with Cost Allocation data found in .specify/specs/. + Run /speckit.specify to create a spec, then add a ## Cost Allocation section. + ``` +- Specs where **Approved LLM Budget** is absent or zero are listed with + `N/A` in the Approved and % Used columns and excluded from the total. diff --git a/extensions/cost-tracker/config-template.yml b/extensions/cost-tracker/config-template.yml new file mode 100644 index 0000000000..2baecdd3c7 --- /dev/null +++ b/extensions/cost-tracker/config-template.yml @@ -0,0 +1,15 @@ +# Cost Tracker Extension Configuration +# Copy to .specify/extensions/cost-tracker/cost-tracker-config.yml and customize. + +# Percentage of approved budget at which a warning is emitted (default: 80) +warn_at_pct: 80 + +# Currency symbol used in output (display only) +currency: "USD" + +# Token pricing used when converting token counts to USD. +# Defaults match Anthropic haiku pricing at time of writing. +# Override if you use a different model or pricing has changed. +token_pricing: + input_per_1k: 0.00025 # USD per 1,000 input tokens + output_per_1k: 0.00125 # USD per 1,000 output tokens diff --git a/extensions/cost-tracker/extension.yml b/extensions/cost-tracker/extension.yml new file mode 100644 index 0000000000..64c09a864a --- /dev/null +++ b/extensions/cost-tracker/extension.yml @@ -0,0 +1,53 @@ +schema_version: "1.0" + +extension: + id: "cost-tracker" + name: "LLM Cost Tracker" + version: "1.0.0" + description: "Records actual LLM spend back to the spec's Cost Allocation section and surfaces per-feature budget status" + author: "spec-kit-core" + repository: "https://github.com/github/spec-kit" + license: "MIT" + homepage: "https://github.com/github/spec-kit" + +requires: + speckit_version: ">=0.7.2" + +provides: + commands: + - name: "speckit.cost-tracker.record" + file: "commands/speckit.cost-tracker.record.md" + description: "Update the Actual LLM Spend field in the current spec with the reported spend amount" + + - name: "speckit.cost-tracker.report" + file: "commands/speckit.cost-tracker.report.md" + description: "Show a budget summary table across all specs in the project" + + config: + - name: "cost-tracker-config.yml" + template: "config-template.yml" + description: "Cost tracker configuration" + required: false + +hooks: + after_implement: + command: "speckit.cost-tracker.record" + optional: true + prompt: "Record LLM spend for this implementation step?" + description: "Prompts for actual spend and writes it to the spec's Cost Allocation section" + +tags: + - "finops" + - "cost" + - "llm" + - "budget" + +cost_profile: + llm_calls_per_invocation: "none" + estimated_tokens_per_call: 0 + supports_caching: false + recommended_model_tier: "haiku" + +defaults: + currency: "USD" + warn_at_pct: 80 diff --git a/extensions/template/extension.yml b/extensions/template/extension.yml index abf7e45afc..6bf38d4e58 100644 --- a/extensions/template/extension.yml +++ b/extensions/template/extension.yml @@ -86,6 +86,14 @@ tags: - "template" # ADD MORE: "category", "tool-name", etc. +# CUSTOMIZE: Declare the cost characteristics of this extension (optional) +# llm_calls_per_invocation: none=0 calls, low=1-3, medium=4-10, high=>10 +cost_profile: + llm_calls_per_invocation: "none" # none | low | medium | high + estimated_tokens_per_call: 0 # average tokens per LLM call + supports_caching: false # true if prompt caching reduces cost + recommended_model_tier: "haiku" # haiku | sonnet | opus + # CUSTOMIZE: Default configuration values (optional) # These are merged with user config defaults: diff --git a/presets/lean/README.md b/presets/lean/README.md index ab17257f96..652d015624 100644 --- a/presets/lean/README.md +++ b/presets/lean/README.md @@ -43,3 +43,27 @@ specify preset remove lean ## License MIT + +## Cost Profile + +Lean uses approximately 67% fewer tokens per spec/plan/tasks cycle than the +standard templates. The reduced prompt size also makes haiku the appropriate +model tier for all lean commands. + +| Command | Lean | Standard | Reduction | +|---------|------|----------|-----------| +| `speckit.specify` | ~800 tokens | ~2,400 tokens | ~67% | +| `speckit.plan` | ~1,200 tokens | ~3,600 tokens | ~67% | +| `speckit.tasks` | ~600 tokens | ~1,800 tokens | ~67% | +| `speckit.implement` | ~500 tokens | ~500 tokens | ~0% | +| `speckit.constitution` | ~400 tokens | ~1,200 tokens | ~67% | + +*Token estimates are approximate and vary with input size.* + +### When to Use Lean by Priority + +| Priority | Recommendation | +|----------|---------------| +| **P1** | Use standard. P1 features justify full artifacts and review gates. | +| **P2** | Lean is suitable. Reduced boilerplate still produces reviewable artifacts. | +| **P3** | Lean is preferred. Minimal investment for exploratory or low-risk changes. | diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 325692900e..9dd68dcd5f 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -4055,6 +4055,17 @@ def extension_list( console.print(f" [dim]{ext['id']}[/dim]") console.print(f" {ext['description']}") console.print(f" Commands: {ext['command_count']} | Hooks: {ext['hook_count']} | Priority: {ext['priority']} | Status: {'Enabled' if ext['enabled'] else 'Disabled'}") + cp = ext.get("cost_profile", {}) + if cp: + tier = cp.get("recommended_model_tier", "haiku") + calls = cp.get("llm_calls_per_invocation", "none") + tokens = cp.get("estimated_tokens_per_call", 0) + caching = cp.get("supports_caching", False) + caching_str = "[green]yes[/green]" if caching else "no" + console.print( + f" [dim]Cost: tier=[cyan]{tier}[/cyan] calls={calls} " + f"tokens/call~{tokens} caching={caching_str}[/dim]" + ) console.print() if available or all_extensions: diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 944ee4a06d..b85c549c8d 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -357,6 +357,11 @@ def hooks(self) -> Dict[str, Any]: """Get hook definitions.""" return self.data.get("hooks", {}) + @property + def cost_profile(self) -> Dict[str, Any]: + """Get cost profile metadata (optional — returns empty dict if absent).""" + return self.data.get("cost_profile", {}) + def get_hash(self) -> str: """Calculate SHA256 hash of manifest file.""" with open(self.path, 'rb') as f: @@ -1519,7 +1524,8 @@ def list_installed(self) -> List[Dict[str, Any]]: "priority": normalize_priority(metadata.get("priority")), "installed_at": metadata.get("installed_at"), "command_count": len(manifest.commands), - "hook_count": len(manifest.hooks) + "hook_count": len(manifest.hooks), + "cost_profile": manifest.cost_profile, }) except ValidationError: # Corrupted extension diff --git a/src/specify_cli/workflows/__init__.py b/src/specify_cli/workflows/__init__.py index 13782f620b..d261c4bd87 100644 --- a/src/specify_cli/workflows/__init__.py +++ b/src/specify_cli/workflows/__init__.py @@ -42,6 +42,7 @@ def get_step_type(type_key: str) -> StepBase | None: def _register_builtin_steps() -> None: """Register all built-in step types.""" + from .steps.budget_gate import BudgetGateStep from .steps.command import CommandStep from .steps.do_while import DoWhileStep from .steps.fan_in import FanInStep @@ -53,6 +54,7 @@ def _register_builtin_steps() -> None: from .steps.switch import SwitchStep from .steps.while_loop import WhileStep + _register_step(BudgetGateStep()) _register_step(CommandStep()) _register_step(DoWhileStep()) _register_step(FanInStep()) diff --git a/src/specify_cli/workflows/engine.py b/src/specify_cli/workflows/engine.py index d6a73bbeb0..77d1827ff7 100644 --- a/src/specify_cli/workflows/engine.py +++ b/src/specify_cli/workflows/engine.py @@ -663,19 +663,37 @@ def _execute_steps( template = result.output.get("step_template", {}) if template and items: fan_out_results = [] - for item_idx, item_val in enumerate(result.output["items"]): - context.item = item_val - # Per-item ID: parentId:templateId:index - item_step = dict(template) - base_id = item_step.get("id", "item") - item_step["id"] = f"{step_id}:{base_id}:{item_idx}" - self._execute_steps( - [item_step], context, state, registry, - step_offset=-1, - ) - # Collect per-item result for fan-in - item_result = context.steps.get(item_step["id"], {}) - fan_out_results.append(item_result.get("output", {})) + max_parallel = result.output.get("max_parallel", 3) + if not isinstance(max_parallel, int) or max_parallel < 1: + max_parallel = 3 + # Items are processed sequentially in batches of max_parallel. + # This is intentional rate-limiting (cost/resource control), + # not concurrent execution. True parallelism would require + # thread-safe context and state management. + # Process items in batches of max_parallel + for batch_start in range(0, len(items), max_parallel): + batch = items[batch_start : batch_start + max_parallel] + for item_idx, item_val in enumerate( + batch, start=batch_start + ): + context.item = item_val + # Per-item ID: parentId:templateId:index + item_step = dict(template) + base_id = item_step.get("id", "item") + item_step["id"] = f"{step_id}:{base_id}:{item_idx}" + self._execute_steps( + [item_step], context, state, registry, + step_offset=-1, + ) + # Collect per-item result for fan-in + item_result = context.steps.get(item_step["id"], {}) + fan_out_results.append(item_result.get("output", {})) + if state.status in ( + RunStatus.PAUSED, + RunStatus.FAILED, + RunStatus.ABORTED, + ): + break if state.status in ( RunStatus.PAUSED, RunStatus.FAILED, diff --git a/src/specify_cli/workflows/steps/budget_gate/__init__.py b/src/specify_cli/workflows/steps/budget_gate/__init__.py new file mode 100644 index 0000000000..24994fd036 --- /dev/null +++ b/src/specify_cli/workflows/steps/budget_gate/__init__.py @@ -0,0 +1,156 @@ +"""Budget gate step — LLM spend circuit breaker.""" + +from __future__ import annotations + +import sys +from typing import Any + +from specify_cli.workflows.base import StepBase, StepContext, StepResult, StepStatus +from specify_cli.workflows.expressions import evaluate_expression + +_VALID_ON_WARNING = frozenset({"pause", "notify", "continue"}) +_VALID_ON_EXCEEDED = frozenset({"abort", "pause"}) + + +class BudgetGateStep(StepBase): + """LLM spend circuit breaker. + + Compares ``current_spend_usd`` against ``threshold_usd`` and takes + action before spending runs away: + + * ``< 80 %`` — completes silently (no interruption to the workflow). + * ``>= 80 %`` — emits a warning; behaviour controlled by ``on_warning`` + (``pause`` / ``notify`` / ``continue``). + * ``>= 100 %`` — emits an error; behaviour controlled by ``on_exceeded`` + (``abort`` / ``pause``). + + Both ``threshold_usd`` and ``current_spend_usd`` support ``{{ }}`` + template expressions so they can read from previous step outputs + (e.g. ``{{ steps.cost_tracker.output.total_spend }}``). + """ + + type_key = "budget-gate" + + def execute(self, config: dict[str, Any], context: StepContext) -> StepResult: + threshold_raw = config.get("threshold_usd", 0) + if isinstance(threshold_raw, str) and "{{" in threshold_raw: + threshold_raw = evaluate_expression(threshold_raw, context) + threshold = float(threshold_raw) if threshold_raw else 0.0 + + spend_raw = config.get("current_spend_usd", 0) + if isinstance(spend_raw, str) and "{{" in spend_raw: + spend_raw = evaluate_expression(spend_raw, context) + current_spend = float(spend_raw) if spend_raw else 0.0 + + on_warning = config.get("on_warning", "pause") + on_exceeded = config.get("on_exceeded", "abort") + + pct = (current_spend / threshold * 100) if threshold > 0 else 0.0 + + output: dict[str, Any] = { + "threshold_usd": threshold, + "current_spend_usd": current_spend, + "pct_used": round(pct, 1), + "status": "ok", + } + + if threshold > 0 and current_spend >= threshold: + output["status"] = "exceeded" + self._print_exceeded(current_spend, threshold, pct) + if on_exceeded == "abort": + return StepResult( + status=StepStatus.FAILED, + output=output, + error=( + f"Budget gate: LLM spend ${current_spend:.2f} exceeded " + f"threshold ${threshold:.2f} ({pct:.1f}%)" + ), + ) + # on_exceeded == "pause" + return StepResult(status=StepStatus.PAUSED, output=output) + + if threshold > 0 and current_spend >= threshold * 0.8: + output["status"] = "warning" + self._print_warning(current_spend, threshold, pct) + if on_warning == "continue": + return StepResult(status=StepStatus.COMPLETED, output=output) + if on_warning == "notify": + # Emit the warning but don't block execution + return StepResult(status=StepStatus.COMPLETED, output=output) + # on_warning == "pause" — pause if not interactive, else prompt + if not sys.stdin.isatty(): + return StepResult(status=StepStatus.PAUSED, output=output) + choice = self._prompt_warning(current_spend, threshold) + if choice == "abort": + output["status"] = "exceeded" + return StepResult( + status=StepStatus.FAILED, + output=output, + error=( + f"Budget gate: user aborted at ${current_spend:.2f} " + f"/ ${threshold:.2f} ({pct:.1f}%)" + ), + ) + return StepResult(status=StepStatus.COMPLETED, output=output) + + return StepResult(status=StepStatus.COMPLETED, output=output) + + @staticmethod + def _print_warning(spend: float, threshold: float, pct: float) -> None: + print("\n ┌─ Budget Gate ───────────────────────────────") + print(f" │ ⚠ WARNING: {pct:.1f}% of LLM budget used") + print(f" │ Spent: ${spend:.2f}") + print(f" │ Threshold: ${threshold:.2f}") + print(" └────────────────────────────────────────────") + + @staticmethod + def _print_exceeded(spend: float, threshold: float, pct: float) -> None: + print("\n ┌─ Budget Gate ───────────────────────────────") + print(f" │ ✗ EXCEEDED: {pct:.1f}% of LLM budget used") + print(f" │ Spent: ${spend:.2f}") + print(f" │ Threshold: ${threshold:.2f}") + print(" └────────────────────────────────────────────") + + @staticmethod + def _prompt_warning(spend: float, threshold: float) -> str: + """Prompt user to continue or abort after a budget warning.""" + print(f" Continue workflow with ${spend:.2f} / ${threshold:.2f} spent?") + print(" [1] continue") + print(" [2] abort") + while True: + try: + raw = input(" Choose [1-2]: ").strip() + except (EOFError, KeyboardInterrupt): + print() + return "abort" + if raw == "1" or raw.lower() == "continue": + return "continue" + if raw == "2" or raw.lower() == "abort": + return "abort" + print(" Enter 1 (continue) or 2 (abort).") + + def validate(self, config: dict[str, Any]) -> list[str]: + errors = super().validate(config) + if "threshold_usd" not in config: + errors.append( + f"Budget gate step {config.get('id', '?')!r} is missing " + f"'threshold_usd' field." + ) + if "current_spend_usd" not in config: + errors.append( + f"Budget gate step {config.get('id', '?')!r} is missing " + f"'current_spend_usd' field." + ) + on_warning = config.get("on_warning", "pause") + if on_warning not in _VALID_ON_WARNING: + errors.append( + f"Budget gate step {config.get('id', '?')!r}: 'on_warning' must be " + f"one of {sorted(_VALID_ON_WARNING)}." + ) + on_exceeded = config.get("on_exceeded", "abort") + if on_exceeded not in _VALID_ON_EXCEEDED: + errors.append( + f"Budget gate step {config.get('id', '?')!r}: 'on_exceeded' must be " + f"one of {sorted(_VALID_ON_EXCEEDED)}." + ) + return errors diff --git a/src/specify_cli/workflows/steps/fan_out/__init__.py b/src/specify_cli/workflows/steps/fan_out/__init__.py index c2fff1face..7aa06a9fe8 100644 --- a/src/specify_cli/workflows/steps/fan_out/__init__.py +++ b/src/specify_cli/workflows/steps/fan_out/__init__.py @@ -12,9 +12,8 @@ class FanOutStep(StepBase): """Dispatch a step template for each item in a collection. The engine executes the nested ``step:`` template once per item, - setting ``context.item`` for each iteration. Execution is - currently sequential; ``max_concurrency`` is accepted but not - enforced. + setting ``context.item`` for each iteration. ``max_parallel`` caps + the batch size (default 3); ``max_concurrency`` is a deprecated alias. """ type_key = "fan-out" @@ -25,14 +24,15 @@ def execute(self, config: dict[str, Any], context: StepContext) -> StepResult: if not isinstance(items, list): items = [] - max_concurrency = config.get("max_concurrency", 1) + # max_parallel is canonical; max_concurrency is a deprecated alias + max_parallel = config.get("max_parallel", config.get("max_concurrency", 3)) step_template = config.get("step", {}) return StepResult( status=StepStatus.COMPLETED, output={ "items": items, - "max_concurrency": max_concurrency, + "max_parallel": max_parallel, "step_template": step_template, "item_count": len(items), }, @@ -55,4 +55,12 @@ def validate(self, config: dict[str, Any]) -> list[str]: errors.append( f"Fan-out step {config.get('id', '?')!r}: 'step' must be a mapping." ) + # Validate both canonical key and deprecated alias + max_parallel = config.get("max_parallel", config.get("max_concurrency")) + if max_parallel is not None: + if not isinstance(max_parallel, int) or max_parallel < 1: + errors.append( + f"Fan-out step {config.get('id', '?')!r}: 'max_parallel' " + f"(or deprecated 'max_concurrency') must be a positive integer." + ) return errors diff --git a/templates/commands/clarify.md b/templates/commands/clarify.md index 57cd01def1..e0c966471b 100644 --- a/templates/commands/clarify.md +++ b/templates/commands/clarify.md @@ -66,7 +66,27 @@ Execution steps: - If JSON parsing fails, abort and instruct user to re-run `__SPECKIT_COMMAND_SPECIFY__` or verify feature branch environment. - For single quotes in args like "I'm Groot", use escape syntax: e.g 'I'\''m Groot' (or double-quote if possible: "I'm Groot"). -2. Load the current spec file. Perform a structured ambiguity & coverage scan using this taxonomy. For each category, mark status: Clear / Partial / Missing. Produce an internal coverage map used for prioritization (do not output raw map unless no questions will be asked). +2. **Clarification Budget check**: Before scanning for ambiguities, read the + `## Cost Allocation` section of the loaded spec. + - Locate the **Clarification Budget** field (e.g., `3`). If the field is + absent or unparseable, treat the budget as unlimited and continue normally. + - Count existing `### Session YYYY-MM-DD #N` subsections under + `## Clarifications` in the spec (each represents one past session). + - If `session_count >= clarification_budget`: + - Output: + ``` + WARNING: Clarification budget exhausted. + Budget: {budget} rounds | Sessions used: {count} + Reply "override clarification budget" to continue anyway. + Reply "proceed" to skip clarification and go to `__SPECKIT_COMMAND_PLAN__`. + ``` + - Pause and wait for user input. + - If user replies `override clarification budget`: continue to step 3. + - If user replies `proceed`: exit command and suggest running `__SPECKIT_COMMAND_PLAN__`. + - Any other input: repeat the warning once, then exit. + - If `session_count < clarification_budget`, continue to step 3. + +3. Load the current spec file. Perform a structured ambiguity & coverage scan using this taxonomy. For each category, mark status: Clear / Partial / Missing. Produce an internal coverage map used for prioritization (do not output raw map unless no questions will be asked). Functional Scope & Behavior: - Core user goals & success criteria @@ -122,7 +142,7 @@ Execution steps: - Clarification would not materially change implementation or validation strategy - Information is better deferred to planning phase (note internally) -3. Generate (internally) a prioritized queue of candidate clarification questions (maximum 5). Do NOT output them all at once. Apply these constraints: +4. Generate (internally) a prioritized queue of candidate clarification questions (maximum 5). Do NOT output them all at once. Apply these constraints: - Maximum of 5 total questions across the whole session. - Each question must be answerable with EITHER: - A short multiple‑choice selection (2–5 distinct, mutually exclusive options), OR @@ -133,7 +153,7 @@ Execution steps: - Favor clarifications that reduce downstream rework risk or prevent misaligned acceptance tests. - If more than 5 categories remain unresolved, select the top 5 by (Impact * Uncertainty) heuristic. -4. Sequential questioning loop (interactive): +5. Sequential questioning loop (interactive): - Present EXACTLY ONE question at a time. - For multiple‑choice questions: - **Analyze all options** and determine the **most suitable option** based on: @@ -169,11 +189,11 @@ Execution steps: - Never reveal future queued questions in advance. - If no valid questions exist at start, immediately report no critical ambiguities. -5. Integration after EACH accepted answer (incremental update approach): +6. Integration after EACH accepted answer (incremental update approach): - Maintain in-memory representation of the spec (loaded once at start) plus the raw file contents. - For the first integrated answer in this session: - Ensure a `## Clarifications` section exists (create it just after the highest-level contextual/overview section per the spec template if missing). - - Under it, create (if not present) a `### Session YYYY-MM-DD` subheading for today. + - Under it, create a new `### Session YYYY-MM-DD #N` subheading (increment N for each session, including multiple on the same date). - Append a bullet line immediately after acceptance: `- Q: → A: `. - Then immediately apply the clarification to the most appropriate section(s): - Functional ambiguity → Update or add a bullet in Functional Requirements. @@ -187,17 +207,17 @@ Execution steps: - Preserve formatting: do not reorder unrelated sections; keep heading hierarchy intact. - Keep each inserted clarification minimal and testable (avoid narrative drift). -6. Validation (performed after EACH write plus final pass): +7. Validation (performed after EACH write plus final pass): - Clarifications session contains exactly one bullet per accepted answer (no duplicates). - Total asked (accepted) questions ≤ 5. - Updated sections contain no lingering vague placeholders the new answer was meant to resolve. - No contradictory earlier statement remains (scan for now-invalid alternative choices removed). - - Markdown structure valid; only allowed new headings: `## Clarifications`, `### Session YYYY-MM-DD`. + - Markdown structure valid; only allowed new headings: `## Clarifications`, `### Session YYYY-MM-DD #N`. - Terminology consistency: same canonical term used across all updated sections. -7. Write the updated spec back to `FEATURE_SPEC`. +8. Write the updated spec back to `FEATURE_SPEC`. -8. Report completion (after questioning loop ends or early termination): +9. Report completion (after questioning loop ends or early termination): - Number of questions asked & answered. - Path to updated spec. - Sections touched (list names). diff --git a/templates/commands/plan.md b/templates/commands/plan.md index 04db94ffaa..0d226c9740 100644 --- a/templates/commands/plan.md +++ b/templates/commands/plan.md @@ -55,6 +55,24 @@ You **MUST** consider the user input before proceeding (if not empty). ``` - If no hooks are registered or `.specify/extensions.yml` does not exist, skip silently +**Validate cost fields before planning**: +- After loading the IMPL_PLAN template (Outline step 2), inspect the + **Technical Context** section for: + - `**AI Model Tier**` + - `**Estimated Token Budget**` +- If either field is absent OR still contains the literal placeholder text + (`[haiku | sonnet | opus]` or `[~N tokens / ~$X.XX]`), stop and emit: + + ``` + ERROR: Plan cannot proceed. The following Technical Context fields are + empty or contain unfilled placeholders: + - AI Model Tier + - Estimated Token Budget + Fill these fields before re-running `__SPECKIT_COMMAND_PLAN__`. + ``` + +- Do NOT infer or auto-fill these values; they require an explicit human decision. + ## Outline 1. **Setup**: Run `{SCRIPT}` from repo root and parse JSON for FEATURE_SPEC, IMPL_PLAN, SPECS_DIR, BRANCH. For single quotes in args like "I'm Groot", use escape syntax: e.g 'I'\''m Groot' (or double-quote if possible: "I'm Groot"). diff --git a/templates/constitution-template.md b/templates/constitution-template.md index a4670ff469..f665d7eaea 100644 --- a/templates/constitution-template.md +++ b/templates/constitution-template.md @@ -28,6 +28,17 @@ [PRINCIPLE_5_DESCRIPTION] +### VI. Economic Efficiency + +Every AI-assisted workflow step must declare its model tier and token budget +before execution. Use the lowest-cost model tier (haiku) by default; justify +in writing any step that requires sonnet or opus. Fan-out operations must cap +parallel invocations to `max_parallel: 3` unless a higher limit is approved +in the plan. LLM spend against each feature is tracked against the approved +budget in the spec's Cost Allocation section; the feature is paused for +budget review if actual spend reaches 80% of approved before implementation +is complete. + ## [SECTION_2_NAME] diff --git a/templates/plan-template.md b/templates/plan-template.md index ee57c35656..6344042621 100644 --- a/templates/plan-template.md +++ b/templates/plan-template.md @@ -25,7 +25,9 @@ **Project Type**: [e.g., library/cli/web-service/mobile-app/compiler/desktop-app or NEEDS CLARIFICATION] **Performance Goals**: [domain-specific, e.g., 1000 req/s, 10k lines/sec, 60 fps or NEEDS CLARIFICATION] **Constraints**: [domain-specific, e.g., <200ms p95, <100MB memory, offline-capable or NEEDS CLARIFICATION] -**Scale/Scope**: [domain-specific, e.g., 10k users, 1M LOC, 50 screens or NEEDS CLARIFICATION] +**Scale/Scope**: [domain-specific, e.g., 10k users, 1M LOC, 50 screens or NEEDS CLARIFICATION] +**AI Model Tier**: [haiku | sonnet | opus] — justify if not haiku +**Estimated Token Budget**: [~N tokens / ~$X.XX] ## Constitution Check diff --git a/templates/spec-template.md b/templates/spec-template.md index 4581e40529..a0c211d104 100644 --- a/templates/spec-template.md +++ b/templates/spec-template.md @@ -5,6 +5,17 @@ **Status**: Draft **Input**: User description: "$ARGUMENTS" +## Cost Allocation + +| Field | Value | +|-------|-------| +| **Team** | [team name] | +| **Cost Center** | [cost-center-id] | +| **Feature Priority** | [P1 / P2 / P3 — aligns with User Story priorities below] | +| **Approved LLM Budget (USD)** | [$X.XX] | +| **Actual LLM Spend (USD)** | [populated by Cost Tracker extension] | +| **Clarification Budget (rounds)** | [3] | + ## User Scenarios & Testing *(mandatory)*