Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ SHIP: cf pr create → cf pr merge
LOOP: Glitch → cf proof capture → New REQ → Enforced forever
```

**Status: CLI ✅ | Server ✅ | ReAct agent ✅ | Web UI ✅ | Agent adapters ✅ | Multi-provider LLM ✅ | Next: Phase 3.5B** — See `docs/PRODUCT_ROADMAP.md`.
**Status: CLI ✅ | Server ✅ | ReAct agent ✅ | Web UI ✅ | Agent adapters ✅ | Multi-provider LLM ✅ | Next: Phase 3.5C** — See `docs/PRODUCT_ROADMAP.md`.

If you are an agent working in this repo: **do not improvise architecture**. Follow the documents listed below.

Expand All @@ -34,11 +34,11 @@ If you are an agent working in this repo: **do not improvise architecture**. Fol

**Rule 0:** If a change does not directly support the Think → Build → Prove → Ship pipeline, do not implement it.

### Current Focus: Phase 3.5B
### Current Focus: Phase 3.5C

**Run quality gates from the web UI** — `POST /api/v2/proof/run` backend is ready; the proof page needs a [Run Gates] button, gate progress view, per-gate evidence display, and run history panel.
**Phase 3.5B is complete** — `[Run Gates]` button, live gate progress, per-gate evidence display (`GateEvidencePanel`), and run history panel (`RunHistoryPanel`) are all shipped. New backend endpoints: `GET /api/v2/proof/runs` and `GET /api/v2/proof/runs/{run_id}/evidence`.

After that, in order:
Next, in order:
- **3.5C**: Glitch capture web UI
- **4A**: PR status tracking + PROOF9 merge gate
- **4B**: Post-merge glitch capture loop
Expand Down Expand Up @@ -90,7 +90,7 @@ Shipped pages: `/`, `/prd`, `/tasks`, `/execution`, `/execution/[taskId]`, `/blo
Testing: `cd web-ui && npm test` must pass; `npm run build` must succeed. The `frontend-tests` CI job enforces this on every PR.

### What's implemented
Full feature list in `docs/PRODUCT_ROADMAP.md`. Key capabilities: ReAct agent execution, batch execution (serial/parallel/auto), task dependencies, stall detection, self-correction, GitHub PR workflow, SSE streaming, API auth, rate limiting, OpenAPI docs, multi-provider LLM (Anthropic/OpenAI-compatible), agent adapters (ClaudeCode/Codex/OpenCode/Kilocode), worktree isolation, E2B cloud execution, interactive agent sessions (WebSocket chat + XTerm.js terminal), PROOF9 quality system.
Full feature list in `docs/PRODUCT_ROADMAP.md`. Key capabilities: ReAct agent execution, batch execution (serial/parallel/auto), task dependencies, stall detection, self-correction, GitHub PR workflow, SSE streaming, API auth, rate limiting, OpenAPI docs, multi-provider LLM (Anthropic/OpenAI-compatible), agent adapters (ClaudeCode/Codex/OpenCode/Kilocode), worktree isolation, E2B cloud execution, interactive agent sessions (WebSocket chat + XTerm.js terminal), PROOF9 quality system (gate runs, per-gate evidence, run history).

---

Expand Down
127 changes: 123 additions & 4 deletions codeframe/core/proof/ledger.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
Gate,
GlitchType,
Obligation,
ProofRun,
ReqStatus,
Requirement,
RequirementScope,
Expand Down Expand Up @@ -70,6 +71,19 @@ def init_proof_tables(workspace: Workspace) -> None:
)
""")

cursor.execute("""
CREATE TABLE IF NOT EXISTS proof_runs (
run_id TEXT NOT NULL,
workspace_id TEXT NOT NULL,
started_at TEXT NOT NULL,
completed_at TEXT,
triggered_by TEXT NOT NULL DEFAULT 'human',
overall_passed INTEGER NOT NULL DEFAULT 0,
duration_ms INTEGER,
PRIMARY KEY (run_id, workspace_id)
)
""")

conn.commit()
conn.close()

Expand All @@ -81,11 +95,15 @@ def _ensure_tables(workspace: Workspace) -> None:
cursor.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='proof_requirements'"
)
if not cursor.fetchone():
conn.close()
missing = not cursor.fetchone()
if not missing:
cursor.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='proof_runs'"
)
missing = not cursor.fetchone()
conn.close()
if missing:
init_proof_tables(workspace)
else:
conn.close()


# --- Serialization helpers ---
Expand Down Expand Up @@ -341,6 +359,107 @@ def waive_requirement(
return get_requirement(workspace, req_id)


def save_run(workspace: Workspace, run: ProofRun) -> None:
"""Insert or replace a proof run record."""
_ensure_tables(workspace)
conn = get_db_connection(workspace)
cursor = conn.cursor()
cursor.execute(
"""INSERT OR REPLACE INTO proof_runs
(run_id, workspace_id, started_at, completed_at, triggered_by,
overall_passed, duration_ms)
VALUES (?, ?, ?, ?, ?, ?, ?)""",
(
run.run_id, workspace.id,
run.started_at.isoformat(),
run.completed_at.isoformat() if run.completed_at else None,
run.triggered_by,
int(run.overall_passed),
run.duration_ms,
),
)
conn.commit()
conn.close()


def get_run(workspace: Workspace, run_id: str) -> Optional[ProofRun]:
"""Fetch a single proof run by run_id."""
_ensure_tables(workspace)
conn = get_db_connection(workspace)
cursor = conn.cursor()
cursor.execute(
"""SELECT run_id, workspace_id, started_at, completed_at, triggered_by,
overall_passed, duration_ms
FROM proof_runs WHERE run_id = ? AND workspace_id = ?""",
(run_id, workspace.id),
)
row = cursor.fetchone()
conn.close()
if not row:
return None
return ProofRun(
run_id=row[0],
workspace_id=row[1],
started_at=datetime.fromisoformat(row[2]),
completed_at=datetime.fromisoformat(row[3]) if row[3] else None,
triggered_by=row[4],
overall_passed=bool(row[5]),
duration_ms=row[6],
)


def list_runs(workspace: Workspace, limit: int = 5) -> list[ProofRun]:
"""List the most recent proof runs for this workspace."""
_ensure_tables(workspace)
conn = get_db_connection(workspace)
cursor = conn.cursor()
cursor.execute(
"""SELECT run_id, workspace_id, started_at, completed_at, triggered_by,
overall_passed, duration_ms
FROM proof_runs WHERE workspace_id = ?
ORDER BY started_at DESC LIMIT ?""",
(workspace.id, limit),
)
rows = cursor.fetchall()
conn.close()
return [
ProofRun(
run_id=r[0],
workspace_id=r[1],
started_at=datetime.fromisoformat(r[2]),
completed_at=datetime.fromisoformat(r[3]) if r[3] else None,
triggered_by=r[4],
overall_passed=bool(r[5]),
duration_ms=r[6],
)
for r in rows
]


def get_run_evidence(workspace: Workspace, run_id: str) -> list[Evidence]:
"""List all evidence records for a specific run_id."""
_ensure_tables(workspace)
conn = get_db_connection(workspace)
cursor = conn.cursor()
cursor.execute(
"""SELECT req_id, gate, satisfied, artifact_path, artifact_checksum,
timestamp, run_id
FROM proof_evidence WHERE run_id = ? AND workspace_id = ?
ORDER BY timestamp ASC""",
(run_id, workspace.id),
)
rows = cursor.fetchall()
conn.close()
return [
Evidence(
req_id=r[0], gate=Gate(r[1]), satisfied=bool(r[2]),
artifact_path=r[3], artifact_checksum=r[4],
timestamp=datetime.fromisoformat(r[5]), run_id=r[6],
)
for r in rows
]


def check_expired_waivers(workspace: Workspace) -> list[Requirement]:
"""Find and revert expired waivers to open status."""
_ensure_tables(workspace)
Expand Down
13 changes: 13 additions & 0 deletions codeframe/core/proof/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,19 @@ class Evidence:
run_id: str


@dataclass
class ProofRun:
"""A single proof gate run record."""

run_id: str
workspace_id: str
started_at: datetime
completed_at: Optional[datetime]
triggered_by: str # 'human' | 'auto'
overall_passed: bool
duration_ms: Optional[int]


@dataclass
class Requirement:
"""A proof obligation born from a glitch.
Expand Down
35 changes: 34 additions & 1 deletion codeframe/core/proof/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@

import logging
import uuid
from datetime import datetime, timezone
from typing import Optional

from codeframe.core.proof import ledger
from codeframe.core.proof.evidence import attach_evidence
from codeframe.core.proof.models import Gate, ReqStatus
from codeframe.core.proof.models import Gate, ProofRun, ReqStatus
from codeframe.core.proof.scope import get_changed_scope, intersects
from codeframe.core.workspace import Workspace

Expand Down Expand Up @@ -68,6 +69,8 @@ def run_proof(
if not run_id:
run_id = str(uuid.uuid4())[:8]

started_at = datetime.now(timezone.utc)

# Expire any stale waivers
expired = ledger.check_expired_waivers(workspace)
if expired:
Expand All @@ -76,6 +79,19 @@ def run_proof(
# Get all open requirements
reqs = ledger.list_requirements(workspace, status=ReqStatus.OPEN)
if not reqs:
completed_at = datetime.now(timezone.utc)
ledger.save_run(
workspace,
ProofRun(
run_id=run_id,
workspace_id=workspace.id,
started_at=started_at,
completed_at=completed_at,
triggered_by="human",
overall_passed=True,
duration_ms=int((completed_at - started_at).total_seconds() * 1000),
),
Comment on lines +83 to +93
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

triggered_by is hardcoded to "human" in both persisted run paths.

If run_proof is ever invoked by automation, run history will be misattributed. Consider threading a triggered_by parameter through run_proof and using it in both ProofRun(...) constructions (Line 90 and Line 157).

Suggested fix
 def run_proof(
     workspace: Workspace,
     *,
     full: bool = False,
     gate_filter: Optional[Gate] = None,
     run_id: Optional[str] = None,
+    triggered_by: str = "human",
 ) -> dict[str, list[tuple[Gate, bool]]]:
@@
             ProofRun(
                 run_id=run_id,
                 workspace_id=workspace.id,
                 started_at=started_at,
                 completed_at=completed_at,
-                triggered_by="human",
+                triggered_by=triggered_by,
                 overall_passed=True,
                 duration_ms=int((completed_at - started_at).total_seconds() * 1000),
             ),
@@
         ProofRun(
             run_id=run_id,
             workspace_id=workspace.id,
             started_at=started_at,
             completed_at=completed_at,
-            triggered_by="human",
+            triggered_by=triggered_by,
             overall_passed=overall_passed,
             duration_ms=duration_ms,
         ),

Also applies to: 150-160

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@codeframe/core/proof/runner.py` around lines 83 - 93, The run history is
misattributed because triggered_by is hardcoded to "human"; update the run_proof
function signature to accept a triggered_by parameter (default "human") and use
that parameter when constructing both ProofRun instances (the one created in
run_proof and the second one later around the other ProofRun(...) construction),
ensuring the passed-in value is threaded into ledger.save_run calls; also update
all call sites of run_proof to pass an appropriate triggered_by value where
automation may invoke it.

)
return {}

# Get changed scope (skip if running full)
Expand Down Expand Up @@ -127,4 +143,21 @@ def run_proof(
req.status = ReqStatus.SATISFIED
ledger.save_requirement(workspace, req)

completed_at = datetime.now(timezone.utc)
duration_ms = int((completed_at - started_at).total_seconds() * 1000)
executed = [passed for gate_results in results.values() for _, passed in gate_results]
overall_passed = all(executed) if executed else True
ledger.save_run(
workspace,
ProofRun(
run_id=run_id,
workspace_id=workspace.id,
started_at=started_at,
completed_at=completed_at,
triggered_by="human",
overall_passed=overall_passed,
duration_ms=duration_ms,
),
)

return results
Loading
Loading