From 695d3abe8d91e02e18a9351e9fc41234597389d3 Mon Sep 17 00:00:00 2001
From: Test User <test@example.com>
Date: Thu, 9 Apr 2026 11:14:28 -0700
Subject: [PATCH 1/3] feat(web-ui): per-gate evidence display and run history
 for PROOF9 (#567)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Backend: add proof_runs table to ledger (save_run, list_runs, get_run_evidence)
- Backend: populate proof_runs on every run in runner.py
- Backend: two new endpoints — GET /api/v2/proof/runs and GET /api/v2/proof/runs/{id}/evidence
- Frontend: ProofRunSummary, ProofEvidenceWithContent, ProofRunDetail types
- Frontend: GateEvidencePanel — expandable per-gate artifact text, 200-line truncation with toggle
- Frontend: RunHistoryPanel — last 5 runs table with pass/fail badge, click to select
- Frontend: wire RunHistoryPanel into /proof page, GateEvidencePanel into /proof/[req_id] page
- Tests: 14 new backend tests (47 total, all pass); 2 new frontend test files (703 total, all pass)
---
 codeframe/core/proof/ledger.py                | 101 ++++++++++++-
 codeframe/core/proof/models.py                |  13 ++
 codeframe/core/proof/runner.py                |  23 ++-
 codeframe/ui/routers/proof_v2.py              | 136 ++++++++++++++++++
 tests/ui/test_proof_v2.py                     | 114 +++++++++++++++
 .../proof/GateEvidencePanel.test.tsx          |  76 ++++++++++
 .../components/proof/ProofPage.test.tsx       |   3 +
 .../components/proof/RunHistoryPanel.test.tsx |  82 +++++++++++
 web-ui/src/app/proof/[req_id]/page.tsx        |  28 +++-
 web-ui/src/app/proof/page.tsx                 |   8 +-
 .../components/proof/GateEvidencePanel.tsx    |  91 ++++++++++++
 .../src/components/proof/RunHistoryPanel.tsx  | 113 +++++++++++++++
 web-ui/src/components/proof/index.ts          |   2 +
 web-ui/src/lib/api.ts                         |  17 +++
 web-ui/src/types/index.ts                     |  18 +++
 15 files changed, 817 insertions(+), 8 deletions(-)
 create mode 100644 web-ui/src/__tests__/components/proof/GateEvidencePanel.test.tsx
 create mode 100644 web-ui/src/__tests__/components/proof/RunHistoryPanel.test.tsx
 create mode 100644 web-ui/src/components/proof/GateEvidencePanel.tsx
 create mode 100644 web-ui/src/components/proof/RunHistoryPanel.tsx

diff --git a/codeframe/core/proof/ledger.py b/codeframe/core/proof/ledger.py
index 28cf5eac..3116bc80 100644
--- a/codeframe/core/proof/ledger.py
+++ b/codeframe/core/proof/ledger.py
@@ -14,6 +14,7 @@
     Gate,
     GlitchType,
     Obligation,
+    ProofRun,
     ReqStatus,
     Requirement,
     RequirementScope,
@@ -70,6 +71,19 @@ def init_proof_tables(workspace: Workspace) -> None:
         )
     """)
 
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS proof_runs (
+            run_id TEXT NOT NULL,
+            workspace_id TEXT NOT NULL,
+            started_at TEXT NOT NULL,
+            completed_at TEXT,
+            triggered_by TEXT NOT NULL DEFAULT 'human',
+            overall_passed INTEGER NOT NULL DEFAULT 0,
+            duration_ms INTEGER,
+            PRIMARY KEY (run_id, workspace_id)
+        )
+    """)
+
     conn.commit()
     conn.close()
 
@@ -81,11 +95,15 @@ def _ensure_tables(workspace: Workspace) -> None:
     cursor.execute(
         "SELECT name FROM sqlite_master WHERE type='table' AND name='proof_requirements'"
     )
-    if not cursor.fetchone():
-        conn.close()
+    missing = not cursor.fetchone()
+    if not missing:
+        cursor.execute(
+            "SELECT name FROM sqlite_master WHERE type='table' AND name='proof_runs'"
+        )
+        missing = not cursor.fetchone()
+    conn.close()
+    if missing:
         init_proof_tables(workspace)
-    else:
-        conn.close()
 
 
 # --- Serialization helpers ---
@@ -341,6 +359,81 @@ def waive_requirement(
     return get_requirement(workspace, req_id)
 
 
+def save_run(workspace: Workspace, run: ProofRun) -> None:
+    """Insert or replace a proof run record."""
+    _ensure_tables(workspace)
+    conn = get_db_connection(workspace)
+    cursor = conn.cursor()
+    cursor.execute(
+        """INSERT OR REPLACE INTO proof_runs
+           (run_id, workspace_id, started_at, completed_at, triggered_by,
+            overall_passed, duration_ms)
+           VALUES (?, ?, ?, ?, ?, ?, ?)""",
+        (
+            run.run_id, workspace.id,
+            run.started_at.isoformat(),
+            run.completed_at.isoformat() if run.completed_at else None,
+            run.triggered_by,
+            int(run.overall_passed),
+            run.duration_ms,
+        ),
+    )
+    conn.commit()
+    conn.close()
+
+
+def list_runs(workspace: Workspace, limit: int = 5) -> list[ProofRun]:
+    """List the most recent proof runs for this workspace."""
+    _ensure_tables(workspace)
+    conn = get_db_connection(workspace)
+    cursor = conn.cursor()
+    cursor.execute(
+        """SELECT run_id, workspace_id, started_at, completed_at, triggered_by,
+                  overall_passed, duration_ms
+           FROM proof_runs WHERE workspace_id = ?
+           ORDER BY started_at DESC LIMIT ?""",
+        (workspace.id, limit),
+    )
+    rows = cursor.fetchall()
+    conn.close()
+    return [
+        ProofRun(
+            run_id=r[0],
+            workspace_id=r[1],
+            started_at=datetime.fromisoformat(r[2]),
+            completed_at=datetime.fromisoformat(r[3]) if r[3] else None,
+            triggered_by=r[4],
+            overall_passed=bool(r[5]),
+            duration_ms=r[6],
+        )
+        for r in rows
+    ]
+
+
+def get_run_evidence(workspace: Workspace, run_id: str) -> list[Evidence]:
+    """List all evidence records for a specific run_id."""
+    _ensure_tables(workspace)
+    conn = get_db_connection(workspace)
+    cursor = conn.cursor()
+    cursor.execute(
+        """SELECT req_id, gate, satisfied, artifact_path, artifact_checksum,
+                  timestamp, run_id
+           FROM proof_evidence WHERE run_id = ? AND workspace_id = ?
+           ORDER BY timestamp ASC""",
+        (run_id, workspace.id),
+    )
+    rows = cursor.fetchall()
+    conn.close()
+    return [
+        Evidence(
+            req_id=r[0], gate=Gate(r[1]), satisfied=bool(r[2]),
+            artifact_path=r[3], artifact_checksum=r[4],
+            timestamp=datetime.fromisoformat(r[5]), run_id=r[6],
+        )
+        for r in rows
+    ]
+
+
 def check_expired_waivers(workspace: Workspace) -> list[Requirement]:
     """Find and revert expired waivers to open status."""
     _ensure_tables(workspace)
diff --git a/codeframe/core/proof/models.py b/codeframe/core/proof/models.py
index 49c4ffcc..4f3aa2c1 100644
--- a/codeframe/core/proof/models.py
+++ b/codeframe/core/proof/models.py
@@ -114,6 +114,19 @@ class Evidence:
     run_id: str
 
 
+@dataclass
+class ProofRun:
+    """A single proof gate run record."""
+
+    run_id: str
+    workspace_id: str
+    started_at: datetime
+    completed_at: Optional[datetime]
+    triggered_by: str  # 'human' | 'auto'
+    overall_passed: bool
+    duration_ms: Optional[int]
+
+
 @dataclass
 class Requirement:
     """A proof obligation born from a glitch.
diff --git a/codeframe/core/proof/runner.py b/codeframe/core/proof/runner.py
index 0e664097..12ca5d9b 100644
--- a/codeframe/core/proof/runner.py
+++ b/codeframe/core/proof/runner.py
@@ -7,11 +7,12 @@
 
 import logging
 import uuid
+from datetime import datetime, timezone
 from typing import Optional
 
 from codeframe.core.proof import ledger
 from codeframe.core.proof.evidence import attach_evidence
-from codeframe.core.proof.models import Gate, ReqStatus
+from codeframe.core.proof.models import Gate, ProofRun, ReqStatus
 from codeframe.core.proof.scope import get_changed_scope, intersects
 from codeframe.core.workspace import Workspace
 
@@ -68,6 +69,8 @@ def run_proof(
     if not run_id:
         run_id = str(uuid.uuid4())[:8]
 
+    started_at = datetime.now(timezone.utc)
+
     # Expire any stale waivers
     expired = ledger.check_expired_waivers(workspace)
     if expired:
@@ -127,4 +130,22 @@ def run_proof(
                 req.status = ReqStatus.SATISFIED
             ledger.save_requirement(workspace, req)
 
+    completed_at = datetime.now(timezone.utc)
+    duration_ms = int((completed_at - started_at).total_seconds() * 1000)
+    overall_passed = bool(results) and all(
+        passed for gate_results in results.values() for _, passed in gate_results
+    )
+    ledger.save_run(
+        workspace,
+        ProofRun(
+            run_id=run_id,
+            workspace_id=workspace.id,
+            started_at=started_at,
+            completed_at=completed_at,
+            triggered_by="human",
+            overall_passed=overall_passed,
+            duration_ms=duration_ms,
+        ),
+    )
+
     return results
diff --git a/codeframe/ui/routers/proof_v2.py b/codeframe/ui/routers/proof_v2.py
index 5cdf48a3..6d209a12 100644
--- a/codeframe/ui/routers/proof_v2.py
+++ b/codeframe/ui/routers/proof_v2.py
@@ -25,8 +25,10 @@
 from codeframe.core.proof.capture import capture_requirement
 from codeframe.core.proof.ledger import (
     get_requirement,
+    get_run_evidence,
     list_evidence,
     list_requirements,
+    list_runs,
     waive_requirement,
 )
 from codeframe.core.proof.models import (
@@ -204,6 +206,29 @@ class EvidenceResponse(BaseModel):
     run_id: str
 
 
+class EvidenceWithContentResponse(EvidenceResponse):
+    """Evidence record including artifact file contents."""
+
+    artifact_text: Optional[str] = None
+
+
+class ProofRunSummaryResponse(BaseModel):
+    """Summary of a single proof gate run."""
+
+    run_id: str
+    started_at: str
+    completed_at: Optional[str]
+    triggered_by: str
+    overall_passed: bool
+    duration_ms: Optional[int]
+
+
+class ProofRunDetailResponse(ProofRunSummaryResponse):
+    """Proof run detail including per-gate evidence with artifact content."""
+
+    evidence: list[EvidenceWithContentResponse]
+
+
 # ============================================================================
 # Helper
 # ============================================================================
@@ -476,6 +501,117 @@ async def proof_status_endpoint(
     )
 
 
+@router.get("/runs", response_model=list[ProofRunSummaryResponse])
+@rate_limit_standard()
+async def list_runs_endpoint(
+    request: Request,
+    limit: int = Query(default=5, ge=1, le=50, description="Maximum number of runs to return"),
+    workspace: Workspace = Depends(get_v2_workspace),
+) -> list[ProofRunSummaryResponse]:
+    """List the most recent proof gate runs for this workspace."""
+    runs = list_runs(workspace, limit=limit)
+    return [
+        ProofRunSummaryResponse(
+            run_id=r.run_id,
+            started_at=r.started_at.isoformat(),
+            completed_at=r.completed_at.isoformat() if r.completed_at else None,
+            triggered_by=r.triggered_by,
+            overall_passed=r.overall_passed,
+            duration_ms=r.duration_ms,
+        )
+        for r in runs
+    ]
+
+
+def _read_artifact_text(artifact_path: str) -> Optional[str]:
+    """Read artifact file content, returning None if the file is missing."""
+    from pathlib import Path
+    try:
+        p = Path(artifact_path)
+        if p.exists():
+            return p.read_text(errors="replace")
+        return None
+    except Exception:
+        return None
+
+
+@router.get("/runs/{run_id}/evidence", response_model=ProofRunDetailResponse)
+@rate_limit_standard()
+async def get_run_evidence_endpoint(
+    request: Request,
+    run_id: str,
+    workspace: Workspace = Depends(get_v2_workspace),
+) -> ProofRunDetailResponse:
+    """Get per-gate evidence with artifact content for a completed proof run."""
+    # Try to get run metadata from DB first; fall back to in-memory cache
+    runs = list_runs(workspace, limit=100)
+    run = next((r for r in runs if r.run_id == run_id), None)
+
+    if run is None:
+        # Fall back to cache for very recent runs not yet in DB
+        cached = _run_cache.get((str(workspace.repo_path), run_id))
+        if cached is None:
+            raise HTTPException(
+                status_code=404,
+                detail=api_error(
+                    f"Run not found: {run_id}",
+                    ErrorCodes.NOT_FOUND,
+                    f"No proof run with id {run_id}",
+                ),
+            )
+        # Build a minimal response from cache
+        evidence_list: list[EvidenceWithContentResponse] = []
+        for req_id, gate_results in cached["results"].items():
+            for gate_result in gate_results:
+                evidence_list.append(EvidenceWithContentResponse(
+                    req_id=req_id,
+                    gate=gate_result["gate"],
+                    satisfied=gate_result["satisfied"],
+                    artifact_path="",
+                    artifact_checksum="",
+                    timestamp="",
+                    run_id=run_id,
+                    artifact_text=None,
+                ))
+        import time as _time
+        ts = cached.get("_ts", _time.time())
+        from datetime import datetime as _dt, timezone as _tz
+        ts_str = _dt.fromtimestamp(ts, tz=_tz.utc).isoformat()
+        return ProofRunDetailResponse(
+            run_id=run_id,
+            started_at=ts_str,
+            completed_at=ts_str,
+            triggered_by="human",
+            overall_passed=cached["passed"],
+            duration_ms=None,
+            evidence=evidence_list,
+        )
+
+    evidence_records = get_run_evidence(workspace, run_id)
+    evidence_out = [
+        EvidenceWithContentResponse(
+            req_id=e.req_id,
+            gate=e.gate.value,
+            satisfied=e.satisfied,
+            artifact_path=e.artifact_path,
+            artifact_checksum=e.artifact_checksum,
+            timestamp=e.timestamp.isoformat(),
+            run_id=e.run_id,
+            artifact_text=_read_artifact_text(e.artifact_path),
+        )
+        for e in evidence_records
+    ]
+    return ProofRunDetailResponse(
+        run_id=run.run_id,
+        started_at=run.started_at.isoformat(),
+        completed_at=run.completed_at.isoformat() if run.completed_at else None,
+        triggered_by=run.triggered_by,
+        overall_passed=run.overall_passed,
+        duration_ms=run.duration_ms,
+        evidence=evidence_out,
+    )
+
+
 @router.get("/requirements/{req_id}/evidence", response_model=list[EvidenceResponse])
 @rate_limit_standard()
 async def list_evidence_endpoint(
diff --git a/tests/ui/test_proof_v2.py b/tests/ui/test_proof_v2.py
index 35535fe2..5c8fd3af 100644
--- a/tests/ui/test_proof_v2.py
+++ b/tests/ui/test_proof_v2.py
@@ -539,3 +539,117 @@ def test_400_format(self, test_client):
         detail = response.json()["detail"]
         assert "error" in detail
         assert "code" in detail
+
+
+# ============================================================================
+# GET /api/v2/proof/runs — list run history
+# ============================================================================
+
+
+class TestListRuns:
+    """Tests for GET /api/v2/proof/runs."""
+
+    def _capture_req(self, test_client):
+        return test_client.post(
+            "/api/v2/proof/requirements",
+            json={
+                "title": "Run history test req",
+                "description": "A requirement for run history testing",
+                "where": "core/tasks.py",
+                "severity": "low",
+                "source": "qa",
+            },
+        ).json()["id"]
+
+    def test_list_runs_empty_initially(self, test_client):
+        """No runs recorded before any proof run is triggered."""
+        response = test_client.get("/api/v2/proof/runs")
+        assert response.status_code == 200
+        assert response.json() == []
+
+    def test_list_runs_after_run(self, test_client):
+        """A completed run appears in the list."""
+        self._capture_req(test_client)
+        test_client.post("/api/v2/proof/run", json={"full": True})
+        response = test_client.get("/api/v2/proof/runs")
+        assert response.status_code == 200
+        runs = response.json()
+        assert len(runs) >= 1
+
+    def test_list_runs_response_shape(self, test_client):
+        """Each run summary has the expected fields."""
+        self._capture_req(test_client)
+        test_client.post("/api/v2/proof/run", json={"full": True})
+        runs = test_client.get("/api/v2/proof/runs").json()
+        assert len(runs) >= 1
+        run = runs[0]
+        for field in ["run_id", "started_at", "completed_at", "triggered_by",
+                      "overall_passed", "duration_ms"]:
+            assert field in run, f"Missing field: {field}"
+
+    def test_list_runs_limit(self, test_client):
+        """Limit parameter is respected."""
+        self._capture_req(test_client)
+        for _ in range(3):
+            test_client.post("/api/v2/proof/run", json={"full": True})
+        runs_limited = test_client.get("/api/v2/proof/runs?limit=2").json()
+        assert len(runs_limited) <= 2
+
+    def test_list_runs_ordered_newest_first(self, test_client):
+        """Runs are returned newest-first."""
+        self._capture_req(test_client)
+        for _ in range(2):
+            test_client.post("/api/v2/proof/run", json={"full": True})
+        runs = test_client.get("/api/v2/proof/runs").json()
+        if len(runs) >= 2:
+            assert runs[0]["started_at"] >= runs[1]["started_at"]
+
+
+# ============================================================================
+# GET /api/v2/proof/runs/{run_id}/evidence — run evidence detail
+# ============================================================================
+
+
+class TestGetRunEvidence:
+    """Tests for GET /api/v2/proof/runs/{run_id}/evidence."""
+
+    def _capture_req(self, test_client):
+        return test_client.post(
+            "/api/v2/proof/requirements",
+            json={
+                "title": "Run evidence test req",
+                "description": "A requirement for run evidence testing",
+                "where": "core/tasks.py",
+                "severity": "low",
+                "source": "qa",
+            },
+        ).json()["id"]
+
+    def test_get_run_evidence_shape(self, test_client):
+        """Run evidence response has expected fields including evidence list."""
+        self._capture_req(test_client)
+        run_resp = test_client.post("/api/v2/proof/run", json={"full": True}).json()
+        run_id = run_resp["run_id"]
+
+        response = test_client.get(f"/api/v2/proof/runs/{run_id}/evidence")
+        assert response.status_code == 200
+        data = response.json()
+        for field in ["run_id", "started_at", "completed_at", "triggered_by",
+                      "overall_passed", "duration_ms", "evidence"]:
+            assert field in data, f"Missing field: {field}"
+        assert isinstance(data["evidence"], list)
+
+    def test_get_run_evidence_unknown_returns_404(self, test_client):
+        """Unknown run_id returns 404."""
+        response = test_client.get("/api/v2/proof/runs/nonexistent-run/evidence")
+        assert response.status_code == 404
+
+    def test_get_run_evidence_each_item_has_artifact_text(self, test_client):
+        """Each evidence item has an artifact_text field."""
+        self._capture_req(test_client)
+        run_resp = test_client.post("/api/v2/proof/run", json={"full": True}).json()
+        run_id = run_resp["run_id"]
+
+        data = test_client.get(f"/api/v2/proof/runs/{run_id}/evidence").json()
+        for ev in data["evidence"]:
+            assert "artifact_text" in ev, "Evidence item missing artifact_text"
diff --git a/web-ui/src/__tests__/components/proof/GateEvidencePanel.test.tsx b/web-ui/src/__tests__/components/proof/GateEvidencePanel.test.tsx
new file mode 100644
index 00000000..85d18be4
--- /dev/null
+++ b/web-ui/src/__tests__/components/proof/GateEvidencePanel.test.tsx
@@ -0,0 +1,76 @@
+import React from 'react';
+import { render, screen, fireEvent } from '@testing-library/react';
+import { GateEvidencePanel } from '@/components/proof/GateEvidencePanel';
+import type { ProofEvidenceWithContent } from '@/types';
+
+function makeEvidence(overrides: Partial<ProofEvidenceWithContent> = {}): ProofEvidenceWithContent {
+  return {
+    req_id: 'REQ-001',
+    gate: 'unit',
+    satisfied: true,
+    artifact_path: '/tmp/REQ-001_unit_abc.txt',
+    artifact_checksum: 'abc123',
+    timestamp: '2026-04-09T12:00:00Z',
+    run_id: 'abc12345',
+    artifact_text: 'test output line 1\ntest output line 2',
+    ...overrides,
+  };
+}
+
+describe('GateEvidencePanel', () => {
+  it('renders nothing for empty evidence', () => {
+    const { container } = render(<GateEvidencePanel evidence={[]} />);
+    expect(container.firstChild).toBeNull();
+  });
+
+  it('renders a row per evidence item', () => {
+    const evidence = [
+      makeEvidence({ gate: 'unit', satisfied: true }),
+      makeEvidence({ gate: 'sec', satisfied: false }),
+    ];
+    render(<GateEvidencePanel evidence={evidence} />);
+    expect(screen.getByText('unit')).toBeInTheDocument();
+    expect(screen.getByText('sec')).toBeInTheDocument();
+  });
+
+  it('shows pass badge for satisfied evidence', () => {
+    render(<GateEvidencePanel evidence={[makeEvidence({ satisfied: true })]} />);
+    expect(screen.getByText('pass')).toBeInTheDocument();
+  });
+
+  it('shows fail badge for unsatisfied evidence', () => {
+    render(<GateEvidencePanel evidence={[makeEvidence({ satisfied: false })]} />);
+    expect(screen.getByText('fail')).toBeInTheDocument();
+  });
+
+  it('expands to show artifact text on click', () => {
+    const ev = makeEvidence({ artifact_text: 'hello output' });
+    render(<GateEvidencePanel evidence={[ev]} />);
+    // Artifact text should not be visible before click
+    expect(screen.queryByText('hello output')).not.toBeInTheDocument();
+    fireEvent.click(screen.getByRole('button', { name: /unit/i }));
+    expect(screen.getByText('hello output')).toBeInTheDocument();
+  });
+
+  it('shows "No output captured" when artifact_text is null', () => {
+    const ev = makeEvidence({ artifact_text: null });
+    render(<GateEvidencePanel evidence={[ev]} />);
+    fireEvent.click(screen.getByRole('button', { name: /unit/i }));
+    expect(screen.getByText('No output captured')).toBeInTheDocument();
+  });
+
+  it('shows "Show full output" toggle when text exceeds 200 lines', () => {
+    const longText = Array.from({ length: 250 }, (_, i) => `line ${i + 1}`).join('\n');
+    const ev = makeEvidence({ artifact_text: longText });
+    render(<GateEvidencePanel evidence={[ev]} />);
+    fireEvent.click(screen.getByRole('button', { name: /unit/i }));
+    expect(screen.getByText('Show full output')).toBeInTheDocument();
+  });
+
+  it('does not show "Show full output" for short text', () => {
+    const ev = makeEvidence({ artifact_text: 'short output' });
+    render(<GateEvidencePanel evidence={[ev]} />);
+    fireEvent.click(screen.getByRole('button', { name: /unit/i }));
+    expect(screen.queryByText('Show full output')).not.toBeInTheDocument();
+  });
+});
diff --git a/web-ui/src/__tests__/components/proof/ProofPage.test.tsx b/web-ui/src/__tests__/components/proof/ProofPage.test.tsx
index b27f736d..fabcc768 100644
--- a/web-ui/src/__tests__/components/proof/ProofPage.test.tsx
+++ b/web-ui/src/__tests__/components/proof/ProofPage.test.tsx
@@ -23,6 +23,9 @@ jest.mock('@/lib/api', () => ({
 jest.mock('@/components/proof', () => ({
   ProofStatusBadge: ({ status }: { status: string }) => <span data-testid="status-badge">{status}</span>,
   WaiveDialog: () => null,
+  GateRunPanel: () => null,
+  GateRunBanner: () => null,
+  RunHistoryPanel: () => null,
 }));
 jest.mock('next/link', () => {
   const MockLink = ({ href, children }: { href: string; children: React.ReactNode }) => (
diff --git a/web-ui/src/__tests__/components/proof/RunHistoryPanel.test.tsx b/web-ui/src/__tests__/components/proof/RunHistoryPanel.test.tsx
new file mode 100644
index 00000000..9950a5a2
--- /dev/null
+++ b/web-ui/src/__tests__/components/proof/RunHistoryPanel.test.tsx
@@ -0,0 +1,82 @@
+import React from 'react';
+import { render, screen, fireEvent } from '@testing-library/react';
+import useSWR from 'swr';
+import { RunHistoryPanel } from '@/components/proof/RunHistoryPanel';
+import type { ProofRunSummary } from '@/types';
+
+jest.mock('swr');
+jest.mock('@/lib/api', () => ({
+  proofApi: {
+    listRuns: jest.fn(),
+  },
+}));
+
+const mockUseSWR = useSWR as jest.MockedFunction<typeof useSWR>;
+
+function makeRun(overrides: Partial<ProofRunSummary> = {}): ProofRunSummary {
+  return {
+    run_id: 'abc12345',
+    started_at: '2026-04-09T12:00:00Z',
+    completed_at: '2026-04-09T12:00:05Z',
+    triggered_by: 'human',
+    overall_passed: true,
+    duration_ms: 5000,
+    ...overrides,
+  };
+}
+
+const WORKSPACE = '/home/user/project';
+
+describe('RunHistoryPanel', () => {
+  afterEach(() => jest.clearAllMocks());
+
+  it('shows loading skeletons while loading', () => {
+    mockUseSWR.mockReturnValue({ data: undefined, error: undefined, isLoading: true } as ReturnType<typeof useSWR>);
+    render(<RunHistoryPanel workspacePath={WORKSPACE} onSelectRun={jest.fn()} selectedRunId={null} />);
+    expect(screen.getAllByRole('generic').some((el) => el.className.includes('animate-pulse'))).toBe(true);
+  });
+
+  it('shows error message on fetch failure', () => {
+    mockUseSWR.mockReturnValue({ data: undefined, error: new Error('fail'), isLoading: false } as ReturnType<typeof useSWR>);
+    render(<RunHistoryPanel workspacePath={WORKSPACE} onSelectRun={jest.fn()} selectedRunId={null} />);
+    expect(screen.getByText('Failed to load run history.')).toBeInTheDocument();
+  });
+
+  it('shows empty state when no runs', () => {
+    mockUseSWR.mockReturnValue({ data: [], error: undefined, isLoading: false } as ReturnType<typeof useSWR>);
+    render(<RunHistoryPanel workspacePath={WORKSPACE} onSelectRun={jest.fn()} selectedRunId={null} />);
+    expect(screen.getByText('No runs recorded yet.')).toBeInTheDocument();
+  });
+
+  it('renders run rows', () => {
+    const runs = [makeRun({ run_id: 'run1' }), makeRun({ run_id: 'run2', overall_passed: false })];
+    mockUseSWR.mockReturnValue({ data: runs, error: undefined, isLoading: false } as ReturnType<typeof useSWR>);
+    render(<RunHistoryPanel workspacePath={WORKSPACE} onSelectRun={jest.fn()} selectedRunId={null} />);
+    expect(screen.getAllByText('pass').length + screen.getAllByText('fail').length).toBeGreaterThanOrEqual(1);
+  });
+
+  it('calls onSelectRun with run_id when row is clicked', () => {
+    const onSelectRun = jest.fn();
+    const run = makeRun({ run_id: 'abc12345' });
+    mockUseSWR.mockReturnValue({ data: [run], error: undefined, isLoading: false } as ReturnType<typeof useSWR>);
+    render(<RunHistoryPanel workspacePath={WORKSPACE} onSelectRun={onSelectRun} selectedRunId={null} />);
+    // Find the clickable row
+    const rows = screen.getAllByRole('button');
+    fireEvent.click(rows[0]);
+    expect(onSelectRun).toHaveBeenCalledWith('abc12345');
+  });
+
+  it('highlights selected run row', () => {
+    const run = makeRun({ run_id: 'abc12345' });
+    mockUseSWR.mockReturnValue({ data: [run], error: undefined, isLoading: false } as ReturnType<typeof useSWR>);
+    render(<RunHistoryPanel workspacePath={WORKSPACE} onSelectRun={jest.fn()} selectedRunId="abc12345" />);
+    const rows = screen.getAllByRole('button');
+    expect(rows[0].className).toContain('bg-muted');
+  });
+
+  it('shows "Recent Runs" heading', () => {
+    mockUseSWR.mockReturnValue({ data: [], error: undefined, isLoading: false } as ReturnType<typeof useSWR>);
+    render(<RunHistoryPanel workspacePath={WORKSPACE} onSelectRun={jest.fn()} selectedRunId={null} />);
+    expect(screen.getByText('Recent Runs')).toBeInTheDocument();
+  });
+});
diff --git a/web-ui/src/app/proof/[req_id]/page.tsx b/web-ui/src/app/proof/[req_id]/page.tsx
index 3051d507..7f26300d 100644
--- a/web-ui/src/app/proof/[req_id]/page.tsx
+++ b/web-ui/src/app/proof/[req_id]/page.tsx
@@ -6,10 +6,10 @@ import { useParams } from 'next/navigation';
 import useSWR from 'swr';
 import { Button } from '@/components/ui/button';
 import { Input } from '@/components/ui/input';
-import { ProofStatusBadge, WaiveDialog } from '@/components/proof';
+import { ProofStatusBadge, WaiveDialog, GateEvidencePanel } from '@/components/proof';
 import { proofApi } from '@/lib/api';
 import { getSelectedWorkspacePath } from '@/lib/workspace-storage';
-import type { ProofRequirement, ProofEvidence, ProofEvidenceSortCol, SortDir } from '@/types';
+import type { ProofRequirement, ProofEvidence, ProofEvidenceSortCol, SortDir, ProofEvidenceWithContent } from '@/types';
 
 function sessionKey(reqId: string) {
   return `proof-evidence-filters:${reqId}`;
@@ -98,6 +98,22 @@ export default function ProofDetailPage() {
       () => proofApi.getEvidence(workspacePath!, reqId)
     );
 
+  // Get the most recent run_id from evidence to show artifact content
+  const latestRunId = useMemo(() => {
+    if (!Array.isArray(evidence) || evidence.length === 0) return null;
+    return [...evidence].sort((a, b) => b.timestamp.localeCompare(a.timestamp))[0]?.run_id ?? null;
+  }, [evidence]);
+
+  const { data: latestRunDetail } = useSWR<import('@/types').ProofRunDetail>(
+    workspacePath && latestRunId ? `/api/v2/proof/runs/${latestRunId}/evidence?path=${workspacePath}` : null,
+    () => proofApi.getRunDetail(workspacePath!, latestRunId!)
+  );
+
+  const latestEvidence: ProofEvidenceWithContent[] = useMemo(
+    () => latestRunDetail?.evidence ?? [],
+    [latestRunDetail]
+  );
+
   const hasActiveFilters = filterGate !== '' || filterResult !== '' || search !== '';
 
   const gateOptions = useMemo(() => {
@@ -256,6 +272,14 @@ export default function ProofDetailPage() {
               </section>
             )}
 
+            {/* Latest run gate evidence */}
+            {latestEvidence.length > 0 && (
+              <section>
+                <h2 className="mb-3 text-base font-semibold">Latest Run Evidence</h2>
+                <GateEvidencePanel evidence={latestEvidence} />
+              </section>
+            )}
+
             {/* Evidence history */}
             <section>
               <h2 className="mb-3 text-base font-semibold">Evidence History</h2>
diff --git a/web-ui/src/app/proof/page.tsx b/web-ui/src/app/proof/page.tsx
index 18ac3267..19b2c398 100644
--- a/web-ui/src/app/proof/page.tsx
+++ b/web-ui/src/app/proof/page.tsx
@@ -12,7 +12,7 @@ import {
   TooltipProvider,
 } from '@/components/ui/tooltip';
 import { Button } from '@/components/ui/button';
-import { ProofStatusBadge, WaiveDialog, GateRunPanel, GateRunBanner } from '@/components/proof';
+import { ProofStatusBadge, WaiveDialog, GateRunPanel, GateRunBanner, RunHistoryPanel } from '@/components/proof';
 import { proofApi } from '@/lib/api';
 import { useProofRun } from '@/hooks/useProofRun';
 import { getSelectedWorkspacePath } from '@/lib/workspace-storage';
@@ -103,6 +103,7 @@ function ProofPageContent() {
   const [workspacePath, setWorkspacePath] = useState<string | null>(null);
   const [workspaceReady, setWorkspaceReady] = useState(false);
   const [waivedReq, setWaivedReq] = useState<ProofRequirement | null>(null);
+  const [selectedRunId, setSelectedRunId] = useState<string | null>(null);
 
   const { runState, gateEntries, passed, runMessage, errorMessage, startRun, retry } = useProofRun();
 
@@ -466,6 +467,11 @@ function ProofPageContent() {
                 </tbody>
               </table>
             </div>
+          <RunHistoryPanel
+            workspacePath={workspacePath}
+            onSelectRun={setSelectedRunId}
+            selectedRunId={selectedRunId}
+          />
           </>
           );
         })()}
diff --git a/web-ui/src/components/proof/GateEvidencePanel.tsx b/web-ui/src/components/proof/GateEvidencePanel.tsx
new file mode 100644
index 00000000..481d222d
--- /dev/null
+++ b/web-ui/src/components/proof/GateEvidencePanel.tsx
@@ -0,0 +1,91 @@
+'use client';
+
+import { useState } from 'react';
+import { Button } from '@/components/ui/button';
+import type { ProofEvidenceWithContent } from '@/types';
+
+const MAX_LINES = 200;
+
+function truncateLines(text: string, max: number): { lines: string[]; truncated: boolean } {
+  const lines = text.split('\n');
+  if (lines.length <= max) return { lines, truncated: false };
+  return { lines: lines.slice(0, max), truncated: true };
+}
+
+interface GateEvidencePanelProps {
+  evidence: ProofEvidenceWithContent[];
+}
+
+interface GateEvidenceRowProps {
+  ev: ProofEvidenceWithContent;
+}
+
+function GateEvidenceRow({ ev }: GateEvidenceRowProps) {
+  const [expanded, setExpanded] = useState(false);
+  const [showFull, setShowFull] = useState(false);
+
+  const hasText = ev.artifact_text != null && ev.artifact_text.trim().length > 0;
+  const { lines, truncated } = hasText
+    ? truncateLines(ev.artifact_text!, MAX_LINES)
+    : { lines: [], truncated: false };
+  const displayLines = showFull ? ev.artifact_text!.split('\n') : lines;
+
+  return (
+    <div className="border-b last:border-0">
+      <button
+        type="button"
+        aria-expanded={expanded}
+        onClick={() => setExpanded((v) => !v)}
+        className="flex w-full items-center gap-3 px-4 py-2 text-left hover:bg-muted/30 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-inset"
+      >
+        <span className="font-mono text-xs text-muted-foreground w-16 shrink-0 capitalize">{ev.gate}</span>
+        <span
+          className={`rounded-full px-2 py-0.5 text-xs font-medium ${
+            ev.satisfied
+              ? 'bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400'
+              : 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400'
+          }`}
+        >
+          {ev.satisfied ? 'pass' : 'fail'}
+        </span>
+        <span className="ml-auto text-xs text-muted-foreground">{expanded ? '▲' : '▼'}</span>
+      </button>
+
+      {expanded && (
+        <div className="px-4 pb-3">
+          {!hasText ? (
+            <p className="text-xs text-muted-foreground italic">No output captured</p>
+          ) : (
+            <>
+              <pre className="max-h-64 overflow-auto rounded-md bg-muted p-3 text-xs leading-relaxed whitespace-pre-wrap break-words">
+                {displayLines.join('\n')}
+              </pre>
+              {truncated && !showFull && (
+                <Button
+                  variant="ghost"
+                  size="sm"
+                  className="mt-1 text-xs"
+                  onClick={(e) => { e.stopPropagation(); setShowFull(true); }}
+                >
+                  Show full output
+                </Button>
+              )}
+            </>
+          )}
+        </div>
+      )}
+    </div>
+  );
+}
+
+export function GateEvidencePanel({ evidence }: GateEvidencePanelProps) {
+  if (evidence.length === 0) return null;
+
+  return (
+    <div className="rounded-lg border bg-background" aria-label="Gate evidence">
+      {evidence.map((ev, i) => (
+        <GateEvidenceRow key={`${ev.run_id}:${ev.gate}:${i}`} ev={ev} />
+      ))}
+    </div>
+  );
+}
diff --git a/web-ui/src/components/proof/RunHistoryPanel.tsx b/web-ui/src/components/proof/RunHistoryPanel.tsx
new file mode 100644
index 00000000..cce91a07
--- /dev/null
+++ b/web-ui/src/components/proof/RunHistoryPanel.tsx
@@ -0,0 +1,113 @@
+'use client';
+
+import useSWR from 'swr';
+import { proofApi } from '@/lib/api';
+import type { ProofRunSummary } from '@/types';
+
+interface RunHistoryPanelProps {
+  workspacePath: string;
+  onSelectRun: (runId: string) => void;
+  selectedRunId: string | null;
+}
+
+function formatDuration(ms: number | null): string {
+  if (ms == null) return '—';
+  if (ms < 1000) return `${ms}ms`;
+  return `${(ms / 1000).toFixed(1)}s`;
+}
+
+function formatTimestamp(iso: string): string {
+  try {
+    return new Date(iso).toLocaleString();
+  } catch {
+    return iso;
+  }
+}
+
+export function RunHistoryPanel({ workspacePath, onSelectRun, selectedRunId }: RunHistoryPanelProps) {
+  const { data, error, isLoading } = useSWR<ProofRunSummary[]>(
+    workspacePath ? `/api/v2/proof/runs?path=${workspacePath}` : null,
+    () => proofApi.listRuns(workspacePath, 5)
+  );
+
+  return (
+    <section aria-label="Recent runs" className="mt-6">
+      <h2 className="mb-3 text-base font-semibold">Recent Runs</h2>
+
+      {isLoading && (
+        <div className="space-y-2">
+          {[...Array(3)].map((_, i) => (
+            <div key={i} className="h-10 animate-pulse rounded-md bg-muted" />
+          ))}
+        </div>
+      )}
+
+      {error && (
+        <p className="text-sm text-destructive">Failed to load run history.</p>
+      )}
+
+      {!isLoading && !error && (!data || data.length === 0) && (
+        <p className="text-sm text-muted-foreground">No runs recorded yet.</p>
+      )}
+
+      {data && data.length > 0 && (
+        <div className="overflow-x-auto rounded-lg border">
+          <table className="min-w-[560px] w-full text-sm">
+            <thead className="border-b bg-muted/50">
+              <tr>
+                <th className="px-4 py-2 text-left font-medium">Timestamp</th>
+                <th className="px-4 py-2 text-left font-medium">Result</th>
+                <th className="px-4 py-2 text-left font-medium">Duration</th>
+                <th className="px-4 py-2 text-left font-medium">Triggered by</th>
+              </tr>
+            </thead>
+            <tbody>
+              {data.map((run) => {
+                const isSelected = run.run_id === selectedRunId;
+                return (
+                  <tr
+                    key={run.run_id}
+                    role="button"
+                    tabIndex={0}
+                    aria-pressed={isSelected}
+                    onClick={() => onSelectRun(run.run_id)}
+                    onKeyDown={(e) => {
+                      if (e.key === 'Enter' || e.key === ' ') {
+                        e.preventDefault();
+                        onSelectRun(run.run_id);
+                      }
+                    }}
+                    className={`cursor-pointer border-b last:border-0 hover:bg-muted/30 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-inset${
+                      isSelected ? ' bg-muted/60' : ''
+                    }`}
+                  >
+                    <td className="px-4 py-2 text-muted-foreground">
+                      {formatTimestamp(run.started_at)}
+                    </td>
+                    <td className="px-4 py-2">
+                      <span
+                        className={`rounded-full px-2 py-0.5 text-xs font-medium ${
+                          run.overall_passed
+                            ? 'bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400'
+                            : 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400'
+                        }`}
+                      >
+                        {run.overall_passed ? 'pass' : 'fail'}
+                      </span>
+                    </td>
+                    <td className="px-4 py-2 text-muted-foreground">
+                      {formatDuration(run.duration_ms)}
+                    </td>
+                    <td className="px-4 py-2 text-muted-foreground capitalize">
+                      {run.triggered_by}
+                    </td>
+                  </tr>
+                );
+              })}
+            </tbody>
+          </table>
+        </div>
+      )}
+    </section>
+  );
+}
diff --git a/web-ui/src/components/proof/index.ts b/web-ui/src/components/proof/index.ts
index 9c934588..47480292 100644
--- a/web-ui/src/components/proof/index.ts
+++ b/web-ui/src/components/proof/index.ts
@@ -3,3 +3,5 @@ export { ProofStatusWidget } from './ProofStatusWidget';
 export { WaiveDialog } from './WaiveDialog';
 export { GateRunPanel } from './GateRunPanel';
 export { GateRunBanner } from './GateRunBanner';
+export { GateEvidencePanel } from './GateEvidencePanel';
+export { RunHistoryPanel } from './RunHistoryPanel';
diff --git a/web-ui/src/lib/api.ts b/web-ui/src/lib/api.ts
index 874ab49e..c4c09615 100644
--- a/web-ui/src/lib/api.ts
+++ b/web-ui/src/lib/api.ts
@@ -46,6 +46,8 @@ import type {
   RunProofRequest,
   RunProofResponse,
   RunStatusResponse,
+  ProofRunSummary,
+  ProofRunDetail,
   Session,
   SessionState,
   SessionListResponse,
@@ -652,6 +654,21 @@ export const proofApi = {
     );
     return response.data;
   },
+
+  listRuns: async (workspacePath: string, limit = 5): Promise<ProofRunSummary[]> => {
+    const response = await api.get<ProofRunSummary[]>('/api/v2/proof/runs', {
+      params: { workspace_path: workspacePath, limit },
+    });
+    return response.data;
+  },
+
+  getRunDetail: async (workspacePath: string, runId: string): Promise<ProofRunDetail> => {
+    const response = await api.get<ProofRunDetail>(
+      `/api/v2/proof/runs/${encodeURIComponent(runId)}/evidence`,
+      { params: { workspace_path: workspacePath } }
+    );
+    return response.data;
+  },
 };
 
 // PR API methods
diff --git a/web-ui/src/types/index.ts b/web-ui/src/types/index.ts
index 8a15dfe0..7ceb95f2 100644
--- a/web-ui/src/types/index.ts
+++ b/web-ui/src/types/index.ts
@@ -363,6 +363,24 @@ export interface RunStatusResponse {
   message: string;
 }
 
+// Proof run history types (mirrors proof_v2.py ProofRunSummaryResponse / ProofRunDetailResponse)
+export interface ProofRunSummary {
+  run_id: string;
+  started_at: string;
+  completed_at: string | null;
+  triggered_by: string;
+  overall_passed: boolean;
+  duration_ms: number | null;
+}
+
+export interface ProofEvidenceWithContent extends ProofEvidence {
+  artifact_text: string | null;
+}
+
+export interface ProofRunDetail extends ProofRunSummary {
+  evidence: ProofEvidenceWithContent[];
+}
+
 // UI-only types for per-gate display in the Run Gates panel
 export type GateRunStatus = 'pending' | 'running' | 'passed' | 'failed';
 

From 6f76a301568efa21c679b37b350459e2910eaadd Mon Sep 17 00:00:00 2001
From: Test User <test@example.com>
Date: Thu, 9 Apr 2026 11:49:59 -0700
Subject: [PATCH 2/3] fix: address CodeRabbit feedback on PR #575
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- ledger.py: add get_run() for O(1) run lookup by run_id (avoids scan)
- runner.py: persist run record even when no open requirements exist
- runner.py: fix overall_passed logic (empty runs → True, not False)
- proof_v2.py: truncate artifact text to 200 lines server-side
- proof_v2.py: use get_run() instead of list_runs() scan in evidence endpoint
- [req_id]/page.tsx: filter latestEvidence by req_id (was returning all gates)
- proof/page.tsx: fetch and render selected run evidence via GateEvidencePanel
---
 codeframe/core/proof/ledger.py         | 26 +++++++++++++++++++++++++
 codeframe/core/proof/runner.py         | 18 ++++++++++++++---
 codeframe/ui/routers/proof_v2.py       | 18 ++++++++++-------
 web-ui/src/app/proof/[req_id]/page.tsx |  4 ++--
 web-ui/src/app/proof/page.tsx          | 27 ++++++++++++++++++++++++--
 5 files changed, 79 insertions(+), 14 deletions(-)

diff --git a/codeframe/core/proof/ledger.py b/codeframe/core/proof/ledger.py
index 3116bc80..28ce332b 100644
--- a/codeframe/core/proof/ledger.py
+++ b/codeframe/core/proof/ledger.py
@@ -382,6 +382,32 @@ def save_run(workspace: Workspace, run: ProofRun) -> None:
     conn.close()
 
 
+def get_run(workspace: Workspace, run_id: str) -> Optional[ProofRun]:
+    """Fetch a single proof run by run_id."""
+    _ensure_tables(workspace)
+    conn = get_db_connection(workspace)
+    cursor = conn.cursor()
+    cursor.execute(
+        """SELECT run_id, workspace_id, started_at, completed_at, triggered_by,
+                  overall_passed, duration_ms
+           FROM proof_runs WHERE run_id = ? AND workspace_id = ?""",
+        (run_id, workspace.id),
+    )
+    row = cursor.fetchone()
+    conn.close()
+    if not row:
+        return None
+    return ProofRun(
+        run_id=row[0],
+        workspace_id=row[1],
+        started_at=datetime.fromisoformat(row[2]),
+        completed_at=datetime.fromisoformat(row[3]) if row[3] else None,
+        triggered_by=row[4],
+        overall_passed=bool(row[5]),
+        duration_ms=row[6],
+    )
+
+
 def list_runs(workspace: Workspace, limit: int = 5) -> list[ProofRun]:
     """List the most recent proof runs for this workspace."""
     _ensure_tables(workspace)
diff --git a/codeframe/core/proof/runner.py b/codeframe/core/proof/runner.py
index 12ca5d9b..a6b8e3fd 100644
--- a/codeframe/core/proof/runner.py
+++ b/codeframe/core/proof/runner.py
@@ -79,6 +79,19 @@ def run_proof(
     # Get all open requirements
     reqs = ledger.list_requirements(workspace, status=ReqStatus.OPEN)
     if not reqs:
+        completed_at = datetime.now(timezone.utc)
+        ledger.save_run(
+            workspace,
+            ProofRun(
+                run_id=run_id,
+                workspace_id=workspace.id,
+                started_at=started_at,
+                completed_at=completed_at,
+                triggered_by="human",
+                overall_passed=True,
+                duration_ms=int((completed_at - started_at).total_seconds() * 1000),
+            ),
+        )
         return {}
 
     # Get changed scope (skip if running full)
@@ -132,9 +145,8 @@ def run_proof(
 
     completed_at = datetime.now(timezone.utc)
     duration_ms = int((completed_at - started_at).total_seconds() * 1000)
-    overall_passed = bool(results) and all(
-        passed for gate_results in results.values() for _, passed in gate_results
-    )
+    executed = [passed for gate_results in results.values() for _, passed in gate_results]
+    overall_passed = all(executed) if executed else True
     ledger.save_run(
         workspace,
         ProofRun(
diff --git a/codeframe/ui/routers/proof_v2.py b/codeframe/ui/routers/proof_v2.py
index 6d209a12..5bfb4449 100644
--- a/codeframe/ui/routers/proof_v2.py
+++ b/codeframe/ui/routers/proof_v2.py
@@ -25,6 +25,7 @@
 from codeframe.core.proof.capture import capture_requirement
 from codeframe.core.proof.ledger import (
     get_requirement,
+    get_run,
     get_run_evidence,
     list_evidence,
     list_requirements,
@@ -523,14 +524,18 @@ async def list_runs_endpoint(
     ]
 
 
-def _read_artifact_text(artifact_path: str) -> Optional[str]:
-    """Read artifact file content, returning None if the file is missing."""
+_ARTIFACT_LINE_LIMIT = 200
+
+
+def _read_artifact_text(artifact_path: str, max_lines: int = _ARTIFACT_LINE_LIMIT) -> Optional[str]:
+    """Read artifact file content up to max_lines, returning None if the file is missing."""
     from pathlib import Path
     try:
         p = Path(artifact_path)
-        if p.exists():
-            return p.read_text(errors="replace")
-        return None
+        if not p.exists():
+            return None
+        lines = p.read_text(errors="replace").splitlines(keepends=True)
+        return "".join(lines[:max_lines])
     except Exception:
         return None
 
@@ -544,8 +549,7 @@ async def get_run_evidence_endpoint(
 ) -> ProofRunDetailResponse:
     """Get per-gate evidence with artifact content for a completed proof run."""
     # Try to get run metadata from DB first; fall back to in-memory cache
-    runs = list_runs(workspace, limit=100)
-    run = next((r for r in runs if r.run_id == run_id), None)
+    run = get_run(workspace, run_id)
 
     if run is None:
         # Fall back to cache for very recent runs not yet in DB
diff --git a/web-ui/src/app/proof/[req_id]/page.tsx b/web-ui/src/app/proof/[req_id]/page.tsx
index 7f26300d..5e942106 100644
--- a/web-ui/src/app/proof/[req_id]/page.tsx
+++ b/web-ui/src/app/proof/[req_id]/page.tsx
@@ -110,8 +110,8 @@ export default function ProofDetailPage() {
   );
 
   const latestEvidence: ProofEvidenceWithContent[] = useMemo(
-    () => latestRunDetail?.evidence ?? [],
-    [latestRunDetail]
+    () => (latestRunDetail?.evidence ?? []).filter((ev) => ev.req_id === reqId),
+    [latestRunDetail, reqId]
   );
 
   const hasActiveFilters = filterGate !== '' || filterResult !== '' || search !== '';
diff --git a/web-ui/src/app/proof/page.tsx b/web-ui/src/app/proof/page.tsx
index 19b2c398..d67a7441 100644
--- a/web-ui/src/app/proof/page.tsx
+++ b/web-ui/src/app/proof/page.tsx
@@ -12,11 +12,11 @@ import {
   TooltipProvider,
 } from '@/components/ui/tooltip';
 import { Button } from '@/components/ui/button';
-import { ProofStatusBadge, WaiveDialog, GateRunPanel, GateRunBanner, RunHistoryPanel } from '@/components/proof';
+import { ProofStatusBadge, WaiveDialog, GateRunPanel, GateRunBanner, RunHistoryPanel, GateEvidencePanel } from '@/components/proof';
 import { proofApi } from '@/lib/api';
 import { useProofRun } from '@/hooks/useProofRun';
 import { getSelectedWorkspacePath } from '@/lib/workspace-storage';
-import type { ProofRequirement, ProofRequirementListResponse, ProofReqStatus, ProofSeverity } from '@/types';
+import type { ProofRequirement, ProofRequirementListResponse, ProofReqStatus, ProofSeverity, ProofRunDetail } from '@/types';
 
 // ── Sort / filter types ────────────────────────────────────────────────────
 
@@ -137,6 +137,14 @@ function ProofPageContent() {
     }
   }, [runState, mutate]);
 
+  // Fetch evidence for a selected historical run
+  const { data: selectedRunDetail } = useSWR<ProofRunDetail>(
+    workspacePath && selectedRunId
+      ? `/api/v2/proof/runs/${selectedRunId}/evidence?path=${workspacePath}`
+      : null,
+    () => proofApi.getRunDetail(workspacePath!, selectedRunId!)
+  );
+
   // Collect unique glitch types from data for the dropdown
   const glitchTypes = useMemo(() => {
     if (!data) return [] as string[];
@@ -275,6 +283,21 @@ function ProofPageContent() {
               </div>
             )}
 
+            {/* Selected historical run evidence */}
+            {selectedRunId && selectedRunDetail && selectedRunDetail.evidence.length > 0 && (
+              <div className="mb-4">
+                <div className="mb-2 flex items-center justify-between">
+                  <p className="text-sm font-medium text-muted-foreground">
+                    Showing evidence for run <span className="font-mono text-xs">{selectedRunId}</span>
+                  </p>
+                  <Button variant="ghost" size="sm" onClick={() => setSelectedRunId(null)} aria-label="Clear selected run">
+                    ✕ Clear
+                  </Button>
+                </div>
+                <GateEvidencePanel evidence={selectedRunDetail.evidence} />
+              </div>
+            )}
+
             <div className="mb-4 flex flex-wrap items-center gap-4 text-sm text-muted-foreground">
               <span>{data.by_status?.open ?? 0} open</span>
               <span>{data.by_status?.satisfied ?? 0} satisfied</span>

From 70accfbcee5de4fdad656448e83beea4cfbe042c Mon Sep 17 00:00:00 2001
From: Test User <test@example.com>
Date: Thu, 9 Apr 2026 12:02:21 -0700
Subject: [PATCH 3/3] =?UTF-8?q?docs:=20sync=20documentation=20for=20Phase?=
 =?UTF-8?q?=203.5B=20=E2=80=94=20evidence=20display=20and=20run=20history?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CLAUDE.md                       | 10 +++---
 docs/PHASE_2_CLI_API_MAPPING.md | 14 ++++++++
 docs/PHASE_3_UI_ARCHITECTURE.md | 57 ++++++++++++++++++++++++++++++++-
 docs/PRODUCT_ROADMAP.md         | 19 +++--------
 4 files changed, 80 insertions(+), 20 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 59af38d4..f92bf0b4 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -18,7 +18,7 @@ SHIP:   cf pr create → cf pr merge
 LOOP:   Glitch → cf proof capture → New REQ → Enforced forever
 ```
 
-**Status: CLI ✅ | Server ✅ | ReAct agent ✅ | Web UI ✅ | Agent adapters ✅ | Multi-provider LLM ✅ | Next: Phase 3.5B** — See `docs/PRODUCT_ROADMAP.md`.
+**Status: CLI ✅ | Server ✅ | ReAct agent ✅ | Web UI ✅ | Agent adapters ✅ | Multi-provider LLM ✅ | Next: Phase 3.5C** — See `docs/PRODUCT_ROADMAP.md`.
 
 If you are an agent working in this repo: **do not improvise architecture**. Follow the documents listed below.
 
@@ -34,11 +34,11 @@ If you are an agent working in this repo: **do not improvise architecture**. Fol
 
 **Rule 0:** If a change does not directly support the Think → Build → Prove → Ship pipeline, do not implement it.
 
-### Current Focus: Phase 3.5B
+### Current Focus: Phase 3.5C
 
-**Run quality gates from the web UI** — `POST /api/v2/proof/run` backend is ready; the proof page needs a [Run Gates] button, gate progress view, per-gate evidence display, and run history panel.
+**Phase 3.5B is complete** — `[Run Gates]` button, live gate progress, per-gate evidence display (`GateEvidencePanel`), and run history panel (`RunHistoryPanel`) are all shipped. New backend endpoints: `GET /api/v2/proof/runs` and `GET /api/v2/proof/runs/{run_id}/evidence`.
 
-After that, in order:
+Next, in order:
 - **3.5C**: Glitch capture web UI
 - **4A**: PR status tracking + PROOF9 merge gate
 - **4B**: Post-merge glitch capture loop
@@ -90,7 +90,7 @@ Shipped pages: `/`, `/prd`, `/tasks`, `/execution`, `/execution/[taskId]`, `/blo
 Testing: `cd web-ui && npm test` must pass; `npm run build` must succeed. The `frontend-tests` CI job enforces this on every PR.
 
 ### What's implemented
-Full feature list in `docs/PRODUCT_ROADMAP.md`. Key capabilities: ReAct agent execution, batch execution (serial/parallel/auto), task dependencies, stall detection, self-correction, GitHub PR workflow, SSE streaming, API auth, rate limiting, OpenAPI docs, multi-provider LLM (Anthropic/OpenAI-compatible), agent adapters (ClaudeCode/Codex/OpenCode/Kilocode), worktree isolation, E2B cloud execution, interactive agent sessions (WebSocket chat + XTerm.js terminal), PROOF9 quality system.
+Full feature list in `docs/PRODUCT_ROADMAP.md`. Key capabilities: ReAct agent execution, batch execution (serial/parallel/auto), task dependencies, stall detection, self-correction, GitHub PR workflow, SSE streaming, API auth, rate limiting, OpenAPI docs, multi-provider LLM (Anthropic/OpenAI-compatible), agent adapters (ClaudeCode/Codex/OpenCode/Kilocode), worktree isolation, E2B cloud execution, interactive agent sessions (WebSocket chat + XTerm.js terminal), PROOF9 quality system (gate runs, per-gate evidence, run history).
 
 ---
 
diff --git a/docs/PHASE_2_CLI_API_MAPPING.md b/docs/PHASE_2_CLI_API_MAPPING.md
index 1677dd5f..713654b6 100644
--- a/docs/PHASE_2_CLI_API_MAPPING.md
+++ b/docs/PHASE_2_CLI_API_MAPPING.md
@@ -154,6 +154,20 @@ These support the Golden Path but aren't in the critical path.
 |-------------|-------------|---------------|----------|--------|--------|
 | `cf gates run` | `core.gates` | `run_gate()` | `/api/v2/gates/run` | POST | ⚠️ Missing |
 
+### PROOF9 Commands
+
+| CLI Command | Core Module | Core Function | V2 Route | Method | Status |
+|-------------|-------------|---------------|----------|--------|--------|
+| `cf proof run` | `core.proof.runner` | `run_proof()` | `/api/v2/proof/run` | POST | ✅ Present |
+| `cf proof capture` | `core.proof.capture` | `capture_requirement()` | `/api/v2/proof/requirements` | POST | ✅ Present |
+| `cf proof list` | `core.proof.ledger` | `list_requirements()` | `/api/v2/proof/requirements` | GET | ✅ Present |
+| `cf proof show <id>` | `core.proof.ledger` | `get_requirement()` | `/api/v2/proof/requirements/{req_id}` | GET | ✅ Present |
+| `cf proof waive <id>` | `core.proof.ledger` | `waive_requirement()` | `/api/v2/proof/requirements/{req_id}/waive` | POST | ✅ Present |
+| `cf proof status` | `core.proof.ledger` | `list_requirements()` | `/api/v2/proof/status` | GET | ✅ Present |
+| (evidence for req) | `core.proof.ledger` | `list_evidence()` | `/api/v2/proof/requirements/{req_id}/evidence` | GET | ✅ Present |
+| (run history) | `core.proof.ledger` | `list_runs()` | `/api/v2/proof/runs` | GET | ✅ Present |
+| (run evidence) | `core.proof.ledger` | `get_run_evidence()` | `/api/v2/proof/runs/{run_id}/evidence` | GET | ✅ Present |
+
 ---
 
 ## 3. Gap Summary
diff --git a/docs/PHASE_3_UI_ARCHITECTURE.md b/docs/PHASE_3_UI_ARCHITECTURE.md
index a1fd0ab3..bdae6aa0 100644
--- a/docs/PHASE_3_UI_ARCHITECTURE.md
+++ b/docs/PHASE_3_UI_ARCHITECTURE.md
@@ -333,6 +333,60 @@ ReviewCommitView
 
 ---
 
+### 3.7 PROOF9 View
+**Purpose:** Trigger gate runs, inspect per-gate evidence, and review run history.
+
+**Key Actions:**
+- Run quality gates via `[Run Gates]` button (calls `POST /api/v2/proof/run`)
+- View live gate progress (pending → running → passed/failed) per gate
+- Inspect per-gate evidence artifacts (test output, coverage report, etc.)
+- Browse run history for the last 5 gate runs
+- Waive requirements with a reason and optional expiry
+
+**Data Displayed:**
+- Requirements table with status badges (open / satisfied / waived)
+- Gate run progress (per-gate status, triggered after [Run Gates] click)
+- Evidence artifacts for each gate in a run
+- Run history panel with outcome and duration
+
+**Component Hierarchy:**
+```
+ProofPage (/proof)
+├── ProofHeader
+│   ├── RunGatesButton → POST /api/v2/proof/run
+│   └── ProofStatusSummary (open / satisfied / waived counts)
+├── RequirementsTable
+│   └── RequirementRow[]
+│       ├── StatusBadge
+│       └── WaiveButton
+├── GateEvidencePanel          ← new (Phase 3.5B)
+│   ├── GateProgressRow[] (pending → running → passed/failed)
+│   └── EvidenceArtifactDisplay (artifact text, scrollable)
+└── RunHistoryPanel            ← new (Phase 3.5B)
+    └── RunSummaryRow[]        (run_id, started_at, duration, overall_passed)
+        └── (click → loads GateEvidencePanel for that run)
+
+ProofRequirementPage (/proof/[req_id])
+├── RequirementDetail
+│   ├── ObligationsList
+│   └── EvidenceHistory
+└── WaiveForm
+```
+
+**API Endpoints Used:**
+- `POST /api/v2/proof/run` — trigger gate run
+- `GET /api/v2/proof/runs` — list run history (limit=5)
+- `GET /api/v2/proof/runs/{run_id}/evidence` — per-gate evidence with artifact text
+- `GET /api/v2/proof/requirements` — list requirements
+- `GET /api/v2/proof/status` — aggregated counts
+
+**Modals:** None
+**Panels:**
+- `GateEvidencePanel` (replaces main content area after run starts)
+- `RunHistoryPanel` (bottom panel, always visible on `/proof` page)
+
+---
+
 ## 4. Real-time Patterns
 
 ### SSE Event Handling Strategy
@@ -559,7 +613,7 @@ ReviewCommitView
 
 ## 7. Summary
 
-### The 6 Core Views
+### The 7 Core Views
 
 | View | Purpose | Key Component | Real-time? |
 |------|---------|---------------|------------|
@@ -569,6 +623,7 @@ ReviewCommitView
 | **Execution** | Monitor AI agent work | EventStream | SSE (execution events) |
 | **Blockers** | Answer agent questions | BlockerCard with inline form | Poll on nav |
 | **Review** | Inspect & commit changes | DiffViewer + CommitPanel | Static |
+| **PROOF9** | Run gates, view evidence, run history | GateEvidencePanel + RunHistoryPanel | Poll after run |
 
 ### Design Philosophy
 - **Navigation:** Left sidebar (persistent), URL-driven, auto-navigate on execution start
diff --git a/docs/PRODUCT_ROADMAP.md b/docs/PRODUCT_ROADMAP.md
index 52b32b7b..adc21570 100644
--- a/docs/PRODUCT_ROADMAP.md
+++ b/docs/PRODUCT_ROADMAP.md
@@ -20,7 +20,7 @@ The golden path works end-to-end in the browser for a single developer on a sing
 
 ---
 
-## Phase 3.5 — Close the Interaction Gap ✅ PARTIAL
+## Phase 3.5 — Close the Interaction Gap ✅ PARTIAL (A+B complete, C pending)
 
 **The issue**: The web UI is read-heavy. Users watch agents run, view requirements, inspect diffs. But they cannot run quality gates from the browser or capture a glitch and watch it become a permanent proof obligation.
 
@@ -30,18 +30,9 @@ Fully shipped: `/sessions` page, `/sessions/[id]` detail with `SplitPane`, `Agen
 
 ---
 
-### Milestone B: Run Quality Gates from the Web UI ❌ NOT STARTED
+### Milestone B: Run Quality Gates from the Web UI ✅ COMPLETE (#566, #567, #574, #575)
 
-**Current state**: The PROOF9 page lists requirements and lets users waive them. It does not let users trigger a gate run. The backend endpoint `POST /api/v2/proof/run` exists and is ready. The frontend has zero run-gate UI (verified 2026-04-06).
-
-**What to build**:
-
-- A **[Run Gates]** button on the PROOF9 page (and optionally on the task detail modal) that calls the existing `POST /api/v2/proof/run` endpoint
-- A **gate run progress view** showing each gate as it executes: pending → running → passed / failed
-- Per-gate **evidence display**: show the artifact (test output, coverage report, lighthouse score, etc.) that was produced as evidence
-- A **run history** panel showing the last 5 gate runs with their outcomes
-
-**Why it matters for the vision**: PROOF9 is described as "nine categories of evidence that code must produce." Without the ability to produce that evidence from the UI, the PROVE phase is inspection-only. Gate runs are the core action of the PROVE phase.
+Fully shipped: `[Run Gates]` button on the PROOF9 page, live gate progress view (pending → running → passed/failed), per-gate evidence display (`GateEvidencePanel`), and run history panel (`RunHistoryPanel`) showing the last 5 gate runs. Backend endpoints `GET /api/v2/proof/runs` and `GET /api/v2/proof/runs/{run_id}/evidence` added. Core: `proof_runs` table in `ledger.py`, `ProofRun` dataclass in `models.py`, `runner.py` populates runs on every execution.
 
 ---
 
@@ -209,7 +200,7 @@ These are items that were considered and excluded because they do not serve the
 | Phase | Focus | Status | Issues |
 |---|---|---|---|
 | 3.5A | Bidirectional agent chat | ✅ Complete | #500–509 |
-| 3.5B | Run gates from the web UI | ❌ Not started | — |
+| 3.5B | Run gates from the web UI | ✅ Complete | #566, #567, #574, #575 |
 | 3.5C | Glitch capture UI | ❌ Not started | — |
 | 4A | PR status + PROOF9 merge gate | ❌ Not started | — |
 | 4B | Post-merge glitch capture loop | ❌ Not started | — |
@@ -219,6 +210,6 @@ These are items that were considered and excluded because they do not serve the
 | 5.4 | PRD stress-test web UI | ❌ Not started | #561–562 |
 | 5.5 | GitHub Issues import | ❌ Not started | #563–565 |
 
-**Current focus**: Phase 3.5B — Run quality gates from the web UI (backend ready, frontend missing).
+**Current focus**: Phase 3.5C — Glitch capture web UI.
 
 The ordering within Phase 5 is by onboarding impact. Settings (5.1) and cost (5.2) block new users earliest.