From 88653462c0de48a89ccf1323f41bd72b294aa7d4 Mon Sep 17 00:00:00 2001 From: Peter Bruinsma Date: Fri, 1 May 2026 19:37:33 +0000 Subject: [PATCH 01/50] chore(devcontainer): install Go + aiwf, enable rituals plugins for v3 migration - install Go 1.22.10 via tarball in init.sh (devcontainer Go feature fails on the .NET base image's stale yarn apt source) - install aiwf via go install + branch-tip SHA resolved through git ls-remote (go install rejects slash-named branches like poc/aiwf-v3) - hardcode user home in remoteEnv PATH; ${containerEnv:HOME} doesn't resolve for new entries in this devcontainer setup - pin aiwf-extensions and wf-rituals plugins via .claude/settings.json (Project scope; manual edit because /plugin install confused itself across per-project bookkeeping) - commit auto-generated devcontainer-lock.json for reproducible feature pins Co-Authored-By: Claude Opus 4.7 (1M context) --- .claude/settings.json | 12 ++++++++---- .devcontainer/devcontainer-lock.json | 14 ++++++++++++++ .devcontainer/devcontainer.json | 2 +- .devcontainer/init.sh | 24 ++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 5 deletions(-) create mode 100644 .devcontainer/devcontainer-lock.json diff --git a/.claude/settings.json b/.claude/settings.json index 5e348272..01e301be 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -1,8 +1,4 @@ { - "statusLine": { - "type": "command", - "command": "bash .claude/statusline.sh" - }, "hooks": { "SessionStart": [ { @@ -19,5 +15,13 @@ ] } ] + }, + "statusLine": { + "type": "command", + "command": "bash .claude/statusline.sh" + }, + "enabledPlugins": { + "aiwf-extensions@ai-workflow-rituals": true, + "wf-rituals@ai-workflow-rituals": true } } diff --git a/.devcontainer/devcontainer-lock.json b/.devcontainer/devcontainer-lock.json new file mode 100644 index 00000000..0227e8ee --- /dev/null +++ b/.devcontainer/devcontainer-lock.json @@ -0,0 +1,14 @@ +{ + "features": { + "ghcr.io/devcontainers/features/github-cli:1": { + "version": "1.1.0", + "resolved": "ghcr.io/devcontainers/features/github-cli@sha256:d22f50b70ed75339b4eed1ba9ecde3a1791f90e88d37936517e3bace0bbad671", + "integrity": "sha256:d22f50b70ed75339b4eed1ba9ecde3a1791f90e88d37936517e3bace0bbad671" + }, + "ghcr.io/devcontainers/features/node:1": { + "version": "1.7.1", + "resolved": "ghcr.io/devcontainers/features/node@sha256:8c0de46939b61958041700ee89e3493f3b2e4131a06dc46b4d9423427d06e5f6", + "integrity": "sha256:8c0de46939b61958041700ee89e3493f3b2e4131a06dc46b4d9423427d06e5f6" + } + } +} \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ef320808..05eaa041 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -92,6 +92,6 @@ }, "remoteEnv": { "NODE_OPTIONS": "--dns-result-order=ipv4first", - "PATH": "${containerEnv:HOME}/.cargo/bin:${containerEnv:HOME}/.dotnet/tools:${containerEnv:HOME}/.local/bin:${containerEnv:PATH}" + "PATH": "/home/vscode/.cargo/bin:/home/vscode/.dotnet/tools:/home/vscode/.local/bin:/home/vscode/go/bin:/usr/local/go/bin:${containerEnv:PATH}" } } diff --git a/.devcontainer/init.sh b/.devcontainer/init.sh index 187e85bc..27c5cbf1 100755 --- a/.devcontainer/init.sh +++ b/.devcontainer/init.sh @@ -37,6 +37,30 @@ if ! command -v roslynator >/dev/null 2>&1; then export PATH="$HOME/.dotnet/tools:$PATH" fi +# Install Go (avoids the devcontainer Go feature, which fails on the .NET base +# image's stale yarn apt source — NO_PUBKEY 62D54FD4003F6525) +if ! command -v go >/dev/null 2>&1; then + echo "Installing Go..." + GO_VERSION=1.22.10 + curl -fsSL "https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz" \ + | sudo tar -C /usr/local -xz + export PATH="/usr/local/go/bin:$PATH" +fi + +# Install aiwf (AI workflow framework v3, branch-tip pin during PoC). +# `go install` rejects branch names containing slashes, so resolve the branch +# tip to a commit SHA via git ls-remote and install that. +if ! command -v aiwf >/dev/null 2>&1; then + echo "Installing aiwf..." + export PATH="$HOME/go/bin:/usr/local/go/bin:$PATH" + aiwf_sha=$(git ls-remote https://github.com/23min/ai-workflow-v2.git refs/heads/poc/aiwf-v3 | awk '{print $1}') + if [ -z "$aiwf_sha" ]; then + echo "Failed to resolve aiwf branch tip" >&2 + exit 1 + fi + go install "github.com/23min/ai-workflow-v2/tools/cmd/aiwf@${aiwf_sha}" +fi + echo "Restoring solution..." dotnet restore >/dev/null From 2d08da8012be34cff69bb9191e4d5cc106114d3d Mon Sep 17 00:00:00 2001 From: Peter Bruinsma Date: Fri, 1 May 2026 19:37:44 +0000 Subject: [PATCH 02/50] =?UTF-8?q?docs(migration):=20aiwf=20v3=20migration?= =?UTF-8?q?=20plan=20with=20Phase=200=E2=80=931=20record?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - living plan for re-platforming from v1 (.ai submodule + sync.sh + generated adapters) to v3 (aiwf Go binary + ai-workflow-rituals plugin marketplace) - Phase 0 closed: 6 open questions settled (history scope, ID stability, ADR path, hook timing, plugin scope, devcontainer Go install) - Phase 1 closed: aiwf installed, doctor self-check 22/22 green, rituals plugins enabled, skill audit done from cache (3 acceptable gaps, no blockers) - migration log appended; archives to work/migration/completed/ on cutover - Q4 (hook timing) closed by upstream change to aiwf core: self-guarding pre-push hook landed on poc/aiwf-v3 (commit ≤53393ed) plus an Options.SkipHook flag for husky/lefthook composition Co-Authored-By: Claude Opus 4.7 (1M context) --- work/migration/aiwf-v3-plan.md | 294 +++++++++++++++++++++++++++++++++ 1 file changed, 294 insertions(+) create mode 100644 work/migration/aiwf-v3-plan.md diff --git a/work/migration/aiwf-v3-plan.md b/work/migration/aiwf-v3-plan.md new file mode 100644 index 00000000..32a40f58 --- /dev/null +++ b/work/migration/aiwf-v3-plan.md @@ -0,0 +1,294 @@ +# aiwf v3 migration plan + +Living plan for re-platforming this repo from the v1 AI framework (`.ai/` submodule + `bash sync.sh` + generated `.claude/`/`.github/` adapters) to v3 (`aiwf` Go binary + `ai-workflow-rituals` Claude Code plugin marketplace). + +Drafted **2026-05-01** on branch `migration/aiwf-v3`. Source-of-truth for migration sequencing while v1 is being torn down. Append findings as we work; finalize and archive to `work/migration/completed/` once main has cut over. + +--- + +## Why this is a re-platform, not an upgrade + +v1 ships rituals, validators, agents, skills as a single bundle synced via bash. v3 splits the layer: + +- **`aiwf` core** — single Go binary (`go install github.com/23min/ai-workflow-v2/tools/cmd/aiwf@poc/aiwf-v3`). Owns 6 entity kinds (epic, milestone, ADR, gap, decision, contract), validators, pre-push hook, structured commit trailers, `aiwf history` from `git log`, `aiwf import` for bulk creation. Ships 6 embedded `aiwf-*` skills materialized to `.claude/skills/aiwf-*/` (gitignored cache). +- **`ai-workflow-rituals` plugin marketplace** — companion repo, two Claude Code plugins: + - `aiwf-extensions` — milestone-lifecycle skills (`aiwfx-*`) + 4 role agents (planner/builder/reviewer/deployer) + templates. Coupled to aiwf vocabulary. + - `wf-rituals` — generic engineering rituals (`wf-patch`, `wf-tdd-cycle`, `wf-review-code`, `wf-doc-lint`). Aiwf-agnostic by discipline. +- **Repo-private skills** — live directly in `.claude/skills//SKILL.md`, committed normally. No prefix. v1's `.ai-repo/skills/` workaround disappears. + +Conceptual identity (epics in `work/epics/E-NN/`, ADRs in `docs/`, commits track lifecycle) is preserved. Mechanism is completely different. + +--- + +## Decision log (settled during planning) + +| # | Decision | Settled | +|---|---|---| +| 1 | Re-platform via v3, not stay on v1 | yes | +| 2 | Repo-private skills land at `.claude/skills//` (committed); no `.ai-repo/skills/` carry-forward | yes | +| 3 | `dead-code-audit`, `devcontainer`, `ui-debug` ported as repo-private skills at migration time | yes | +| 4 | `verify-contracts` ported when first aiwf `contract` entity lands (not at migration); `design-contract` deferred until schema-evolution work begins | yes | +| 5 | `quality-score`, `doc-garden` deferred until real friction (likely never) | yes | +| 6 | Zero contract entities migrated — repo has none today | yes | +| 7 | Migration runs on long-lived `migration/aiwf-v3` branch off `main`; pre-push hook deferred until cutover so `main` keeps pushing | yes | +| 8 | Plan lives at `work/migration/aiwf-v3-plan.md` (not an epic); archive on completion | yes | + +--- + +## Open questions (must settle before Phase 2 projector code lands) + +1. **History scope.** ✅ **Settled — hybrid: project everything with an E-NN id, best-effort on completed.** + - **Active:** 6 epics in `work/epics/E-*` (E-11, E-13, E-14, E-15, E-18, E-22) — full projection, all milestones, frontmatter reconstructed properly. + - **Completed (id'd):** 9 epics in `work/epics/completed/E-*` (E-10, E-12, E-16, E-17, E-19, E-20, E-21, E-23, E-24) — best-effort projection. Missing/ambiguous fields get sensible defaults; failures logged in `skip-log.md`, don't block import. + - **Completed (no id):** 16 dirs without E-NN prefix (`ai/`, `core-foundations/`, `time-travel/`, etc.) — **excluded** and **relocated** to `work/archived-epics/` during Phase 3 pre-processing. Pre-date the E-NN convention; aiwf doesn't model them. Kept on disk for grep/git-blame; out of aiwf's walked roots so `aiwf check` ignores them. + - **Unplanned:** `work/epics/unplanned/` — only project subdirs that have an E-NN prefix (none currently — all 13 are slug-only, so all excluded). + - **Best-effort discipline.** Projector flags rather than fails on: missing status, missing parent on milestone, milestone IDs that don't fit `m-EXX-NN-...` shape, dangling depends-on refs. Each becomes a `skip-log.md` entry; we triage in Phase 4 dry-run loop and decide per-finding whether to fix the source or accept the projection. + +2. **ID stability.** ✅ **Settled — best-effort per-epic preprocessing + chronological renumber for D/G.** + + **Strategy.** Pin epic ids (already match v3); produce explicit, human-meaningful ids for milestones, decisions, and gaps via per-kind deterministic rules; rewrite body cross-references via id-map.csv (committed permanently to `work/migration/id-map.csv`). + + **Per-epic preprocessing config.** Each id'd epic gets a small per-epic config block in the projector. Most are 3-line generic ("epic spec at `spec.md`, milestones match `m-E{NN}-NN-.md`"); 3 outliers need custom rules. + + | Epic | Epic spec | Milestone shape | Notes | + |---|---|---|---| + | E-13, E-14, E-15, E-22 | `spec.md` | n/a (no milestones yet) or `m-E{NN}-NN-...` | generic | + | E-16, E-17, E-19, E-20, E-21, E-23, E-24, E-18 | `spec.md` | `m-E{NN}-NN-.md` | generic; tracking/log files siblings | + | **E-10** | check; uses `m-ec-pN-.md` | custom milestone-id rule (`m-ec-pN-...`) | special | + | **E-11** | no `spec.md`; loose layout | milestones use `m-svui-NN-...` | special; pick representative file or synthesize body | + | **E-12** | `spec.md` (verify) | `M-10.NN-.md` (capitalized + dotted) | special | + + **Milestone ids.** Use `id: auto` in the manifest; aiwf allocates `M-001..M-NNN` in deterministic manifest order (epic ascending, then milestone ascending within each epic). Old `m-E18-13-session-evaluator` → new `M-053` (or whatever falls out). Mapping captured in `id-map.csv`. Body cross-references rewritten via the map. + + **Decision ids.** 54 entries in `work/decisions.md`, all explicit `D-YYYY-MM-DD-NNN:` headings. Sort tuple `(date, NNN)` → assign `D-001..D-054` chronologically. Body references in specs/tracking-docs/`CLAUDE.md`/memory get rewritten via id-map. + + **Gap ids.** 157 entries in `work/gaps.md`, only 8 with explicit dates (and those are *resolution* dates). Use **`git blame`** on each H2 line to recover the entry's creation date (the commit that introduced it). Sort by (blame-date, blame-commit-time) → assign `G-001..G-157`. Verified by sampling: blame produces clean per-entry dates ranging 2026-03-24..2026-04-28; doc order is unreliable (recent entries inserted at top). + + **Body rewriting.** Projector substitutes old → new ids in body text. Identifier classes to handle: `m-E\d+-\d+-[a-z-]+`, `M-\d+\.\d+-[a-z-]+` (E-12 dotted), `m-ec-p\d-[a-z-]+` (E-10), `m-svui-\d+-[a-z-]+` (E-11), `D-\d{4}-\d{2}-\d{2}-\d+` (decisions), `ADR-m-E\d+-\d+-\d+` (a few epic-local ADRs noted in catalog). Plus prose like "the session-evaluator milestone" — left as-is; aiwf doesn't parse prose. + + **Source untouched until Phase 5.** Projector reads `work/` read-only; the rewrites land only in the manifest body strings. Source is rewritten en-masse only at cutover (Phase 5) when v1 layout teardown happens anyway. + +3. **ADR path.** ✅ **Settled — adopt v3 default `docs/adr/`.** + - First ADR lands at `docs/adr/ADR-0001-.md`. No `aiwf.yaml` override. + - Empty `docs/decisions/` directory deletes in Phase 5 v1 teardown (one `git rm -r`). + - Going forward, `aiwfx-record-decision` skill picks ADR vs. D-NNN in-flow per the kind's conventions; path is then mechanical. + +4. **Pre-push hook timing.** ✅ **Settled — gap closed upstream; install whenever convenient.** + - Upstream landed a self-guarding hook on `poc/aiwf-v3` (commit on or before 53393ed): the embedded pre-push script's first content line is `[ -f "$(git rev-parse --show-toplevel)/aiwf.yaml" ] || exit 0`, plus a bonus `Options.SkipHook` flag for husky/lefthook composition. Pushes from any branch lacking `aiwf.yaml` silently no-op. + - **Implication for our sequence.** We can run `aiwf init` as early as Phase 1 (right after `go install`) on the migration branch without breaking `main` pushes. The hook simply doesn't fire on `main` until `aiwf.yaml` arrives via merge. + - **Practical choice.** Run `aiwf init` near the end of Phase 4 anyway — after the import lands and the tree validates — so the first time the hook fires it's against a known-clean projection. Earlier-install gives no extra signal since `aiwf check` is also available as a CLI we can run manually during dry-run iteration. + +5. **`.claude/settings.json` plugin pin scope.** ✅ **Settled — Project scope.** + - `/plugin install aiwf-extensions@ai-workflow-rituals` and `/plugin install wf-rituals@ai-workflow-rituals` both pin into the committed `.claude/settings.json`. Migration commit includes the diff; collaborators get the same plugins after a one-time `/plugin marketplace add` on first clone. + - Aligns with existing usage of `.claude/settings.json` (statusline + team-wide hooks). + - **Side-note flagged for Phase 5.** Existing SessionStart hooks in `.claude/settings.json` reference v1 paths (`.ai/tools/scratch-audit.sh`, `.ai-repo/bin/wf-graph`) — these break at v1 teardown and need to be removed or replaced. Added to the Phase 5 checklist below. + +6. **Devcontainer Go install.** ✅ **Settled — devcontainer feature + `postCreateCommand`, branch-tip pin.** + - `.devcontainer/devcontainer.json` gets: + ```jsonc + "features": { + "ghcr.io/devcontainers/features/go:1": { "version": "1.22" } + }, + "postCreateCommand": "go install github.com/23min/ai-workflow-v2/tools/cmd/aiwf@poc/aiwf-v3" + ``` + - Branch-tip pin (`@poc/aiwf-v3`) is deliberate during the migration window: PoC is actively iterating, and we want upstream fixes (like the Q4 brownfield hook fix) automatically on rebuild. + - When v3 stabilizes (PoC → tagged release), bump to a fixed version in one line. + - **One-time pain.** Existing devs need to rebuild the container to pick up Go + aiwf. Acceptable; happens at start of Phase 1. + +--- + +## Skills inventory and disposition + +### Upstream (no porting needed) + +| Our v1 skill | v3 destination | Notes | +|---|---|---| +| `wf-plan-epic` | `aiwfx-plan-epic` | shipped in `aiwf-extensions` plugin | +| `wf-plan-milestones` | `aiwfx-plan-milestones` | shipped | +| `wf-start-milestone` | `aiwfx-start-milestone` | shipped | +| `wf-wrap-milestone` | `aiwfx-wrap-milestone` | shipped | +| `wf-wrap-epic` | `aiwfx-wrap-epic` | shipped | +| `wf-release` | `aiwfx-release` | shipped | +| `wf-architect` | `aiwfx-record-decision` | shipped, replaces v1 architect | +| `wf-draft-spec` | folded into `aiwfx-plan-milestones` | confirm during Phase 1 audit | +| `wf-patch` | `wf-patch` (wf-rituals) | shipped | +| `wf-tdd-cycle` | `wf-tdd-cycle` (wf-rituals) | shipped | +| `wf-review-code` | `wf-review-code` (wf-rituals) | shipped | +| `wf-doc-lint` | `wf-doc-lint` (wf-rituals) | shipped | + +### Repo-private at migration time + +Port from `.ai/skills/` (or `.ai-repo/skills/`) into `.claude/skills//SKILL.md`, committed. + +| Skill | Source | Lands at | +|---|---|---| +| `dead-code-audit` | `.ai/skills/dead-code-audit.md` | `.claude/skills/dead-code-audit/SKILL.md` + `recipes/dead-code-{dotnet,rust,typescript}.md` | +| `devcontainer` | `.ai-repo/skills/devcontainer.md` | `.claude/skills/devcontainer/SKILL.md` | +| `ui-debug` | `.ai-repo/skills/ui-debug.md` | `.claude/skills/ui-debug/SKILL.md` | + +### Repo-private later + +| Skill | When | +|---|---| +| `verify-contracts` | when first aiwf contract entity is registered (post-migration) | +| `design-contract` | when schema-evolution work begins (post-migration) | +| `quality-score`, `doc-garden` | only if real friction shows up | + +### Dropped entirely + +| v1 skill | Replaced by | +|---|---| +| `wf-workflow-graph` | aiwf core (validators + history) | +| `wf-workflow-audit` | `aiwf check` + `aiwf doctor` | +| `wf-update-framework` | `aiwf update` + `/plugin update` | +| `wf-devcontainer` | repo-private `devcontainer` already covers this | + +### Agents + +All four (`planner`, `builder`, `reviewer`, `deployer`) ship in `aiwf-extensions` plugin. Local `.claude/agents/*.md` files delete at cutover. + +--- + +## Branching strategy + +``` +main ────────────────────────────────────────► + │ + └── migration/aiwf-v3 ────► + │ + └── feature branches if useful + (projector, gaps-split, decisions-split) +``` + +Side-effect scopes: + +| Side effect | Scope | Handling | +|---|---|---| +| `go install` | global to machine (`~/go/bin/aiwf`) | install once via devcontainer feature | +| `/plugin install` rituals plugin | global to Claude Code user (cache) + branch-scoped `.claude/settings.json` | settings.json normal git; cache is harmless | +| `aiwf init` writes `aiwf.yaml`, `.gitignore` | working tree — branch-scoped | run on migration branch only | +| `aiwf init` writes `.git/hooks/pre-push` | **NOT branch-scoped** — `.git/hooks/` applies to every branch in this clone | **defer until late Phase 4** so `main` keeps pushing | +| Plugin marketplace add | per-machine Claude Code config | one-time, harmless | + +The pre-push hook is the only real footgun. We use the binary's read-only verbs (`aiwf check`, `aiwf import --dry-run`) without ever running `aiwf init`, until we're ready to lock in. + +--- + +## Phased checklist + +Status legend: `[ ]` not started · `[~]` in progress · `[x]` done · `[-]` skipped + +### Phase 0 — planning and inventory + +- [x] Read v3 docs (`README`, `poc-design-decisions`, `poc-import-format`, `poc-migrating-from-prior-systems`, `rituals-plugin-plan`, `design-lessons`) +- [x] Inventory v1 skills × v3 destinations (above) +- [x] Confirm zero existing aiwf contract entities to migrate +- [x] Decide repo-private skill list and timing (`dead-code-audit` + `devcontainer` + `ui-debug` at migration; contracts later) +- [x] Branching strategy settled +- [x] Plan doc drafted (this file) +- [x] Settle 6 open questions above (Q1–Q6 ✅ 2026-05-01) + +### Phase 1 — sandbox install + +- [x] Add Go to devcontainer (feature `ghcr.io/devcontainers/features/go:1`); rebuild container — Go 1.22.10 in +- [x] Verify `go version` and `~/go/bin` on `$PATH` — Go OK; PATH needed a fix (hardcode `/home/vscode/...` instead of `${containerEnv:HOME}` which didn't resolve) +- [x] Install aiwf — landed via `init.sh` (resolves branch tip via `git ls-remote` then `go install @`, since `go install @poc/aiwf-v3` rejects slash-named branches) +- [x] Verify `aiwf --version` (`dev`) and `aiwf doctor --self-check` — **22 steps green** +- [x] `aiwf doctor` against current repo — confirms brownfield state cleanly: no `aiwf.yaml`, 8 skills not yet materialized, hook + pre-commit missing, plugin not detected. All expected. +- [x] `/plugin marketplace add 23min/ai-workflow-rituals` (User scope; cache populated at `~/.claude/plugins/cache/ai-workflow-rituals/`) +- [x] `/plugin install aiwf-extensions@ai-workflow-rituals` — landed via manual `enabledPlugins` edit in `.claude/settings.json` (Path C; `/plugin install` failed due to plugin manager state-confusion across projects, see migration log) +- [x] `/plugin install wf-rituals@ai-workflow-rituals` — same mechanism, same edit +- [x] Confirmed via `aiwf doctor` — reports "rituals plugin detected (aiwf-extensions in .claude/settings)" +- [x] Audit each shipped `aiwfx-*` and `wf-*` skill body for coverage gaps — done from cache, see findings below +- [ ] **Do NOT run `aiwf init` yet** — defer to end of Phase 4 against a clean tree + +**Skill-audit findings (read from plugin cache, before install):** + +Shipped surface verified present and feature-complete: +- `aiwf-extensions`: 8 skills (`aiwfx-{plan-epic, plan-milestones, start-milestone, wrap-milestone, wrap-epic, release, record-decision, track}`), 4 agents (`builder, planner, reviewer, deployer`), 5 templates (`epic-spec, milestone-spec, tracking-doc, adr, decision`). +- `wf-rituals`: 4 skills (`wf-{patch, tdd-cycle, review-code, doc-lint}`). +- Plugin commit pinned: `e556ec9215c5` (cache subdir). + +Coverage gaps and mitigations: + +| Gap | Detail | Mitigation | +|---|---|---| +| **`aiwfx-wrap-milestone` does not chain `dead-code-audit`** | Shipped wrap-milestone has step 3 doc-lint but no extension hook for additional audits. Our v1 wrap-milestone invoked `wf-dead-code-audit` as a non-blocking step. | Document in `CLAUDE.md` that wrap also invokes the repo-private `dead-code-audit` skill (one-line addition to builder/reviewer agent guidance). Acceptable. | +| **`wf-doc-lint` is a minimal port** | Drops v1's `metrics.json` / `docs/log.md` / `docs/index.md` primitives, mode flag (scoped vs full), uncovered-contract-surface check, badges. Keeps the 4 mechanical checks (code-ref drift, removed-feature docs, orphan docs, doc TODOs). | Acceptable — we don't actively use the dropped primitives in any wired flow; `contractSurfaces` is unconfigured. | +| **Tracking-doc path convention** | v3 default `work/tracking/M-NNN-.md` (centralized). Our v1 convention is epic-local `work/epics/.../-tracking.md`. The shipped `aiwfx-track` skill explicitly allows project-override. | Phase 5 decision (record at projector design): keep epic-local layout, override the framework default. Lower churn; matches our existing artifact-layout. | +| **No shipped equivalents for** `workflow-audit`, `workflow-graph`, `update-framework`, `verify-contracts`, `design-contract`, `quality-score`, `doc-garden`, `dead-code-audit` | All accounted for in plan: first three replaced by aiwf core (`check`/`history`/`update` + `/plugin update`); last five are repo-private (port now: `dead-code-audit`; later: `verify-contracts`; deferred: rest). | No new gap; matches Q1-Q6 settled state. | + +**No blocking gaps.** Plugins ship feature-complete relative to our actual flow. Repo-private port list is unchanged. + +**Phase 1 findings recorded:** +- aiwf core ships 8 embedded skills now (was 6 in earlier docs): aiwf-{add, check, contract, history, promote, reallocate, rename, status}. +- New since earlier docs: a pre-commit hook that auto-regenerates `STATUS.md` (installed by `aiwf update` not `aiwf init`; toggleable via `status_md.auto_update` config). +- Filesystem case-insensitive at `/workspaces/flowtime-vnext` (devcontainer bind mount over macOS host APFS). v3's `case-paths` validator may surface findings during Phase 4 dry-run; not a blocker, but track. + +### Phase 2 — projector + +- [ ] Decide projector home (e.g. `tools/migration/aiwf-v3-projector/` — temporary; deletes at end of Phase 5) +- [ ] Implement reading: `work/epics/`, `work/decisions.md`, `work/gaps.md`, `CLAUDE.md` Current Work +- [ ] Implement ID remap: epics keep ids; milestones get `auto`; produce `id-map.csv` side-output +- [ ] Implement body rewriting: substitute old `m-E18-13-...` references with new `M-NNN` per the id-map +- [ ] Implement frontmatter synthesis from prose `**ID:**` / `**Status:**` markers +- [ ] Emit `manifest.yaml` per kind (epics, milestones, decisions, gaps) — five files or one combined +- [ ] Emit `skip-log.md` listing what didn't migrate (completed-epic detail if archived; agent-history; etc.) +- [ ] Spot-check projector output on one epic (E-22) before running across the whole tree + +### Phase 3 — pre-process source + +- [ ] Branch off migration/aiwf-v3 if useful (`migration/aiwf-v3-preprocess`) +- [ ] Fill missing required fields in active epic specs (status:, parent:, etc.) so projector reads them mechanically +- [ ] Resolve any source ambiguities flagged by projector +- [ ] Decide and apply: archive `work/epics/completed/` to `archive-prior-planning/` (lean: yes, per question 1) or leave in place if projecting full history + +### Phase 4 — dry-run loop + +- [ ] `aiwf import --dry-run manifest.yaml` from a clean state +- [ ] Iterate: each finding either fixes the projector (Phase 2) or the source (Phase 3); halve findings each pass +- [ ] When dry-run is clean, run `aiwf import manifest.yaml` for real → atomic commit on migration branch +- [ ] Run `aiwf init` (installs pre-push hook + materializes 6 `aiwf-*` skills) — separate commit +- [ ] Test push from migration branch; pre-push hook should pass since import succeeded +- [ ] Spot-check: `aiwf history E-22`, `aiwf check`, `aiwf render roadmap` + +### Phase 5 — teardown and cutover + +- [ ] Port repo-private skills (`dead-code-audit`, `devcontainer`, `ui-debug`) to `.claude/skills//SKILL.md`; commit +- [ ] Move `.ai-repo/recipes/dead-code-*.md` under `.claude/skills/dead-code-audit/recipes/` +- [ ] Fold `.ai-repo/rules/project.md` content into `CLAUDE.md` +- [ ] Delete `.ai/` submodule (`.gitmodules` edit + `git rm`) +- [ ] Delete `.ai-repo/` (entire directory) +- [ ] Delete generated `.github/skills/` adapter files (if any) +- [ ] Delete generated `.claude/skills/wf-*/` (v1 generated copies; replaced by plugin install) +- [ ] Delete `.claude/agents/*.md` (replaced by `aiwf-extensions` plugin agents) +- [ ] Update `CLAUDE.md`: replace v1 framework references with v3; rewrite "Resolved Artifact Layout" section against `aiwf.yaml`; rewrite "Agent Routing" against plugin agents; remove "Framework Sources" v1 table +- [ ] Update `.claude/settings.json`: remove or replace the v1-path SessionStart hooks (`.ai/tools/scratch-audit.sh`, `.ai-repo/bin/wf-graph`) — either delete the hook entries entirely or substitute v3 equivalents (e.g. `aiwf doctor --quiet` on session start) if useful +- [ ] Update `CLAUDE.md` Current Work to reference new milestone ids +- [ ] Update memory files: `feedback_audit_mute_archived.md` rewrites to target `aiwf check` rather than `wf-workflow-audit` +- [ ] Run `aiwf doctor`, `aiwf check`, `aiwf doctor --self-check` — all green +- [ ] Open PR `migration/aiwf-v3` → `main`, request review, merge + +### Phase 6 — post-merge + +- [ ] Archive this plan to `work/migration/completed/aiwf-v3-plan.md` +- [ ] First real `aiwf` work: pick first contract entity (likely `model.schema.yaml` → `C-001`); port `verify-contracts` skill at that point + +--- + +## Migration log + +Append-only record of dry-run iterations, decisions taken mid-flight, and findings. Format: `YYYY-MM-DD — phase — note`. + +- 2026-05-01 — phase 0 — branch created, plan drafted, open questions captured. +- 2026-05-01 — phase 0 — Q1 settled: hybrid history scope. Project all E-NN-prefixed epics (6 active + 9 completed-id'd = 15 epics). Best-effort on completed; non-id'd dirs (16) relocate to `work/archived-epics/`. +- 2026-05-01 — phase 0 — Q2 settled: per-epic preprocessing + chronological renumber. Epic ids verbatim. Milestones `auto`-allocated under deterministic order. Decisions sorted by (date, seq) → D-001..D-054. Gaps sorted by git-blame date → G-001..G-157. id-map.csv captures all mappings. Outlier epics (E-10, E-11, E-12) get custom per-epic projector rules. +- 2026-05-01 — phase 0 — Q3 settled: adopt v3 default `docs/adr/`. No config override. Empty `docs/decisions/` deletes in Phase 5 teardown. +- 2026-05-01 — phase 0 — Q4 settled: brownfield-migration gap closed upstream (self-guarding pre-push hook + `Options.SkipHook` flag landed on poc/aiwf-v3 commit ≤53393ed). `aiwf init` is safe to run on the migration branch any time without breaking `main` pushes. Practical choice: install at end of Phase 4 against a known-clean tree. +- 2026-05-01 — phase 0 — Q5 settled: Project scope for plugin pins — committed to `.claude/settings.json`. SessionStart hooks pointing at v1 paths flagged for Phase 5 cleanup (remove or replace). +- 2026-05-01 — phase 0 — Q6 settled: devcontainer feature for Go + `postCreateCommand` for aiwf. Branch-tip pin during migration window; bump to tagged release once PoC stabilizes. +- 2026-05-01 — phase 0 — **all 6 open questions settled.** Plan ready for Phase 1. +- 2026-05-01 — phase 1 — devcontainer rebuilt; Go 1.22.10 + aiwf installed (init.sh resolves branch tip via `git ls-remote` to a SHA, since `go install @poc/aiwf-v3` rejects slash-named branches). PATH hardcoded to `/home/vscode/...` (containerEnv:HOME doesn't resolve in this devcontainer setup). `aiwf doctor --self-check` 22/22 green. `aiwf doctor` against current repo confirms brownfield state cleanly. Filesystem flagged case-insensitive — track for Phase 4 dry-run findings. +- 2026-05-01 — phase 1 — marketplace `23min/ai-workflow-rituals` registered (User scope; cache populated under `~/.claude/plugins/cache/`). Plugin install (Project scope) awaiting user action. +- 2026-05-01 — phase 1 — skill-audit done from plugin cache: 8 aiwfx-* skills + 4 wf-* skills + 4 agents + 5 templates verified present. Three real gaps identified, all with acceptable mitigations: (1) `aiwfx-wrap-milestone` doesn't chain `dead-code-audit` — document in CLAUDE.md; (2) `wf-doc-lint` is minimal port — acceptable since we don't use dropped primitives; (3) tracking-doc path convention shifted to centralized `work/tracking/` — keep our epic-local layout via project override. No blocking gaps. +- 2026-05-01 — phase 1 — `/plugin install` failed with "Source path does not exist" despite source being present; root cause was plugin-manager bookkeeping confusion (plugins were already installed for `/Users/peterbru/Projects/proliminal.net` and Claude Code wouldn't add a second per-project install record). Recovered via manual edit of `.claude/settings.json` — added `enabledPlugins: { "aiwf-extensions@ai-workflow-rituals": true, "wf-rituals@ai-workflow-rituals": true }`. `aiwf doctor` now confirms "rituals plugin detected." **Phase 1 closed.** From 409e682e75c76f01be7b7f9eb2fdb822447e2fec Mon Sep 17 00:00:00 2001 From: Peter Bruinsma Date: Fri, 1 May 2026 19:59:28 +0000 Subject: [PATCH 03/50] =?UTF-8?q?chore(migration):=20Pass=20A=20=E2=80=94?= =?UTF-8?q?=20E-22=20projector=20spike=20with=20green=20aiwf=20dry-run?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - project_e22.py: uv PEP-723 script-mode projector (ruamel.yaml, LiteralScalarString for body); reads E-22 spec, synthesizes frontmatter from prose **ID:** / **Status:**, strips H1+ID+Status from body, emits single-entity aiwf import manifest - e22-spike.yaml: generated single-entity manifest (regenerable) - aiwf import --dry-run reports zero error findings, exit 0 - finding logged: aiwf derives destination dir slug from title, not source dir name; settle slug-preservation decision before Pass B mass projection Phase 2 Pass A complete. Plan updated with passes A–G, status-mapping table, authoritative aiwf status sets per kind. Co-Authored-By: Claude Opus 4.7 (1M context) --- work/migration/aiwf-v3-plan.md | 72 +++++++++++-- work/migration/manifests/e22-spike.yaml | 127 +++++++++++++++++++++++ work/migration/scripts/project_e22.py | 130 ++++++++++++++++++++++++ 3 files changed, 321 insertions(+), 8 deletions(-) create mode 100644 work/migration/manifests/e22-spike.yaml create mode 100644 work/migration/scripts/project_e22.py diff --git a/work/migration/aiwf-v3-plan.md b/work/migration/aiwf-v3-plan.md index 32a40f58..5355a018 100644 --- a/work/migration/aiwf-v3-plan.md +++ b/work/migration/aiwf-v3-plan.md @@ -227,14 +227,63 @@ Coverage gaps and mitigations: ### Phase 2 — projector -- [ ] Decide projector home (e.g. `tools/migration/aiwf-v3-projector/` — temporary; deletes at end of Phase 5) -- [ ] Implement reading: `work/epics/`, `work/decisions.md`, `work/gaps.md`, `CLAUDE.md` Current Work -- [ ] Implement ID remap: epics keep ids; milestones get `auto`; produce `id-map.csv` side-output -- [ ] Implement body rewriting: substitute old `m-E18-13-...` references with new `M-NNN` per the id-map -- [ ] Implement frontmatter synthesis from prose `**ID:**` / `**Status:**` markers -- [ ] Emit `manifest.yaml` per kind (epics, milestones, decisions, gaps) — five files or one combined -- [ ] Emit `skip-log.md` listing what didn't migrate (completed-epic detail if archived; agent-history; etc.) -- [ ] Spot-check projector output on one epic (E-22) before running across the whole tree +**Design (Phase 2 Q&A settled 2026-05-01):** +- Home: `work/migration/scripts/` +- Manifests output: `work/migration/manifests/` (separate from scripts; data ≠ code) +- Language: Python 3 + `uv` script mode (PEP 723 inline deps; `uv run script.py`); no `pyproject.toml` +- Incremental scope: E-22 first, then extend in successive passes; decisions + gaps last + +**Authoritative aiwf status sets (from `aiwf schema`):** + +| Kind | Statuses | Required fields | Notes | +|---|---|---|---| +| epic | `proposed, active, done, cancelled` | id, title, status | no parent | +| milestone | `draft, in_progress, done, cancelled` | id, title, status, parent | parent → epic | +| adr | `proposed, accepted, superseded, rejected` | id, title, status | optional supersedes/superseded_by | +| gap | `open, addressed, wontfix` | id, title, status | optional discovered_in/addressed_by | +| decision | `proposed, accepted, superseded, rejected` | id, title, status | optional relates_to | +| contract | `proposed, accepted, deprecated, retired, rejected` | id, title, status | optional linked_adrs | + +**v1 → v3 status-mapping table (filled as passes encounter source statuses):** + +| v1 source status | v1 kind | v3 status | Settled | +|---|---|---|---| +| `planning` | epic | `proposed` | ✅ Pass A (E-22 spec) | +| `in-progress` | epic / milestone | (epic) `active` / (milestone) `in_progress` | TBD when encountered | +| `complete` / `completed` | epic / milestone | `done` | TBD when encountered | +| `pending` | milestone | `draft` | TBD | +| `paused` | epic / milestone | TBD — possibly `proposed` (no aiwf "paused"); flag as projector finding | TBD | +| `active` | decision | `accepted` | TBD | +| `superseded` | decision | `superseded` | TBD | +| `withdrawn` | decision | `rejected` | TBD | +| `open` | gap | `open` | TBD | +| `resolved` (with date suffix) | gap | `addressed` | TBD | + +**Successive-pass plan:** + +| Pass | Scope | Goal | +|---|---|---| +| **A (spike)** | E-22 only | debug shared projector logic against minimal surface | +| **B** | E-13 + E-14 + E-15 (no-milestone active epics) | confirm shared logic generalizes | +| **C** | E-18 (multi-milestone generic `m-EXX-NN-...`) | exercise milestone-emit + body-rewrite at scale | +| **D** | completed-id'd generic-shape epics (E-16, E-17, E-19, E-20, E-21, E-23, E-24) | best-effort projection | +| **E** | outliers (E-10 `m-ec-pN`; E-11 no-spec.md + `m-svui-NN`; E-12 `M-10.NN`) | per-epic custom rules | +| **F** | decisions (54, chronological) + gaps (157, git-blame sort) | mechanical projection on stable code | +| **G** | body-rewrite cross-pass | substitute old ids → new ids per id-map.csv across manifest body strings | + +**Implementation checklist:** +- [x] Decide projector home — `work/migration/scripts/` +- [x] Decide language — Python 3 + `uv` script mode +- [x] Decide incremental scope — E-22 first; successive passes A–G +- [x] Pass A: spike on E-22 — `work/migration/scripts/project_e22.py` (uv script-mode, ruamel.yaml). Generates `work/migration/manifests/e22-spike.yaml`. **Dry-run green:** `aiwf import --dry-run` zero findings, exit 0. 12,902-byte `epic.md` would land at `work/epics/E-22-time-machine-model-fit-chunked-evaluation/epic.md` +- [ ] Pass B: extend to E-13/E-14/E-15 +- [ ] Pass C: extend to E-18 (milestones) +- [ ] Pass D: extend to completed-id'd epics +- [ ] Pass E: outlier per-epic rules (E-10, E-11, E-12) +- [ ] Pass F: decisions + gaps +- [ ] Pass G: body-rewrite cross-pass with id-map.csv +- [ ] Emit `skip-log.md` accumulated across passes +- [ ] Final: produce single combined manifest for Phase 4 dry-run loop ### Phase 3 — pre-process source @@ -292,3 +341,10 @@ Append-only record of dry-run iterations, decisions taken mid-flight, and findin - 2026-05-01 — phase 1 — marketplace `23min/ai-workflow-rituals` registered (User scope; cache populated under `~/.claude/plugins/cache/`). Plugin install (Project scope) awaiting user action. - 2026-05-01 — phase 1 — skill-audit done from plugin cache: 8 aiwfx-* skills + 4 wf-* skills + 4 agents + 5 templates verified present. Three real gaps identified, all with acceptable mitigations: (1) `aiwfx-wrap-milestone` doesn't chain `dead-code-audit` — document in CLAUDE.md; (2) `wf-doc-lint` is minimal port — acceptable since we don't use dropped primitives; (3) tracking-doc path convention shifted to centralized `work/tracking/` — keep our epic-local layout via project override. No blocking gaps. - 2026-05-01 — phase 1 — `/plugin install` failed with "Source path does not exist" despite source being present; root cause was plugin-manager bookkeeping confusion (plugins were already installed for `/Users/peterbru/Projects/proliminal.net` and Claude Code wouldn't add a second per-project install record). Recovered via manual edit of `.claude/settings.json` — added `enabledPlugins: { "aiwf-extensions@ai-workflow-rituals": true, "wf-rituals@ai-workflow-rituals": true }`. `aiwf doctor` now confirms "rituals plugin detected." **Phase 1 closed.** +- 2026-05-01 — phase 1 — committed (8865346 + 2d08da8): devcontainer infra + plan doc. +- 2026-05-01 — phase 2 — Q1 settled: projector home = `work/migration/scripts/`. Co-locates with plan + id-map.csv + skip-log.md; deletes as one dir at Phase 5. +- 2026-05-01 — phase 2 — Q2 settled: Python 3 with `uv`. Use uv script mode (PEP 723 inline `# /// script` metadata declaring deps like `ruamel.yaml`); `uv run script.py`. No `pyproject.toml`/project skeleton — each script is self-contained and disposable. uv already in devcontainer (init.sh installs it). +- 2026-05-01 — phase 2 — Q3 settled: incremental scope = E-22 first (Pass A spike), extend in passes B–G; decisions + gaps last on stable projector code. +- 2026-05-01 — phase 2 — micro-decisions for Pass A: status mapping `planning → proposed` (epic kind); manifests output dir `work/migration/manifests/` (separate from scripts; data ≠ code). Authoritative status sets pulled from `aiwf schema` and recorded in plan. **Phase 2 design closed; ready to implement Pass A.** +- 2026-05-01 — phase 2 — Pass A landed. `project_e22.py` (uv PEP-723 script mode, ruamel.yaml LiteralScalarString for body). Single-entity manifest validates with `aiwf import --dry-run` zero findings; would write `work/epics/E-22-time-machine-model-fit-chunked-evaluation/epic.md` (12,902 bytes). +- 2026-05-01 — phase 2 — **Pass A finding (slug derivation):** aiwf derives destination dir slug from `title`, not source dir name. Source `E-22-model-fit-chunked-evaluation` ≠ aiwf-generated `E-22-time-machine-model-fit-chunked-evaluation`. Phase 3/5 decision pending: preserve v1 short slugs (via `aiwf rename` or trimmed manifest titles) vs. accept title-derived slugs (id is stable ref; path is incidental). Lean: accept; settle before mass import in Pass B. diff --git a/work/migration/manifests/e22-spike.yaml b/work/migration/manifests/e22-spike.yaml new file mode 100644 index 00000000..4bfdd257 --- /dev/null +++ b/work/migration/manifests/e22-spike.yaml @@ -0,0 +1,127 @@ +version: 1 +commit: + mode: single + message: 'import(spike): E-22 — Pass A projector dry-run' +entities: + - kind: epic + id: E-22 + frontmatter: + title: Time Machine — Model Fit & Chunked Evaluation + status: proposed + body: | + ## Goal + + Close out the remaining Time Machine analysis modes — **model fitting** against real telemetry and **chunked evaluation** for feedback simulation — and crystallize the resulting surface as a clean embeddable **`FlowTime.Pipeline` SDK**. These are the last two analysis modes in the E-18 Time Machine architecture; delivering them completes the "FlowTime as a callable function" arc. + + ## Context + + E-18 delivered 11 milestones covering parameterized evaluation, engine session protocol, tiered validation, parameter sweep, sensitivity, goal seek, N-parameter optimization, `SessionModelEvaluator`, and the .NET Time Machine CLI. All are on `main`. The analysis layer works end-to-end for synthetic runs. + + Two scope items remained in E-18 and are explicitly carried into E-22: + + - **Model Fit** (`FitSpec`/`FitRunner`/`POST /v1/fit`) — composes `ITelemetrySource` + `Optimizer` with residual as the objective. Infrastructure exists; the composition is the new work. **Blocked on** E-15 Telemetry Ingestion (first repeatable dataset path) and the Telemetry Loop & Parity epic (validated drift bounds — fitting against real telemetry without measured drift would produce falsely precise results). + + - **Chunked Evaluation** (Mode 6) — bin-chunk evaluation for feedback simulation with external controllers. The Rust engine session (m-E18-02) is the seam, but the chunk-step protocol on top of it is not designed. + + A third scope item was decided (2026-04-20) to land alongside Fit and Chunked so the completed surface is exposed as a clean embeddable API: + + - **`FlowTime.Pipeline` SDK wrapper** — thin project exposing `Sweep`, `Sensitivity`, `GoalSeek`, `Optimize`, `Fit`, `ChunkedEvaluate` as a programmatic embedding surface over the Time Machine internals. Callers today reach into `FlowTime.TimeMachine.Sweep.*` directly; the SDK crystallizes the external contract. + + E-22 is sequenced per D-2026-04-15-032 (Option A): `E-21 (active) → E-15 Telemetry Ingestion → Telemetry Loop & Parity → E-22`. + + ### Supersedes + + Closes out E-18 placeholder `m-E18-XX Model Fit` and `m-E18-05 Chunked Evaluation`. E-18 itself remains the archived parent epic (11 delivered milestones recorded there as "complete"); E-22 is the forward-looking epic that completes its scope. + + ### Related + + - **E-15 Telemetry Ingestion** (`work/epics/E-15-telemetry-ingestion/`) — hard prerequisite for Fit. Provides the first repeatable dataset path and replayable canonical bundle via Gold Builder → Graph Builder. + - **Telemetry Loop & Parity** (`work/epics/unplanned/telemetry-loop-parity/spec.md`) — hard prerequisite for Fit. Provides measured drift bounds between synthetic and replayed runs; without those bounds, fit quality cannot be meaningfully reported. Currently unnumbered; will take its own epic slot when scheduled. + - **E-21 Svelte Workbench & Analysis Surfaces** — builds UI for fit results once Fit's API contract is stable. Not in E-22 scope; E-21 milestone allocation is independent. + + ## Scope + + ### In Scope + + - `FitSpec` + `FitRunner` + `POST /v1/fit` composing `ITelemetrySource` + `Optimizer` with residual objective (RMSE / MAE / configurable) against a user-selected series + - `flowtime fit` .NET CLI command, pipeable JSON-in/JSON-out, byte-compatible with `POST /v1/fit` + - Chunked evaluation protocol: stateful `chunk_step` session command on the Rust engine that advances a compiled plan by N bins, yielding partial results + - `ChunkSpec` + `ChunkRunner` + `POST /v1/chunked-eval` (final name TBD) driving the chunk-step protocol from .NET with external-controller integration + - `flowtime chunked-eval` .NET CLI command matching the API contract + - `FlowTime.Pipeline` project — thin embeddable SDK exposing `Sweep`, `Sensitivity`, `GoalSeek`, `Optimize`, `Fit`, `ChunkedEvaluate` as a clean programmatic API over the Time Machine internals; surfaces no HTTP, no CLI parsing, no artifact layout + - Migration of existing internal callers (the API endpoints, the .NET CLI commands) to the `FlowTime.Pipeline` SDK so the SDK is dogfooded from day one + - Test coverage matching E-18 standard: 100% branch coverage on pure runners; integration tests for Rust chunk-step protocol; API contract tests for `/v1/fit` and `/v1/chunked-eval` + - Documentation: `docs/architecture/time-machine-analysis-modes.md` extended with Fit and Chunked sections; `FlowTime.Pipeline` SDK embedding guide + + ### Out of Scope + + - **Optimization constraints** (`--constraint "max(node.queue.utilization) < 0.8"`) — tracked in `work/gaps.md`; candidate for a later analysis-layer patch against `OptimizeSpec`/`GoalSeekSpec`/`FitSpec` + - **Monte Carlo** (Mode 5) — sampling parameters from distributions, characterizing output distribution. Lower priority than Fit; tracked in gaps + - **`FlowTime.Telemetry.*` direct-source adapters** (Prometheus, OpenTelemetry, BPI event logs) — E-15 Gold Builder covers the general batch path; adapters are narrower shortcuts to build only when a concrete client asks + - **Tiered validation parity across Sim UI / Blazor UI / Svelte UI / MCP / external agents** — validation surface work; closer to E-21 m-E21-06 (Validation Surface) and future Blazor maintenance + - **Canonical bundle round-trip parity AC** (E-18 unchecked AC: capture baseline → replay bundle = same outputs modulo drift) — owned by Telemetry Loop & Parity, not E-22 + - **Fit-result UI** — lands in E-21 (or a later Svelte milestone) once Fit's API is stable; E-22 delivers only the contract + - **Fitting algorithms beyond the existing optimizer** — Fit uses the existing Nelder-Mead `Optimizer` with a residual objective. Gradient-based or Bayesian fitters are future work + - **Time grid alignment across heterogeneous sources** — assumes E-15 Gold Builder presents series on a common grid; grid alignment belongs to E-15 + - **Streaming / push UI for chunked evaluation** — chunked delivers a pull contract (caller drives chunk_step); WebSocket push for live simulation is a separate E-17-style track + + ## Constraints + + - Builds on the existing Rust engine session protocol (m-E18-02). No changes to the MessagePack framing; `chunk_step` is a new command alongside `compile`, `eval`, `patch`, `get_params`, `get_series`, `validate_schema` + - `FlowTime.Pipeline` is a pure SDK: no HTTP server, no CLI parsing, no artifact writing. It may compose services from other projects but its public API is in-process method calls returning strongly-typed results + - Fit against real telemetry is gated by the Telemetry Loop & Parity harness reporting drift within documented tolerance. A failing parity harness blocks Fit acceptance — reported fit residuals without parity validation are not trustworthy + - Byte-for-byte API ↔ CLI parity per the E-18 CLI convention: `flowtime fit < request.json` produces the same JSON as `POST /v1/fit` + - .NET 9 / C# 13; invariant culture; camelCase JSON payloads (project-rule: `project.md:43`) + - No reintroduction of the deprecated `FlowTime.Generator` project (D-2026-04-07-019 Path B remains authoritative) + + ## Success Criteria + + - [ ] `POST /v1/fit` accepts `(model YAML, telemetry source spec, target series, parameter overrides to fit, residual metric)` and returns a fitted parameter set plus fit quality (residual, iterations, convergence flag) + - [ ] `flowtime fit` CLI command produces byte-identical output to `POST /v1/fit` for equivalent inputs + - [ ] Fit results against a parity-validated telemetry fixture produce residuals within documented tolerance; the tolerance bound is set by the Telemetry Loop & Parity harness output + - [ ] Rust engine session responds to `chunk_step { bins: N }` command by advancing the compiled plan N bins and returning partial series; state persists across calls within a session + - [ ] `POST /v1/chunked-eval` drives the chunk-step protocol from .NET, exposing either a polling pull or a request/response-per-chunk model (choice is a design task for the milestone) + - [ ] External controller integration demonstration: a fixture controller consumes chunk outputs, writes back parameter patches, and the next chunk reflects the patch — end-to-end test, not just unit coverage + - [ ] `FlowTime.Pipeline` project compiles as a standalone library; its public API exposes `Sweep`, `Sensitivity`, `GoalSeek`, `Optimize`, `Fit`, `ChunkedEvaluate` as strongly-typed methods + - [ ] All `/v1/*` API endpoints added by E-18 are rewritten to call through `FlowTime.Pipeline` rather than `FlowTime.TimeMachine.*` internals directly; the SDK is dogfooded from day one + - [ ] All Time Machine CLI commands (`flowtime validate/sweep/sensitivity/goal-seek/optimize/fit/chunked-eval`) are rewritten to call through `FlowTime.Pipeline` + - [ ] 100% branch coverage on new pure runners (`FitRunner`, `ChunkRunner`); integration tests cover the Rust chunk-step protocol; API contract tests cover `/v1/fit` and `/v1/chunked-eval` + - [ ] `docs/architecture/time-machine-analysis-modes.md` updated with Fit and Chunked Evaluation sections reflecting the shipped behavior + - [ ] `FlowTime.Pipeline` embedding guide documents a concrete use case (e.g., hosted in a notebook, in an Azure Function, in a script) with a complete working example + - [ ] On epic completion, E-18's `m-E18-05 Chunked Evaluation` and `m-E18-XX Model Fit` placeholder rows are struck through; E-18 epic status flips to `complete` and the epic is archived under `work/epics/completed/` + + ## Risks & Open Questions + + | Risk / Question | Impact | Mitigation | + |-----------------|--------|------------| + | Telemetry Loop & Parity epic has not been scheduled; without it Fit cannot be meaningfully validated | **Hard block** | Telemetry Loop & Parity must be scheduled and completed before Fit AC can be closed. E-22 does not start Fit until TLP ships | + | E-15 first-dataset-path timeline is uncertain | High | E-15 is on the critical path per Option A. E-22 planning proceeds; E-22 implementation is gated on E-15 delivering at least one end-to-end canonical bundle dataset | + | Chunked evaluation semantics for stateful nodes (e.g., queues carrying over bin boundaries) need design; the current `IStatefulNode` stubs may not suffice | Medium | First milestone of chunked scope is a design spike: document chunk-boundary state-transfer semantics before implementation | + | Residual metric choice (RMSE vs. MAE vs. weighted) may not generalize across queueing topologies | Medium | Start with RMSE as default; make the residual a configurable strategy in `FitSpec`; document tradeoffs | + | `FlowTime.Pipeline` SDK surface may be over-scoped if designed before external embedders give feedback | Medium | Keep the SDK minimal: expose only the methods that existing API/CLI callers need. Defer Monte Carlo, direct-source adapters, and any speculative surface | + | Rust `chunk_step` may reveal gaps in the stateful execution seam that require refactoring inside the engine core, not just protocol work | Medium | Scope a Rust-side design spike first; if the seam needs reshaping, split into a foundation milestone before the .NET chunk runner | + | Pipeline SDK rewrite of existing API/CLI callers may introduce regressions | Medium | Dogfood migration is done one endpoint at a time with full existing test coverage asserting behavior identity before and after | + + ## Milestones + + Sequencing: Fit first (unblocks the largest downstream set — E-15 dataset path leads directly into fit validation). Chunked second (independent Rust protocol work; runs after Fit only to avoid Rust-engine contention on a single milestone branch). Pipeline SDK third so it crystallizes against the final surface including Fit and Chunked. + + | ID | Title | Status | Depends on | + |----|-------|--------|-----------| + | m-E22-01-model-fit | Model Fit | not started | E-15 first dataset path complete; Telemetry Loop & Parity harness validated | + | m-E22-02-chunked-evaluation | Chunked Evaluation | not started | m-E22-01 (sequencing only; no hard code dependency); m-E18-02 session protocol (already delivered) | + | m-E22-03-pipeline-sdk | `FlowTime.Pipeline` SDK Wrapper | not started | m-E22-01 + m-E22-02 complete (SDK reflects the final surface) | + + ## ADRs + + - (none yet — ADRs will be captured under `work/decisions.md` as they arise during milestone planning) + + ## References + + - E-18 epic spec: `work/epics/E-18-headless-pipeline-and-optimization/spec.md` + - E-18 gap analysis: `work/epics/E-18-headless-pipeline-and-optimization/e18-gap-analysis.md` + - Analysis modes architecture: `docs/architecture/time-machine-analysis-modes.md` + - Telemetry Loop & Parity: `work/epics/unplanned/telemetry-loop-parity/spec.md` + - E-15 Telemetry Ingestion: `work/epics/E-15-telemetry-ingestion/` + - Option A delivery sequence: `work/decisions.md` → D-2026-04-15-032 + - Headless engine architecture: `docs/architecture/headless-engine-architecture.md` diff --git a/work/migration/scripts/project_e22.py b/work/migration/scripts/project_e22.py new file mode 100644 index 00000000..444a40f4 --- /dev/null +++ b/work/migration/scripts/project_e22.py @@ -0,0 +1,130 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = ["ruamel.yaml>=0.18"] +# /// +"""Pass A spike — project E-22 epic into a single-entity aiwf import manifest. + +Reads: work/epics/E-22-model-fit-chunked-evaluation/spec.md +Emits: work/migration/manifests/e22-spike.yaml + +Validate downstream with: aiwf import --dry-run work/migration/manifests/e22-spike.yaml +""" + +import re +import sys +from pathlib import Path + +from ruamel.yaml import YAML +from ruamel.yaml.scalarstring import LiteralScalarString + + +REPO_ROOT = Path(__file__).resolve().parents[3] +SPEC_PATH = REPO_ROOT / "work/epics/E-22-model-fit-chunked-evaluation/spec.md" +OUT_PATH = REPO_ROOT / "work/migration/manifests/e22-spike.yaml" + +V1_TO_V3_EPIC_STATUS = { + "planning": "proposed", +} + + +def parse_epic_spec(text: str) -> dict: + lines = text.splitlines() + if not lines or not lines[0].startswith("# "): + raise ValueError("spec missing H1 title") + raw_title = lines[0][2:].strip() + title = re.sub(r"^Epic:\s*", "", raw_title) + + id_match = re.search(r"^\*\*ID:\*\*\s*(\S+)\s*$", text, re.MULTILINE) + status_match = re.search(r"^\*\*Status:\*\*\s*(\S+)\s*$", text, re.MULTILINE) + if not id_match: + raise ValueError("spec missing **ID:** line") + if not status_match: + raise ValueError("spec missing **Status:** line") + + epic_id = id_match.group(1) + v1_status = status_match.group(1) + if v1_status not in V1_TO_V3_EPIC_STATUS: + raise ValueError( + f"unmapped epic status {v1_status!r} — add to V1_TO_V3_EPIC_STATUS" + ) + status = V1_TO_V3_EPIC_STATUS[v1_status] + + body = strip_frontmatter_prose(text) + + return {"id": epic_id, "title": title, "status": status, "body": body} + + +def strip_frontmatter_prose(text: str) -> str: + """Remove the H1 title, **ID:**, and **Status:** lines plus surrounding blanks. + + Everything else (## Goal, ## Context, etc.) is body content for aiwf. + """ + out_lines: list[str] = [] + skip_next_blank = False + for line in text.splitlines(): + if line.startswith("# "): + skip_next_blank = True + continue + if re.match(r"^\*\*(ID|Status):\*\*", line): + skip_next_blank = True + continue + if skip_next_blank and line.strip() == "": + skip_next_blank = False + continue + skip_next_blank = False + out_lines.append(line) + while out_lines and out_lines[0].strip() == "": + out_lines.pop(0) + while out_lines and out_lines[-1].strip() == "": + out_lines.pop() + return "\n".join(out_lines) + "\n" + + +def build_manifest(epic: dict) -> dict: + return { + "version": 1, + "commit": { + "mode": "single", + "message": f"import(spike): {epic['id']} — Pass A projector dry-run", + }, + "entities": [ + { + "kind": "epic", + "id": epic["id"], + "frontmatter": { + "title": epic["title"], + "status": epic["status"], + }, + "body": LiteralScalarString(epic["body"]), + } + ], + } + + +def main() -> int: + spec_text = SPEC_PATH.read_text(encoding="utf-8") + epic = parse_epic_spec(spec_text) + manifest = build_manifest(epic) + + OUT_PATH.parent.mkdir(parents=True, exist_ok=True) + yaml = YAML() + yaml.indent(mapping=2, sequence=4, offset=2) + yaml.width = 120 + with OUT_PATH.open("w", encoding="utf-8") as f: + yaml.dump(manifest, f) + + print(f"wrote {OUT_PATH.relative_to(REPO_ROOT)}") + print(f" kind: epic") + print(f" id: {epic['id']}") + print(f" title: {epic['title']}") + print(f" status: {epic['status']} (v1: {V1_TO_V3_EPIC_STATUS_REVERSED().get(epic['status'], '?')})") + print(f" body: {len(epic['body'].splitlines())} lines") + return 0 + + +def V1_TO_V3_EPIC_STATUS_REVERSED() -> dict: + return {v: k for k, v in V1_TO_V3_EPIC_STATUS.items()} + + +if __name__ == "__main__": + sys.exit(main()) From 78f05e9da6b2aa3497f4ee8714ef747c1a62214f Mon Sep 17 00:00:00 2001 From: Peter Bruinsma Date: Fri, 1 May 2026 20:51:53 +0000 Subject: [PATCH 04/50] =?UTF-8?q?chore(migration):=20Pass=20B=20=E2=80=94?= =?UTF-8?q?=20extend=20projector=20to=20active-set=20epics=20with=20green?= =?UTF-8?q?=20dry-run?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - project_epics.py replaces project_e22.py: accepts list of E-NN ids (Pass B = E-13/E-14/E-15/E-22), per-epic loop, accumulator manifest, skip-log.md emission - status mapping covers planning/proposed/paused/in-progress/superseded/ absorbed/active/cancelled/complete/done; case-insensitive lookup - handles three new input shapes: missing **Status:** (default proposed + skip-log), `superseded by ...` prose (maps to cancelled), parenthesized qualifier on **Status:** line (preserved as `> **Status note:** ...` blockquote prepended to body) - emits skip-log.md only when findings exist; cleans up stale log otherwise - delete e22-spike.yaml + project_e22.py (Pass A artifacts subsumed) - aiwf import --dry-run on epics-active.yaml: 4 plans, 0 findings, exit 0 - plan updated with Pass B Q&A settlements (E-14 cancelled, E-18 active, paused→active, in-progress→active, superseded→cancelled) Co-Authored-By: Claude Opus 4.7 (1M context) --- work/migration/aiwf-v3-plan.md | 22 +- work/migration/manifests/e22-spike.yaml | 127 ------ work/migration/manifests/epics-active.yaml | 474 +++++++++++++++++++++ work/migration/manifests/skip-log.md | 5 + work/migration/scripts/project_e22.py | 130 ------ work/migration/scripts/project_epics.py | 186 ++++++++ 6 files changed, 684 insertions(+), 260 deletions(-) delete mode 100644 work/migration/manifests/e22-spike.yaml create mode 100644 work/migration/manifests/epics-active.yaml create mode 100644 work/migration/manifests/skip-log.md delete mode 100644 work/migration/scripts/project_e22.py create mode 100644 work/migration/scripts/project_epics.py diff --git a/work/migration/aiwf-v3-plan.md b/work/migration/aiwf-v3-plan.md index 5355a018..54053a4a 100644 --- a/work/migration/aiwf-v3-plan.md +++ b/work/migration/aiwf-v3-plan.md @@ -244,15 +244,24 @@ Coverage gaps and mitigations: | decision | `proposed, accepted, superseded, rejected` | id, title, status | optional relates_to | | contract | `proposed, accepted, deprecated, retired, rejected` | id, title, status | optional linked_adrs | +**Status-derivation rule (precedence):** + +1. **Dir location wins for terminal states.** Any epic under `work/epics/completed/` → `done`, regardless of what the source spec's `**Status:**` says. (Pass D applies this to E-10, E-12, E-16, E-17, E-19, E-20, E-21, E-23, E-24.) +2. **Otherwise use the source `**Status:**` line, mapped via the table below** (also cross-check `work/epics/epic-roadmap.md` when source spec is silent or ambiguous). +3. **If neither source signals nor roadmap clarify** → default `proposed`, log finding to `skip-log.md`. + + *Rationale:* most active-dir epics in this repo are "we plan to do this; nothing's started" → `proposed`. Epics with explicit "paused" signal map to aiwf `active` (paused work has real commitments / branches / prior milestones; closer to active than proposed). Default for missing-status is `proposed` since the absence of a paused signal in source means we shouldn't infer one. + **v1 → v3 status-mapping table (filled as passes encounter source statuses):** | v1 source status | v1 kind | v3 status | Settled | |---|---|---|---| | `planning` | epic | `proposed` | ✅ Pass A (E-22 spec) | -| `in-progress` | epic / milestone | (epic) `active` / (milestone) `in_progress` | TBD when encountered | +| `in-progress` | epic / milestone | (epic) `active` / (milestone) `in_progress` | ✅ Pass B (E-18 spec + roadmap + ROADMAP + CLAUDE.md all agree) | +| `superseded` / `absorbed` | epic | `cancelled` (work moved permanently to a different epic; original plan no longer governs) | ✅ Pass B (E-14: absorbed into ui-analytical-views which is out of migration scope per Q1) | | `complete` / `completed` | epic / milestone | `done` | TBD when encountered | | `pending` | milestone | `draft` | TBD | -| `paused` | epic / milestone | TBD — possibly `proposed` (no aiwf "paused"); flag as projector finding | TBD | +| `paused` | epic / milestone | `active` (epic) — paused work has real commitments / branches / prior milestones; closer to active than proposed | ✅ Pass B (user override) | | `active` | decision | `accepted` | TBD | | `superseded` | decision | `superseded` | TBD | | `withdrawn` | decision | `rejected` | TBD | @@ -276,7 +285,7 @@ Coverage gaps and mitigations: - [x] Decide language — Python 3 + `uv` script mode - [x] Decide incremental scope — E-22 first; successive passes A–G - [x] Pass A: spike on E-22 — `work/migration/scripts/project_e22.py` (uv script-mode, ruamel.yaml). Generates `work/migration/manifests/e22-spike.yaml`. **Dry-run green:** `aiwf import --dry-run` zero findings, exit 0. 12,902-byte `epic.md` would land at `work/epics/E-22-time-machine-model-fit-chunked-evaluation/epic.md` -- [ ] Pass B: extend to E-13/E-14/E-15 +- [x] Pass B: extend to E-13/E-14/E-15 (+ E-22 carried from A) — `project_epics.py` replaces `project_e22.py`. **Dry-run green:** 4 epics, 0 findings, exit 0. skip-log.md emitted (1 entry: E-13 default-status). Status mapping table proven on missing/superseded/parenthesized-prose/clean inputs. - [ ] Pass C: extend to E-18 (milestones) - [ ] Pass D: extend to completed-id'd epics - [ ] Pass E: outlier per-epic rules (E-10, E-11, E-12) @@ -348,3 +357,10 @@ Append-only record of dry-run iterations, decisions taken mid-flight, and findin - 2026-05-01 — phase 2 — micro-decisions for Pass A: status mapping `planning → proposed` (epic kind); manifests output dir `work/migration/manifests/` (separate from scripts; data ≠ code). Authoritative status sets pulled from `aiwf schema` and recorded in plan. **Phase 2 design closed; ready to implement Pass A.** - 2026-05-01 — phase 2 — Pass A landed. `project_e22.py` (uv PEP-723 script mode, ruamel.yaml LiteralScalarString for body). Single-entity manifest validates with `aiwf import --dry-run` zero findings; would write `work/epics/E-22-time-machine-model-fit-chunked-evaluation/epic.md` (12,902 bytes). - 2026-05-01 — phase 2 — **Pass A finding (slug derivation):** aiwf derives destination dir slug from `title`, not source dir name. Source `E-22-model-fit-chunked-evaluation` ≠ aiwf-generated `E-22-time-machine-model-fit-chunked-evaluation`. Phase 3/5 decision pending: preserve v1 short slugs (via `aiwf rename` or trimmed manifest titles) vs. accept title-derived slugs (id is stable ref; path is incidental). Lean: accept; settle before mass import in Pass B. +- 2026-05-01 — phase 2 — slug-preservation settled: **accept aiwf title-derived slugs.** Identity = id; path = incidental (per upstream design-lessons.md principle 1 "identity is not location"). Projector emits titles verbatim; aiwf decides slugs. Phase 5 cleanup deletes old v1 dirs whole; memory files get one-pass review to fix path references that matter. No `aiwf rename` post-import; no manifest title trimming. +- 2026-05-01 — phase 2 — Pass B finding 1 settled: missing `**Status:**` in active-dir epic spec → default `proposed` + skip-log finding. Status-derivation rule expanded: roadmap is a secondary cross-check; missing status defaults to `proposed`. +- 2026-05-01 — phase 2 — mapping override (user): `paused → active` (epic kind). Paused work has real commitments / branches / prior milestones; closer to aiwf `active` than `proposed`. Affects E-11 (paused after M6 per roadmap) → `active`. Default for missing-status is still `proposed` (absence of explicit paused signal means we don't infer one). +- 2026-05-01 — phase 2 — Pass B finding 2 settled: `superseded` / `absorbed` → `cancelled` (epic). Affects E-14 (absorbed into ui-analytical-views, which is out of migration scope per Q1). Body retains supersession prose verbatim. +- 2026-05-01 — phase 2 — Pass B mapping resolved (user) for E-18: `in-progress → active`. All four surfaces agree (spec, epic-roadmap, ROADMAP, CLAUDE.md) plus the live `epic/E-18-time-machine` branch hasn't been closed. E-18 → `active`. +- 2026-05-01 — phase 2 — Pass B finding 3 settled: parenthesized qualifier on `**Status:**` line is preserved by prepending a `> **Status note:** ` blockquote to the top of the body. Information not lost; body retains the "capture is shipped; ingestion pipeline is not" nuance from E-15 inline. +- 2026-05-01 — phase 2 — Pass B landed. `project_epics.py` (replaces `project_e22.py`); `epics-active.yaml` (4 epics: E-13/E-14/E-15/E-22); `skip-log.md` (1 finding: E-13 default-status). Status-mapping table exercised on all four input shapes (missing / superseded / parenthesized-prose / clean). `aiwf import --dry-run` zero error findings, exit 0. Plans 4 writes at title-derived slugs as expected. diff --git a/work/migration/manifests/e22-spike.yaml b/work/migration/manifests/e22-spike.yaml deleted file mode 100644 index 4bfdd257..00000000 --- a/work/migration/manifests/e22-spike.yaml +++ /dev/null @@ -1,127 +0,0 @@ -version: 1 -commit: - mode: single - message: 'import(spike): E-22 — Pass A projector dry-run' -entities: - - kind: epic - id: E-22 - frontmatter: - title: Time Machine — Model Fit & Chunked Evaluation - status: proposed - body: | - ## Goal - - Close out the remaining Time Machine analysis modes — **model fitting** against real telemetry and **chunked evaluation** for feedback simulation — and crystallize the resulting surface as a clean embeddable **`FlowTime.Pipeline` SDK**. These are the last two analysis modes in the E-18 Time Machine architecture; delivering them completes the "FlowTime as a callable function" arc. - - ## Context - - E-18 delivered 11 milestones covering parameterized evaluation, engine session protocol, tiered validation, parameter sweep, sensitivity, goal seek, N-parameter optimization, `SessionModelEvaluator`, and the .NET Time Machine CLI. All are on `main`. The analysis layer works end-to-end for synthetic runs. - - Two scope items remained in E-18 and are explicitly carried into E-22: - - - **Model Fit** (`FitSpec`/`FitRunner`/`POST /v1/fit`) — composes `ITelemetrySource` + `Optimizer` with residual as the objective. Infrastructure exists; the composition is the new work. **Blocked on** E-15 Telemetry Ingestion (first repeatable dataset path) and the Telemetry Loop & Parity epic (validated drift bounds — fitting against real telemetry without measured drift would produce falsely precise results). - - - **Chunked Evaluation** (Mode 6) — bin-chunk evaluation for feedback simulation with external controllers. The Rust engine session (m-E18-02) is the seam, but the chunk-step protocol on top of it is not designed. - - A third scope item was decided (2026-04-20) to land alongside Fit and Chunked so the completed surface is exposed as a clean embeddable API: - - - **`FlowTime.Pipeline` SDK wrapper** — thin project exposing `Sweep`, `Sensitivity`, `GoalSeek`, `Optimize`, `Fit`, `ChunkedEvaluate` as a programmatic embedding surface over the Time Machine internals. Callers today reach into `FlowTime.TimeMachine.Sweep.*` directly; the SDK crystallizes the external contract. - - E-22 is sequenced per D-2026-04-15-032 (Option A): `E-21 (active) → E-15 Telemetry Ingestion → Telemetry Loop & Parity → E-22`. - - ### Supersedes - - Closes out E-18 placeholder `m-E18-XX Model Fit` and `m-E18-05 Chunked Evaluation`. E-18 itself remains the archived parent epic (11 delivered milestones recorded there as "complete"); E-22 is the forward-looking epic that completes its scope. - - ### Related - - - **E-15 Telemetry Ingestion** (`work/epics/E-15-telemetry-ingestion/`) — hard prerequisite for Fit. Provides the first repeatable dataset path and replayable canonical bundle via Gold Builder → Graph Builder. - - **Telemetry Loop & Parity** (`work/epics/unplanned/telemetry-loop-parity/spec.md`) — hard prerequisite for Fit. Provides measured drift bounds between synthetic and replayed runs; without those bounds, fit quality cannot be meaningfully reported. Currently unnumbered; will take its own epic slot when scheduled. - - **E-21 Svelte Workbench & Analysis Surfaces** — builds UI for fit results once Fit's API contract is stable. Not in E-22 scope; E-21 milestone allocation is independent. - - ## Scope - - ### In Scope - - - `FitSpec` + `FitRunner` + `POST /v1/fit` composing `ITelemetrySource` + `Optimizer` with residual objective (RMSE / MAE / configurable) against a user-selected series - - `flowtime fit` .NET CLI command, pipeable JSON-in/JSON-out, byte-compatible with `POST /v1/fit` - - Chunked evaluation protocol: stateful `chunk_step` session command on the Rust engine that advances a compiled plan by N bins, yielding partial results - - `ChunkSpec` + `ChunkRunner` + `POST /v1/chunked-eval` (final name TBD) driving the chunk-step protocol from .NET with external-controller integration - - `flowtime chunked-eval` .NET CLI command matching the API contract - - `FlowTime.Pipeline` project — thin embeddable SDK exposing `Sweep`, `Sensitivity`, `GoalSeek`, `Optimize`, `Fit`, `ChunkedEvaluate` as a clean programmatic API over the Time Machine internals; surfaces no HTTP, no CLI parsing, no artifact layout - - Migration of existing internal callers (the API endpoints, the .NET CLI commands) to the `FlowTime.Pipeline` SDK so the SDK is dogfooded from day one - - Test coverage matching E-18 standard: 100% branch coverage on pure runners; integration tests for Rust chunk-step protocol; API contract tests for `/v1/fit` and `/v1/chunked-eval` - - Documentation: `docs/architecture/time-machine-analysis-modes.md` extended with Fit and Chunked sections; `FlowTime.Pipeline` SDK embedding guide - - ### Out of Scope - - - **Optimization constraints** (`--constraint "max(node.queue.utilization) < 0.8"`) — tracked in `work/gaps.md`; candidate for a later analysis-layer patch against `OptimizeSpec`/`GoalSeekSpec`/`FitSpec` - - **Monte Carlo** (Mode 5) — sampling parameters from distributions, characterizing output distribution. Lower priority than Fit; tracked in gaps - - **`FlowTime.Telemetry.*` direct-source adapters** (Prometheus, OpenTelemetry, BPI event logs) — E-15 Gold Builder covers the general batch path; adapters are narrower shortcuts to build only when a concrete client asks - - **Tiered validation parity across Sim UI / Blazor UI / Svelte UI / MCP / external agents** — validation surface work; closer to E-21 m-E21-06 (Validation Surface) and future Blazor maintenance - - **Canonical bundle round-trip parity AC** (E-18 unchecked AC: capture baseline → replay bundle = same outputs modulo drift) — owned by Telemetry Loop & Parity, not E-22 - - **Fit-result UI** — lands in E-21 (or a later Svelte milestone) once Fit's API is stable; E-22 delivers only the contract - - **Fitting algorithms beyond the existing optimizer** — Fit uses the existing Nelder-Mead `Optimizer` with a residual objective. Gradient-based or Bayesian fitters are future work - - **Time grid alignment across heterogeneous sources** — assumes E-15 Gold Builder presents series on a common grid; grid alignment belongs to E-15 - - **Streaming / push UI for chunked evaluation** — chunked delivers a pull contract (caller drives chunk_step); WebSocket push for live simulation is a separate E-17-style track - - ## Constraints - - - Builds on the existing Rust engine session protocol (m-E18-02). No changes to the MessagePack framing; `chunk_step` is a new command alongside `compile`, `eval`, `patch`, `get_params`, `get_series`, `validate_schema` - - `FlowTime.Pipeline` is a pure SDK: no HTTP server, no CLI parsing, no artifact writing. It may compose services from other projects but its public API is in-process method calls returning strongly-typed results - - Fit against real telemetry is gated by the Telemetry Loop & Parity harness reporting drift within documented tolerance. A failing parity harness blocks Fit acceptance — reported fit residuals without parity validation are not trustworthy - - Byte-for-byte API ↔ CLI parity per the E-18 CLI convention: `flowtime fit < request.json` produces the same JSON as `POST /v1/fit` - - .NET 9 / C# 13; invariant culture; camelCase JSON payloads (project-rule: `project.md:43`) - - No reintroduction of the deprecated `FlowTime.Generator` project (D-2026-04-07-019 Path B remains authoritative) - - ## Success Criteria - - - [ ] `POST /v1/fit` accepts `(model YAML, telemetry source spec, target series, parameter overrides to fit, residual metric)` and returns a fitted parameter set plus fit quality (residual, iterations, convergence flag) - - [ ] `flowtime fit` CLI command produces byte-identical output to `POST /v1/fit` for equivalent inputs - - [ ] Fit results against a parity-validated telemetry fixture produce residuals within documented tolerance; the tolerance bound is set by the Telemetry Loop & Parity harness output - - [ ] Rust engine session responds to `chunk_step { bins: N }` command by advancing the compiled plan N bins and returning partial series; state persists across calls within a session - - [ ] `POST /v1/chunked-eval` drives the chunk-step protocol from .NET, exposing either a polling pull or a request/response-per-chunk model (choice is a design task for the milestone) - - [ ] External controller integration demonstration: a fixture controller consumes chunk outputs, writes back parameter patches, and the next chunk reflects the patch — end-to-end test, not just unit coverage - - [ ] `FlowTime.Pipeline` project compiles as a standalone library; its public API exposes `Sweep`, `Sensitivity`, `GoalSeek`, `Optimize`, `Fit`, `ChunkedEvaluate` as strongly-typed methods - - [ ] All `/v1/*` API endpoints added by E-18 are rewritten to call through `FlowTime.Pipeline` rather than `FlowTime.TimeMachine.*` internals directly; the SDK is dogfooded from day one - - [ ] All Time Machine CLI commands (`flowtime validate/sweep/sensitivity/goal-seek/optimize/fit/chunked-eval`) are rewritten to call through `FlowTime.Pipeline` - - [ ] 100% branch coverage on new pure runners (`FitRunner`, `ChunkRunner`); integration tests cover the Rust chunk-step protocol; API contract tests cover `/v1/fit` and `/v1/chunked-eval` - - [ ] `docs/architecture/time-machine-analysis-modes.md` updated with Fit and Chunked Evaluation sections reflecting the shipped behavior - - [ ] `FlowTime.Pipeline` embedding guide documents a concrete use case (e.g., hosted in a notebook, in an Azure Function, in a script) with a complete working example - - [ ] On epic completion, E-18's `m-E18-05 Chunked Evaluation` and `m-E18-XX Model Fit` placeholder rows are struck through; E-18 epic status flips to `complete` and the epic is archived under `work/epics/completed/` - - ## Risks & Open Questions - - | Risk / Question | Impact | Mitigation | - |-----------------|--------|------------| - | Telemetry Loop & Parity epic has not been scheduled; without it Fit cannot be meaningfully validated | **Hard block** | Telemetry Loop & Parity must be scheduled and completed before Fit AC can be closed. E-22 does not start Fit until TLP ships | - | E-15 first-dataset-path timeline is uncertain | High | E-15 is on the critical path per Option A. E-22 planning proceeds; E-22 implementation is gated on E-15 delivering at least one end-to-end canonical bundle dataset | - | Chunked evaluation semantics for stateful nodes (e.g., queues carrying over bin boundaries) need design; the current `IStatefulNode` stubs may not suffice | Medium | First milestone of chunked scope is a design spike: document chunk-boundary state-transfer semantics before implementation | - | Residual metric choice (RMSE vs. MAE vs. weighted) may not generalize across queueing topologies | Medium | Start with RMSE as default; make the residual a configurable strategy in `FitSpec`; document tradeoffs | - | `FlowTime.Pipeline` SDK surface may be over-scoped if designed before external embedders give feedback | Medium | Keep the SDK minimal: expose only the methods that existing API/CLI callers need. Defer Monte Carlo, direct-source adapters, and any speculative surface | - | Rust `chunk_step` may reveal gaps in the stateful execution seam that require refactoring inside the engine core, not just protocol work | Medium | Scope a Rust-side design spike first; if the seam needs reshaping, split into a foundation milestone before the .NET chunk runner | - | Pipeline SDK rewrite of existing API/CLI callers may introduce regressions | Medium | Dogfood migration is done one endpoint at a time with full existing test coverage asserting behavior identity before and after | - - ## Milestones - - Sequencing: Fit first (unblocks the largest downstream set — E-15 dataset path leads directly into fit validation). Chunked second (independent Rust protocol work; runs after Fit only to avoid Rust-engine contention on a single milestone branch). Pipeline SDK third so it crystallizes against the final surface including Fit and Chunked. - - | ID | Title | Status | Depends on | - |----|-------|--------|-----------| - | m-E22-01-model-fit | Model Fit | not started | E-15 first dataset path complete; Telemetry Loop & Parity harness validated | - | m-E22-02-chunked-evaluation | Chunked Evaluation | not started | m-E22-01 (sequencing only; no hard code dependency); m-E18-02 session protocol (already delivered) | - | m-E22-03-pipeline-sdk | `FlowTime.Pipeline` SDK Wrapper | not started | m-E22-01 + m-E22-02 complete (SDK reflects the final surface) | - - ## ADRs - - - (none yet — ADRs will be captured under `work/decisions.md` as they arise during milestone planning) - - ## References - - - E-18 epic spec: `work/epics/E-18-headless-pipeline-and-optimization/spec.md` - - E-18 gap analysis: `work/epics/E-18-headless-pipeline-and-optimization/e18-gap-analysis.md` - - Analysis modes architecture: `docs/architecture/time-machine-analysis-modes.md` - - Telemetry Loop & Parity: `work/epics/unplanned/telemetry-loop-parity/spec.md` - - E-15 Telemetry Ingestion: `work/epics/E-15-telemetry-ingestion/` - - Option A delivery sequence: `work/decisions.md` → D-2026-04-15-032 - - Headless engine architecture: `docs/architecture/headless-engine-architecture.md` diff --git a/work/migration/manifests/epics-active.yaml b/work/migration/manifests/epics-active.yaml new file mode 100644 index 00000000..9f7ed1c2 --- /dev/null +++ b/work/migration/manifests/epics-active.yaml @@ -0,0 +1,474 @@ +version: 1 +commit: + mode: single + message: 'import(spike): Pass B — 4 active-set epics' +entities: + - kind: epic + id: E-13 + frontmatter: + title: Path Analysis & Subgraph Queries + status: proposed + body: | + ## 1. Summary + + This epic defines **path analysis** as a first‑class capability in FlowTime: answering end‑to‑end journey questions across one or more routes in the DAG, over time, using FlowTime’s per‑bin node and edge artifacts. + + Path analysis is built on EdgeTimeBin, Classes‑as‑Flows, and the stable post-E-16 fact surfaces exposed by the server. It is consumed by both the UI and MCP server. It is not a replacement for edge overlays; it is a higher‑order analysis layer. + + --- + + ## 2. Motivation + + Edge time bins enable truth‑based edge overlays and conservation checks, but they do not answer questions like: + + - “For the **Order** flow, what was the dominant end‑to‑end path from `Orders.Create → Billing.Settle` at 10:00–14:00, and where did it bottleneck?” + - “What fraction of volume went via path A vs path B during a surge window?” + - “Which path contributed most to backlog‑hours and SLA misses?” + - “If we reroute 15% around `Auth.Validate`, what happens to end‑to‑end latency on the Order journey?” + + Path analysis introduces formal query semantics and derived outputs so these questions are answerable in a consistent, explainable way. + + --- + + ## 3. Definitions + + ### 3.1 Path + + A **path** is a sequence of nodes/edges in the DAG. Path queries can be: + + - **Explicit**: caller supplies a sequence of edges or nodes. + - **Set‑based**: “all paths from A to B,” optionally constrained (e.g., must pass through X). + - **Policy‑based**: “top K paths by volume” within a window. + + ### 3.2 Path Filters vs Path Analysis + + - **Path Filters** are *subgraph extraction*: return only the nodes/edges that participate in a selected path or set of paths. + - **Path Analysis** includes *metrics and attribution* (dominant paths, bottlenecks, path pain, latency estimates). + + Path filters are a subset of path analysis but still require defined semantics (thresholds, class handling, missing edge data behavior). + + --- + + ## 4. Scope + + ### In Scope + + - **Path query object** definition for explicit, set‑based, and policy‑based paths. + - **Derived path metrics** that are honest and explainable using existing per‑bin artifacts. + - **Subgraph responses** suitable for UI overlays and MCP consumption. + - **Provenance metadata** for derived path metrics (origin/aggregation). + + ### Out of Scope (v1) + + - Full process‑mining algorithms or conformance checking. + - Probabilistic end‑to‑end latency distributions. + - Cross‑run path inference at scale. + + --- + + ## 5. Path Metrics (v1) + + These metrics are designed to be **derived without pretending**: + + ### A. Volume Split + + - Per‑path volume can be computed using edge time bins. + - For explicit paths, **path flow** can be approximated by the tightest edge (min‑cut) or by the entry edge, with explicit provenance. + + ### B. Bottleneck Attribution + + Per bin, define a binding score for each node in the path: + + - `binding = 1(Q[t-1] > 0)` OR `utilization[t] ≈ 1` + - or `shortfall = max(0, arrivals[t] - capacity[t])` + + Path bottleneck per bin = argmax(binding/shortfall) among nodes on the path. + + ### C. Path “Pain” + + Sum backlog‑hours along the path: + + ``` + pathPain = Σ_nodes Σ_t Q_node[t] * Δt + ``` + + This is interpretable and ties directly to incident analysis. + + ### D. End‑to‑End Latency Estimate (v1) + + A pragmatic latency estimate per bin: + + ``` + W_path[t] ≈ Σ_i W_i[t] + W_i[t] ≈ Q_i[t] / max(ε, served_i[t]) * bin_minutes + ``` + + This is directional and explainable. v2+ can introduce delay kernels / convolution across edges. + + --- + + ## 6. API & Data Contracts + + Path analysis requires a **server‑side contract** so clients don’t compute their own semantics: + + - Path query input (`from`, `to`, `classId`, constraints, window). + - Path analysis output (path list, metrics, subgraph). + - Clear provenance metadata for derived metrics. + + This should be exposed as a **dedicated analysis endpoint** (preferred), not overloaded onto `/state_window`. + + --- + + ## 7. UI Integration + + Path analysis enables a distinctive UI mode: + + - Highlight chosen path(s). + - Edge width = flow. + - Node color = bottleneck score / SLA risk. + - Scrubber shows dominant path changes over time. + + --- + + ## 8. MCP Integration + + MCP should consume **server‑provided path outputs** (authoritative), not compute path metrics itself. + + --- + + ## 9. Dependencies + + - Stable post-E-16 server-provided state/graph facts and contracts + - EdgeTimeBin (edge series + quality + warnings) + - Classes as Flows (per-class edge series) + - Derived sink/path latency (v1 signal for end-to-end latency) + - Resumed Phase 3 p3c + p3b for richer diagnostics and what-if path work + + --- + + ## 10. Roadmap / Milestones (TBD) + + This epic will be broken into milestones once the query contract and minimal metrics are approved. + + Suggested phases: + 1. Path query contract + subgraph responses on stable post-E-16 fact surfaces. + 2. Volume split + bottleneck attribution (v1) once edge facts are authoritative. + 3. Path pain + latency estimate (v1), then richer comparison/what-if path work after p3c + p3b. + 4. UI + MCP integration; overlay-aware comparisons can layer on later. + + --- + + ## 11. Open Questions + + - How should path volume be defined (min‑cut vs entry edge vs explicit normalization)? + - How to handle missing edge data or approximate edge quality? + - Where should path outputs be stored (derived on demand vs persisted in run artifacts)? + - What is the minimal API footprint that still enables UI/MCP consumption? + - kind: epic + id: E-14 + frontmatter: + title: Visualizations (Chart Gallery / Demo Lab) + status: cancelled + body: | + This spec is retained as historical planning context only. Live ownership for analytical visualization work now sits with the UI Analytical Views epic. + + ## Intent + + Provide a dedicated UI page where we can prototype role-focused charts (exec, SRE, support) using FlowTime-derived metrics, and clearly show which insights come from FlowTime output versus raw telemetry. + + ## Goals + + - Create a single "Visualization Lab" page with chart panels grouped by role. + - Support horizon/stacked charts for volume, queue depth, SLA, latency, retry volume, and utilization. + - Use bespoke SVG or canvas charts (no charting library dependencies) for full control. + - Allow quick toggles for flow/class selection and time window. + - Contrast FlowTime-derived views with raw telemetry views where available. + - Keep chart definitions and data wiring deterministic and repeatable for demos. + - Keep charting code isolated from other UI modules and fed only by API output (to validate the engine semantics surface). + + ## Non-Goals + + - No new analytics pipeline; reuse existing engine and API outputs. + - No third-party charting library dependencies. + + ## Data Sources + + - `/state_window` for per-bin node and edge metrics. + - Edge metrics for flow/attempt/retry volumes. + - Warnings/quality flags for contextual overlays. + - Raw telemetry comparisons only after Telemetry Ingestion is available. + + ## UI Concept (v1) + + - Role tabs (Exec, SRE, Support) each with curated chart bundles. + - A simple control strip: window, flow/class, metric set. + - Chart cards with consistent titles, units, and provenance text. + + ## Milestones + + - To be defined when the epic is scheduled. + - kind: epic + id: E-15 + frontmatter: + title: Telemetry Ingestion, Topology Inference, and Canonical Bundles + status: proposed + body: | + > **Status note:** capture is shipped; ingestion pipeline is not + + ## Goal + + Build the pipeline that takes real-world data — event logs, traces, sensor feeds — and produces the two things FlowTime needs: a `/graph` topology and Gold-format time-binned series. This epic owns ingestion, topology inference, validation, and bundle assembly. + + ## Context + + FlowTime currently assumes topology is hand-built and telemetry fills series for known nodes. This works for synthetic models but blocks two capabilities: + + 1. **Domain-agnostic adoption** — FlowTime should work for any process with entities flowing through a system: IT microservices, business workflows, transit networks, logistics. + 2. **Validation against real data** — Synthetic models validate correctness but not usefulness. Real data proves FlowTime's analytical primitives produce answers that matter. + + The March 2026 dataset fitness research (`work/epics/E-15-telemetry-ingestion/reference/dataset-fitness-and-ingestion-research.md`) identifies process mining event logs, distributed systems traces, and physical network telemetry as candidate validation datasets. + + ## Scope + + ### In Scope + + **Gold Builder (raw data → binned facts)** + - Bronze → Silver → Gold pipeline: raw payloads → normalized events → binned facts per node per time window + - Mapping rules from raw telemetry to canonical node series + - Manifest generation and data quality warnings + - Class coverage metadata and gap-fill policy + - Support for multiple source types: event logs, time-series, OD pairs, API feeds + + **Graph Builder (data → topology)** + - Topology inference from case traces (directly-follows graph) + - Topology inference from origin-destination pairs + - Topology join with external structure sources (GTFS, OSM, service registries) + - Edge confidence scoring and provenance metadata + - Human curation: accept/reject edges, pin known edges, merge/split nodes, annotate node kinds + + **Bundle Assembly** + - Deterministic bundle generation (same inputs → same hashes) + - Schema validation against published schemas + - Canonical bundle format: `model.yaml`, `manifest.json`, `series/`, CSV files + + **Validation Dataset Integration** + - Process mining event log ingestion (BPI Challenge format) + - At least one non-IT dataset path (transit, traffic, or logistics) + + ### Out of Scope + - Engine semantic derivations (utilization, latency, retry governance) — owned by engine + - Overlay scenarios and derived runs — owned by overlays epic + - UI workflows beyond topology rendering and basic data availability signals + - Real-time / streaming ingestion (batch-first) + + ## Current State (2026-03) + + - **Capture exists**: API `/telemetry/captures` and CLI `flowtime telemetry capture` can generate telemetry bundles from existing runs. + - **Bundle contract exists**: `docs/schemas/telemetry-manifest.schema.json` defines the canonical manifest. + - **Gold schema defined**: Per node, per time bin: timestamp, node, flow, arrivals, served, errors, optional queue_depth/capacity_proxy. + - **TelemetryLoader service** (ADX/KQL or lake ingestion) is not implemented. + - **Graph Builder** does not exist. + - **No external dataset has been ingested** into FlowTime yet. + + ## Design Principles + + - **Deterministic outputs**: same inputs yield same bundle hashes. + - **Schema-first**: ingestion must validate against published schemas. + - **No semantics**: ingestion is responsible for aggregation and alignment, not derived metrics (bottleneck ID, cycle time, etc. are engine-side). + - **Topology as artifact with provenance**: inferred edges carry confidence scores; the graph is versioned and curable, not a fixed truth. + - **Honest about gaps**: missing data, low-confidence edges, and inferred values are explicitly surfaced as warnings — never silently hidden. + + ## Recommended Sequencing + + - Build Gold Builder plus one replayable dataset path first; this epic should not wait on later UI or optimization work. + - Follow the first dataset path immediately with Telemetry Loop & Parity so synthetic-vs-replay drift is measured before fitting, optimization, or anomaly automation. + - Treat resumed Phase 3 primitives as leverage layers that make ingested data more valuable, not as prerequisites for canonical bundle generation. + + ## Canonical Bundle Contract + + Bundles consist of: + - `model.yaml` + - `manifest.json` (window, grid, files, warnings) + - `series/index.json` + - CSV series files + + Ingestion rules ensure: + - Stable series naming and units + - Consistent bin alignment and gap handling + - Explicit warnings for missing or low-quality data + + ## Dependencies + + - **Canonical telemetry contracts and post-E-16 fact surfaces**: ingestion outputs must align with stable series semantics and authoritative graph/category facts. + - **Phase 3 analytical primitives** (in progress): bottleneck ID, cycle time, WIP limits — these make ingested data interesting rather than just displayable, but are not required to produce canonical bundles. + - **dag-map spike**: informs how inferred topologies render in the UI. + + ## Suggested Milestone Decomposition + + ### M1: Gold Builder v1 + - Ingest a single process mining event log (BPI Challenge 2012) into Gold format + - Bronze → Silver → Gold pipeline for event log sources + - Bin alignment, gap handling, data quality warnings + - Output: canonical bundle that FlowTime engine can consume + + ### M2: Graph Builder v1 + - Infer topology from directly-follows relations in event log traces + - Output `/graph` with nodes, edges, confidence scores + - Human curation hooks (accept/reject/pin) + - First end-to-end demo: real dataset → topology + Gold → UI renders graph + + ### M3: External topology join + - Join telemetry with external structure source (GTFS, OSM, or service registry) + - Support OD-pair and physical-adjacency topology construction + - Second dataset path (transit or traffic) + + ### M4: Data quality and operational tooling + - Gap detection, zero-fill policies, class coverage metadata + - CLI/SDK wrappers for ingestion + - Ingestion diagnostics and provenance + + ### M5: TelemetryLoader service + - Service endpoint for batch ingestion from data lake / ADX / parquet sources + - Production-grade validation and bundle assembly + + ## Validation Datasets (Identified) + + | Dataset | Domain | Why | Milestone | + |---------|--------|-----|-----------| + | BPI Challenge 2012 | Loan applications | Topology from traces, 13K cases, rework loops | M1, M2 | + | Road Traffic Fines | Municipal process | 150K cases, very high volume, clear bottlenecks | M1, M2 | + | PeMS + OpenStreetMap | Road traffic | Physical network, massive vehicle volume | M3 | + | MTA Ridership + GTFS | Transit passengers | Passengers as entities, incidents, schedule backbone | M3 | + | Alibaba Cluster Trace 2018 | Microservices | IT validation, millions of requests, trace-based topology | Future | + + See `work/epics/E-15-telemetry-ingestion/reference/dataset-fitness-and-ingestion-research.md` for the full evaluation and dataset fitness checklist. + + ## References + + - `work/epics/E-15-telemetry-ingestion/reference/dataset-fitness-and-ingestion-research.md` — Dataset fitness research + - `work/epics/completed/time-travel/telemetry-generation-explicit.md` — Existing telemetry capture + - `docs/reference/engine-capabilities.md` — Engine capabilities + - `docs/schemas/telemetry-manifest.schema.json` — Bundle manifest schema + - `work/epics/E-10-engine-correctness-and-analytics/spec.md` — Phase 3 analytical primitives (dependency) + - kind: epic + id: E-22 + frontmatter: + title: Time Machine — Model Fit & Chunked Evaluation + status: proposed + body: | + ## Goal + + Close out the remaining Time Machine analysis modes — **model fitting** against real telemetry and **chunked evaluation** for feedback simulation — and crystallize the resulting surface as a clean embeddable **`FlowTime.Pipeline` SDK**. These are the last two analysis modes in the E-18 Time Machine architecture; delivering them completes the "FlowTime as a callable function" arc. + + ## Context + + E-18 delivered 11 milestones covering parameterized evaluation, engine session protocol, tiered validation, parameter sweep, sensitivity, goal seek, N-parameter optimization, `SessionModelEvaluator`, and the .NET Time Machine CLI. All are on `main`. The analysis layer works end-to-end for synthetic runs. + + Two scope items remained in E-18 and are explicitly carried into E-22: + + - **Model Fit** (`FitSpec`/`FitRunner`/`POST /v1/fit`) — composes `ITelemetrySource` + `Optimizer` with residual as the objective. Infrastructure exists; the composition is the new work. **Blocked on** E-15 Telemetry Ingestion (first repeatable dataset path) and the Telemetry Loop & Parity epic (validated drift bounds — fitting against real telemetry without measured drift would produce falsely precise results). + + - **Chunked Evaluation** (Mode 6) — bin-chunk evaluation for feedback simulation with external controllers. The Rust engine session (m-E18-02) is the seam, but the chunk-step protocol on top of it is not designed. + + A third scope item was decided (2026-04-20) to land alongside Fit and Chunked so the completed surface is exposed as a clean embeddable API: + + - **`FlowTime.Pipeline` SDK wrapper** — thin project exposing `Sweep`, `Sensitivity`, `GoalSeek`, `Optimize`, `Fit`, `ChunkedEvaluate` as a programmatic embedding surface over the Time Machine internals. Callers today reach into `FlowTime.TimeMachine.Sweep.*` directly; the SDK crystallizes the external contract. + + E-22 is sequenced per D-2026-04-15-032 (Option A): `E-21 (active) → E-15 Telemetry Ingestion → Telemetry Loop & Parity → E-22`. + + ### Supersedes + + Closes out E-18 placeholder `m-E18-XX Model Fit` and `m-E18-05 Chunked Evaluation`. E-18 itself remains the archived parent epic (11 delivered milestones recorded there as "complete"); E-22 is the forward-looking epic that completes its scope. + + ### Related + + - **E-15 Telemetry Ingestion** (`work/epics/E-15-telemetry-ingestion/`) — hard prerequisite for Fit. Provides the first repeatable dataset path and replayable canonical bundle via Gold Builder → Graph Builder. + - **Telemetry Loop & Parity** (`work/epics/unplanned/telemetry-loop-parity/spec.md`) — hard prerequisite for Fit. Provides measured drift bounds between synthetic and replayed runs; without those bounds, fit quality cannot be meaningfully reported. Currently unnumbered; will take its own epic slot when scheduled. + - **E-21 Svelte Workbench & Analysis Surfaces** — builds UI for fit results once Fit's API contract is stable. Not in E-22 scope; E-21 milestone allocation is independent. + + ## Scope + + ### In Scope + + - `FitSpec` + `FitRunner` + `POST /v1/fit` composing `ITelemetrySource` + `Optimizer` with residual objective (RMSE / MAE / configurable) against a user-selected series + - `flowtime fit` .NET CLI command, pipeable JSON-in/JSON-out, byte-compatible with `POST /v1/fit` + - Chunked evaluation protocol: stateful `chunk_step` session command on the Rust engine that advances a compiled plan by N bins, yielding partial results + - `ChunkSpec` + `ChunkRunner` + `POST /v1/chunked-eval` (final name TBD) driving the chunk-step protocol from .NET with external-controller integration + - `flowtime chunked-eval` .NET CLI command matching the API contract + - `FlowTime.Pipeline` project — thin embeddable SDK exposing `Sweep`, `Sensitivity`, `GoalSeek`, `Optimize`, `Fit`, `ChunkedEvaluate` as a clean programmatic API over the Time Machine internals; surfaces no HTTP, no CLI parsing, no artifact layout + - Migration of existing internal callers (the API endpoints, the .NET CLI commands) to the `FlowTime.Pipeline` SDK so the SDK is dogfooded from day one + - Test coverage matching E-18 standard: 100% branch coverage on pure runners; integration tests for Rust chunk-step protocol; API contract tests for `/v1/fit` and `/v1/chunked-eval` + - Documentation: `docs/architecture/time-machine-analysis-modes.md` extended with Fit and Chunked sections; `FlowTime.Pipeline` SDK embedding guide + + ### Out of Scope + + - **Optimization constraints** (`--constraint "max(node.queue.utilization) < 0.8"`) — tracked in `work/gaps.md`; candidate for a later analysis-layer patch against `OptimizeSpec`/`GoalSeekSpec`/`FitSpec` + - **Monte Carlo** (Mode 5) — sampling parameters from distributions, characterizing output distribution. Lower priority than Fit; tracked in gaps + - **`FlowTime.Telemetry.*` direct-source adapters** (Prometheus, OpenTelemetry, BPI event logs) — E-15 Gold Builder covers the general batch path; adapters are narrower shortcuts to build only when a concrete client asks + - **Tiered validation parity across Sim UI / Blazor UI / Svelte UI / MCP / external agents** — validation surface work; closer to E-21 m-E21-06 (Validation Surface) and future Blazor maintenance + - **Canonical bundle round-trip parity AC** (E-18 unchecked AC: capture baseline → replay bundle = same outputs modulo drift) — owned by Telemetry Loop & Parity, not E-22 + - **Fit-result UI** — lands in E-21 (or a later Svelte milestone) once Fit's API is stable; E-22 delivers only the contract + - **Fitting algorithms beyond the existing optimizer** — Fit uses the existing Nelder-Mead `Optimizer` with a residual objective. Gradient-based or Bayesian fitters are future work + - **Time grid alignment across heterogeneous sources** — assumes E-15 Gold Builder presents series on a common grid; grid alignment belongs to E-15 + - **Streaming / push UI for chunked evaluation** — chunked delivers a pull contract (caller drives chunk_step); WebSocket push for live simulation is a separate E-17-style track + + ## Constraints + + - Builds on the existing Rust engine session protocol (m-E18-02). No changes to the MessagePack framing; `chunk_step` is a new command alongside `compile`, `eval`, `patch`, `get_params`, `get_series`, `validate_schema` + - `FlowTime.Pipeline` is a pure SDK: no HTTP server, no CLI parsing, no artifact writing. It may compose services from other projects but its public API is in-process method calls returning strongly-typed results + - Fit against real telemetry is gated by the Telemetry Loop & Parity harness reporting drift within documented tolerance. A failing parity harness blocks Fit acceptance — reported fit residuals without parity validation are not trustworthy + - Byte-for-byte API ↔ CLI parity per the E-18 CLI convention: `flowtime fit < request.json` produces the same JSON as `POST /v1/fit` + - .NET 9 / C# 13; invariant culture; camelCase JSON payloads (project-rule: `project.md:43`) + - No reintroduction of the deprecated `FlowTime.Generator` project (D-2026-04-07-019 Path B remains authoritative) + + ## Success Criteria + + - [ ] `POST /v1/fit` accepts `(model YAML, telemetry source spec, target series, parameter overrides to fit, residual metric)` and returns a fitted parameter set plus fit quality (residual, iterations, convergence flag) + - [ ] `flowtime fit` CLI command produces byte-identical output to `POST /v1/fit` for equivalent inputs + - [ ] Fit results against a parity-validated telemetry fixture produce residuals within documented tolerance; the tolerance bound is set by the Telemetry Loop & Parity harness output + - [ ] Rust engine session responds to `chunk_step { bins: N }` command by advancing the compiled plan N bins and returning partial series; state persists across calls within a session + - [ ] `POST /v1/chunked-eval` drives the chunk-step protocol from .NET, exposing either a polling pull or a request/response-per-chunk model (choice is a design task for the milestone) + - [ ] External controller integration demonstration: a fixture controller consumes chunk outputs, writes back parameter patches, and the next chunk reflects the patch — end-to-end test, not just unit coverage + - [ ] `FlowTime.Pipeline` project compiles as a standalone library; its public API exposes `Sweep`, `Sensitivity`, `GoalSeek`, `Optimize`, `Fit`, `ChunkedEvaluate` as strongly-typed methods + - [ ] All `/v1/*` API endpoints added by E-18 are rewritten to call through `FlowTime.Pipeline` rather than `FlowTime.TimeMachine.*` internals directly; the SDK is dogfooded from day one + - [ ] All Time Machine CLI commands (`flowtime validate/sweep/sensitivity/goal-seek/optimize/fit/chunked-eval`) are rewritten to call through `FlowTime.Pipeline` + - [ ] 100% branch coverage on new pure runners (`FitRunner`, `ChunkRunner`); integration tests cover the Rust chunk-step protocol; API contract tests cover `/v1/fit` and `/v1/chunked-eval` + - [ ] `docs/architecture/time-machine-analysis-modes.md` updated with Fit and Chunked Evaluation sections reflecting the shipped behavior + - [ ] `FlowTime.Pipeline` embedding guide documents a concrete use case (e.g., hosted in a notebook, in an Azure Function, in a script) with a complete working example + - [ ] On epic completion, E-18's `m-E18-05 Chunked Evaluation` and `m-E18-XX Model Fit` placeholder rows are struck through; E-18 epic status flips to `complete` and the epic is archived under `work/epics/completed/` + + ## Risks & Open Questions + + | Risk / Question | Impact | Mitigation | + |-----------------|--------|------------| + | Telemetry Loop & Parity epic has not been scheduled; without it Fit cannot be meaningfully validated | **Hard block** | Telemetry Loop & Parity must be scheduled and completed before Fit AC can be closed. E-22 does not start Fit until TLP ships | + | E-15 first-dataset-path timeline is uncertain | High | E-15 is on the critical path per Option A. E-22 planning proceeds; E-22 implementation is gated on E-15 delivering at least one end-to-end canonical bundle dataset | + | Chunked evaluation semantics for stateful nodes (e.g., queues carrying over bin boundaries) need design; the current `IStatefulNode` stubs may not suffice | Medium | First milestone of chunked scope is a design spike: document chunk-boundary state-transfer semantics before implementation | + | Residual metric choice (RMSE vs. MAE vs. weighted) may not generalize across queueing topologies | Medium | Start with RMSE as default; make the residual a configurable strategy in `FitSpec`; document tradeoffs | + | `FlowTime.Pipeline` SDK surface may be over-scoped if designed before external embedders give feedback | Medium | Keep the SDK minimal: expose only the methods that existing API/CLI callers need. Defer Monte Carlo, direct-source adapters, and any speculative surface | + | Rust `chunk_step` may reveal gaps in the stateful execution seam that require refactoring inside the engine core, not just protocol work | Medium | Scope a Rust-side design spike first; if the seam needs reshaping, split into a foundation milestone before the .NET chunk runner | + | Pipeline SDK rewrite of existing API/CLI callers may introduce regressions | Medium | Dogfood migration is done one endpoint at a time with full existing test coverage asserting behavior identity before and after | + + ## Milestones + + Sequencing: Fit first (unblocks the largest downstream set — E-15 dataset path leads directly into fit validation). Chunked second (independent Rust protocol work; runs after Fit only to avoid Rust-engine contention on a single milestone branch). Pipeline SDK third so it crystallizes against the final surface including Fit and Chunked. + + | ID | Title | Status | Depends on | + |----|-------|--------|-----------| + | m-E22-01-model-fit | Model Fit | not started | E-15 first dataset path complete; Telemetry Loop & Parity harness validated | + | m-E22-02-chunked-evaluation | Chunked Evaluation | not started | m-E22-01 (sequencing only; no hard code dependency); m-E18-02 session protocol (already delivered) | + | m-E22-03-pipeline-sdk | `FlowTime.Pipeline` SDK Wrapper | not started | m-E22-01 + m-E22-02 complete (SDK reflects the final surface) | + + ## ADRs + + - (none yet — ADRs will be captured under `work/decisions.md` as they arise during milestone planning) + + ## References + + - E-18 epic spec: `work/epics/E-18-headless-pipeline-and-optimization/spec.md` + - E-18 gap analysis: `work/epics/E-18-headless-pipeline-and-optimization/e18-gap-analysis.md` + - Analysis modes architecture: `docs/architecture/time-machine-analysis-modes.md` + - Telemetry Loop & Parity: `work/epics/unplanned/telemetry-loop-parity/spec.md` + - E-15 Telemetry Ingestion: `work/epics/E-15-telemetry-ingestion/` + - Option A delivery sequence: `work/decisions.md` → D-2026-04-15-032 + - Headless engine architecture: `docs/architecture/headless-engine-architecture.md` diff --git a/work/migration/manifests/skip-log.md b/work/migration/manifests/skip-log.md new file mode 100644 index 00000000..224b7c55 --- /dev/null +++ b/work/migration/manifests/skip-log.md @@ -0,0 +1,5 @@ +# Migration skip-log + +Findings accumulated by the projector. Triage in Phase 4 dry-run loop. + +- **E-13**: source spec has no `**Status:**` line; defaulted to `proposed`. Source: `work/epics/E-13-path-analysis/spec.md`. diff --git a/work/migration/scripts/project_e22.py b/work/migration/scripts/project_e22.py deleted file mode 100644 index 444a40f4..00000000 --- a/work/migration/scripts/project_e22.py +++ /dev/null @@ -1,130 +0,0 @@ -# /// script -# requires-python = ">=3.10" -# dependencies = ["ruamel.yaml>=0.18"] -# /// -"""Pass A spike — project E-22 epic into a single-entity aiwf import manifest. - -Reads: work/epics/E-22-model-fit-chunked-evaluation/spec.md -Emits: work/migration/manifests/e22-spike.yaml - -Validate downstream with: aiwf import --dry-run work/migration/manifests/e22-spike.yaml -""" - -import re -import sys -from pathlib import Path - -from ruamel.yaml import YAML -from ruamel.yaml.scalarstring import LiteralScalarString - - -REPO_ROOT = Path(__file__).resolve().parents[3] -SPEC_PATH = REPO_ROOT / "work/epics/E-22-model-fit-chunked-evaluation/spec.md" -OUT_PATH = REPO_ROOT / "work/migration/manifests/e22-spike.yaml" - -V1_TO_V3_EPIC_STATUS = { - "planning": "proposed", -} - - -def parse_epic_spec(text: str) -> dict: - lines = text.splitlines() - if not lines or not lines[0].startswith("# "): - raise ValueError("spec missing H1 title") - raw_title = lines[0][2:].strip() - title = re.sub(r"^Epic:\s*", "", raw_title) - - id_match = re.search(r"^\*\*ID:\*\*\s*(\S+)\s*$", text, re.MULTILINE) - status_match = re.search(r"^\*\*Status:\*\*\s*(\S+)\s*$", text, re.MULTILINE) - if not id_match: - raise ValueError("spec missing **ID:** line") - if not status_match: - raise ValueError("spec missing **Status:** line") - - epic_id = id_match.group(1) - v1_status = status_match.group(1) - if v1_status not in V1_TO_V3_EPIC_STATUS: - raise ValueError( - f"unmapped epic status {v1_status!r} — add to V1_TO_V3_EPIC_STATUS" - ) - status = V1_TO_V3_EPIC_STATUS[v1_status] - - body = strip_frontmatter_prose(text) - - return {"id": epic_id, "title": title, "status": status, "body": body} - - -def strip_frontmatter_prose(text: str) -> str: - """Remove the H1 title, **ID:**, and **Status:** lines plus surrounding blanks. - - Everything else (## Goal, ## Context, etc.) is body content for aiwf. - """ - out_lines: list[str] = [] - skip_next_blank = False - for line in text.splitlines(): - if line.startswith("# "): - skip_next_blank = True - continue - if re.match(r"^\*\*(ID|Status):\*\*", line): - skip_next_blank = True - continue - if skip_next_blank and line.strip() == "": - skip_next_blank = False - continue - skip_next_blank = False - out_lines.append(line) - while out_lines and out_lines[0].strip() == "": - out_lines.pop(0) - while out_lines and out_lines[-1].strip() == "": - out_lines.pop() - return "\n".join(out_lines) + "\n" - - -def build_manifest(epic: dict) -> dict: - return { - "version": 1, - "commit": { - "mode": "single", - "message": f"import(spike): {epic['id']} — Pass A projector dry-run", - }, - "entities": [ - { - "kind": "epic", - "id": epic["id"], - "frontmatter": { - "title": epic["title"], - "status": epic["status"], - }, - "body": LiteralScalarString(epic["body"]), - } - ], - } - - -def main() -> int: - spec_text = SPEC_PATH.read_text(encoding="utf-8") - epic = parse_epic_spec(spec_text) - manifest = build_manifest(epic) - - OUT_PATH.parent.mkdir(parents=True, exist_ok=True) - yaml = YAML() - yaml.indent(mapping=2, sequence=4, offset=2) - yaml.width = 120 - with OUT_PATH.open("w", encoding="utf-8") as f: - yaml.dump(manifest, f) - - print(f"wrote {OUT_PATH.relative_to(REPO_ROOT)}") - print(f" kind: epic") - print(f" id: {epic['id']}") - print(f" title: {epic['title']}") - print(f" status: {epic['status']} (v1: {V1_TO_V3_EPIC_STATUS_REVERSED().get(epic['status'], '?')})") - print(f" body: {len(epic['body'].splitlines())} lines") - return 0 - - -def V1_TO_V3_EPIC_STATUS_REVERSED() -> dict: - return {v: k for k, v in V1_TO_V3_EPIC_STATUS.items()} - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/work/migration/scripts/project_epics.py b/work/migration/scripts/project_epics.py new file mode 100644 index 00000000..77d6b21f --- /dev/null +++ b/work/migration/scripts/project_epics.py @@ -0,0 +1,186 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = ["ruamel.yaml>=0.18"] +# /// +"""Project active-set E-NN epics into a combined aiwf import manifest. + +Pass B scope (no milestones): E-13, E-14, E-15, plus E-22 carried from Pass A. +E-11 and E-18 are multi-milestone — handled in later passes (C/E). + +Reads: work/epics/E-NN-*/spec.md +Emits: work/migration/manifests/epics-active.yaml + work/migration/manifests/skip-log.md (accumulated findings, only when non-empty) + +Validate downstream: aiwf import --dry-run work/migration/manifests/epics-active.yaml +""" + +import re +import sys +from pathlib import Path + +from ruamel.yaml import YAML +from ruamel.yaml.scalarstring import LiteralScalarString + + +REPO_ROOT = Path(__file__).resolve().parents[3] +EPICS_DIR = REPO_ROOT / "work/epics" +OUT_PATH = REPO_ROOT / "work/migration/manifests/epics-active.yaml" +SKIP_LOG_PATH = REPO_ROOT / "work/migration/manifests/skip-log.md" + +PASS_B_EPICS = ["E-13", "E-14", "E-15", "E-22"] + +V1_TO_V3_EPIC_STATUS = { + "planning": "proposed", + "proposed": "proposed", + "paused": "active", + "in-progress": "active", + "in_progress": "active", + "active": "active", + "superseded": "cancelled", + "absorbed": "cancelled", + "cancelled": "cancelled", + "complete": "done", + "completed": "done", + "done": "done", +} + + +def find_epic_dir(epic_id: str) -> Path: + matches = sorted(EPICS_DIR.glob(f"{epic_id}-*")) + matches = [m for m in matches if m.is_dir()] + if not matches: + raise FileNotFoundError(f"no dir matching {epic_id}-* under {EPICS_DIR}") + if len(matches) > 1: + raise ValueError(f"multiple dirs match {epic_id}-*: {matches}") + return matches[0] + + +def parse_epic_spec(epic_id: str, spec_path: Path, findings: list[str]) -> dict: + text = spec_path.read_text(encoding="utf-8") + lines = text.splitlines() + if not lines or not lines[0].startswith("# "): + raise ValueError(f"{epic_id}: spec missing H1 title") + raw_title = lines[0][2:].strip() + title = re.sub(r"^Epic:\s*", "", raw_title) + title = re.sub(rf"^{epic_id}\s+", "", title) + + id_match = re.search(r"^\*\*ID:\*\*\s*(\S+)\s*$", text, re.MULTILINE) + if not id_match: + raise ValueError(f"{epic_id}: spec missing **ID:** line") + if id_match.group(1) != epic_id: + raise ValueError( + f"{epic_id}: **ID:** says {id_match.group(1)!r}; expected {epic_id!r}" + ) + + status_match = re.search( + r"^\*\*Status:\*\*\s*(\S+)(?:\s*\((.+?)\))?", + text, + re.MULTILINE, + ) + qualifier: str | None = None + if status_match: + v1_status = status_match.group(1).lower() + qualifier = status_match.group(2) + if v1_status not in V1_TO_V3_EPIC_STATUS: + raise ValueError( + f"{epic_id}: unmapped epic status {v1_status!r} — add to V1_TO_V3_EPIC_STATUS" + ) + v3_status = V1_TO_V3_EPIC_STATUS[v1_status] + else: + v3_status = "proposed" + findings.append( + f"- **{epic_id}**: source spec has no `**Status:**` line; " + f"defaulted to `proposed`. Source: `{spec_path.relative_to(REPO_ROOT)}`." + ) + + body = strip_frontmatter_prose(text) + if qualifier: + body = f"> **Status note:** {qualifier}\n\n{body}" + + return {"id": epic_id, "title": title, "status": v3_status, "body": body} + + +def strip_frontmatter_prose(text: str) -> str: + out: list[str] = [] + skip_blank = False + for line in text.splitlines(): + if line.startswith("# "): + skip_blank = True + continue + if re.match(r"^\*\*(ID|Status):\*\*", line): + skip_blank = True + continue + if skip_blank and line.strip() == "": + skip_blank = False + continue + skip_blank = False + out.append(line) + while out and out[0].strip() == "": + out.pop(0) + while out and out[-1].strip() == "": + out.pop() + return "\n".join(out) + "\n" + + +def build_manifest(epics: list[dict]) -> dict: + return { + "version": 1, + "commit": { + "mode": "single", + "message": f"import(spike): Pass B — {len(epics)} active-set epics", + }, + "entities": [ + { + "kind": "epic", + "id": e["id"], + "frontmatter": { + "title": e["title"], + "status": e["status"], + }, + "body": LiteralScalarString(e["body"]), + } + for e in epics + ], + } + + +def main() -> int: + findings: list[str] = [] + epics: list[dict] = [] + for epic_id in PASS_B_EPICS: + spec_dir = find_epic_dir(epic_id) + spec_path = spec_dir / "spec.md" + if not spec_path.exists(): + findings.append( + f"- **{epic_id}**: no `spec.md` in `{spec_dir.relative_to(REPO_ROOT)}`; skipped." + ) + continue + epics.append(parse_epic_spec(epic_id, spec_path, findings)) + + manifest = build_manifest(epics) + OUT_PATH.parent.mkdir(parents=True, exist_ok=True) + yaml = YAML() + yaml.indent(mapping=2, sequence=4, offset=2) + yaml.width = 120 + with OUT_PATH.open("w", encoding="utf-8") as f: + yaml.dump(manifest, f) + + if findings: + header = ( + "# Migration skip-log\n\n" + "Findings accumulated by the projector. Triage in Phase 4 dry-run loop.\n\n" + ) + SKIP_LOG_PATH.write_text(header + "\n".join(findings) + "\n", encoding="utf-8") + elif SKIP_LOG_PATH.exists(): + SKIP_LOG_PATH.unlink() + + print(f"wrote {OUT_PATH.relative_to(REPO_ROOT)} ({len(epics)} epics)") + for e in epics: + print(f" {e['id']:6} {e['status']:10} — {e['title']}") + if findings: + print(f"\n{len(findings)} finding(s) in {SKIP_LOG_PATH.relative_to(REPO_ROOT)}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From a8661716c86660f8c94919cc3fb448a0efb09edf Mon Sep 17 00:00:00 2001 From: Peter Bruinsma Date: Fri, 1 May 2026 20:57:14 +0000 Subject: [PATCH 05/50] =?UTF-8?q?chore(migration):=20Pass=20C=20=E2=80=94?= =?UTF-8?q?=20extend=20projector=20to=20milestones=20with=20green=20dry-ru?= =?UTF-8?q?n?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - project_epics.py extended for milestones: discover m-EXX-NN-*.md (excluding -tracking and -log siblings), parse two H1 shape variants (prose **ID:** line vs. id-embedded H1 `m-EXX-NN — Title`), map statuses, resolve depends_on within manifest scope, emit id-map.csv - Q2 revised: explicit M-NNN computed by projector (deterministic order: epic-id ascending, milestone-old-id ascending within epic) instead of `id: auto`. Reason: aiwf manifest reference fields (depends_on) can only resolve against ids that are explicitly declared at manifest time - depends_on filtering: milestone-target deps mapped through id-map; epic- target deps (e.g. m-E18-01 → E-20) dropped from frontmatter with skip-log finding (body retains the prose) - E-18 (in-progress) → active per status mapping - 11 m-E18-NN milestones → M-001..M-011 (statuses all `done` from `complete`) - aiwf import --dry-run on epics-active.yaml: 16 plans, 0 errors, exit 0 Co-Authored-By: Claude Opus 4.7 (1M context) --- work/migration/aiwf-v3-plan.md | 5 +- work/migration/manifests/epics-active.yaml | 1746 +++++++++++++++++++- work/migration/manifests/id-map.csv | 12 + work/migration/manifests/skip-log.md | 1 + work/migration/scripts/project_epics.py | 360 +++- 5 files changed, 2063 insertions(+), 61 deletions(-) create mode 100644 work/migration/manifests/id-map.csv diff --git a/work/migration/aiwf-v3-plan.md b/work/migration/aiwf-v3-plan.md index 54053a4a..92039efd 100644 --- a/work/migration/aiwf-v3-plan.md +++ b/work/migration/aiwf-v3-plan.md @@ -58,7 +58,7 @@ Conceptual identity (epics in `work/epics/E-NN/`, ADRs in `docs/`, commits track | **E-11** | no `spec.md`; loose layout | milestones use `m-svui-NN-...` | special; pick representative file or synthesize body | | **E-12** | `spec.md` (verify) | `M-10.NN-.md` (capitalized + dotted) | special | - **Milestone ids.** Use `id: auto` in the manifest; aiwf allocates `M-001..M-NNN` in deterministic manifest order (epic ascending, then milestone ascending within each epic). Old `m-E18-13-session-evaluator` → new `M-053` (or whatever falls out). Mapping captured in `id-map.csv`. Body cross-references rewritten via the map. + **Milestone ids.** ~~Use `id: auto`~~ **Use explicit `M-NNN` computed by the projector** in deterministic order (epic-id ascending, then milestone-old-id ascending within epic). Revision rationale (Pass C, 2026-05-01): aiwf manifest reference fields (e.g. `depends_on`) can resolve against existing-tree ids OR manifest-declared ids — but `auto` entries don't declare an id at manifest-time, so intra-manifest cross-references like m-E18-02's `depends_on: m-E18-01` cannot resolve. Switching to projector-computed explicit M-NNN sidesteps this and makes id-map.csv a direct projector emit (no parsing of aiwf allocation output needed). Mapping captured in `id-map.csv`. Body cross-references rewritten via the map. **Decision ids.** 54 entries in `work/decisions.md`, all explicit `D-YYYY-MM-DD-NNN:` headings. Sort tuple `(date, NNN)` → assign `D-001..D-054` chronologically. Body references in specs/tracking-docs/`CLAUDE.md`/memory get rewritten via id-map. @@ -286,7 +286,7 @@ Coverage gaps and mitigations: - [x] Decide incremental scope — E-22 first; successive passes A–G - [x] Pass A: spike on E-22 — `work/migration/scripts/project_e22.py` (uv script-mode, ruamel.yaml). Generates `work/migration/manifests/e22-spike.yaml`. **Dry-run green:** `aiwf import --dry-run` zero findings, exit 0. 12,902-byte `epic.md` would land at `work/epics/E-22-time-machine-model-fit-chunked-evaluation/epic.md` - [x] Pass B: extend to E-13/E-14/E-15 (+ E-22 carried from A) — `project_epics.py` replaces `project_e22.py`. **Dry-run green:** 4 epics, 0 findings, exit 0. skip-log.md emitted (1 entry: E-13 default-status). Status mapping table proven on missing/superseded/parenthesized-prose/clean inputs. -- [ ] Pass C: extend to E-18 (milestones) +- [x] Pass C: extend to E-18 + 11 milestones. Q2 revised: explicit `M-NNN` (computed by projector) instead of `auto` because intra-manifest `depends_on` can't reference auto entries. **Dry-run green:** 5 epics + 11 milestones, 0 errors, exit 0. id-map.csv emitted (11 entries). M-002 depends_on M-001 resolves correctly within manifest. m-E18-01 epic-target dep (E-20) dropped + skip-logged. - [ ] Pass D: extend to completed-id'd epics - [ ] Pass E: outlier per-epic rules (E-10, E-11, E-12) - [ ] Pass F: decisions + gaps @@ -364,3 +364,4 @@ Append-only record of dry-run iterations, decisions taken mid-flight, and findin - 2026-05-01 — phase 2 — Pass B mapping resolved (user) for E-18: `in-progress → active`. All four surfaces agree (spec, epic-roadmap, ROADMAP, CLAUDE.md) plus the live `epic/E-18-time-machine` branch hasn't been closed. E-18 → `active`. - 2026-05-01 — phase 2 — Pass B finding 3 settled: parenthesized qualifier on `**Status:**` line is preserved by prepending a `> **Status note:** ` blockquote to the top of the body. Information not lost; body retains the "capture is shipped; ingestion pipeline is not" nuance from E-15 inline. - 2026-05-01 — phase 2 — Pass B landed. `project_epics.py` (replaces `project_e22.py`); `epics-active.yaml` (4 epics: E-13/E-14/E-15/E-22); `skip-log.md` (1 finding: E-13 default-status). Status-mapping table exercised on all four input shapes (missing / superseded / parenthesized-prose / clean). `aiwf import --dry-run` zero error findings, exit 0. Plans 4 writes at title-derived slugs as expected. +- 2026-05-01 — phase 2 — Pass C landed. Projector extended to milestones. Q2 **revised** (recorded in plan): explicit `M-NNN` ids computed by projector instead of `auto`, because aiwf manifest reference fields like `depends_on` can't resolve against `auto` entries (no id declared at manifest time). E-18 + 11 milestones (M-001..M-011) projected; M-002 depends_on M-001 resolves cleanly. id-map.csv emitted. m-E18-01's `**Depends on:** E-20` (epic-target) correctly dropped from frontmatter (body retains prose) and skip-logged. Two milestone H1 shapes handled (Variant A: prose `**ID:**` line; Variant B: id embedded in H1). Two milestone status mappings: `complete` → `done`. `aiwf import --dry-run` zero errors, exit 0. 16 writes planned. diff --git a/work/migration/manifests/epics-active.yaml b/work/migration/manifests/epics-active.yaml index 9f7ed1c2..66f7e018 100644 --- a/work/migration/manifests/epics-active.yaml +++ b/work/migration/manifests/epics-active.yaml @@ -1,7 +1,7 @@ version: 1 commit: mode: single - message: 'import(spike): Pass B — 4 active-set epics' + message: 'import(spike): 5 epics + 11 milestones' entities: - kind: epic id: E-13 @@ -350,6 +350,424 @@ entities: - `docs/reference/engine-capabilities.md` — Engine capabilities - `docs/schemas/telemetry-manifest.schema.json` — Bundle manifest schema - `work/epics/E-10-engine-correctness-and-analytics/spec.md` — Phase 3 analytical primitives (dependency) + - kind: epic + id: E-18 + frontmatter: + title: Time Machine + status: active + body: | + > **Naming note.** This epic was originally filed as "Headless Pipeline and Optimization." The component is now named `FlowTime.TimeMachine` (the Time Machine). The directory path `work/epics/E-18-headless-pipeline-and-optimization/` is preserved for historical stability; cross-doc references use that path. The decision is recorded in `work/decisions.md` and in `work/epics/E-19-surface-alignment-and-compatibility-cleanup/m-E19-01-supported-surface-inventory.md` (A6 + shared framing). + + ## Goal + + Make FlowTime usable as a pure callable function — embeddable in pipelines, optimization loops, model discovery workflows, and digital twin architectures. The **Time Machine** (`FlowTime.TimeMachine`) is a new first-class execution component that scripts, UIs, MCP servers, and AI agents can drive programmatically. It owns compile, tiered validation, evaluate, reevaluate with parameter overrides, and canonical artifact write. + + FlowTime's execution component is an abstract machine in the BEAM / JVM sense: instructions (the compiled graph), state (the time grid plus accumulating series), deterministic topological stepping through time. "Time Machine" also aligns with the existing Blazor "Time Travel" UI feature — the Time Travel UI navigates runs the Time Machine produces — and the reevaluation semantics (rewind a compiled model, run it forward with different parameters) are literally time travel. + + ## Context + + FlowTime's engine is deterministic: given a model, parameters, and input series, it produces the same output every time. After E-16 purifies the engine into a compiled, typed, pure evaluation surface, treating it as a callable function becomes natural: + + ``` + f(model, parameters, inputs) → outputs + ``` + + This is the same relationship a circuit simulator (SPICE) has with its netlist: compile once, evaluate many times, vary parameters programmatically. SPICE built an entire ecosystem of analysis modes on this foundation — parameter sweeps, Monte Carlo, optimization, model fitting. FlowTime can do the same for queueing networks. + + This epic owns the shared runtime parameter foundation used by both the programmable Time Machine layer and the interactive UI layer. The parameter model, override surface, and reevaluation API should be built once here and consumed by E-17, not implemented twice. + + ## The core insight + + Every advanced use case is a composition of "call the evaluation function with different inputs": + + | Use case | Loop shape | + |----------|-----------| + | **Parameter sweep** | Evaluate N times with different parameter values, compare outputs | + | **Optimization** | Vary parameters to minimize an objective function over FlowTime outputs | + | **Sensitivity analysis** | Perturb each parameter, measure output change (numerical gradient) | + | **Model discovery** | Fit model parameters to match observed telemetry (inverse modeling) | + | **Monte Carlo** | Sample parameters from distributions, evaluate N times, characterize output distribution | + | **Digital twin** | Continuously calibrate model from production telemetry, use for prediction and what-if | + | **Feedback simulation** | Evaluate chunk by chunk, let a controller (autoscaler, circuit breaker) adjust parameters between chunks | + + ## Scope + + ### In Scope + + - **Time Machine CLI mode** — FlowTime as a pipeline-friendly command: model + params in, results out (JSON/CSV) + - **Shared runtime parameter foundation** — compiled parameter identities, override points, reevaluation API, and optional enrichment contract for template-authored parameter metadata reused by E-17 + - **Iteration protocol** — keep compiled graph alive, accept new parameter sets per iteration without recompile + - **Tiered validation as a first-class operation** — schema / compile / analyse tiers callable from the same Time Machine surface as compile/evaluate/reevaluate. Client-agnostic: Sim UI, Blazor UI, Svelte UI, MCP servers, external AI agents, tests, and CI are all first-class callers on equal footing. Detailed requirement below in *Tiered validation (required scope)*. Originates from E-19 m-E19-01 decision A6. + - **Parameter sweep** — evaluate over a grid of parameter values + - **Optimization** — find parameter values that minimize/maximize an objective subject to constraints + - **Model fitting** — given observed telemetry, calibrate model parameters to match (system identification) + - **Sensitivity analysis** — compute ∂output/∂parameter numerically + - **Pipeline SDK** — `FlowTime.Core` as the embeddable evaluation library, surfaced via `FlowTime.TimeMachine` as the first-class execution component with a clean API + - **Chunked evaluation** — evaluate bins in chunks for feedback simulation (autoscaler, circuit breaker scenarios) + + Real-telemetry fitting/optimization is part of this epic, but it is not the first cut: when E-18 consumes real telemetry rather than synthetic or fixture data, that branch should sit downstream of E-15's first dataset path and Telemetry Loop & Parity so calibration is grounded in measured replay drift. + + ### Out of Scope + + - UI for interactive parameter exploration (E-17) + - WebSocket/SignalR push channel (E-17) + - New analytical primitives + - Rewriting the DAG evaluator foundation + - Chunked/stateful execution semantics in the first Time Machine cut — treat them as a later layer once a dedicated streaming/stateful seam exists + + ## Execution Layers + + To minimize risk, execute this epic in three layers: + + 1. **Foundation layer:** shared runtime parameter foundation + evaluation SDK + tiered validation + Time Machine CLI / sidecar. + 2. **Analysis layer:** parameter sweep, sensitivity, optimization, and fitting on top of the foundation. + 3. **Stateful layer:** chunked evaluation and richer telemetry adapters only after a dedicated streaming/stateful execution seam exists. + + ## Tiered validation (required scope) + + **Origin.** This requirement was decided in E-19 m-E19-01 as decision A6. E-19 retires the existing `POST /api/v1/drafts/validate` endpoint (Sim-private, mislabeled, unused by any UI, only exercised by tests) and records a hard dependency that E-18 must expose validation as a first-class, client-agnostic Time Machine operation alongside compile, evaluate, reevaluate, parameter override, and artifact write. + + **Principle.** Validation — answering *"is this YAML a correct FlowTime model?"* — is a first-class, client-agnostic operation. `FlowTime.Core` owns the authoritative answer. The Time Machine surfaces it. No single client is privileged as the validation host. Sim UI, Blazor UI, Svelte UI, MCP servers, external AI agents, tests, and CI are all first-class callers on equal footing. + + **Three tiers.** The Time Machine must expose all three of the following through its in-process SDK, CLI, and sidecar protocol, with consistent request and response shapes: + + - **Tier 1 — schema.** YAML parses, JSON schema validates, class references resolve. Cheap, no compile. Intended for per-keystroke editor feedback and per-iteration AI inner-loop feedback. Backed by `FlowTime.Core.Models.ModelSchemaValidator`. + - **Tier 2 — compile.** Model compiles into a `Graph`: topology resolves, dependencies resolve, expression nodes compile. No execution. Catches structural errors. Backed by `FlowTime.Core.Compiler.ModelCompiler` + `FlowTime.Core.Models.ModelParser`. + - **Tier 3 — analyse.** Full invariant analysis: compile + deterministic evaluation + invariant checks (capacity, conservation, runtime warnings). Catches semantic issues that only emerge after evaluation. Backed by `FlowTime.Sim.Core.Analysis.TemplateInvariantAnalyzer` composed into the Time Machine (the analyzer logic is correct; only its hosting moves). + + **Why tiered is required, not optional.** Without cheap tiers, every client needing "just check this" pays the full evaluate cost. That: + - Breaks AI inner-loop performance (agents generating candidate models iterate thousands of times). + - Makes editor-time UX expensive (IDE red-squiggles on every keystroke is unacceptable if each keystroke compiles and evaluates a graph). + - Discourages clients from validating at all, which is worse than cheap validation. + + Validation and compile-only are natural siblings of compile-then-evaluate. They share the front end of the pipeline and differ only in where they stop. + + **Client list (none privileged):** + - **Sim UI** (Blazor, Svelte) — template authoring, editor-time feedback. + - **Blazor UI, Svelte UI** — editor-time feedback on inline YAML, pre-run "check" action. + - **MCP servers** — expose `validate_model` (and friends) as tools for Claude and other models to call. + - **External AI agents** — programmatic inner loop: generate → validate (tier 1 or 2) → refine → validate (tier 3) → run. + - **Tests, CI** — pre-run well-formedness gates, regression checks on model fixtures. + - **The Time Machine itself** — compile operations share the tier-2 path. + + **What this milestone does not design.** Concrete wire format (JSON shape of the validation response, error envelope, line/column mapping) is a Foundation Layer implementation detail. What must be true is that the three tiers exist, are callable via all three surfaces (SDK, CLI, sidecar), and treat every client the same. + + **Library pieces preserved by E-19 for E-18 to compose:** + - `FlowTime.Core.Models.ModelSchemaValidator` (tier 1) + - `FlowTime.Core.Models.ModelValidator` (tier 2 adjacent — schemaVersion/grid/structure + legacy field detection) + - `FlowTime.Core.Compiler.ModelCompiler`, `FlowTime.Core.Models.ModelParser` (tier 2) + - `FlowTime.Sim.Core.Analysis.TemplateInvariantAnalyzer`, `InvariantAnalyzer` (tier 3 logic) + + These stay intact. E-19 only removes the HTTP wrapper on Sim; the validation capability itself moves forward to compose into the Time Machine. + + ## Analysis Modes (SPICE-inspired) + + ### Mode 1: Sweep + + ```bash + flowtime evaluate model.yaml --sweep "parallelism=1,2,4,8,16" --output sweep-results.json + ``` + + Embarrassingly parallel. Produces a table of (parameter_value → key_metrics). + + ### Mode 2: Optimize + + ```bash + flowtime optimize model.yaml \ + --objective "min(avg(node.queue.queueTimeMs))" \ + --constraint "max(node.queue.utilization) < 0.8" \ + --vary "parallelism=1..32" "serviceRate=50..500" \ + --output optimal.json + ``` + + Gradient-free optimization (Nelder-Mead, Bayesian) over FlowTime as the evaluation function. + + ### Mode 3: Fit + + ```bash + flowtime fit model.yaml --observed production-metrics.json \ + --fit-params "serviceRate,routingWeight" \ + --output calibrated-model.yaml + ``` + + System identification: given real telemetry, find the parameter values that make the model match reality. Uses least-squares or similar fitting over the residual between predicted and observed series. + + ### Mode 4: Sensitivity + + ```bash + flowtime sensitivity model.yaml \ + --params "parallelism,serviceRate,arrivalRate" \ + --metric "avg(queueTimeMs)" \ + --perturbation 0.05 \ + --output sensitivity.json + ``` + + Numerical gradient: perturb each parameter by ±5%, measure output change. Answers "which parameter has the most impact on latency?" + + ### Mode 5: Monte Carlo + + ```bash + flowtime montecarlo model.yaml \ + --distribution "serviceRate=normal(100,15)" \ + --distribution "arrivalRate=normal(80,10)" \ + --samples 1000 \ + --output distribution.json + ``` + + Stochastic parameter variation. Answers "given uncertainty in our estimates, what's the 95th percentile of queue time?" + + ### Mode 6: Feedback / chunked evaluation + + ```bash + flowtime simulate model.yaml --chunked --chunk-size 60 \ + --controller autoscaler.py \ + --output trace.json + ``` + + Evaluate 60 bins, pass state to controller script, controller adjusts parameters, evaluate next 60 bins. Simulates closed-loop control. + + ## Telemetry integration + + Telemetry is an adapter concern that lives **outside** the Time Machine's pure execution scope, with one exception: writing the canonical bundle format is a Time Machine core capability (see below). Everything else — ingesting Prometheus metrics, OpenTelemetry traces, custom event logs, real-world capture feeds — lives in adapter projects that depend on the Time Machine, not inside it. + + ### Two distinct artifact kinds + + - **Canonical run directory** (`data/runs//model/`, `series/`, `run.json`) — the Time Machine's internal operational truth for a run. Always written unconditionally on every run. **Clear-text, debuggable, inspectable.** Used by the Query API, Time Travel UI, run listings, and human debugging. This is FlowTime's authoritative in-place run record. + - **Canonical bundle** (`model.yaml`, `manifest.json`, `series/index.json`, CSV files) — a portable, interchange-oriented artifact defined by the E-15 canonical bundle schema (`docs/schemas/telemetry-manifest.schema.json`). Written on demand, not on every run. **Different shape from the canonical run directory, and intentionally so**: the bundle's purpose (portable telemetry interchange, telemetry loop participation, archival) is distinct from the run directory's purpose (debugging, internal operational truth). The bundle format may evolve independently of the run directory format. + + The canonical run directory and the canonical bundle are **not two representations of the same thing**. They share content (both contain the model and the series) but their shapes, purposes, and lifecycles differ. Both are preserved. + + ### The telemetry loop (established vocabulary) + + See the `telemetry-loop-parity` epic at `work/epics/telemetry-loop-parity/spec.md` for the authoritative definition. In brief: + + > 1. Model and Sim produce a **baseline run** (canonical run directory on disk). + > 2. **Telemetry capture** generates a canonical bundle from that baseline (Time Machine core capability). + > 3. **Telemetry replay** creates a new run from the bundle (`ITelemetrySource` implementation for the canonical bundle, fed into the Time Machine as input). + > 4. Outputs of the baseline run and the replay run are compared for **parity** (owned by the Telemetry Loop & Parity epic). + + The loop has three distinct primary use cases: + + 1. **Specification / bootstrap.** You have a model but no real telemetry yet. Capture generates telemetry in the canonical format; the generated telemetry becomes the instrumentation specification for the real system — "these are the series at this cadence that the real system must emit so a replay produces the same result." + 2. **Self-consistency testing.** You have a model and a capture+replay pair. Round-trip the model through the loop and verify parity. Drift is a bug in capture, replay, or the Time Machine's determinism. + 3. **AI iteration / model fitting.** An AI agent proposes a candidate model. Capture generates telemetry from the candidate. Compare the generated telemetry to **real observed telemetry** from production. Adjust the model and iterate. The Time Machine provides the forward model; the loop provides the comparison surface. + + All three use cases require the capture and replay sides to be **round-trip consistent** for the canonical bundle format, modulo documented tolerances (owned by the Telemetry Loop & Parity epic). + + ### Contract asymmetry: `ITelemetrySource` yes, `ITelemetrySink` deferred + + - **`ITelemetrySource` is introduced by E-18 m-E18-01b** as the input contract. (m-E18-01a extracts the concrete `CanonicalBundleSource` reader from Generator without an interface; 01b lifts it to implement `ITelemetrySource` and adds a second implementation.) Multiple implementations from day one of 01b: + - `CanonicalBundleSource` — reads canonical bundles (the telemetry loop's replay step). Concrete class created in 01a, lifted to implement `ITelemetrySource` in 01b. + - `FileCsvSource` — reads `file:`-referenced CSV data (already what Core does today for model inputs). Extracted to implement `ITelemetrySource` in 01b. + - Future: `FlowTime.Telemetry.Prometheus`, `FlowTime.Telemetry.Otel`, `FlowTime.Telemetry.BpiEventLog`, and other real-world ingestion adapters — **direct-source bypasses** of the E-15 Gold Builder pipeline. They implement `ITelemetrySource` against a live source without writing a canonical bundle to disk, and are alternatives to E-15's general path (raw → Gold → `CanonicalBundleSource`), not part of E-15 scope. Deferred until a concrete client need surfaces. + - The contract must carry enough metadata to round-trip the canonical bundle format losslessly (modulo documented precision/format drift). This is a non-trivial design task in m-E18-01b, not a throwaway tiny interface. + - **`ITelemetrySink` is explicitly deferred.** Only one sink format exists — the canonical bundle — and writing it is a Time Machine core capability, not a pluggable adapter. Canonical bundle writing is always done via a concrete `CanonicalBundleWriter` inside the Time Machine, not behind an interface. An `ITelemetrySink` interface will be introduced only when a second sink format is required (speculative — Prometheus push, OTEL emit, custom external system push). Per the "don't create abstractions for one-time operations" principle, no sink interface is built on speculation. + + ### Determinism boundary is at the source + + Live telemetry adapters (Prometheus queries, OTEL streams, Kafka consumers) are non-deterministic by nature. FlowTime's determinism story requires the Time Machine to see deterministic inputs. Resolution: **adapters snapshot live data at a well-defined point and expose the snapshot through `ITelemetrySource`**. The Time Machine sees only the snapshot, not the live feed. Snapshot provenance (source, query, timestamp, hash) is recorded in the run artifacts so the run can be reproduced exactly. + + This matches today's capture-directory flow, generalised: the adapter can produce the snapshot however it wants. + + ### Milestone ownership + + - **m-E18-01a** (Path B core cut) extracts the concrete canonical bundle writer and concrete `CanonicalBundleSource` reader from Generator into the Time Machine, alongside the execution-pipeline extraction. No `ITelemetrySource` interface yet — the reader is a concrete class. This is enough to enable the telemetry loop end-to-end over the canonical bundle format using today's existing capture and replay code, just rehosted. + - **m-E18-01b** (Tiered Validation & Telemetry Source Contract) introduces `ITelemetrySource` as the formal interface. Lifts `CanonicalBundleSource` to implement it. Adds `FileCsvSource` as a second implementation. Tiered validation lands in this milestone. + - **m-E18-06** (reshaped from "Telemetry I/O" to "Telemetry Ingestion Source Adapters") delivers source-only adapters for real-world formats. Depends on the `ITelemetrySource` contract from 01b. No Time Machine changes. No sinks. Specific formats (Prometheus, OTEL, BPI event logs, GTFS, …) chosen when the milestone is scheduled, not now. + - **m-E18-04** (Optimization & Fitting) depends on the Telemetry Loop & Parity epic being complete — optimization against real telemetry requires measured drift bounds. This is a hard prerequisite, not a soft one. + + ### Non-goals for E-18 + + - **No real-world format sinks.** FlowTime does not generate Prometheus-format or OTEL-format telemetry from runs. If ever needed, add later as optional downstream adapters behind `ITelemetrySink` when it exists. + - **No parity harness.** Drift measurement, tolerance rules, CI gating, regression reporting all belong to the Telemetry Loop & Parity epic. + - **No topology inference from ingested telemetry.** Owned by E-15's Graph Builder. + - **No Gold Builder pipeline.** Owned by E-15. + + ## Architecture + + ``` + FlowTime.Core (E-16: pure compiled engine; unchanged by E-18) + ├── ModelSchemaValidator.Validate(yaml) → ValidationResult (tier 1) + ├── ModelCompiler.Compile(model) → CompiledModel (tier 2 front) + ├── ModelParser.ParseModel(compiled) → (TimeGrid, Graph) (tier 2 back) + ├── Graph.Evaluate(grid) → EvaluatedState + ├── Analyze(state) → AnalyticalFacts + │ + │ E-17/E-18 shared foundation: + ├── IdentifyParameters(graph) → Parameter[] + ├── Reevaluate(graph, param_overrides) → EvaluatedState + │ + │ E-18 specific: + ├── EvaluateChunk(graph, state_at_t, bins[t..t+n]) → state_at_t+n + └── ComputeObjective(state, objective_expr) → double + + FlowTime.TimeMachine (NEW — execution component) + ├── ValidateSchema(yaml) → Result (tier 1, from Core) + ├── ValidateCompile(yaml) → Result (tier 2, from Core) + ├── Analyse(yaml) → Result (tier 3, composes TemplateInvariantAnalyzer) + ├── Compile(yaml) → CompiledGraphHandle + ├── Evaluate(handle, params, seed) → Run + ├── Reevaluate(handle, overrides) → Run + ├── WriteRunDirectory(run, path) → RunId (canonical run dir, always written, clear-text) + ├── WriteCanonicalBundle(run) → BundlePath (canonical bundle, on demand, for telemetry loop) + ├── ITelemetrySource interface (input contract, multiple implementations) + ├── CanonicalBundleSource : ITelemetrySource (replay side of the telemetry loop) + ├── FileCsvSource : ITelemetrySource (file: references in models) + ├── CLI with pipeline-friendly I/O + ├── Iteration protocol (keep graph alive across many evaluate/reevaluate calls) + ├── Sidecar protocol (optional) — long-lived process driven over a wire protocol + └── Analysis mode dispatch (sweep, optimize, fit, sensitivity, montecarlo, chunked) + + FlowTime.Pipeline (NEW — embeddable SDK; thin layer over the Time Machine) + ├── Sweep(graph, param_grid) → results[] + ├── Optimize(graph, objective, constraints, ranges) → optimal + ├── Fit(graph, observed, fit_params) → calibrated + ├── Sensitivity(graph, params, perturbation) → gradients + ├── MonteCarlo(graph, distributions, N) → distribution[] + └── ChunkedEvaluate(graph, chunk_size, controller_fn) → trace + + FlowTime.Telemetry.* (NEW — adapter projects, source-only, real-world ingestion) + ├── FlowTime.Telemetry.Prometheus : ITelemetrySource (future, m-E18-06 / E-15) + ├── FlowTime.Telemetry.Otel : ITelemetrySource (future, m-E18-06 / E-15) + └── FlowTime.Telemetry.BpiEventLog : ITelemetrySource (future, m-E18-06 / E-15) + + FlowTime.Generator (DELETED in E-18) + └── Execution code → FlowTime.TimeMachine + └── Telemetry-generation code → canonical bundle writer + CanonicalBundleSource (in TimeMachine or its own adapter) + └── See "Generator migration" below + + ITelemetrySink (DEFERRED — not introduced until a second sink format exists) + └── Canonical bundle writing is a concrete Time Machine capability, not a pluggable adapter. + ``` + + ## Core and Time Machine relationship + + Core and Time Machine are strictly layered. The dependency direction is **Time Machine → Core, never reverse.** + + - **`FlowTime.Core` is the library of pure deterministic operations.** In BEAM/JVM terms it is the instruction set and execution kernel as a library: `ModelSchemaValidator` (tier 1), `ModelCompiler` + `ModelParser` (tier 2), `Graph.Evaluate` (the execution kernel), expression compilation, analytical facts, and invariant analyzer rules. No HTTP, no orchestration, no storage, no client awareness. Core does not know what a "client" is. + - **`FlowTime.TimeMachine` is the hosted machine.** It loads programs (YAML models → compiled graphs via Core), drives them through time (via Core's `Graph.Evaluate`), manages iteration and reevaluation protocols, handles parameter identity and override, writes canonical artifacts, and exposes the whole thing as a client-agnostic API with three surfaces (SDK, CLI, sidecar). The Time Machine is where the abstract machine's hosting concerns live: state lifetimes, run identity, RNG seeding, artifact layout, and multi-client API shapes. + - **The Time Machine composes; it never reimplements.** If the Time Machine needs a pure computational primitive that Core is missing, the primitive is added to Core as a pure library function, not to the Time Machine as a parallel implementation. This preserves Core's invariants and prevents two sources of truth for any given computation. + - **Core remains pure and stable.** Nothing added by E-18 gives Core HTTP, orchestration, storage, or client awareness. Core stays UI-less, host-less, and I/O-less beyond YAML parsing. + + ## Generator migration (Path B: extraction and deletion) + + **Origin.** This is recorded as decision D-2026-04-07-019 and referenced from E-19 m-E19-01's shared framing (item 3). The forward fate of `FlowTime.Generator` was previously left implicit. + + **Current state (pre-E-18).** `FlowTime.Generator` is the shared orchestration layer used by both `FlowTime.Sim.Service` and `FlowTime.API`. It owns `RunOrchestrationService`, `RunArtifactWriter`, deterministic run ID logic, RNG seeding, and dry-run/plan mode. Sim.Service does not reference `FlowTime.Core` directly — only via Generator. API references both Core and Generator. + + **Problem.** Generator's responsibilities — compile, evaluate, artifact write, run IDs, RNG seeding, dry-run — overlap the Time Machine's scope almost completely. Keeping Generator alive alongside the Time Machine would create two shared orchestration layers doing the same pipeline, which violates the no-coexistence discipline established in E-16 and E-19. + + **Decision (Path B — extraction and deletion).** In E-18 m-E18-01a (the dedicated Path B cut), the following is done in a single milestone: + + 1. **Extract execution-pipeline code into `FlowTime.TimeMachine`** (new project): + - `RunOrchestrationService` → Time Machine's Compile + Evaluate + ArtifactWrite operations (split along the tier boundary) + - `RunArtifactWriter` → Time Machine's canonical run directory writer (always written, unchanged from today's clear-text layout — preserved for debugging value) + - `RunDirectoryUtilities`, `RunOrchestrationContractMapper` → Time Machine supporting infrastructure + - Deterministic run ID logic → Time Machine's run identity service + - RNG seeding → Time Machine's parameter and run configuration + - Dry-run / plan mode → Time Machine's tier 2 validation (compile-only) + - Simulation-mode code → Time Machine's evaluate path + + 2. **Extract telemetry-generation code into the canonical bundle adapter** (new project `FlowTime.Telemetry.Bundle`, or as part of the Time Machine if design review prefers): + - `TelemetryBundleBuilder`, `TelemetryBundleOptions` → canonical bundle writer. Not behind `ITelemetrySink` (which is deferred) — a concrete writer. Called when the Time Machine is asked to produce a bundle. + - `TelemetryCapture`, `TelemetryGenerationService` → canonical bundle writing orchestration, combined with the writer. + - `CaptureManifestWriter` → canonical bundle manifest production (part of the writer). + - `RunArtifactReader` (under today's Generator `Capture/`) → `CanonicalBundleSource`, the replay-side `ITelemetrySource` implementation for the canonical bundle format. + - `GapInjector`, `GapInjectorOptions` → realism transform applied inside the canonical bundle adapter (or deferred to the Telemetry Loop & Parity epic if the transform is about driving parity tolerance tests rather than realistic bundle generation). The final location is an m-E18-01a decision. + + 3. **Update callers**: + - `FlowTime.Sim.Service` replaces its `FlowTime.Generator` reference with a `FlowTime.TimeMachine` reference. Sim.Service now depends on Time Machine for execution and keeps its own authoring/template responsibilities. + - `FlowTime.API` replaces its `FlowTime.Generator` reference with `FlowTime.TimeMachine` (or drops the dependency entirely if API only needs Core-level reads). + - The existing `POST /telemetry/captures` API endpoint and `flowtime telemetry capture` CLI surface are re-wired to call the extracted capture capability at its new home. The public surface does not change as part of Path B; only the implementation moves. + + 4. **Delete `FlowTime.Generator`** in the same milestone. No stranded empty project. No "Generator and Time Machine coexist" window. + + **No coexistence window.** Path B deliberately rejects a transition state in which both `FlowTime.Generator` and `FlowTime.TimeMachine` exist in the tree. The extraction and deletion happen in one milestone cut, matching the E-16 no-coexistence pattern. If a milestone cannot extract cleanly in one pass, the correct response is to resize the milestone, not to introduce a coexistence shim. + + **Relationship with Sim's transitional execution host.** During E-19 (before E-18 ships), `FlowTime.Sim.Service`'s `/api/v1/orchestration/runs` endpoint remains the transitional execution host and routes through `FlowTime.Generator`. When E-18 completes the Path B migration, that endpoint is deleted in favour of the UI calling the Time Machine directly by default. A temporary thin facade is allowed only if a concrete technical migration constraint is documented in the owning E-18 milestone, with explicit removal criteria. That choice is made in the E-18 milestone that deletes Generator, not in E-19. + + **What Path B does not do.** Path B does not require any change to `FlowTime.Core`, the canonical run directory layout (`data/runs//model/`, `series/`, `run.json`), the canonical run.json contract, the canonical bundle schema (`docs/schemas/telemetry-manifest.schema.json`), or the analytical surfaces purified by E-16. Those all stay unchanged. Path B is a project-boundary refactor, not a contract or data refactor. Canonical run directories stay clear-text and debuggable. Canonical bundles stay in their E-15-defined shape (which may evolve independently of the run directory if a future milestone decides the bundle needs a different shape for interchange purposes, but that is not Path B's concern). + + ## Success Criteria + + - [ ] The Time Machine can be called as a CLI in a shell pipeline: `cat model.yaml | flowtime evaluate --params '{"parallelism":4}' | jq '.nodes.queue.derived.queueTimeMs'` + - [ ] The Time Machine exposes tiered validation (schema / compile / analyse) via SDK, CLI, and sidecar, callable identically from Sim UI, Blazor UI, Svelte UI, MCP servers, external AI agents, tests, and CI. No client is privileged. + - [ ] Tier 1 (schema) returns validation results without compiling the model. Tier 2 (compile) returns without executing. Tier 3 (analyse) returns full invariant analysis. Each tier has a consistent request/response shape. + - [ ] Parameter sweeps produce correct comparative results without recompilation per evaluation + - [ ] An optimization loop can find parameter values that satisfy an objective + constraints + - [ ] Model fitting against parity-validated real telemetry produces a calibrated model that predicts within tolerance + - [ ] Chunked evaluation enables feedback simulation with external controller logic + - [ ] All evaluation modes use the pure Core engine through the Time Machine — no adapter-side analytical computation, no Sim-private execution path + - [ ] `FlowTime.Generator` is deleted. Its responsibilities are extracted into `FlowTime.TimeMachine` (and the canonical bundle adapter project) in a single milestone cut with no coexistence window. Sim.Service and API reference `FlowTime.TimeMachine` (or Core directly) instead of Generator. `rg "FlowTime\.Generator" src/ tests/` returns zero matches. + - [ ] The canonical run directory (`data/runs//model/`, `series/`, `run.json`) is preserved unchanged. It remains clear-text and debuggable as today. + - [ ] The canonical bundle format (`model.yaml`, `manifest.json`, `series/`, CSV files) is produced by the Time Machine's canonical bundle writer and consumed by `CanonicalBundleSource`, and the two are round-trip consistent: capturing a baseline run and replaying its bundle produces the same outputs modulo documented drift tolerances owned by the Telemetry Loop & Parity epic. + - [ ] `ITelemetrySource` is defined and has at least two implementations at milestone completion: `CanonicalBundleSource` and `FileCsvSource` (for `file:`-referenced CSV model inputs). + - [ ] `ITelemetrySink` is **not** introduced on speculation. It is documented as deferred until a second sink format is required. + - [ ] `FlowTime.TimeMachine` contains no external-telemetry-format-specific code (no Prometheus, OTEL, BPI-format parsing or emission). All external format knowledge lives in adapter projects under `FlowTime.Telemetry.*`. `rg -i "prometheus|opentelemetry|otel" src/FlowTime.TimeMachine/` returns zero matches. + - [ ] The existing `POST /telemetry/captures` API endpoint and `flowtime telemetry capture` CLI command continue to work after Generator deletion, backed by the extracted canonical bundle writer. Their public request/response contracts are unchanged. + + ## Milestones + + Plan v2 (2026-04-10): once the Rust engine (E-20) became the evaluation path, the original C#-Core-centric milestone plan was reshaped. The current milestone structure lives here. `milestone-plan-v2.md` documents the remapping from v1 → v2. + + | ID | Title | Status | Summary | + |----|-------|--------|---------| + | m-E18-01 | Parameterized Evaluation (Rust) | **complete** (merged to main 2026-04-10) | `ParamTable` in Plan. Compiler extracts tweakable parameters from const nodes, traffic arrivals, WIP limits. `evaluate_with_params(plan, overrides)` pure function. Parameter metadata (id, kind, default, bounds). Foundation for everything that follows. | + | m-E18-02 | Engine Session + Streaming Protocol (Rust) | **complete** (merged to main 2026-04-10) | `flowtime-engine session` persistent CLI mode. Length-prefixed MessagePack over stdin/stdout. Commands: `compile`, `eval`, `patch`, `get_params`, `get_series`, `validate_schema`. Session holds compiled Plan + current state. | + | m-E18-06 | Tiered Validation | **complete** (merged to main) | `TimeMachineValidator` (schema / compile / analyse tiers); `POST /v1/validate`; Rust `validate_schema` session command. Satisfies E-19 m-E19-01 A6 (D-2026-04-07-017). | + | m-E18-07 | FlowTime.TimeMachine Extraction (Path B) | **complete** (merged to main) | `FlowTime.TimeMachine` project created; `FlowTime.Generator` deleted outright. Path B, no coexistence window. Per D-2026-04-07-019. | + | m-E18-08 | Telemetry Source Contract | **complete** (merged to main) | `ITelemetrySource` interface + `CanonicalBundleSource` + `FileCsvSource`. 23 tests. `ITelemetrySink` explicitly **not** introduced — see D-2026-04-07-020. | + | m-E18-09 | Parameter Sweep | **complete** (merged to main) | `SweepSpec`/`SweepRunner`/`ConstNodePatcher`; `IModelEvaluator` / `RustModelEvaluator`; `POST /v1/sweep`. 35 tests. | + | m-E18-10 | Sensitivity Analysis | **complete** (merged to main) | `ConstNodeReader`; `SensitivitySpec`/`SensitivityRunner` (central difference); `POST /v1/sensitivity`. 39 tests. | + | m-E18-11 | Goal Seeking | **complete** (merged to main) | `GoalSeekSpec`/`GoalSeeker` (bisection); `POST /v1/goal-seek`. 33 tests. (Added 2026-04; not in original plan.) | + | m-E18-12 | Multi-parameter Optimization | **complete** (merged to main) | `OptimizeSpec`/`Optimizer` (Nelder-Mead, N parameters); `POST /v1/optimize`. 29 unit + 10 API tests. | + | m-E18-13 | SessionModelEvaluator | **complete** (merged to epic 2026-04-15) | Persistent `flowtime-engine session` subprocess; MessagePack over stdin/stdout; compile-once/eval-many. `RustEngine:UseSession` config switch (default true); `RustModelEvaluator` retained as fallback. 44 new tests. | + | m-E18-14 | .NET Time Machine CLI | **complete** (merged to epic 2026-04-15) | `flowtime validate/sweep/sensitivity/goal-seek/optimize` as pipeable JSON-over-stdio commands byte-compatible with `/v1/` endpoints. `--no-session` fallback. 72 CLI unit + 10 integration tests. | + | m-E18-XX | Model Fit | **planned** — blocked on E-15 + Telemetry Loop & Parity | `FitSpec`/`FitRunner`/`POST /v1/fit` composing `ITelemetrySource` + `Optimizer`. Infrastructure exists; assembly requires telemetry ingestion (E-15) and parity harness first. | + | m-E18-05 | Chunked Evaluation (Mode 6) | **deferred** — after discovery pipeline works end-to-end | Bin-chunk evaluation for feedback simulation with external controllers. Requires a real stateful execution seam. Sequenced after Model Fit per Option A (D-2026-04-15-032). | + + ### Deferred from v1 (not on current critical path) + + These v1 milestones were superseded or deferred when the Rust engine became the evaluation path. Some have since been re-admitted under different IDs (m-E18-06, m-E18-07, m-E18-08 above). + + - **m-E18-01a** Generator extraction — superseded by **m-E18-07** (same outcome, different entry point). + - **m-E18-01b** Tiered validation & telemetry source contract — split across **m-E18-06** (validation) and **m-E18-08** (telemetry source contract). + - **m-E18-01c** Runtime parameter foundation — replaced by **m-E18-01** (Rust-native, not C#). + - **m-E18-04** Optimization & Fitting as a single milestone — split into **m-E18-11** (goal seek), **m-E18-12** (N-parameter optimize), and **m-E18-XX** (model fit). + - **Telemetry Ingestion Source Adapters** (v1 m-E18-06 idea) — moved to **E-15** scope; not an E-18 milestone. + + ## Risks & Open Questions + + | Risk / Question | Impact | Mitigation | + |----------------|--------|------------| + | Optimization solver choice (Nelder-Mead vs Bayesian vs gradient-free) | Medium | Start with Nelder-Mead (simple, derivative-free), add Bayesian later | + | Model fitting convergence for complex topologies | High | Start with small models, add diagnostics for fit quality | + | Chunked evaluation requires a real stateful execution seam, not just the current `IStatefulNode` stubs | High | Defer chunked evaluation to the epic's stateful layer; do not block foundation or analysis layers on it | + | Objective expression language design | Medium | Start with simple predefined metrics, add expression support later | + | Telemetry format proliferation | Low | Start with CSV (already supported) and JSON, add OTEL later | + + ## Dependencies + + - **E-16 Formula-First Core Purification** — must complete first. Provides the pure compiled engine that the Time Machine hosts. + - **E-19 m-E19-01 Supported Surface Inventory** — provides the A6 tiered-validation requirement, the Path B Generator extraction commitment (D-2026-04-07-019), the telemetry-as-adapter framing (D-2026-04-07-020), and the Time Machine naming decision (D-2026-04-07-018) that this epic builds on. + - **E-15 Telemetry Ingestion** — provides the canonical bundle schema (`docs/schemas/telemetry-manifest.schema.json`) that this epic's `CanonicalBundleSource` and canonical bundle writer must conform to. The schema already exists; this is an alignment dependency rather than a sequencing dependency. + - **E-17 Interactive What-If Mode** consumes the shared runtime parameter foundation built here; it should not duplicate the runtime parameter model or reevaluation API. + - **Telemetry Loop & Parity** (`work/epics/telemetry-loop-parity/spec.md`, currently unnumbered) — **hard prerequisite for m-E18-04 (Optimization & Fitting)**. Optimization and fitting against real telemetry require measured drift bounds, which only the parity harness can provide. Soft dependency for m-E18-01a through m-E18-03 (those milestones can ship without parity automation, but the loop's existence shapes the contract design in 01b). + + ## Analogies + + FlowTime's relationship to these analysis modes is the same as SPICE's relationship to circuit analysis: + - The engine is the forward model (netlist → simulation) + - Every analysis mode is a different way of calling the forward model + - The engine doesn't need to know about optimization — it just evaluates purely + - The analysis framework wraps the engine with different calling patterns + + ## References + + - SPICE analysis modes (.DC, .AC, .TRAN, .STEP, .MC, .OPTIM) as architectural precedent + - Control theory system identification (Ljung, "System Identification: Theory for the User") + - [work/epics/E-16-formula-first-core-purification/reference/formula-first-engine-refactor-plan.md](../E-16-formula-first-core-purification/reference/formula-first-engine-refactor-plan.md) + - [docs/research/flowtime-headless-integration.md](../../../docs/research/flowtime-headless-integration.md) - kind: epic id: E-22 frontmatter: @@ -472,3 +890,1329 @@ entities: - E-15 Telemetry Ingestion: `work/epics/E-15-telemetry-ingestion/` - Option A delivery sequence: `work/decisions.md` → D-2026-04-15-032 - Headless engine architecture: `docs/architecture/headless-engine-architecture.md` + - kind: milestone + id: M-001 + frontmatter: + title: Parameterized Evaluation + status: done + parent: E-18 + body: | + ## Goal + + The Rust engine can compile a model once and re-evaluate it many times with different parameter values without recompiling. This is the critical primitive that every downstream use case builds on — interactive what-if, parameter sweeps, optimization, sensitivity analysis. The Plan becomes a reusable program; parameters are its inputs. + + ## Context + + The current `compile(model) → Plan` bakes all constants into `Op::Const { out, values }` at compile time. To change an arrival rate from 10 to 15, you must recompile the entire model. Compilation is O(nodes) with topological sorting, expression parsing, and constraint resolution — unnecessary work when only a scalar value changed. + + After this milestone, the Plan carries a `ParamTable` that lists every user-visible constant. `evaluate_with_params(plan, overrides)` writes overrides into the state matrix before the eval loop, then runs the same bin-major evaluation. The Plan is immutable and shareable; only the parameter values change. + + ### Where constants come from in the compiler + + The compiler creates `Op::Const` from seven sources: + + | Source | Example | Parameter? | + |--------|---------|-----------| + | `kind: const` node values | `values: [10, 20, 30]` | Yes — primary user input | + | Traffic arrival `ratePerBin` | `ratePerBin: 20` | Yes — class arrival rate | + | PMF expected value | `pmf: { values, probabilities }` | Yes — derived from PMF definition | + | WIP limit scalar | `wipLimit: 50` | Yes — topology constraint | + | Queue initial condition | `initialCondition: { queueDepth: 5 }` | Yes — initial state | + | Expression literal | `8` in `MIN(arrivals, 8)` | Yes — inline constant in formula | + | Compiler-generated temps | Internal proportional alloc, router weight columns | No — derived, not user-visible | + + The distinction: a parameter is a constant that traces back to a user-authored value in the model YAML. Compiler-generated intermediate constants (temp columns, normalized weights) are NOT parameters. + + ## Acceptance Criteria + + 1. **AC-1: ParamTable struct.** `Plan` gains a `params: ParamTable` field. `ParamTable` contains a `Vec` where each entry has: + - `id: String` — stable identifier matching the model YAML source (e.g., `"arrivals"` for a const node, `"arrivals.Order"` for a traffic class rate, `"Queue.wipLimit"` for a topology WIP limit) + - `column: usize` — the column index in the state matrix this parameter fills + - `default: ParamValue` — original value from the model (`Scalar(f64)` for uniform, `Vector(Vec)` for per-bin) + - `kind: ParamKind` — `ConstNode`, `ArrivalRate`, `WipLimit`, `InitialCondition`, `ExprLiteral` + + 2. **AC-2: Compiler populates ParamTable.** The compiler registers parameters for: + - Every `kind: const` node (id = node id, value from `values` field) + - Every `traffic.arrivals` entry with `ratePerBin` (id = `"{nodeId}.{classId}"`) + - Every topology node with scalar `wipLimit` (id = `"{topoNodeId}.wipLimit"`) + - Every topology node with `initialCondition.queueDepth` (id = `"{topoNodeId}.init"`) + - Expression literals are NOT parameters (they're inline formula constants, not model inputs) + + 3. **AC-3: `evaluate_with_params` function.** New public function: + ```rust + pub fn evaluate_with_params(plan: &Plan, overrides: &[(String, ParamValue)]) -> Vec + ``` + - Applies overrides to matching param IDs before the eval loop + - `Scalar(v)` fills all bins with `v`; `Vector(vs)` writes per-bin values + - Unmatched override IDs are ignored (forward-compatible) + - Unknown param IDs do not cause errors + - Returns the filled state matrix (same shape as `evaluate`) + + 4. **AC-4: Equivalence.** `evaluate_with_params(plan, &[])` (no overrides) produces identical results to `evaluate(plan)`. A Rust test asserts bitwise equality. + + 5. **AC-5: Full post-eval pipeline.** `eval_model` is refactored to accept optional overrides. When overrides are provided, it calls `evaluate_with_params` instead of `evaluate`, then runs the same post-eval pipeline: class decomposition normalization, proportional allocation propagation, edge series computation, analysis warnings. A new public entry point: + ```rust + pub fn eval_model_with_params( + model: &ModelDefinition, + overrides: &[(String, ParamValue)] + ) -> Result + ``` + + 6. **AC-6: Parameter override affects downstream.** Overriding a const node's value propagates through all downstream expressions, queue recurrences, per-class decomposition, and edge series. Test: override `arrivals` from 10 to 20 → verify `served`, `queue_depth`, per-class series, and edge flow all change correctly. + + 7. **AC-7: Class arrival rate override.** Overriding a class arrival rate (e.g., `"arrivals.Order"` from 6 to 12) changes the class fraction and propagates through normalization and downstream decomposition. Test: change one class rate, verify normalization invariant still holds. + + 8. **AC-8: WIP limit override.** Overriding `"{topoNodeId}.wipLimit"` changes the queue's WIP limit and affects overflow. Test: lower WIP limit → verify overflow increases. + + 9. **AC-9: Parameter schema extraction.** New public function: + ```rust + pub fn extract_params(plan: &Plan) -> &ParamTable + ``` + Returns the plan's parameter table. Clients use this to discover what can be tweaked, with IDs, kinds, and defaults. This is what the UI will use to auto-generate controls. + + 10. **AC-10: Compile-once, eval-many pattern.** Demonstrate the pattern with a Rust test that compiles once, evaluates 10 times with different arrival rates, and verifies each result is independent (no state leakage between evaluations). Measure that subsequent evals are faster than the first (no recompilation). + + ## Out of Scope + + - Session management or persistent process (m-E18-02) + - Streaming protocol or MessagePack framing (m-E18-02) + - CLI interface changes (m-E18-02) + - UI parameter controls (m-E17-02) + - Parameter bounds, display names, or template metadata enrichment (future — the parameter table carries IDs and defaults only) + - Expression literal parameterization (inline `8` in `MIN(arrivals, 8)` stays baked — parameterizing expression constants requires expression-tree rewriting, which is a different problem) + - Structural model changes (adding/removing nodes requires recompilation — by design) + + ## Key References + + - `engine/core/src/plan.rs` — Plan struct, Op enum, ColumnMap + - `engine/core/src/eval.rs` — `evaluate()` function, bin-major loop + - `engine/core/src/compiler.rs` — `compile()`, `eval_model()`, all `Op::Const` emission sites + - `docs/architecture/headless-engine-architecture.md` — overall architecture + - `work/epics/E-18-headless-pipeline-and-optimization/milestone-plan-v2.md` — milestone sequence + - kind: milestone + id: M-002 + frontmatter: + title: Engine Session + Streaming Protocol + status: done + parent: E-18 + depends_on: + - M-001 + body: | + ## Goal + + The Rust engine runs as a persistent process that accepts commands and streams results. `flowtime-engine session` reads length-prefixed MessagePack messages from stdin, holds a compiled Plan in memory, and writes responses to stdout. This is the headless pipeline component — the same protocol works over stdin/stdout (CLI pipes) and WebSocket (UI, via m-E17-01 proxy). + + ## Context + + After m-E18-01, the engine can compile once and evaluate many times with different parameters via `evaluate_with_params(plan, overrides)`. But every invocation is still a batch subprocess: spawn → parse YAML → compile → evaluate → write files → exit. The overhead of process spawn + file I/O dominates latency (100-500ms). For interactive use, we need a persistent process that holds the compiled Plan and responds to parameter changes in microseconds. + + The session is a stateful loop: + + ``` + stdin → [compile] → hold Plan → [eval overrides] → stdout + → [eval overrides] → stdout + → [eval overrides] → stdout + → EOF → exit + ``` + + ### Why MessagePack + + - **Binary f64 arrays.** A 1,000-bin series is 8KB as binary vs ~8KB+ as JSON text (with formatting overhead and parse cost). MessagePack encodes `Vec` as a binary ext type — zero parsing, memcpy-fast. + - **Length-prefixed framing.** 4-byte big-endian length prefix before each message. No newline ambiguity, no incomplete-line bugs. + - **Cross-language.** Native libraries: Rust (`rmp-serde`), JavaScript (`@msgpack/msgpack`), C# (`MessagePack-CSharp`), Python (`msgpack`). + - **Pipe-friendly.** Works over stdin/stdout for CLI composition, over WebSocket for UI. + + ## Acceptance Criteria + + 1. **AC-1: `session` CLI command.** `flowtime-engine session` enters a persistent loop reading from stdin and writing to stdout. No file arguments required. Exits cleanly on stdin EOF or SIGTERM. + + 2. **AC-2: Length-prefixed MessagePack framing.** Each message is `[4-byte big-endian length][MessagePack payload]`. Both requests (stdin) and responses (stdout) use this framing. Stderr is reserved for human-readable log messages (not protocol). + + 3. **AC-3: `compile` command.** Request: `{ method: "compile", params: { yaml: "" } }`. Response: `{ result: { params: [{ id, kind, default }], series: [{ id, bins, values }], bins, grid } }`. Compiles the model, holds the Plan in session state, evaluates with defaults, returns the parameter schema and initial series. + + 4. **AC-4: `eval` command.** Request: `{ method: "eval", params: { overrides: { "arrivals": 15.0, "Queue.wipLimit": 30.0 } } }`. Response: `{ result: { series: { "arrivals": , "served": , ... }, elapsed_us } }`. Re-evaluates with overrides, returns updated series. Must not recompile. Series values are MessagePack binary arrays (not JSON text arrays). + + 5. **AC-5: `get_params` command.** Request: `{ method: "get_params" }`. Response: `{ result: { params: [{ id, kind, default }] } }`. Returns the current parameter table from the compiled Plan. + + 6. **AC-6: `get_series` command.** Request: `{ method: "get_series", params: { names: ["arrivals", "served"] } }`. Response: `{ result: { series: { "arrivals": , "served": } } }`. Returns specific series from the current evaluation state. If no names provided, returns all non-internal series. + + 7. **AC-7: Error handling.** Invalid requests return `{ error: { code, message } }`. Specific errors: `not_compiled` (eval before compile), `compile_error` (bad YAML), `unknown_method`. The session continues after errors — it does not exit. + + 8. **AC-8: Session state.** The session holds: compiled Plan, current parameter overrides, current state matrix (from most recent eval). `compile` replaces the entire session state. `eval` updates overrides and state. Multiple `eval` calls are independent (no accumulation). + + 9. **AC-9: Performance.** For a model with 8 bins and ~10 series, `eval` with scalar overrides completes in under 1ms (excluding I/O). A Rust benchmark test evaluates 1,000 times in a loop and asserts total < 1 second. + + 10. **AC-10: Integration test.** A Rust integration test spawns `flowtime-engine session` as a subprocess, sends compile + eval + eval (with different overrides) + get_params via the MessagePack protocol over stdin/stdout, and verifies all responses are correct. + + ## Technical Notes + + ### Dependencies to add + + - `rmp-serde` (MessagePack serialization for Rust) — workspace dependency + - `serde` derive on request/response types + + ### Module structure + + - `engine/core/src/session.rs` — Session struct, state management, command dispatch + - `engine/core/src/protocol.rs` — Request/Response types, MessagePack framing (read/write) + - `engine/cli/src/main.rs` — `cmd_session()` entry point + + ### Message envelope + + ```rust + #[derive(Serialize, Deserialize)] + struct Request { + method: String, + #[serde(default)] + params: serde_json::Value, // flexible params per method + } + + #[derive(Serialize)] + struct Response { + #[serde(skip_serializing_if = "Option::is_none")] + result: Option, + #[serde(skip_serializing_if = "Option::is_none")] + error: Option, + } + ``` + + Note: We use `serde_json::Value` as the flexible inner type even though the wire format is MessagePack. MessagePack and JSON share the same data model (maps, arrays, strings, numbers, bools, null). `rmp-serde` serializes/deserializes `serde_json::Value` correctly. + + ### Series encoding + + Series data (`Vec`) serializes naturally as MessagePack arrays of floats. For very large series, a future optimization could use MessagePack binary ext type for raw f64 bytes, but the standard array encoding is correct and sufficient for this milestone. + + ### Post-eval pipeline + + After `evaluate_with_params`, the session must also run: + - Class decomposition normalization + proportional allocation + - Edge series computation + - Analysis warnings + + This means the session calls the same post-eval pipeline as `eval_model_with_params`. The simplest approach: the session stores the compiled Plan and the ModelDefinition, and each `eval` call runs `eval_model_with_params` reusing the model but with the new overrides. + + For the compile-once optimization (skip recompilation), a future milestone can cache the Plan separately. For now, recompiling per eval is acceptable if latency is under the AC-9 target. + + ## Out of Scope + + - WebSocket transport (m-E17-01) + - .NET bridge for session mode (m-E17-01) + - UI parameter controls (m-E17-02) + - Parameter sweep batch mode (m-E18-03) + - Request IDs / multiplexing (single-client, sequential for now) + - Authentication or access control + - TLS/encryption + + ## Key References + + - `engine/core/src/compiler.rs` — `compile()`, `eval_model_with_params()` + - `engine/core/src/plan.rs` — `ParamTable`, `ParamValue` + - `engine/core/src/eval.rs` — `evaluate_with_params()` + - `engine/cli/src/main.rs` — existing CLI command dispatch + - `docs/architecture/headless-engine-architecture.md` — protocol design + - [rmp-serde crate](https://crates.io/crates/rmp-serde) — MessagePack for Rust + - [MessagePack spec](https://msgpack.org/) — wire format + - kind: milestone + id: M-003 + frontmatter: + title: Tiered Validation + status: done + parent: E-18 + body: | + ## Goal + + Expose model validation as a first-class, client-agnostic Time Machine operation. + Three tiers callable from the .NET SDK and from a new `POST /v1/validate` HTTP + endpoint. Tier 1 (schema) is also added to the Rust engine session protocol so + the Svelte What-If UI can get cheap per-edit feedback without a full compile. + + ## Scope + + **Tier 1 — Schema:** YAML parses + JSON schema validates + class references resolve. + Backed by `ModelSchemaValidator.Validate` + `ModelValidator.Validate` in Core. + Cheap: no compile, no eval. + + **Tier 2 — Compile:** Schema (tier 1) + model compiles into a Graph. + Backed by `ModelCompiler.Compile` + `ModelParser.ParseModel` in Core. + Catches structural errors (unresolved references, expression errors). + + **Tier 3 — Analyse:** Compile (tier 2) + deterministic evaluation + invariant + checks. Backed by `TemplateInvariantAnalyzer.Analyze` in Sim.Core. + Catches semantic issues (conservation violations, capacity breaches). + + **In scope:** + - `src/FlowTime.TimeMachine/Validation/` — `TimeMachineValidator` (static service), + `ValidationResult`, `ValidationError`, `ValidationWarning`, `ValidationTier` enum + - `src/FlowTime.API/Endpoints/ValidationEndpoints.cs` — `POST /v1/validate` + - Rust engine session — new `validate_schema` command (tier 1 via session protocol) + - Unit tests: `tests/FlowTime.TimeMachine.Tests/Validation/` + - API tests: `tests/FlowTime.Api.Tests/ValidationEndpointsTests.cs` + - Rust integration tests: session `validate_schema` command + + **Out of scope:** + - Line/column mapping in error messages + - Editor LSP integration + - Svelte UI changes (validate button) — separate UI milestone + + ## Contract + + ### HTTP Endpoint + + ``` + POST /v1/validate + Content-Type: application/json + + { + "yaml": "...", + "tier": "schema" | "compile" | "analyse" + } + ``` + + Response (200 always, errors in body): + + ```json + { + "tier": "schema", + "isValid": false, + "errors": [ + { "message": "Unknown class reference: 'premium'" } + ], + "warnings": [] + } + ``` + + Tier 3 analyse response includes warnings in addition to errors: + + ```json + { + "tier": "analyse", + "isValid": true, + "errors": [], + "warnings": [ + { "nodeId": "Queue", "code": "high_utilization", "message": "..." } + ] + } + ``` + + ### Session Protocol Command (`validate_schema`) + + ``` + request: { method: "validate_schema", params: { yaml: "..." } } + response (valid): { result: { is_valid: true, errors: [] } } + response (invalid): { result: { is_valid: false, errors: ["..."] } } + ``` + + Tier 2 (compile) is already served by the existing `compile` command, which + returns `error: { code: "compile_error", ... }` on failure. + + ## Acceptance Criteria + + - [x] `TimeMachineValidator.Validate(yaml, ValidationTier.Schema)` returns errors for invalid YAML + - [x] `TimeMachineValidator.Validate(yaml, ValidationTier.Compile)` catches structural errors (bad node refs, bad expressions) + - [x] `TimeMachineValidator.Validate(yaml, ValidationTier.Analyse)` returns warnings from invariant analyzer + - [x] `POST /v1/validate` responds 200 with `{ isValid, tier, errors, warnings }` for all three tiers + - [x] Invalid tier value → 400 Bad Request + - [x] Empty/null yaml → 400 Bad Request + - [x] Rust session `validate_schema` returns `{ is_valid, errors }` without full compile + - [x] `rg "FlowTime\.Generator" src/ tests/` still zero (no regressions) + - [x] `dotnet test FlowTime.sln` all green; Rust `cargo test` all green + - kind: milestone + id: M-004 + frontmatter: + title: Generator Extraction → TimeMachine + status: done + parent: E-18 + body: | + ## Goal + + Rename `FlowTime.Generator` → `FlowTime.TimeMachine`. Move all classes, update all + references in consumers (src + tests), remove `FlowTime.Generator` from the solution. + Pure structural refactor — no behavior change, all tests green, no coexistence window + (per D-2026-04-07-019 Path B). + + ## Scope + + **In scope:** + - Create `src/FlowTime.TimeMachine/FlowTime.TimeMachine.csproj` with identical dependencies + - Move all Generator source files; update `FlowTime.Generator.*` namespaces → `FlowTime.TimeMachine.*` + - Rename `tests/FlowTime.Generator.Tests/` → `tests/FlowTime.TimeMachine.Tests/`; update its csproj + - Update project references in: FlowTime.Cli, FlowTime.Sim.Service, FlowTime.API, FlowTime.Api.Tests, FlowTime.Cli.Tests, FlowTime.Integration.Tests + - Update `using FlowTime.Generator.*` → `using FlowTime.TimeMachine.*` across all source files + - Register TimeMachine in FlowTime.sln; remove Generator entry + - Delete `src/FlowTime.Generator/` entirely + + **Out of scope:** + - Tiered validation (m-E18-06) + - Any behavior changes whatsoever + + ## Acceptance Criteria + + - [x] `src/FlowTime.TimeMachine/` exists; `src/FlowTime.Generator/` is gone + - [x] `tests/FlowTime.TimeMachine.Tests/` exists; `tests/FlowTime.Generator.Tests/` is gone + - [x] `dotnet build FlowTime.sln` succeeds with zero errors + - [x] `dotnet test FlowTime.sln` passes with the same test count + - [x] `rg "FlowTime\.Generator" src/ tests/ --include="*.cs" --include="*.csproj"` returns zero matches + - [x] Solution file contains TimeMachine entry; Generator entry is absent + + ## Namespace Mapping + + | Old | New | + |-----|-----| + | `FlowTime.Generator` | `FlowTime.TimeMachine` | + | `FlowTime.Generator.Artifacts` | `FlowTime.TimeMachine.Artifacts` | + | `FlowTime.Generator.Capture` | `FlowTime.TimeMachine.Capture` | + | `FlowTime.Generator.Models` | `FlowTime.TimeMachine.Models` | + | `FlowTime.Generator.Orchestration` | `FlowTime.TimeMachine.Orchestration` | + | `FlowTime.Generator.Processing` | `FlowTime.TimeMachine.Processing` | + - kind: milestone + id: M-005 + frontmatter: + title: ITelemetrySource Contract + status: done + parent: E-18 + body: | + ## Goal + + Define `ITelemetrySource` as the formal input contract for the Time Machine's external data + surface, with two concrete implementations from day one. Satisfies the deferred portion of + the spec's m-E18-01b scope (the tiered-validation half shipped as m-E18-06; this delivers + the source-contract half). + + ## Scope + + **`ITelemetrySource` interface** — in `src/FlowTime.TimeMachine/Telemetry/`: + - `ITelemetrySource` — single method: `Task ReadAsync(CancellationToken)` + - `TelemetryData` — typed payload: grid, series dictionary, optional provenance metadata + + **`CanonicalBundleSource : ITelemetrySource`** — reads the canonical bundle format + (`manifest.json` + CSV series files) written by the existing `TelemetryBundleBuilder` in + `FlowTime.Core`. Concrete class (not behind a second interface). + + **`FileCsvSource : ITelemetrySource`** — reads `file:`-referenced CSV inputs, extracting + the existing file-read logic already in `FlowTime.Core` into a named, injectable + implementation. + + **In scope:** + - `src/FlowTime.TimeMachine/Telemetry/ITelemetrySource.cs` + - `src/FlowTime.TimeMachine/Telemetry/TelemetryData.cs` + - `src/FlowTime.TimeMachine/Telemetry/CanonicalBundleSource.cs` + - `src/FlowTime.TimeMachine/Telemetry/FileCsvSource.cs` + - Unit tests: `tests/FlowTime.TimeMachine.Tests/Telemetry/` + + **Out of scope:** + - `ITelemetrySink` — explicitly deferred per D-2026-04-07-020 + - Real-world format adapters (Prometheus, OTEL, BPI) — m-E18 telemetry adapters milestone + - Time Machine `Evaluate` / `Reevaluate` consuming the source — separate milestone + - HTTP endpoint changes + + ## Contract + + ### `ITelemetrySource` + + ```csharp + namespace FlowTime.TimeMachine.Telemetry; + + /// + /// Input contract for external data fed into the Time Machine. + /// Each implementation snapshots data from its source at ReadAsync time, + /// returning a deterministic TelemetryData payload the Time Machine can consume. + /// + public interface ITelemetrySource + { + Task ReadAsync(CancellationToken cancellationToken = default); + } + ``` + + ### `TelemetryData` + + ```csharp + public sealed class TelemetryData + { + /// Grid definition (bins, binSize, binUnit). + public required GridDefinition Grid { get; init; } + + /// Node-id → double[] series values (one per bin). + public required IReadOnlyDictionary Series { get; init; } + + /// Optional provenance: source path, captured-at timestamp, content hash. + public TelemetryProvenance? Provenance { get; init; } + } + + public sealed class TelemetryProvenance + { + public string? SourcePath { get; init; } + public DateTimeOffset? CapturedAt { get; init; } + public string? ContentHash { get; init; } + } + ``` + + ### `CanonicalBundleSource` + + Reads a canonical bundle directory (containing `manifest.json` and `series/*.csv`). + + ```csharp + public sealed class CanonicalBundleSource : ITelemetrySource + { + public CanonicalBundleSource(string bundleDirectory) { ... } + public Task ReadAsync(CancellationToken cancellationToken = default) { ... } + } + ``` + + ### `FileCsvSource` + + Reads a single CSV file as a named series. + + ```csharp + public sealed class FileCsvSource : ITelemetrySource + { + /// Path to the CSV file. + /// Node ID to assign the series to. + /// Grid definition to validate series length against. + public FileCsvSource(string filePath, string seriesId, GridDefinition grid) { ... } + public Task ReadAsync(CancellationToken cancellationToken = default) { ... } + } + ``` + + ## Acceptance Criteria + + - [x] `ITelemetrySource` interface exists in `FlowTime.TimeMachine.Telemetry` + - [x] `TelemetryData` carries Grid + Series + optional Provenance + - [x] `CanonicalBundleSource.ReadAsync` reads a bundle directory and returns correct series values + - [x] `FileCsvSource.ReadAsync` reads a single CSV and returns the series under the specified ID + - [x] Both implementations compile and have passing unit tests (23 tests across 2 suites) + - [x] `ITelemetrySink` is **not** introduced (explicitly documented as deferred) + - [x] `rg "FlowTime\.Generator" src/ tests/` still zero (no regressions) + - [x] `dotnet test FlowTime.sln` all green (72 TimeMachine tests, 0 failures) + - kind: milestone + id: M-006 + frontmatter: + title: Parameter Sweep + status: done + parent: E-18 + body: | + ## Goal + + Implement parameter sweep as a first-class Time Machine operation: given a model YAML, a + const-node ID, and an array of values, evaluate the model once per value and return a + structured table of (param_value → series outputs). + + Builds on: + - m-E18-01 `evaluate_with_params` in the Rust engine (compile-once foundation) + - m-E18-07 `FlowTime.TimeMachine` project (host for the sweep domain model) + - m-E18-08 `ITelemetrySource` (pattern for injectable evaluation contracts) + + ## Scope + + **`FlowTime.TimeMachine.Sweep` namespace** — in `src/FlowTime.TimeMachine/Sweep/`: + - `IModelEvaluator` — injectable evaluation contract; decouples SweepRunner from the Rust binary in tests + - `SweepSpec` — validated input: ModelYaml, ParamId, Values[], optional CaptureSeriesIds + - `SweepPoint` — single evaluation result: ParamValue + Series dictionary + - `SweepResult` — full sweep result: ParamId + SweepPoint[] + - `ConstNodePatcher` — internal YAML DOM manipulation; patches a named const node's values array + - `SweepRunner` — orchestrates N evaluations via injected `IModelEvaluator` + - `RustModelEvaluator : IModelEvaluator` — wraps `RustEngineRunner`, maps series list to dictionary + + **`POST /v1/sweep`** — in `src/FlowTime.API/Endpoints/SweepEndpoints.cs`: + - Request: `{ yaml, paramId, values: [double...], captureSeriesIds?: [string...] }` + - Response (200): `{ paramId, points: [{ paramValue, series: { seriesId: double[] } }] }` + - 400: missing yaml / paramId / values + - 503: engine not enabled (RustEngine:Enabled=false) + + **In scope:** + - `src/FlowTime.TimeMachine/Sweep/IModelEvaluator.cs` + - `src/FlowTime.TimeMachine/Sweep/SweepSpec.cs` + - `src/FlowTime.TimeMachine/Sweep/SweepResult.cs` + - `src/FlowTime.TimeMachine/Sweep/ConstNodePatcher.cs` + - `src/FlowTime.TimeMachine/Sweep/SweepRunner.cs` + - `src/FlowTime.TimeMachine/Sweep/RustModelEvaluator.cs` + - `src/FlowTime.API/Endpoints/SweepEndpoints.cs` + - DI registration in `Program.cs` + - Unit tests: `tests/FlowTime.TimeMachine.Tests/Sweep/` + - API tests: `tests/FlowTime.Api.Tests/SweepEndpointsTests.cs` + + **Out of scope:** + - Sensitivity analysis (numerical gradient) — follow-on + - Multi-parameter sweeps (grid sweeps) — follow-on + - Session-based compile-once optimization — follow-on (each sweep point uses subprocess eval) + - Optimization / fitting — m-E18-10+ + - Sweep result persistence / artifact writing — follow-on + + ## Design Notes + + ### Implementation approach + + Each sweep point calls `RustEngineRunner.EvaluateAsync(patchedYaml)` independently (one + subprocess per point). The YAML is patched in-memory before each call via `ConstNodePatcher`, + which uses YamlDotNet's representation model to substitute the const node's values array. + + This deliberately trades compile-once efficiency for implementation simplicity: the Rust + session protocol requires a MessagePack NuGet dependency not yet in the tree, while the + subprocess approach reuses existing infrastructure with no new dependencies. + + The `IModelEvaluator` abstraction isolates this choice from `SweepRunner`, so a future + session-based evaluator can be dropped in without changing the sweep domain model or tests. + + ### ConstNodePatcher behaviour + + - Finds the first `nodes` entry where `id == nodeId` AND `kind == "const"` + - Replaces its `values` sequence with `[value, value, ..., value]` (same bin count) + - Returns the original YAML unchanged if the node is not found or is not a const node + - Uses `InvariantCulture` formatting for decimal precision + + ## Acceptance Criteria + + - [x] `IModelEvaluator` interface exists in `FlowTime.TimeMachine.Sweep` + - [x] `SweepSpec` validates: non-null/whitespace ModelYaml, non-null/whitespace ParamId, non-null/non-empty Values + - [x] `ConstNodePatcher.Patch` correctly replaces const node values; returns original YAML for unknown/non-const nodes + - [x] `SweepRunner.RunAsync` returns one `SweepPoint` per input value, with correct ParamValue and Series + - [x] `SweepRunner` respects `CaptureSeriesIds` filter (null = all series) + - [x] `SweepRunner` respects `CancellationToken` between evaluation points + - [x] `RustModelEvaluator` wraps `RustEngineRunner` and maps series list to dictionary + - [x] `POST /v1/sweep` returns 400 for missing yaml / paramId / empty values + - [x] `POST /v1/sweep` returns 503 when Rust engine not enabled + - [x] Unit tests pass: 28 sweep unit tests (SweepSpec ×9, ConstNodePatcher ×7, SweepRunner ×12) + - [x] API validation tests pass: 7 tests (6×400, 1×503) + - [x] `dotnet test FlowTime.sln` all green (105 TimeMachine, 235 API — pre-existing integration failures unrelated) + - kind: milestone + id: M-007 + frontmatter: + title: Sensitivity Analysis + status: done + parent: E-18 + body: | + ## Goal + + Add numerical sensitivity analysis as a Time Machine operation: given a model YAML, a set + of const-node parameters, and a target metric series, compute ∂metric_mean/∂param for each + parameter using a central-difference approximation. Answers "which parameter has the most + impact on this metric?" + + Builds on: + - m-E18-09 `SweepRunner` + `ConstNodePatcher` — two-point sweep per parameter reuses the + sweep infrastructure directly + - `ConstNodePatcher` — YAML DOM manipulation already in place + + ## Scope + + **`FlowTime.TimeMachine.Sweep` namespace** (extending m-E18-09's namespace): + - `ConstNodeReader` — companion to `ConstNodePatcher`; reads the current scalar value of a + named const node's first bin. Returns `null` if the node is not found or not a const node. + - `SensitivitySpec` — validated input: ModelYaml, ParamIds[], MetricSeriesId, Perturbation (default 5%) + - `SensitivityPoint` — single result: ParamId, BaseValue, Gradient (∂metric_mean/∂param) + - `SensitivityResult` — `SensitivityPoint[]` sorted by `|Gradient|` descending + - `SensitivityRunner` — composes `SweepRunner`; for each param: read base, 2-point sweep, + central difference + + **`POST /v1/sensitivity`** — in `src/FlowTime.API/Endpoints/SensitivityEndpoints.cs` + - Request: `{ yaml, paramIds: [string...], metricSeriesId, perturbation?: double }` + - Response (200): `{ metricSeriesId, points: [{ paramId, baseValue, gradient }] }` + - 400: missing yaml / paramIds (null or empty) / metricSeriesId + - 503: engine not enabled + + **In scope:** + - `src/FlowTime.TimeMachine/Sweep/ConstNodeReader.cs` + - `src/FlowTime.TimeMachine/Sweep/SensitivitySpec.cs` + - `src/FlowTime.TimeMachine/Sweep/SensitivityResult.cs` + - `src/FlowTime.TimeMachine/Sweep/SensitivityRunner.cs` + - `src/FlowTime.API/Endpoints/SensitivityEndpoints.cs` + - DI registration in `Program.cs` + - Unit tests: `tests/FlowTime.TimeMachine.Tests/Sweep/` + - API tests: `tests/FlowTime.Api.Tests/SensitivityEndpointsTests.cs` + + **Out of scope:** + - Multi-metric sensitivity — single metric per call + - Distribution-based sensitivity (Morris method, Sobol indices) — follow-on + - Forward-difference vs central-difference choice — central difference only + - Optimization / fitting — m-E18-11+ + + ## Design Notes + + ### Gradient formula (central difference) + + For each parameter `p` with base value `b` and perturbation fraction `ε`: + + ``` + hi = b × (1 + ε) + lo = b × (1 - ε) + gradient = (mean(metric_series_at_hi) − mean(metric_series_at_lo)) / (hi − lo) + = (mean_hi − mean_lo) / (2 × b × ε) + ``` + + **Zero-base edge case:** when `b == 0`, `hi == lo == 0` and the gradient is indeterminate. + Gradient is set to `0.0` and a note is included in the point. The parameter is still included + in the result so callers can see it was processed. + + **Missing metric series:** if the evaluator returns series that do not include `MetricSeriesId`, + `SensitivityRunner` throws `InvalidOperationException` with a clear message. This is a caller + error (wrong series ID), not a graceful skip. + + **Unknown param:** if `ConstNodeReader.ReadValue` returns `null` for a param ID (node not + found or not a const node), that param is skipped (omitted from result). Callers can detect + skipped params by comparing `spec.ParamIds.Length` vs `result.Points.Length`. + + ### `SensitivityRunner` composes `SweepRunner` + + `SensitivityRunner(SweepRunner sweepRunner)` — takes the full `SweepRunner` including its + injected `IModelEvaluator`. Tests pass a `SweepRunner(fakeEvaluator)` — no additional + test doubles needed. + + ## Acceptance Criteria + + - [x] `ConstNodeReader.ReadValue(yaml, nodeId)` returns the first-bin value for known const + nodes; returns `null` for unknown nodes, non-const nodes, and missing `nodes` section + - [x] `SensitivitySpec` validates: non-null/whitespace ModelYaml, non-null/non-empty ParamIds, + non-null/whitespace MetricSeriesId, Perturbation in (0, 1) exclusive + - [x] `SensitivityRunner.RunAsync` returns one `SensitivityPoint` per found param, sorted by + `|Gradient|` descending + - [x] Gradient computed correctly via central difference + - [x] Zero-base param produces Gradient = 0.0 (no crash) + - [x] Unknown param ID silently skipped (omitted from result) + - [x] Missing metric series throws `InvalidOperationException` + - [x] `SensitivityRunner` respects `CancellationToken` + - [x] `POST /v1/sensitivity` returns 400 for missing yaml / paramIds / metricSeriesId + - [x] `POST /v1/sensitivity` returns 503 when Rust engine not enabled + - [x] Unit tests pass: 32 tests (ConstNodeReader ×8, SensitivitySpec ×12, SensitivityRunner ×12) + - [x] API tests pass: 7 tests (6×400, 1×503) + - [x] `dotnet test FlowTime.sln` all green (137 TimeMachine, 242 API) + - kind: milestone + id: M-008 + frontmatter: + title: Goal Seeking + status: done + parent: E-18 + body: | + ## Goal + + Add 1D goal seeking: given a model YAML, a const-node parameter, a metric series, and a + target value, find the parameter value that drives the metric mean to the target via bisection. + Answers "what arrival rate gives 80% utilization?" without a full parameter sweep. + + Builds on: + - m-E18-09 `SweepRunner` + `ConstNodePatcher` / `ConstNodeReader` (m-E18-10) + - Same `IModelEvaluator` seam + + ## Scope + + **`FlowTime.TimeMachine.Sweep` namespace:** + - `GoalSeekSpec` — validated input: ModelYaml, ParamId, MetricSeriesId, Target, SearchLo, + SearchHi, Tolerance (default 1e-6), MaxIterations (default 50) + - `GoalSeekResult` — output: ParamValue, AchievedMetricMean, Converged, Iterations + - `GoalSeeker` — bisection over `SweepRunner`; handles non-bracketed case gracefully + + **`POST /v1/goal-seek`** — in `src/FlowTime.API/Endpoints/GoalSeekEndpoints.cs` + - Request: `{ yaml, paramId, metricSeriesId, target, searchLo, searchHi, tolerance?, maxIterations? }` + - Response (200): `{ paramValue, achievedMetricMean, converged, iterations }` + - 400: missing/invalid required fields (searchLo ≥ searchHi is invalid) + - 503: engine not enabled + + **In scope:** + - `src/FlowTime.TimeMachine/Sweep/GoalSeekSpec.cs` + - `src/FlowTime.TimeMachine/Sweep/GoalSeekResult.cs` + - `src/FlowTime.TimeMachine/Sweep/GoalSeeker.cs` + - `src/FlowTime.API/Endpoints/GoalSeekEndpoints.cs` + - DI registration in `Program.cs` + - Unit tests: `tests/FlowTime.TimeMachine.Tests/Sweep/` + - API tests: `tests/FlowTime.Api.Tests/GoalSeekEndpointsTests.cs` + - Architecture doc: `docs/architecture/time-machine-analysis-modes.md` (written alongside) + + **Out of scope:** + - Multi-dimensional optimization (Nelder-Mead) — m-E18-12+ + - Constraint handling beyond the `[searchLo, searchHi]` range + - Non-monotonic functions (bisection is undefined; `Converged=false` returned) + + ## Algorithm + + Bisection on the metric mean: + + ``` + 1. Evaluate at searchLo → meanLo = mean(metric at searchLo) + 2. Evaluate at searchHi → meanHi = mean(metric at searchHi) + 3. If target not in [min(meanLo,meanHi), max(meanLo,meanHi)]: + return best endpoint, Converged=false + 4. While iterations < maxIterations: + mid = (lo + hi) / 2 + midMean = mean(metric at mid) + if |midMean - target| < tolerance: return mid, Converged=true + if (midMean - target) same sign as (meanLo - target): lo = mid, meanLo = midMean + else: hi = mid, meanHi = midMean + 5. Return mid, Converged=false (max iterations reached) + ``` + + ## Acceptance Criteria + + - [x] `GoalSeekSpec` validates: non-null/whitespace ModelYaml/ParamId/MetricSeriesId; + SearchLo < SearchHi; Tolerance > 0; MaxIterations ≥ 1 + - [x] `GoalSeeker.SeekAsync` converges on a linear model to within tolerance + - [x] `GoalSeeker` returns `Converged=false` when target is not bracketed + - [x] `GoalSeeker` returns `Converged=false` (best guess) when max iterations exhausted + - [x] `GoalSeeker` respects `CancellationToken` + - [x] `POST /v1/goal-seek` returns 400 for missing/invalid required fields + - [x] `POST /v1/goal-seek` returns 503 when engine not enabled + - [x] Unit tests pass: 26 tests (GoalSeekSpec ×14, GoalSeeker ×12) + - [x] API tests pass: 8 tests (7×400, 1×503) + - [x] `dotnet test FlowTime.sln` all green (163 TimeMachine, 250 API) + - kind: milestone + id: M-009 + frontmatter: + title: Multi-parameter Optimization + status: done + parent: E-18 + body: | + ## Goal + + Add multi-parameter optimization: given a model, a set of const-node parameters with search + ranges, a metric series, and an objective (minimize or maximize), find the parameter values that + drive the metric mean to its optimum using Nelder-Mead simplex — a derivative-free method that + works for any number of parameters without needing gradients. + + Answers "what combination of arrival rate and capacity minimizes queue depth?" without a full + multi-dimensional grid search. + + Builds on: + - `IModelEvaluator` seam (m-E18-09) + - `ConstNodePatcher` for multi-parameter YAML mutation (m-E18-09) + - `ConstNodeReader` (m-E18-10) — used in tests to read patched values + + ## Scope + + **`FlowTime.TimeMachine.Sweep` namespace:** + - `OptimizeObjective` — `Minimize | Maximize` enum + - `SearchRange` — `record(double Lo, double Hi)` with `Lo < Hi` invariant + - `OptimizeSpec` — validated input: ModelYaml, ParamIds, MetricSeriesId, Objective, + SearchRanges (one entry per ParamId), Tolerance (default 1e-4), MaxIterations (default 200) + - `OptimizeResult` — output: ParamValues, AchievedMetricMean, Converged, Iterations + - `Optimizer` — Nelder-Mead simplex over `IModelEvaluator`; patches all parameters + simultaneously per evaluation; respects CancellationToken + + **`POST /v1/optimize`** — in `src/FlowTime.API/Endpoints/OptimizeEndpoints.cs` + - Request: `{ yaml, paramIds, metricSeriesId, objective, searchRanges, tolerance?, maxIterations? }` + where `searchRanges` is `{ "": { "lo": N, "hi": N }, ... }` + and `objective` is `"minimize"` or `"maximize"` (case-insensitive) + - Response (200): `{ paramValues, achievedMetricMean, converged, iterations }` + - 400: missing/invalid required fields, searchRange lo >= hi, unknown objective string + - 503: engine not enabled + + **In scope:** + - `src/FlowTime.TimeMachine/Sweep/OptimizeObjective.cs` + - `src/FlowTime.TimeMachine/Sweep/SearchRange.cs` + - `src/FlowTime.TimeMachine/Sweep/OptimizeSpec.cs` + - `src/FlowTime.TimeMachine/Sweep/OptimizeResult.cs` + - `src/FlowTime.TimeMachine/Sweep/Optimizer.cs` + - `src/FlowTime.API/Endpoints/OptimizeEndpoints.cs` + - DI registration in `Program.cs` + - Unit tests: `tests/FlowTime.TimeMachine.Tests/Sweep/` + - API tests: `tests/FlowTime.Api.Tests/OptimizeEndpointsTests.cs` + - Architecture doc update: `docs/architecture/time-machine-analysis-modes.md` + + **Out of scope:** + - Constraint handling (utilization < 0.8 etc.) — future milestone + - Bayesian optimization — future milestone + - Parallel evaluation of simplex vertices + - Gradient-based methods (sensitivity-driven descent) + + ## Algorithm + + Nelder-Mead simplex (N parameters → N+1 vertices): + + ``` + Coefficients: α=1.0 (reflect), γ=2.0 (expand), ρ=0.5 (contract), σ=0.5 (shrink) + Objective f(v) = metricMean(v) for Minimize + f(v) = -metricMean(v) for Maximize (internally always minimize f) + + 1. Build initial N+1 simplex: + v[0] = midpoint of all search ranges + v[i] = v[0] with param[i-1] shifted +5% of its range (clamped) + + 2. Evaluate f at each vertex. + + 3. Sort vertices so v[0] is best (lowest f) and v[N] is worst. + + 4. Check pre-loop convergence: if |f[N] - f[0]| < tolerance → Converged(0 iterations) + + 5. For iteration = 1 to MaxIterations: + a. Compute centroid c of best N vertices (v[0]..v[N-1]). + b. Reflect: xr = c + α*(c - v[N]); clamp; fr = f(xr) + c. if fr < f[0]: expand: xe = c + γ*(xr-c); clamp; fe = f(xe) + replace v[N] with (fe 0; MaxIterations ≥ 1 + - [x] `Optimizer.OptimizeAsync` converges on a 1D bowl function to within tolerance + - [x] `Optimizer.OptimizeAsync` converges on a 2D bowl function to within tolerance + - [x] `Optimizer.OptimizeAsync` supports Maximize objective (maximizes a linear metric) + - [x] `Optimizer` returns `Converged=false` when MaxIterations exhausted before convergence + - [x] `Optimizer` respects `CancellationToken` + - [x] `POST /v1/optimize` returns 400 for missing/invalid required fields + - [x] `POST /v1/optimize` returns 503 when engine not enabled + - [x] Unit tests pass: 29 tests (OptimizeSpec ×17, Optimizer ×12) + - [x] API tests pass: 10 tests (9×400, 1×503) + - [x] `dotnet test FlowTime.sln` all green (192 TimeMachine, 260 API) + - kind: milestone + id: M-010 + frontmatter: + title: SessionModelEvaluator + status: done + parent: E-18 + body: | + ## Goal + + Replace per-point subprocess compile overhead with a single persistent engine session. + Today, every `IModelEvaluator.EvaluateAsync` call spawns `flowtime-engine eval` as a fresh + subprocess that re-parses YAML and re-compiles the Plan. For a sweep of 200 points this is + 200 compiles; for an optimization run it can be 100–1000 compiles. Each spawn is + ~100–500 ms of pure compile overhead. + + `SessionModelEvaluator` uses the m-E18-02 session protocol (MessagePack over stdin/stdout): + compile once on the first call, then send `eval` with parameter overrides for every + subsequent call. The expected speedup for large batches is ~10–50×. + + Also makes model fitting practical — fitting typically runs 100–1000 evaluations with + the optimizer as the inner loop, which is not viable with per-point subprocess compile. + + ## Scope + + **Namespace:** `FlowTime.TimeMachine.Sweep` + + - `SessionModelEvaluator : IModelEvaluator, IAsyncDisposable` — persistent session bridge: + - Lazy-spawns `flowtime-engine session` subprocess on first `EvaluateAsync` + - First call: sends `compile` request with the (already-patched) YAML; captures the list + of parameter IDs from the response; returns the series from the compile result + - Subsequent calls: uses `ConstNodeReader` to read the current value of each captured + parameter ID from the patched YAML; sends `eval { overrides: { ... } }`; returns series + from the response + - Serializes protocol I/O with `SemaphoreSlim` (one request at a time per instance) + - MessagePack via the `MessagePack` package (already used in integration tests); encodes + requests with `ContractlessStandardResolver` as `Dictionary` + - Wire framing: 4-byte big-endian length prefix + MessagePack payload (matches + `engine/cli/src/protocol.rs`) + - `DisposeAsync`: closes stdin, waits briefly for the subprocess to exit, kills the + process tree if still alive after the timeout + + **DI registration** (`src/FlowTime.API/Program.cs`): + - New config key `RustEngine:UseSession` (default `true`). Selects which `IModelEvaluator` + implementation is registered: + - `true` → `SessionModelEvaluator` (persistent session, compile-once) + - `false` → `RustModelEvaluator` (stateless subprocess per eval — retained as fallback) + - `IModelEvaluator`, `SweepRunner`, `SensitivityRunner`, `GoalSeeker`, `Optimizer` change + from `AddSingleton` → `AddScoped`. Session lifetime must match the analysis run; Scoped + gives one evaluator per HTTP request with automatic disposal. Even when `UseSession=false` + the Scoped lifetime is harmless — runners are stateless wrappers. + - `RustEngineRunner` remains `Singleton` (still used by E-20 bridge/parity tests and by + `RustModelEvaluator`). + + **Why keep `RustModelEvaluator`:** + - Fallback switch if the session protocol surfaces bugs in the wild (30 lines of code; negligible maintenance). + - Diagnostic comparison path — "does the non-session path agree?" is a cheap bug-triage question. + - Process isolation per eval is genuinely different behavior from a stateful session; both have legitimate deployment shapes (see cloud-deployment notes in `ROADMAP.md`). + - Two production impls make `IModelEvaluator` a real seam, not a testing-only interface. + + **Package reference:** + - Add `MessagePack` 3.1.4 to `src/FlowTime.TimeMachine/FlowTime.TimeMachine.csproj` + (same version already used in `FlowTime.Integration.Tests`) + + **In scope:** + - `src/FlowTime.TimeMachine/Sweep/SessionModelEvaluator.cs` + - `src/FlowTime.TimeMachine/FlowTime.TimeMachine.csproj` (MessagePack package) + - `src/FlowTime.API/Program.cs` (config switch + DI scope changes) + - Unit tests: `tests/FlowTime.TimeMachine.Tests/Sweep/SessionModelEvaluatorTests.cs` + (covers override extraction logic and error paths that do not require the subprocess) + - Integration tests: `tests/FlowTime.Integration.Tests/SessionModelEvaluatorIntegrationTests.cs` + (requires the Rust binary; skipped if not present, following the existing + `EngineSessionWebSocketTests` pattern) + - Milestone tracking doc + - Update `docs/architecture/time-machine-analysis-modes.md` to note the new evaluator and config switch + + **Out of scope:** + - Session pooling / reuse across HTTP requests — each request gets its own session + - Auto-reconnect on session crash — if the subprocess dies, the next call surfaces the error + - Chunked evaluation (Mode 6) — separate later milestone + - Model-change detection (session always compiles the YAML it sees first; further calls + assume the same base model, which holds for all current analysis runners) + + ## Design + + ### Lifecycle + + ``` + T=0 SessionModelEvaluator ctor (no I/O yet) + T=1 SweepRunner calls EvaluateAsync(patchedYaml1) + → lazy spawn subprocess + → send compile { yaml: patchedYaml1 } + → receive compile result { params: [...], series: {...} } + → store paramIds from the response + → return series + T=2 SweepRunner calls EvaluateAsync(patchedYaml2) + → use ConstNodeReader to read each paramId value from patchedYaml2 + → send eval { overrides: { ... } } + → receive eval result { series: {...} } + → return series + ... + T=N HTTP request ends → DI scope disposes the evaluator + → DisposeAsync: close stdin, wait 1s, kill if still alive + ``` + + ### Why the `overrides` approach works + + The compile result captures the initial parameter defaults from the YAML (including whatever + values the first patch applied). On every subsequent call the evaluator sends an explicit + override for every tracked parameter, so the session always evaluates with the current + patched values. The compile-time defaults only matter for the very first call's return. + + ### Request/response shapes + + Requests are plain `Dictionary` (contractless MessagePack). Responses + are `Dictionary` navigated by key. Matching the Rust protocol: + + | Method | Request params | Response `result` | + |--------|----------------|-------------------| + | `compile` | `{ yaml: string }` | `{ params: [...], series: { id: [double,...] }, bins, grid, graph, warnings }` | + | `eval` | `{ overrides: { paramId: double } }` | `{ series: { id: [double,...] }, elapsed_us, warnings }` | + + Errors arrive as `{ error: { code, message } }` with no `result` key. The evaluator + raises `InvalidOperationException` with the error code + message. + + ## Acceptance Criteria + + - [x] `SessionModelEvaluator` exists, implements `IModelEvaluator` and `IAsyncDisposable` + - [x] Constructor validates engine path (non-null, non-whitespace) + - [x] First `EvaluateAsync` call spawns the subprocess exactly once; subsequent calls reuse it + - [x] First call sends `compile`; subsequent calls send `eval` with overrides extracted via `ConstNodeReader` + - [x] Returned series dictionary uses case-insensitive keys (matches `RustModelEvaluator`) + - [x] Error responses (`error` key present) raise `InvalidOperationException` with code + message + - [x] `DisposeAsync` closes stdin, waits for exit, kills the process tree on timeout + - [x] `DisposeAsync` is idempotent (safe to call multiple times) + - [x] Calling `EvaluateAsync` after `DisposeAsync` throws `ObjectDisposedException` + - [x] `CancellationToken` is observed during I/O + - [x] Concurrent `EvaluateAsync` calls on one instance are serialized (no interleaved frames) + - [x] DI: `IModelEvaluator`, `SweepRunner`, `SensitivityRunner`, `GoalSeeker`, `Optimizer` all registered as `Scoped` + - [x] DI: `RustEngine:UseSession` config (default `true`) selects `SessionModelEvaluator`; `false` selects `RustModelEvaluator` + - [x] `RustModelEvaluator.cs` retained as fallback; covered by an API test that flips the config switch + - [x] Unit tests pass: 32 tests total + - 6 constructor + disposal (SessionModelEvaluatorTests) + - 3 BuildOverrides (empty / all-found / some-missing) + - 5 ExtractResult (success / error-with-code-msg / error-missing-subfields / neither / malformed-result) + - 4 ExtractParamIds (missing-key / not-array / valid / malformed-items) + - 6 ExtractSeries (missing-key / not-dict / valid / case-insensitive / non-string-key / non-array-value) + - 1 WriteFrameAsync (length-prefixed MessagePack) + - 5 ReadFrameAsync (valid / zero / negative / excessive / truncated) + - 2 ReadExactAsync (full-read / EOF-mid-read) + - [x] Integration tests pass with the Rust binary present: 8 tests (SessionModelEvaluatorIntegrationTests) + - [x] Compile-once / eval-many returns correct series after parameter override + - [x] Parity on numeric values against per-eval path (keys differ by design — documented in `work/gaps.md`) + - [x] `SweepRunner` drives `SessionModelEvaluator` end-to-end over a 5-point sweep + - [x] Session subprocess does not leak after disposal + - [x] Invalid model raises `InvalidOperationException` with engine error code + - [x] Concurrent calls on one instance are serialized + - [x] API DI tests pass: 4 tests (ModelEvaluatorRegistrationTests — default/true/false/scope lifetime) + - [x] `dotnet build FlowTime.sln` green + - [x] `dotnet test FlowTime.sln` all green (1,620 passed / 9 skipped) + - [x] `docs/architecture/time-machine-analysis-modes.md` updated — now documents both evaluator paths, config switch, and scoped lifetime + + ## Coverage notes + + **Covered:** every reachable branch in the production implementation — 44 dedicated tests (32 unit + 8 integration + 4 DI). The unit tests deliberately exercise every parsing helper with hand-crafted protocol payloads that the real Rust engine would not produce (missing fields, malformed types, non-string keys, out-of-range frame lengths), because those are defense-in-depth paths against protocol corruption and must not fail silently. + + **Explicitly not covered (defensive paths, acceptable gaps):** + + | Path | Why untested | + |------|--------------| + | `DisposeAsync` graceful-timeout → `Process.Kill` (line ~380) | Requires simulating a stuck subprocess; no deterministic way in unit tests. Behavior is symmetrically correct with the kill-succeeds case which IS covered by `Dispose_TerminatesSubprocess`. | + | `DisposeAsync` generic exception while waiting for exit (line ~385) | Defense-in-depth catch — unreachable in practice. `WaitForExitAsync` only throws `OperationCanceledException` (covered) or completes normally. | + | `SpawnProcess` `Process.Start` returns null | Only happens on platform-level process creation failure with an executable path that exists. Not reproducible in test. | + | `ExchangeAsync` `stdin`/`stdout` null guard | Defensive — caller always invokes after `SpawnProcess` has assigned both streams. Unreachable in practice. | + | `EvaluateAsync` inner-after-mutex disposed check | Race between `DisposeAsync` and an in-flight `EvaluateAsync`. Hard to trigger deterministically. The outer check + mutex make this extremely narrow. | + | `EvalAsync` error response | The Rust session only errors on `eval` when no model has been compiled (covered by compile-error path instead) or on a programmer bug that isn't otherwise reachable. | + + These six branches remain in the code as defense-in-depth and would be removed only with explicit evidence that they cannot occur under any future refactor. + + ## Dependencies + + - m-E18-02 (engine session protocol) — delivered + - m-E18-08 (ITelemetrySource) — independent + - m-E18-09 (`IModelEvaluator` seam, `ConstNodePatcher`) — delivered + - m-E18-10 (`ConstNodeReader`) — delivered + - `MessagePack` 3.1.4 — already in integration tests, add to TimeMachine + + ## Risks / notes + + - **Scope lifetime change.** Moving the four runners from `Singleton` → `Scoped` is a DI + semantics change. Runners are stateless wrappers over `IModelEvaluator`, so the risk is + low, but verify the minimal API endpoints still resolve them correctly. + - **Test flakiness from subprocess I/O.** Integration tests must guard against slow spawn + on first call; use a 5 s initial-compile timeout and skip cleanly if the binary is absent. + - **Process leak on abnormal termination.** `DisposeAsync` kills the process tree; CI must + not accumulate stray `flowtime-engine` processes between tests. + - **MessagePack dependency surface.** Adding `MessagePack` to `FlowTime.TimeMachine` pulls + it into the runtime surface. Acceptable — it is already transitively available through + `FlowTime.Integration.Tests` and matches the wire format owned by the Rust engine. + - kind: milestone + id: M-011 + frontmatter: + title: .NET Time Machine CLI + status: done + parent: E-18 + body: | + ## Goal + + Expose the Time Machine analysis modes (validate / sweep / sensitivity / goal-seek / + optimize) through the `FlowTime.Cli` binary as pipeable JSON-over-stdio commands. + The CLI becomes the canonical pipeline entry point for Azure Functions custom handlers, + Container Apps jobs, scripted regression suites, shell composition, and AI-assistant + iteration — without requiring the ASP.NET API to be running. + + Spec success criterion from E-18: + ``` + cat model.yaml | flowtime validate + cat sweep-spec.json | flowtime sweep | jq '.points[].metricMean' + ``` + + ## Scope + + Five new commands under `src/FlowTime.Cli/Commands/` mirroring the `/v1/` API surface: + + | CLI command | Spec type | Runner | Matches API endpoint | + |-------------|-----------|--------|----------------------| + | `flowtime validate` | `TimeMachineValidator` (no wrapping spec) | `TimeMachineValidator` | `POST /v1/validate` | + | `flowtime sweep` | `SweepSpec` | `SweepRunner` | `POST /v1/sweep` | + | `flowtime sensitivity` | `SensitivitySpec` | `SensitivityRunner` | `POST /v1/sensitivity` | + | `flowtime goal-seek` | `GoalSeekSpec` | `GoalSeeker` | `POST /v1/goal-seek` | + | `flowtime optimize` | `OptimizeSpec` | `Optimizer` | `POST /v1/optimize` | + + ### JSON I/O contract + + Each command reads a JSON request on stdin (or via `--spec `), runs the analysis, + and writes a JSON response on stdout. The request/response shapes are **identical to the + corresponding `/v1/` endpoint bodies** — byte-for-byte compatible, so `cat spec.json | + flowtime sweep` produces the same payload as `curl -d @spec.json /v1/sweep`. + + `validate` is the exception: its input is raw YAML (on stdin or `--model `), + not JSON. Output is the same JSON response shape as `POST /v1/validate`. + + ### Unified options across commands + + - `--spec ` — read JSON request from a file instead of stdin (analysis commands) + - `--model ` — read model YAML from a file (validate only) + - `--output ` / `-o` — write JSON response to a file instead of stdout + - `--no-session` — use `RustModelEvaluator` (stateless, subprocess-per-eval) instead of + `SessionModelEvaluator` (default). Matches the `RustEngine:UseSession=false` config. + - `--engine ` — override engine binary path (default: `FLOWTIME_RUST_BINARY` env + var, then `/engine/target/release/flowtime-engine`) + - `-h` / `--help` — command-specific help + + ### Exit codes + + - **0** — success + - **1** — analysis produced an explicit failure (e.g., validate returned invalid, + optimize didn't converge and exited cleanly). The JSON response is still written to + stdout and describes the failure — stderr is clean. + - **2** — input error: missing required args, invalid JSON, engine binary not found, + spec failed validation. Error message on stderr; nothing on stdout. + - **3** — engine/runtime error: session subprocess crashed, protocol error, + `InvalidOperationException` from evaluator. Error message on stderr; nothing on stdout. + + ### Engine binary resolution + + Same precedence as the API, extracted to a shared helper: + 1. `--engine ` command-line flag + 2. `FLOWTIME_RUST_BINARY` environment variable + 3. `/engine/target/release/flowtime-engine` (found via `DirectoryProvider.FindSolutionRoot`) + 4. `flowtime-engine` on `$PATH` (fallback) + + Fail with exit 2 and clear stderr message if binary is not found or not executable. + + ### Shared infrastructure + + Extract two helpers to `src/FlowTime.Cli/Commands/`: + + - `CliEngineSetup` — resolves engine binary path, constructs the chosen `IModelEvaluator` + as `IAsyncDisposable` (so callers can `await using`). Also exposes a factory for + `RustEngineRunner` (needed by `RustModelEvaluator` fallback). + - `CliJsonIO` — reads JSON from stdin-or-file, writes JSON to stdout-or-file, common + serialization options matching the API (camelCase, web defaults). + + ### In scope + + - `src/FlowTime.Cli/Commands/ValidateCommand.cs` + - `src/FlowTime.Cli/Commands/SweepCommand.cs` + - `src/FlowTime.Cli/Commands/SensitivityCommand.cs` + - `src/FlowTime.Cli/Commands/GoalSeekCommand.cs` + - `src/FlowTime.Cli/Commands/OptimizeCommand.cs` + - `src/FlowTime.Cli/Commands/CliEngineSetup.cs` (helper) + - `src/FlowTime.Cli/Commands/CliJsonIO.cs` (helper) + - `src/FlowTime.Cli/Program.cs` — command routing + `PrintUsage` updates + - Unit tests per command: `tests/FlowTime.Cli.Tests/Commands/{Validate,Sweep,Sensitivity,GoalSeek,Optimize}CommandTests.cs` + - Integration tests that exercise end-to-end with the Rust binary: + `tests/FlowTime.Integration.Tests/TimeMachineCliIntegrationTests.cs` + - Update `docs/architecture/time-machine-analysis-modes.md` — new "CLI surface" section + - Update `CLAUDE.md` Current Work + + ### Out of scope + + - `fit` command (blocked on Telemetry Loop & Parity epic — not yet started) + - `chunked-eval` command (explicitly deferred by spec) + - `monte-carlo` command (explicitly deferred by spec) + - `System.CommandLine` framework migration — keep the existing minimal-args convention + used by the `run` and `artifacts` commands; introducing a library would be a separate + refactor + - YAML / CSV / table output formats — JSON only + - Interactive REPL mode + - Progress reporting beyond stderr status lines + + ## Design + + ### Command shape (every analysis command) + + ```csharp + public static class SweepCommand + { + public static async Task ExecuteAsync(string[] args) + { + var parsed = ParseArgs(args); // spec path, output, no-session, engine + if (parsed.ShowHelp) { PrintHelp(); return 0; } + + SweepSpec spec; + try { spec = CliJsonIO.Read(parsed.SpecPath); } + catch (JsonException ex) { Console.Error.WriteLine($"Invalid JSON: {ex.Message}"); return 2; } + + await using var evaluator = CliEngineSetup.CreateEvaluator(parsed); + var runner = new SweepRunner(evaluator); + + try + { + var result = await runner.RunAsync(spec); + CliJsonIO.Write(parsed.OutputPath, result); + return 0; + } + catch (InvalidOperationException ex) { Console.Error.WriteLine(ex.Message); return 3; } + } + } + ``` + + Each command is ~30-40 lines — parsing, spec deserialization, runner invocation, output. + + ### Why not `System.CommandLine`? + + The existing `FlowTime.Cli` uses hand-rolled arg parsing (see `Program.cs` — `for` loop + over args). Adding `System.CommandLine` would be a larger refactor that touches the + `run` command too. Keeping consistency is more important than getting the nicer library + for this milestone. A future cleanup milestone can migrate all commands together. + + ### Why JSON-over-stdio and not flag-driven? + + Specs like `OptimizeSpec` carry search ranges (one entry per param with lo/hi), objective + enums, tolerance, max iterations. Representing them as CLI flags is unergonomic: + + ``` + flowtime optimize --param arrivals --range-arrivals 0:100 --param capacity \ + --range-capacity 1:20 --metric util --objective minimize \ + --tolerance 1e-4 --max-iters 200 + ``` + + vs. + + ``` + cat optimize-spec.json | flowtime optimize + ``` + + The JSON path is pipeline-native: compose with `jq`, store specs as fixtures, invoke + from Azure Functions custom handlers, share spec files with the API. + + ## Acceptance Criteria + + - [x] Five CLI commands (`validate`, `sweep`, `sensitivity`, `goal-seek`, `optimize`) wired into `Program.cs` router + - [x] Each command parses `--spec` / stdin, `--output` / stdout, `--no-session`, `--engine`, `--help` + - [x] `validate` reads YAML (not JSON) via `--model` / stdin; outputs `ValidationResult` as JSON + - [x] Each analysis command reads its matching `*Spec` as JSON and writes its matching result as JSON, byte-compatible with the corresponding `/v1/` endpoint + - [x] `CliEngineSetup` helper resolves binary path via `--engine` → `FLOWTIME_RUST_BINARY` → solution-relative default → `$PATH` + - [x] `CliEngineSetup` constructs `SessionModelEvaluator` by default; `--no-session` selects `RustModelEvaluator` + - [x] `CliJsonIO` helper reads JSON from stdin-or-file and writes JSON to stdout-or-file with camelCase / web defaults matching the API; `JsonStringEnumConverter` added so `objective: "minimize"` etc. deserialize correctly + - [x] Exit codes follow the 0/1/2/3 contract (success / analysis-failed / input-error / engine-error) + - [x] Missing engine binary produces exit 2 with a readable stderr message + - [x] Invalid JSON produces exit 2 with a stderr message; no partial stdout + - [x] `--help` on any command prints command-specific usage and exits 0 + - [x] Unit tests pass: 72 new CLI unit tests + - 15 CliJsonIO (read/write, file/stdin, camelCase, null literal, errors) + - 14 CliCommonArgs (all flag variants, missing values, unknown flag, positional, dash-as-positional) + - 8 CliEngineSetup (path precedence, evaluator selection, disposal idempotency) + - 13 ValidateCommand (help, arg errors, tier, valid/invalid YAML, output) + - 18 AnalysisCommandTests (help for each of 4 commands, shared error paths, IsOnPath, BarePath) + - 4 deferred (covered by integration tests instead — see below) + - [x] Integration tests pass with the Rust binary present: 10 tests (TimeMachineCliIntegrationTests) + - [x] `flowtime validate` with valid and invalid YAML + - [x] `flowtime sweep` end-to-end producing correct series (arrivals=10,20,30 → served=5,10,15) + - [x] `flowtime sensitivity` end-to-end (∂served/∂arrivals = 0.5) + - [x] `flowtime goal-seek` end-to-end (target served=25 → arrivals≈50) + - [x] `flowtime optimize` converging on a `MAX(x-7,7-x)` bowl around arrivals=14 + - [x] Session vs. per-eval flag (`--no-session`) both work + - [x] Output to file (`-o`) matches output to stdout + - [x] Engine compile error (unknown function) produces exit 3 + - [x] Every reachable path in the new command classes and helpers is covered (line-by-line audited) + - [x] `docs/architecture/time-machine-analysis-modes.md` — new "CLI surface" section documents the five commands, JSON I/O contract, exit codes, evaluator selection, engine resolution, and pipeline composition example + - [x] `Program.cs` `PrintUsage` updated with the five new commands + - [x] `dotnet build FlowTime.sln` green + - [x] `dotnet test FlowTime.sln` all green — 1,702 passed / 9 skipped + + ## Coverage notes + + **Covered:** every reachable branch in the command classes, helpers, and the `AnalysisCliRunner` shared path. The 89 CLI unit tests explicitly exercise: + + - Help paths for all 5 commands + - All `CliCommonArgs` flag variants (spec/model/output/no-session/engine/help) and their error paths (missing value, unknown flag) + - Positional spec path AND `-` as a positional + - JSON I/O to/from file and stdin/stdout; invalid JSON; null JSON literal; missing file + - Engine path precedence (explicit/env/default); empty explicit falls through + - Evaluator construction for both session and no-session; disposal idempotency for both + - Input-error paths (exit 2): unknown flag, missing spec file, invalid JSON, invalid spec (ArgumentException), missing engine binary + - `IsOnPath` branches (absolute / relative-with-separator / bare name) + - Bare-name engine path bypasses file-existence check and reaches the spawn step + + Integration tests (8) cover success paths and the exit-3 engine-error path. + + **Explicitly not covered:** + + | Path | Why untested | + |------|--------------| + | `CliEngineSetup.ResolveEnginePath` fallback to bare `"flowtime-engine"` when `DirectoryProvider.FindSolutionRoot()` returns null | Would require environment manipulation to move outside any .git-rooted directory tree. The env-var and explicit paths are covered; the default-path branch is covered when run inside the repo. | + + This is a single acceptable gap for platform-edge behavior. + + ## Dependencies + + - m-E18-06 (TimeMachineValidator) — delivered + - m-E18-09 (`IModelEvaluator`, `SweepRunner`, `ConstNodePatcher`) — delivered + - m-E18-10 (`SensitivityRunner`, `ConstNodeReader`) — delivered + - m-E18-11 (`GoalSeeker`) — delivered + - m-E18-12 (`Optimizer`, `OptimizeSpec`) — delivered + - m-E18-13 (`SessionModelEvaluator`, evaluator config switch) — delivered + + ## Risks / notes + + - **Argument parser consistency.** The existing CLI uses manual `for`-loop parsing. New + commands should follow the same convention; don't introduce a parsing library in this + milestone. A future cleanup epic can migrate all commands to `System.CommandLine`. + - **Test isolation.** Integration tests spawn the Rust engine subprocess per test. Same + skip-if-missing pattern as m-E18-13 integration tests. + - **Stdin handling in tests.** Tests should not actually redirect Console.In — use the + `--spec ` flag with a temp file for test inputs. Reserve stdin testing for a + single smoke test that sets `Console.SetIn`. + - **Binary resolution on Windows.** Paths use `Path.Combine`; binary name on Windows is + `flowtime-engine.exe`. The existing `DirectoryProvider` / `EngineSessionBridge` handle + this correctly — reuse their logic. diff --git a/work/migration/manifests/id-map.csv b/work/migration/manifests/id-map.csv new file mode 100644 index 00000000..21d380b9 --- /dev/null +++ b/work/migration/manifests/id-map.csv @@ -0,0 +1,12 @@ +old_id,new_id,kind +m-E18-01,M-001,milestone +m-E18-02,M-002,milestone +m-E18-06,M-003,milestone +m-E18-07,M-004,milestone +m-E18-08,M-005,milestone +m-E18-09,M-006,milestone +m-E18-10,M-007,milestone +m-E18-11,M-008,milestone +m-E18-12,M-009,milestone +m-E18-13,M-010,milestone +m-E18-14,M-011,milestone diff --git a/work/migration/manifests/skip-log.md b/work/migration/manifests/skip-log.md index 224b7c55..5772cf6c 100644 --- a/work/migration/manifests/skip-log.md +++ b/work/migration/manifests/skip-log.md @@ -3,3 +3,4 @@ Findings accumulated by the projector. Triage in Phase 4 dry-run loop. - **E-13**: source spec has no `**Status:**` line; defaulted to `proposed`. Source: `work/epics/E-13-path-analysis/spec.md`. +- **m-E18-01**: `**Depends on:**` references epic `E-20` — aiwf milestone.depends_on requires milestone targets only; dropped from frontmatter (body retains the prose). diff --git a/work/migration/scripts/project_epics.py b/work/migration/scripts/project_epics.py index 77d6b21f..ce7b8340 100644 --- a/work/migration/scripts/project_epics.py +++ b/work/migration/scripts/project_epics.py @@ -2,20 +2,28 @@ # requires-python = ">=3.10" # dependencies = ["ruamel.yaml>=0.18"] # /// -"""Project active-set E-NN epics into a combined aiwf import manifest. +"""Project E-NN epics (and their milestones, where applicable) into a combined +aiwf import manifest. -Pass B scope (no milestones): E-13, E-14, E-15, plus E-22 carried from Pass A. -E-11 and E-18 are multi-milestone — handled in later passes (C/E). +Pass A: E-22 only (epic spike). +Pass B: E-13, E-14, E-15 added (no milestones). +Pass C: E-18 added with 11 milestones (first multi-milestone epic). Reads: work/epics/E-NN-*/spec.md + work/epics/E-NN-*/m-EXX-NN-*.md (milestone files; tracking/log siblings excluded) Emits: work/migration/manifests/epics-active.yaml - work/migration/manifests/skip-log.md (accumulated findings, only when non-empty) + work/migration/manifests/skip-log.md (only if findings) + work/migration/manifests/id-map.csv (only if any milestones projected) Validate downstream: aiwf import --dry-run work/migration/manifests/epics-active.yaml """ +from __future__ import annotations + +import csv import re import sys +from dataclasses import dataclass, field from pathlib import Path from ruamel.yaml import YAML @@ -26,8 +34,10 @@ EPICS_DIR = REPO_ROOT / "work/epics" OUT_PATH = REPO_ROOT / "work/migration/manifests/epics-active.yaml" SKIP_LOG_PATH = REPO_ROOT / "work/migration/manifests/skip-log.md" +ID_MAP_PATH = REPO_ROOT / "work/migration/manifests/id-map.csv" -PASS_B_EPICS = ["E-13", "E-14", "E-15", "E-22"] +# Epics to project, in manifest order. Milestones are auto-discovered per epic. +SCOPE_EPICS = ["E-13", "E-14", "E-15", "E-18", "E-22"] V1_TO_V3_EPIC_STATUS = { "planning": "proposed", @@ -44,10 +54,43 @@ "done": "done", } +V1_TO_V3_MILESTONE_STATUS = { + "draft": "draft", + "pending": "draft", + "proposed": "draft", + "in-progress": "in_progress", + "in_progress": "in_progress", + "active": "in_progress", + "complete": "done", + "completed": "done", + "done": "done", + "cancelled": "cancelled", +} + + +@dataclass +class EpicEntity: + epic_id: str + title: str + status: str + body: str + + +@dataclass +class MilestoneEntity: + old_id: str # m-E18-01 + new_id: str # M-001 + parent_epic: str # E-18 + title: str + status: str + depends_on_old_ids: list[str] = field(default_factory=list) + body: str = "" + + +# ----- shared ----------------------------------------------------------------- def find_epic_dir(epic_id: str) -> Path: - matches = sorted(EPICS_DIR.glob(f"{epic_id}-*")) - matches = [m for m in matches if m.is_dir()] + matches = sorted(p for p in EPICS_DIR.glob(f"{epic_id}-*") if p.is_dir()) if not matches: raise FileNotFoundError(f"no dir matching {epic_id}-* under {EPICS_DIR}") if len(matches) > 1: @@ -55,7 +98,33 @@ def find_epic_dir(epic_id: str) -> Path: return matches[0] -def parse_epic_spec(epic_id: str, spec_path: Path, findings: list[str]) -> dict: +def strip_frontmatter_prose(text: str, prose_keys: tuple[str, ...]) -> str: + """Drop the H1 line and any **:** prose lines (and their trailing blank).""" + pattern = re.compile(rf"^\*\*({'|'.join(prose_keys)}):\*\*", re.IGNORECASE) + out: list[str] = [] + skip_blank = False + for line in text.splitlines(): + if line.startswith("# "): + skip_blank = True + continue + if pattern.match(line): + skip_blank = True + continue + if skip_blank and line.strip() == "": + skip_blank = False + continue + skip_blank = False + out.append(line) + while out and out[0].strip() == "": + out.pop(0) + while out and out[-1].strip() == "": + out.pop() + return "\n".join(out) + "\n" + + +# ----- epics ------------------------------------------------------------------ + +def parse_epic_spec(epic_id: str, spec_path: Path, findings: list[str]) -> EpicEntity: text = spec_path.read_text(encoding="utf-8") lines = text.splitlines() if not lines or not lines[0].startswith("# "): @@ -68,14 +137,10 @@ def parse_epic_spec(epic_id: str, spec_path: Path, findings: list[str]) -> dict: if not id_match: raise ValueError(f"{epic_id}: spec missing **ID:** line") if id_match.group(1) != epic_id: - raise ValueError( - f"{epic_id}: **ID:** says {id_match.group(1)!r}; expected {epic_id!r}" - ) + raise ValueError(f"{epic_id}: **ID:** says {id_match.group(1)!r}; expected {epic_id!r}") status_match = re.search( - r"^\*\*Status:\*\*\s*(\S+)(?:\s*\((.+?)\))?", - text, - re.MULTILINE, + r"^\*\*Status:\*\*\s*(\S+)(?:\s*\((.+?)\))?", text, re.MULTILINE ) qualifier: str | None = None if status_match: @@ -93,71 +158,231 @@ def parse_epic_spec(epic_id: str, spec_path: Path, findings: list[str]) -> dict: f"defaulted to `proposed`. Source: `{spec_path.relative_to(REPO_ROOT)}`." ) - body = strip_frontmatter_prose(text) + body = strip_frontmatter_prose(text, ("ID", "Status")) if qualifier: body = f"> **Status note:** {qualifier}\n\n{body}" + return EpicEntity(epic_id=epic_id, title=title, status=v3_status, body=body) - return {"id": epic_id, "title": title, "status": v3_status, "body": body} +# ----- milestones ------------------------------------------------------------- -def strip_frontmatter_prose(text: str) -> str: - out: list[str] = [] - skip_blank = False - for line in text.splitlines(): - if line.startswith("# "): - skip_blank = True - continue - if re.match(r"^\*\*(ID|Status):\*\*", line): - skip_blank = True - continue - if skip_blank and line.strip() == "": - skip_blank = False +MILESTONE_SPEC_RE = re.compile(r"^m-E\d+-\d+-[a-z0-9-]+\.md$") + + +def discover_milestone_specs(epic_dir: Path) -> list[Path]: + """Return milestone spec files (excluding tracking/log siblings).""" + return sorted( + p + for p in epic_dir.iterdir() + if p.is_file() + and MILESTONE_SPEC_RE.match(p.name) + and "-tracking" not in p.name + and "-log" not in p.name + ) + + +MILESTONE_OLD_ID_RE = re.compile(r"m-E\d+-\d+") + + +def parse_milestone_spec( + spec_path: Path, + expected_parent: str, + findings: list[str], +) -> tuple[str, str, str, list[str], str]: + """Return (old_id, title, v3_status, depends_on_old_ids, body).""" + text = spec_path.read_text(encoding="utf-8") + lines = text.splitlines() + if not lines or not lines[0].startswith("# "): + raise ValueError(f"{spec_path.name}: spec missing H1 title") + h1 = lines[0][2:].strip() + + # Variant A: prose **ID:** line. Variant B: id embedded in H1 (`m-EXX-NN — Title`). + old_id: str | None = None + title: str | None = None + id_match = re.search(r"^\*\*ID:\*\*\s*(\S+)\s*$", text, re.MULTILINE) + if id_match: + old_id = id_match.group(1) + title = re.sub(r"^Milestone:\s*", "", h1) + else: + m = re.match(r"^(m-E\d+-\d+)\s*[—-]\s*(.+)$", h1) + if not m: + raise ValueError( + f"{spec_path.name}: cannot derive id — no **ID:** line and H1 doesn't match `m-EXX-NN — Title`" + ) + old_id = m.group(1) + title = m.group(2).strip() + + # Epic field (informational only; we trust dir-derived parent) + epic_match = re.search(r"^\*\*Epic:\*\*\s*(\S+)", text, re.MULTILINE) + if epic_match and epic_match.group(1) != expected_parent: + findings.append( + f"- **{old_id}**: `**Epic:**` says `{epic_match.group(1)}` but file lives " + f"under `{expected_parent}`'s dir; trusting dir-derived parent." + ) + + # Status — first whitespace token, lowercase, mapped + status_match = re.search(r"^\*\*Status:\*\*\s*(\S+)", text, re.MULTILINE) + if status_match: + v1 = status_match.group(1).lower() + if v1 not in V1_TO_V3_MILESTONE_STATUS: + raise ValueError( + f"{old_id}: unmapped milestone status {v1!r} — add to V1_TO_V3_MILESTONE_STATUS" + ) + v3_status = V1_TO_V3_MILESTONE_STATUS[v1] + else: + v3_status = "draft" + findings.append( + f"- **{old_id}**: no `**Status:**` line; defaulted to `draft`." + ) + + # Depends on — only milestone targets are projectable; epic targets dropped + depends_on_old: list[str] = [] + deps_match = re.search(r"^\*\*Depends on:\*\*\s*(.+)$", text, re.MULTILINE) + if deps_match: + deps_raw = deps_match.group(1) + for tok in MILESTONE_OLD_ID_RE.findall(deps_raw): + depends_on_old.append(tok) + # Detect epic targets in the same field (E-NN tokens that aren't m-EXX-NN) + for ep in re.findall(r"\bE-\d+\b", deps_raw): + if not re.search(rf"m-{ep[:0]}{ep}", deps_raw): # always true; epic match + findings.append( + f"- **{old_id}**: `**Depends on:**` references epic `{ep}` — " + f"aiwf milestone.depends_on requires milestone targets only; dropped from frontmatter (body retains the prose)." + ) + break # one finding per milestone is enough + + body = strip_frontmatter_prose( + text, ("ID", "Epic", "Status", "Branch", "Depends on") + ) + return old_id, title, v3_status, depends_on_old, body + + +def allocate_milestone_ids( + discovered: list[tuple[str, Path]], # [(epic_id, milestone_path)] +) -> dict[str, str]: + """Compute deterministic old_id → new_id mapping. + + Order: epic-id ascending (per SCOPE_EPICS order), then milestone old-id ascending. + """ + sorted_pairs: list[tuple[str, Path]] = [] + for epic_id in SCOPE_EPICS: + for ep, path in discovered: + if ep == epic_id: + sorted_pairs.append((ep, path)) + # Within each epic, sort by old-id (which is embedded in filename or extractable). + # Filename pattern: m-EXX-NN-slug.md — sorts naturally by NN since EXX is constant per epic. + sorted_pairs.sort(key=lambda p: (SCOPE_EPICS.index(p[0]), p[1].name)) + + id_map: dict[str, str] = {} + counter = 1 + for ep, path in sorted_pairs: + # extract old_id from filename + m = re.match(r"^(m-E\d+-\d+)-", path.name) + if not m: continue - skip_blank = False - out.append(line) - while out and out[0].strip() == "": - out.pop(0) - while out and out[-1].strip() == "": - out.pop() - return "\n".join(out) + "\n" + old_id = m.group(1) + id_map[old_id] = f"M-{counter:03d}" + counter += 1 + return id_map -def build_manifest(epics: list[dict]) -> dict: +# ----- manifest --------------------------------------------------------------- + +def build_manifest( + epics: list[EpicEntity], milestones: list[MilestoneEntity] +) -> dict: + epic_entries = [ + { + "kind": "epic", + "id": e.epic_id, + "frontmatter": {"title": e.title, "status": e.status}, + "body": LiteralScalarString(e.body), + } + for e in epics + ] + milestone_entries = [] + for m in milestones: + fm: dict[str, object] = { + "title": m.title, + "status": m.status, + "parent": m.parent_epic, + } + if m.depends_on_old_ids: + fm["depends_on"] = list(m.depends_on_old_ids) # placeholder; replaced after id-map + milestone_entries.append( + { + "kind": "milestone", + "id": m.new_id, + "frontmatter": fm, + "body": LiteralScalarString(m.body), + } + ) return { "version": 1, "commit": { "mode": "single", - "message": f"import(spike): Pass B — {len(epics)} active-set epics", + "message": f"import(spike): {len(epics)} epics + {len(milestones)} milestones", }, - "entities": [ - { - "kind": "epic", - "id": e["id"], - "frontmatter": { - "title": e["title"], - "status": e["status"], - }, - "body": LiteralScalarString(e["body"]), - } - for e in epics - ], + "entities": epic_entries + milestone_entries, } def main() -> int: findings: list[str] = [] - epics: list[dict] = [] - for epic_id in PASS_B_EPICS: - spec_dir = find_epic_dir(epic_id) - spec_path = spec_dir / "spec.md" + epics: list[EpicEntity] = [] + + # Pass 1: epics + for epic_id in SCOPE_EPICS: + epic_dir = find_epic_dir(epic_id) + spec_path = epic_dir / "spec.md" if not spec_path.exists(): findings.append( - f"- **{epic_id}**: no `spec.md` in `{spec_dir.relative_to(REPO_ROOT)}`; skipped." + f"- **{epic_id}**: no `spec.md` in `{epic_dir.relative_to(REPO_ROOT)}`; epic skipped." ) continue epics.append(parse_epic_spec(epic_id, spec_path, findings)) - manifest = build_manifest(epics) + # Pass 2: discover all milestone files across in-scope epics + discovered: list[tuple[str, Path]] = [] # [(epic_id, milestone_path)] + for epic_id in SCOPE_EPICS: + try: + epic_dir = find_epic_dir(epic_id) + except FileNotFoundError: + continue + for ms_path in discover_milestone_specs(epic_dir): + discovered.append((epic_id, ms_path)) + + id_map = allocate_milestone_ids(discovered) + + # Pass 3: parse milestones; resolve depends_on via id_map + milestones: list[MilestoneEntity] = [] + for epic_id, ms_path in discovered: + old_id, title, v3_status, depends_on_old, body = parse_milestone_spec( + ms_path, epic_id, findings + ) + new_id = id_map[old_id] + # Resolve depends_on: keep only targets in id_map (in-scope milestones) + deps_resolved = [] + for old_dep in depends_on_old: + if old_dep in id_map: + deps_resolved.append(id_map[old_dep]) + else: + findings.append( + f"- **{old_id}**: `depends_on: {old_dep}` is out of migration scope; dropped from frontmatter." + ) + milestones.append( + MilestoneEntity( + old_id=old_id, + new_id=new_id, + parent_epic=epic_id, + title=title, + status=v3_status, + depends_on_old_ids=deps_resolved, # already mapped to new ids + body=body, + ) + ) + + manifest = build_manifest(epics, milestones) OUT_PATH.parent.mkdir(parents=True, exist_ok=True) yaml = YAML() yaml.indent(mapping=2, sequence=4, offset=2) @@ -165,6 +390,17 @@ def main() -> int: with OUT_PATH.open("w", encoding="utf-8") as f: yaml.dump(manifest, f) + # id-map.csv + if id_map: + with ID_MAP_PATH.open("w", encoding="utf-8", newline="") as f: + w = csv.writer(f) + w.writerow(["old_id", "new_id", "kind"]) + for old, new in sorted(id_map.items()): + w.writerow([old, new, "milestone"]) + elif ID_MAP_PATH.exists(): + ID_MAP_PATH.unlink() + + # skip-log if findings: header = ( "# Migration skip-log\n\n" @@ -174,11 +410,19 @@ def main() -> int: elif SKIP_LOG_PATH.exists(): SKIP_LOG_PATH.unlink() - print(f"wrote {OUT_PATH.relative_to(REPO_ROOT)} ({len(epics)} epics)") + print(f"wrote {OUT_PATH.relative_to(REPO_ROOT)}") + print(f" epics: {len(epics)}") for e in epics: - print(f" {e['id']:6} {e['status']:10} — {e['title']}") + ms_count = sum(1 for m in milestones if m.parent_epic == e.epic_id) + print(f" {e.epic_id:6} {e.status:10} — {e.title} [{ms_count} milestones]") + print(f" milestones: {len(milestones)}") + for m in milestones: + deps = f" ← {','.join(m.depends_on_old_ids)}" if m.depends_on_old_ids else "" + print(f" {m.new_id} ({m.old_id}) {m.status:12} parent={m.parent_epic} — {m.title}{deps}") + if id_map: + print(f"\nid-map: {ID_MAP_PATH.relative_to(REPO_ROOT)} ({len(id_map)} entries)") if findings: - print(f"\n{len(findings)} finding(s) in {SKIP_LOG_PATH.relative_to(REPO_ROOT)}") + print(f"findings: {len(findings)} in {SKIP_LOG_PATH.relative_to(REPO_ROOT)}") return 0 From e4da0fc2985d5ad6ad759bd95703532a8bcb4052 Mon Sep 17 00:00:00 2001 From: Peter Bruinsma Date: Fri, 1 May 2026 21:01:19 +0000 Subject: [PATCH 06/50] =?UTF-8?q?chore(migration):=20Pass=20D=20=E2=80=94?= =?UTF-8?q?=20extend=20projector=20to=20completed-id'd=20generic=20epics?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - find_epic_dir walks both work/epics/ and work/epics/completed/ - is_completed_dir helper detects completed/ parents - dir-location override extended to milestones: completed/ epic dir → all child milestones forced to status `done` regardless of source - two new shape variants handled: - YAML frontmatter (`--- ... ---`) for E-23, E-24 epics + milestones - full-slug `**ID:**` values (m-E16-NN-rest-of-slug) normalized via MILESTONE_OLD_ID_RE - H1 separator broadened from `[—-]` to `[:—-]` (E-23/E-24 use `:`) - milestone status falls back to YAML frontmatter `status:` when prose `**Status:**` line absent - yaml_id matched against expected epic prefix (E-23-slug, E-24-slug) - aiwf import --dry-run epics-active.yaml: 12 epics + 53 milestones = 65 writes, 0 errors, exit 0 - id-map.csv now 53 entries (M-001..M-053) - skip-log: 12 findings (10 noise from `**Epic:**` field carrying title instead of id; 2 real: E-13 default-status, m-E18-01 epic-dep) Co-Authored-By: Claude Opus 4.7 (1M context) --- work/migration/aiwf-v3-plan.md | 3 +- work/migration/manifests/epics-active.yaml | 8799 +++++++++++++++++--- work/migration/manifests/id-map.csv | 42 + work/migration/manifests/skip-log.md | 10 + work/migration/scripts/project_epics.py | 143 +- 5 files changed, 7960 insertions(+), 1037 deletions(-) diff --git a/work/migration/aiwf-v3-plan.md b/work/migration/aiwf-v3-plan.md index 92039efd..babc5cba 100644 --- a/work/migration/aiwf-v3-plan.md +++ b/work/migration/aiwf-v3-plan.md @@ -287,7 +287,7 @@ Coverage gaps and mitigations: - [x] Pass A: spike on E-22 — `work/migration/scripts/project_e22.py` (uv script-mode, ruamel.yaml). Generates `work/migration/manifests/e22-spike.yaml`. **Dry-run green:** `aiwf import --dry-run` zero findings, exit 0. 12,902-byte `epic.md` would land at `work/epics/E-22-time-machine-model-fit-chunked-evaluation/epic.md` - [x] Pass B: extend to E-13/E-14/E-15 (+ E-22 carried from A) — `project_epics.py` replaces `project_e22.py`. **Dry-run green:** 4 epics, 0 findings, exit 0. skip-log.md emitted (1 entry: E-13 default-status). Status mapping table proven on missing/superseded/parenthesized-prose/clean inputs. - [x] Pass C: extend to E-18 + 11 milestones. Q2 revised: explicit `M-NNN` (computed by projector) instead of `auto` because intra-manifest `depends_on` can't reference auto entries. **Dry-run green:** 5 epics + 11 milestones, 0 errors, exit 0. id-map.csv emitted (11 entries). M-002 depends_on M-001 resolves correctly within manifest. m-E18-01 epic-target dep (E-20) dropped + skip-logged. -- [ ] Pass D: extend to completed-id'd epics +- [x] Pass D: extend to completed-id'd generic-shape epics (E-16, E-17, E-19, E-20, E-21, E-23, E-24). Dir-location override (`completed/` → `done`) extended to milestones too — milestones inside completed/ epic dirs forced to `done` regardless of source status. Two new shape variants handled: YAML frontmatter (E-23, E-24 epics + their milestones) and full-slug `**ID:**` lines (m-E16-NN). H1 separator broadened to accept `:` (`# m-E23-01: Title`). **Dry-run green:** 12 epics + 53 milestones = 65 writes, 0 errors, exit 0. id-map.csv now 53 entries. 12 findings (10 are noise: `**Epic:**` field carries title instead of id; 2 are real: E-13 default-status, m-E18-01 epic-dep). - [ ] Pass E: outlier per-epic rules (E-10, E-11, E-12) - [ ] Pass F: decisions + gaps - [ ] Pass G: body-rewrite cross-pass with id-map.csv @@ -365,3 +365,4 @@ Append-only record of dry-run iterations, decisions taken mid-flight, and findin - 2026-05-01 — phase 2 — Pass B finding 3 settled: parenthesized qualifier on `**Status:**` line is preserved by prepending a `> **Status note:** ` blockquote to the top of the body. Information not lost; body retains the "capture is shipped; ingestion pipeline is not" nuance from E-15 inline. - 2026-05-01 — phase 2 — Pass B landed. `project_epics.py` (replaces `project_e22.py`); `epics-active.yaml` (4 epics: E-13/E-14/E-15/E-22); `skip-log.md` (1 finding: E-13 default-status). Status-mapping table exercised on all four input shapes (missing / superseded / parenthesized-prose / clean). `aiwf import --dry-run` zero error findings, exit 0. Plans 4 writes at title-derived slugs as expected. - 2026-05-01 — phase 2 — Pass C landed. Projector extended to milestones. Q2 **revised** (recorded in plan): explicit `M-NNN` ids computed by projector instead of `auto`, because aiwf manifest reference fields like `depends_on` can't resolve against `auto` entries (no id declared at manifest time). E-18 + 11 milestones (M-001..M-011) projected; M-002 depends_on M-001 resolves cleanly. id-map.csv emitted. m-E18-01's `**Depends on:** E-20` (epic-target) correctly dropped from frontmatter (body retains prose) and skip-logged. Two milestone H1 shapes handled (Variant A: prose `**ID:**` line; Variant B: id embedded in H1). Two milestone status mappings: `complete` → `done`. `aiwf import --dry-run` zero errors, exit 0. 16 writes planned. +- 2026-05-01 — phase 2 — Pass D landed. Projector extended to completed-id'd generic-shape epics (E-16, E-17, E-19, E-20, E-21, E-23, E-24). Dir-location override extended to milestones (completed/ parent → milestone forced `done`). Two new shape variants handled: (a) YAML frontmatter `--- ... ---` blocks (E-23, E-24 epics + milestones); (b) full-slug `**ID:**` lines (m-E16-NN normalized via MILESTONE_OLD_ID_RE). H1 separator broadened to `[:—-]`. Final: 12 epics + 53 milestones = 65 writes, 0 errors, exit 0. id-map.csv 53 entries. 12 findings (10 noise from `**Epic:**` field-carries-title; 2 real). diff --git a/work/migration/manifests/epics-active.yaml b/work/migration/manifests/epics-active.yaml index 66f7e018..a392a210 100644 --- a/work/migration/manifests/epics-active.yaml +++ b/work/migration/manifests/epics-active.yaml @@ -1,7 +1,7 @@ version: 1 commit: mode: single - message: 'import(spike): 5 epics + 11 milestones' + message: 'import(spike): 12 epics + 53 milestones' entities: - kind: epic id: E-13 @@ -890,847 +890,1922 @@ entities: - E-15 Telemetry Ingestion: `work/epics/E-15-telemetry-ingestion/` - Option A delivery sequence: `work/decisions.md` → D-2026-04-15-032 - Headless engine architecture: `docs/architecture/headless-engine-architecture.md` - - kind: milestone - id: M-001 + - kind: epic + id: E-16 frontmatter: - title: Parameterized Evaluation + title: Formula-First Core Purification status: done - parent: E-18 body: | ## Goal - The Rust engine can compile a model once and re-evaluate it many times with different parameter values without recompiling. This is the critical primitive that every downstream use case builds on — interactive what-if, parameter sweeps, optimization, sensitivity analysis. The Plan becomes a reusable program; parameters are its inputs. + Purify FlowTime's execution boundary so semantic meaning and analytical truth are compiled into Core once and consumed as facts everywhere else. This epic turns the existing "spreadsheet for flows" mental model into an enforceable architecture: parser/compiler resolve references, the core evaluates pure vector formulas, and adapters and clients stop reconstructing domain meaning from strings. ## Context - The current `compile(model) → Plan` bakes all constants into `Op::Const { out, values }` at compile time. To change an arrival rate from 10 to 15, you must recompile the entire model. Compilation is O(nodes) with topological sorting, expression parsing, and constraint resolution — unnecessary work when only a scalar value changed. + E-10 established the right direction for engine correctness and analytical primitives, and `m-ec-p3a1` correctly identified that duplicated analytical logic in `StateQueryService` was a design problem rather than just a missing helper. We are now wrapping `m-ec-p3a1` as a bridge milestone: it moved the current analytical capability/computation surface into Core, but the review and plan pressure test confirmed that the full purification work is larger and should be owned entirely by E-16. - After this milestone, the Plan carries a `ParamTable` that lists every user-visible constant. `evaluate_with_params(plan, overrides)` writes overrides into the state matrix before the eval loop, then runs the same bin-major evaluation. The Plan is immutable and shareable; only the parameter values change. + The deeper structural issue remains: - ### Where constants come from in the compiler + - runtime analytical identity is still inferred late from raw semantic strings + - `StateQueryService` still carries parser-style context so it can reconstruct meaning + - API contracts still expose hints (`kind`, `nodeLogicalType`) instead of authoritative analytical facts + - UI/client code still re-implements queue/service classification and node category heuristics - The compiler creates `Op::Const` from seven sources: + That means the current architecture is still vulnerable to the same class of drift that p3a1 was meant to fix. If we continue directly into the remaining E-10 Phase 3 work on top of that boundary, we will multiply the cleanup cost and likely accumulate more duct tape. - | Source | Example | Parameter? | - |--------|---------|-----------| - | `kind: const` node values | `values: [10, 20, 30]` | Yes — primary user input | - | Traffic arrival `ratePerBin` | `ratePerBin: 20` | Yes — class arrival rate | - | PMF expected value | `pmf: { values, probabilities }` | Yes — derived from PMF definition | - | WIP limit scalar | `wipLimit: 50` | Yes — topology constraint | - | Queue initial condition | `initialCondition: { queueDepth: 5 }` | Yes — initial state | - | Expression literal | `8` in `MIN(arrivals, 8)` | Yes — inline constant in formula | - | Compiler-generated temps | Internal proportional alloc, router weight columns | No — derived, not user-visible | + This work should therefore be treated as a dedicated epic, not hidden inside another correctness milestone. The point is not a rewrite. The point is to move semantic truth earlier into the compile/evaluate pipeline, make deletion of the current heuristics an explicit deliverable, and do it in a forward-only cut rather than layering on compatibility shims. - The distinction: a parameter is a constant that traces back to a user-authored value in the model YAML. Compiler-generated intermediate constants (temp columns, normalized weights) are NOT parameters. + ## Scope - ## Acceptance Criteria + ### In Scope - 1. **AC-1: ParamTable struct.** `Plan` gains a `params: ParamTable` field. `ParamTable` contains a `Vec` where each entry has: - - `id: String` — stable identifier matching the model YAML source (e.g., `"arrivals"` for a const node, `"arrivals.Order"` for a traffic class rate, `"Queue.wipLimit"` for a topology WIP limit) - - `column: usize` — the column index in the state matrix this parameter fills - - `default: ParamValue` — original value from the model (`Scalar(f64)` for uniform, `Vector(Vec)` for per-bin) - - `kind: ParamKind` — `ConstNode`, `ArrivalRate`, `WipLimit`, `InitialCondition`, `ExprLiteral` + - Typed semantic references in the compiled runtime model + - A compiled analytical descriptor on runtime nodes + - A pure Core analytical evaluator that owns emitted derived metrics and warning facts + - Purification of `/state`, `/state_window`, and `/graph` contracts so clients can consume authoritative analytical and categorical facts + - Removal of analytical identity reconstruction from `StateQueryService` + - Separation of real by-class truth from wildcard fallback projection + - Removal of raw-model-text metadata recovery and duplicate analytical fallback paths once regenerated runtime metadata and evaluator surfaces exist + - Deletion audits and review gates that prevent reintroduction of heuristics - 2. **AC-2: Compiler populates ParamTable.** The compiler registers parameters for: - - Every `kind: const` node (id = node id, value from `values` field) - - Every `traffic.arrivals` entry with `ratePerBin` (id = `"{nodeId}.{classId}"`) - - Every topology node with scalar `wipLimit` (id = `"{topoNodeId}.wipLimit"`) - - Every topology node with `initialCondition.queueDepth` (id = `"{topoNodeId}.init"`) - - Expression literals are NOT parameters (they're inline formula constants, not model inputs) + ### Out of Scope - 3. **AC-3: `evaluate_with_params` function.** New public function: - ```rust - pub fn evaluate_with_params(plan: &Plan, overrides: &[(String, ParamValue)]) -> Vec - ``` - - Applies overrides to matching param IDs before the eval loop - - `Scalar(v)` fills all bins with `v`; `Vector(vs)` writes per-bin values - - Unmatched override IDs are ignored (forward-compatible) - - Unknown param IDs do not cause errors - - Returns the filled state matrix (same shape as `evaluate`) + - New analytical primitives such as WIP limits, variability, or constraint enforcement themselves + - SIM/orchestration extraction and run packaging boundary work + - A rewrite of the DAG evaluator or expression language foundation + - Broad UI redesign unrelated to analytical truth and contract purity + - Changes to deterministic artifact semantics unless explicitly versioned - 4. **AC-4: Equivalence.** `evaluate_with_params(plan, &[])` (no overrides) produces identical results to `evaluate(plan)`. A Rust test asserts bitwise equality. + ## Constraints - 5. **AC-5: Full post-eval pipeline.** `eval_model` is refactored to accept optional overrides. When overrides are provided, it calls `evaluate_with_params` instead of `evaluate`, then runs the same post-eval pipeline: class decomposition normalization, proportional allocation propagation, edge series computation, analysis warnings. A new public entry point: - ```rust - pub fn eval_model_with_params( - model: &ModelDefinition, - overrides: &[(String, ParamValue)] - ) -> Result - ``` + - No rewrite-from-scratch. This must be a strangler refactor around the existing deterministic DAG/compiler foundation. + - Forward-only migration. Existing runs, generated fixtures, and approved snapshots that depend on the old analytical/runtime boundary may be deleted and regenerated; no backward-compatibility shim is required. + - No new runtime behavior may depend on reparsing raw semantic strings after compile. + - Each milestone must delete an old heuristic path; do not preserve old inference paths once the replacement exists. + - Contract changes may remove or replace old hint fields in the same forward-only milestone when the named consumers for that milestone are migrated and tested. - 6. **AC-6: Parameter override affects downstream.** Overriding a const node's value propagates through all downstream expressions, queue recurrences, per-class decomposition, and edge series. Test: override `arrivals` from 10 to 20 → verify `served`, `queue_depth`, per-class series, and edge flow all change correctly. + ## Success Criteria - 7. **AC-7: Class arrival rate override.** Overriding a class arrival rate (e.g., `"arrivals.Order"` from 6 to 12) changes the class fraction and propagates through normalization and downstream decomposition. Test: change one class rate, verify normalization invariant still holds. + - [x] Runtime nodes carry compiled semantic references and an authoritative analytical descriptor. + - [x] `StateQueryService` no longer parses raw semantic references or reconstructs analytical identity for runtime behavior. + - [x] Core owns analytical evaluation, emitted derived keys, and warning eligibility facts for snapshot, window, and by-class outputs. + - [x] API contracts publish authoritative analytical and node-category facts across current state and graph surfaces so first-party consumers stop classifying behavior from `kind + logicalType`. + - [x] Fallback wildcard class data is explicit and distinguishable from real by-class truth. + - [x] Runtime metadata readers and analytical query surfaces no longer recover required facts from raw model text or maintain duplicate model-evaluation fallback paths for analytical behavior. + - [x] Remaining E-10 Phase 3 milestones can build on compiled facts instead of adapter heuristics. + - [x] End-to-end pipeline validation proves Sim → Compiler → Runtime → API → Consumer works correctly: Sim-produced YAML (unchanged authoring surface) compiles through the new typed-reference compiler, evaluates correctly, and projects through purified contracts to consumers. - 8. **AC-8: WIP limit override.** Overriding `"{topoNodeId}.wipLimit"` changes the queue's WIP limit and affects overflow. Test: lower WIP limit → verify overflow increases. + ## End-to-End Validation Strategy - 9. **AC-9: Parameter schema extraction.** New public function: - ```rust - pub fn extract_params(plan: &Plan) -> &ParamTable - ``` - Returns the plan's parameter table. Clients use this to discover what can be tweaked, with IDs, kinds, and defaults. This is what the UI will use to auto-generate controls. + E-16 changes Core/Compiler/API but not Sim's authoring surface. The full pipeline must be validated end-to-end even though Sim code is not modified: - 10. **AC-10: Compile-once, eval-many pattern.** Demonstrate the pattern with a Rust test that compiles once, evaluates 10 times with different arrival rates, and verifies each result is independent (no state leakage between evaluations). Measure that subsequent evals are faster than the first (no recompilation). + - **Sim → Compiler boundary:** Sim-produced YAML with raw string references compiles correctly through the new typed-reference compiler. At minimum, run all existing Sim templates through the new compiler and verify no regressions. + - **Compiler → Runtime boundary:** Compiled typed references produce the same evaluated series as the old raw-string path. Parity tests per milestone. + - **Runtime → API boundary:** Purified state projection (snapshot, window, by-class) produces the same analytical outputs. End-to-end API tests with `WebApplicationFactory`. + - **Runtime → Graph/API boundary:** Graph projection and current-state projection both consume compiled facts instead of re-deriving category or analytical identity from strings. + - **API → Consumer boundary:** First-party Blazor/JS consumers read from the new fact surface and produce the same behavior as the old `kind + logicalType` heuristic path. + - **Model / template boundary:** Typed parallelism and reference cleanup must validate through model DTOs, template substitution, and graph projection surfaces; E-16 is not treated as a Core/API-only refactor in implementation planning. + - **Integration test suite:** `tests/FlowTime.Integration.Tests` should include at least one scenario that exercises the full Sim-template → engine-run → state-query → contract-assertion path to guard against boundary drift. - ## Out of Scope + Each milestone is individually shippable, but the final milestone (m-E16-06) must include a cross-cutting integration pass before declaring the epic complete. - - Session management or persistent process (m-E18-02) - - Streaming protocol or MessagePack framing (m-E18-02) - - CLI interface changes (m-E18-02) - - UI parameter controls (m-E17-02) - - Parameter bounds, display names, or template metadata enrichment (future — the parameter table carries IDs and defaults only) - - Expression literal parameterization (inline `8` in `MIN(arrivals, 8)` stays baked — parameterizing expression constants requires expression-tree rewriting, which is a different problem) - - Structural model changes (adding/removing nodes requires recompilation — by design) + ## Risks & Open Questions - ## Key References + | Risk / Question | Impact | Mitigation | + |----------------|--------|------------| + | Compiler/runtime model refactor touches many files across Core and API | High | Sequence compiler-first slices, keep each milestone shippable, and use deletion lists to constrain scope | + | Contract purification may ripple into UI/client code, graph consumers, and golden tests | High | Keep the first-party consumer scope explicit and migrate all current state + topology consumers in the contract milestone | + | Existing templates and fixtures rely on loose reference shapes | Medium | Regenerate runs, fixtures, and approved snapshots forward-only rather than carrying compatibility fallbacks | + | Should the public contract expose the analytical descriptor directly or a smaller fact surface? | Medium | Decide in the contract milestone and ship one forward-only fact surface for the named current-state consumers | + | How much warning policy belongs in the analytical evaluator vs a separate analyzer package? | Medium | Resolve explicitly during the evaluator milestone and document the ownership boundary | - - `engine/core/src/plan.rs` — Plan struct, Op enum, ColumnMap - - `engine/core/src/eval.rs` — `evaluate()` function, bin-major loop - - `engine/core/src/compiler.rs` — `compile()`, `eval_model()`, all `Op::Const` emission sites - - `docs/architecture/headless-engine-architecture.md` — overall architecture - - `work/epics/E-18-headless-pipeline-and-optimization/milestone-plan-v2.md` — milestone sequence - - kind: milestone - id: M-002 + ## Milestones + + **Sequencing note:** `m-E16-01` through `m-E16-05` are the architecture gate between wrapped `m-ec-p3a1` and the rest of E-10 Phase 3. `m-E16-01` introduces typed references and parallelism typing; `m-E16-03` is the descriptor-driven deletion point for analytical-identity heuristics; `m-E16-06` publishes the final contract/consumer cut across state and graph surfaces. + + | ID | Title | Summary | Depends On | Status | + |----|-------|---------|------------|--------| + | [m-E16-01-compiled-semantic-references](m-E16-01-compiled-semantic-references.md) | Compiled Semantic References | Replace raw runtime semantic strings with typed references and regenerate dependent runs/fixtures forward-only. | none | completed | + | [m-E16-02-class-truth-boundary](m-E16-02-class-truth-boundary.md) | Class Truth Boundary | Separate real by-class truth from wildcard fallback before descriptor and evaluator work depend on it. | m-E16-01 | completed | + | [m-E16-03-runtime-analytical-descriptor](m-E16-03-runtime-analytical-descriptor.md) | Runtime Analytical Descriptor | Compile authoritative analytical identity onto runtime nodes and delete adapter-side logical-type reconstruction. | m-E16-02 | completed | + | [m-E16-04-core-analytical-evaluation](m-E16-04-core-analytical-evaluation.md) | Core Analytical Evaluation | Move analytical values and emitted-series truth into a pure Core evaluator for snapshot, window, and by-class outputs. | m-E16-03 | completed | + | [m-E16-05-analytical-warning-facts-and-primitive-cleanup](m-E16-05-analytical-warning-facts-and-primitive-cleanup.md) | Analytical Warning Facts & Primitive Cleanup | Move warning facts into Core analyzers and finish analytical primitive ownership cleanup. | m-E16-04 | completed | + | [m-E16-06-analytical-contract-and-consumer-purification](m-E16-06-analytical-contract-and-consumer-purification.md) | Analytical Contract & Consumer Purification | Publish authoritative analytical facts and delete named current-state consumer heuristics in one forward-only cut. | m-E16-05 | completed | + + **Forward-only rule:** old run directories, generated fixtures, and approved golden snapshots are not compatibility obligations for this epic. When the runtime boundary changes, regenerate them. + + ## Why This Is a Separate Epic + + E-10 is still the right umbrella for correctness and analytical primitives, but it did not explicitly own the deeper architectural purification that p3a1 exposed. Treating this as a separate epic does three useful things: + + 1. It gives boundary cleanup explicit success criteria rather than burying it in feature work. + 2. It makes deletion of heuristics and duplicate policy a first-class deliverable. + 3. It prevents the remaining Phase 3 work (`p3d` -> `p3c` -> `p3b`) from normalizing an impure adapter/client boundary. + + In short: E-10 found the wound, `m-ec-p3a1` stabilized the current bridge, and E-16 now owns closing it properly. + + ## References + + - [reference/formula-first-engine-refactor-plan.md](reference/formula-first-engine-refactor-plan.md) + - [docs/concepts/nodes-and-expressions.md](../../../docs/concepts/nodes-and-expressions.md) + - [docs/architecture/expression-language-design.md](../../../docs/architecture/expression-language-design.md) + - [work/epics/E-10-engine-correctness-and-analytics/m-ec-p3a1-analytical-projection-hardening.md](../E-10-engine-correctness-and-analytics/m-ec-p3a1-analytical-projection-hardening.md) + - [work/epics/E-10-engine-correctness-and-analytics/spec.md](../E-10-engine-correctness-and-analytics/spec.md) + - kind: epic + id: E-17 frontmatter: - title: Engine Session + Streaming Protocol + title: Interactive What-If Mode status: done - parent: E-18 - depends_on: - - M-001 body: | + **Completed:** 2026-04-12 + **Branch merged:** `epic/E-17-interactive-what-if-mode` → `main` + ## Goal - The Rust engine runs as a persistent process that accepts commands and streams results. `flowtime-engine session` reads length-prefixed MessagePack messages from stdin, holds a compiled Plan in memory, and writes responses to stdout. This is the headless pipeline component — the same protocol works over stdin/stdout (CLI pipes) and WebSocket (UI, via m-E17-01 proxy). + Enable live, interactive recalculation in FlowTime — change a parameter and see results update instantly across the entire model, like a spreadsheet. ## Context - After m-E18-01, the engine can compile once and evaluate many times with different parameters via `evaluate_with_params(plan, overrides)`. But every invocation is still a batch subprocess: spawn → parse YAML → compile → evaluate → write files → exit. The overhead of process spawn + file I/O dominates latency (100-500ms). For interactive use, we need a persistent process that holds the compiled Plan and responds to parameter changes in microseconds. + After E-16, the engine will be a pure compiled evaluation surface: typed references, compiled descriptors, pure evaluators. The evaluation itself is already fast enough for live interaction. What's missing is one shared runtime parameter foundation: parameter identity and override points in compiled graphs, reevaluation APIs, and optional enrichment from authored template parameter metadata. Sessions, push delivery, and UI controls build on that foundation. - The session is a stateful loop: + The circuit simulator analogy: SPICE compiles a netlist once, then allows parameter sweeps and interactive probing without re-reading the schematic. FlowTime should do the same. - ``` - stdin → [compile] → hold Plan → [eval overrides] → stdout - → [eval overrides] → stdout - → [eval overrides] → stdout - → EOF → exit - ``` + ## Scope - ### Why MessagePack + ### In Scope - - **Binary f64 arrays.** A 1,000-bin series is 8KB as binary vs ~8KB+ as JSON text (with formatting overhead and parse cost). MessagePack encodes `Vec` as a binary ext type — zero parsing, memcpy-fast. - - **Length-prefixed framing.** 4-byte big-endian length prefix before each message. No newline ambiguity, no incomplete-line bugs. - - **Cross-language.** Native libraries: Rust (`rmp-serde`), JavaScript (`@msgpack/msgpack`), C# (`MessagePack-CSharp`), Python (`msgpack`). - - **Pipe-friendly.** Works over stdin/stdout for CLI composition, over WebSocket for UI. + - Parameter identification in compiled graphs — which nodes are user-editable constants? + - Runtime parameter model — change parameter values without recompilation + - Server-side session management — keep compiled graph alive across requests + - Re-evaluation API — accept parameter changes, return updated results + - Push channel (WebSocket/SignalR) for live UI updates + - UI parameter controls — sliders, numeric inputs bound to model parameters + - Analytical re-evaluation through the pure Core evaluator after parameter change + - Parameter metadata enrichment from authored template parameters when available (titles, ranges, defaults, descriptions) - ## Acceptance Criteria + ### Out of Scope - 1. **AC-1: `session` CLI command.** `flowtime-engine session` enters a persistent loop reading from stdin and writing to stdout. No file arguments required. Exits cleanly on stdin EOF or SIGTERM. + - Optimization loops and automated parameter search (E-18) + - Headless CLI and pipeline embedding (E-18) + - Model topology changes at runtime (requires recompilation — that's fine) + - New analytical primitives + - Bin-by-bin / chunked evaluation (future: feedback simulation) - 2. **AC-2: Length-prefixed MessagePack framing.** Each message is `[4-byte big-endian length][MessagePack payload]`. Both requests (stdin) and responses (stdout) use this framing. Stderr is reserved for human-readable log messages (not protocol). + ## Constraints - 3. **AC-3: `compile` command.** Request: `{ method: "compile", params: { yaml: "" } }`. Response: `{ result: { params: [{ id, kind, default }], series: [{ id, bins, values }], bins, grid } }`. Compiles the model, holds the Plan in session state, evaluates with defaults, returns the parameter schema and initial series. + - Evaluation must feel instantaneous (< 50ms end-to-end for parameter change → UI update) + - No recompilation for parameter value changes — only for structural model changes + - The push channel must not require polling + - Parameter identity, override points, and deterministic reevaluation must be derivable from the compiled graph. Human-facing labels, ranges, defaults, and descriptions may be enriched from template parameter metadata when available. - 4. **AC-4: `eval` command.** Request: `{ method: "eval", params: { overrides: { "arrivals": 15.0, "Queue.wipLimit": 30.0 } } }`. Response: `{ result: { series: { "arrivals": , "served": , ... }, elapsed_us } }`. Re-evaluates with overrides, returns updated series. Must not recompile. Series values are MessagePack binary arrays (not JSON text arrays). + ## Success Criteria - 5. **AC-5: `get_params` command.** Request: `{ method: "get_params" }`. Response: `{ result: { params: [{ id, kind, default }] } }`. Returns the current parameter table from the compiled Plan. + - [ ] User can change a model parameter via UI control and see all metrics, charts, and heatmaps update live + - [ ] Parameter changes do not trigger recompilation — only re-evaluation + - [ ] Compiled graph stays alive in a server-side session; no re-parse/re-compile per interaction + - [ ] Analytical results (cycle time, flow efficiency, warnings) update through the pure Core evaluator + - [ ] UI parameter controls are generated from model metadata, not hand-coded per template - 6. **AC-6: `get_series` command.** Request: `{ method: "get_series", params: { names: ["arrivals", "served"] } }`. Response: `{ result: { series: { "arrivals": , "served": } } }`. Returns specific series from the current evaluation state. If no names provided, returns all non-internal series. + ## Milestones - 7. **AC-7: Error handling.** Invalid requests return `{ error: { code, message } }`. Specific errors: `not_compiled` (eval before compile), `compile_error` (bad YAML), `unknown_method`. The session continues after errors — it does not exit. + | ID | Title | Status | Summary | + |----|-------|--------|---------| + | m-E17-01 | WebSocket Engine Bridge | complete | .NET WebSocket proxy over persistent Rust `flowtime-engine session` subprocess; MessagePack compile/eval/get_series round-trip | + | m-E17-02 | Svelte Parameter Panel | complete | SvelteKit `/what-if` page with live-bound sliders, example model picker, series mini-charts, latency badge | + | m-E17-03 | Live Topology + Charts | complete | Dag-map topology graph with heatmap, per-series charts with hover tooltips, layout stability across tweaks | + | m-E17-04 | Warnings Surface | complete | Engine warnings flow through session protocol into banner, details panel, and topology node badges; capacity-constrained example model drives the demo loop | + | m-E17-05 | Edge Heatmap | complete | Color topology edges by their throughput series mean; wires up the already-present `edgeMetrics` prop in dag-map-view | + | m-E17-06 | Time Scrubber | complete | Bin-position slider switches heatmap (nodes + edges) from mean to per-bin value; vertical crosshair on all charts | - 8. **AC-8: Session state.** The session holds: compiled Plan, current parameter overrides, current state matrix (from most recent eval). `compile` replaces the entire session state. `eval` updates overrides and state. Multiple `eval` calls are independent (no accumulation). + **Final polish (post-m-E17-06):** Advanced demo models (SaaS API platform, e-commerce order pipeline with chained throughput); edge color semantic fixed to destination node load; zero-anchored heatmap normalization; sidebar model picker; warnings as non-shifting overlay with bezier connectors and pulsing animation. 200 vitest. - 9. **AC-9: Performance.** For a model with 8 bins and ~10 series, `eval` with scalar overrides completes in under 1ms (excluding I/O). A Rust benchmark test evaluates 1,000 times in a loop and asserts total < 1 second. + ## Dependencies - 10. **AC-10: Integration test.** A Rust integration test spawns `flowtime-engine session` as a subprocess, sends compile + eval + eval (with different overrides) + get_params via the MessagePack protocol over stdin/stdout, and verifies all responses are correct. + - E-16 (Formula-First Core Purification) — must complete first + - Shared runtime parameter foundation — owned once and shared with E-18; E-17 consumes it rather than defining a second runtime parameter model - ## Technical Notes + ## References - ### Dependencies to add + - [work/epics/E-16-formula-first-core-purification/reference/formula-first-engine-refactor-plan.md](../E-16-formula-first-core-purification/reference/formula-first-engine-refactor-plan.md) + - SPICE interactive analysis modes as architectural precedent + - kind: epic + id: E-19 + frontmatter: + title: Surface Alignment & Compatibility Cleanup + status: done + body: | + ## Goal - - `rmp-serde` (MessagePack serialization for Rust) — workspace dependency - - `serde` derive on request/response types + Tighten the remaining non-analytical legacy and compatibility surfaces after E-16 so FlowTime exposes current Engine/Sim contracts consistently across first-party UI, Sim, docs, schemas, and examples without carrying stale fallback layers or stripping supported Blazor capability. - ### Module structure + ## Context - - `engine/core/src/session.rs` — Session struct, state management, command dispatch - - `engine/core/src/protocol.rs` — Request/Response types, MessagePack framing (read/write) - - `engine/cli/src/main.rs` — `cmd_session()` entry point + E-16 purifies analytical truth and contract facts, but broader repo surfaces still carry compatibility debt that sits outside the formula-first boundary: - ### Message envelope + - first-party Blazor/UI clients still contain endpoint and metrics fallback paths + - parallel Svelte and Blazor surfaces need an explicit shared-contract discipline so one UI does not silently drift from the other + - active UI/template code still carries demo generation and schema-migration residue such as `binMinutes`-based templates + - Sim and UI surfaces still expose transitional endpoint or discovery helpers whose purpose is no longer clearly distinguished from true supported behavior + - Sim still publicly owns template-driven run creation while Engine `/runs` now imports canonical bundles and reads canonical run artifacts + - storage-backed drafts and archived run bundles are active first-party surfaces, but their supported status versus transitional status is not explicit + - catalog-era runtime seeding, endpoints, and UI clients still exist even where active callers say catalogs are no longer used + - template vs draft vs model vs run vs bundle terminology is ambiguous enough that the current Sim orchestration path can be mistaken for the future Time Machine contract + - docs, schemas, and examples still keep some deprecated material on current surfaces instead of moving it to archive/historical space - ```rust - #[derive(Serialize, Deserialize)] - struct Request { - method: String, - #[serde(default)] - params: serde_json::Value, // flexible params per method - } + These are not analytical truth seams, so they should not be folded back into E-16. But they also should not remain unowned. If left alone, temporary compatibility layers become a de facto product support promise and the parallel UI surfaces will drift. - #[derive(Serialize)] - struct Response { - #[serde(skip_serializing_if = "Option::is_none")] - result: Option, - #[serde(skip_serializing_if = "Option::is_none")] - error: Option, - } - ``` + This epic creates an explicit post-purification cleanup lane: once E-16 finishes, FlowTime should tighten the rest of its first-party surfaces around one current contract and a small set of explicitly supported user paths. That includes keeping Blazor current as a supported debugging/plan-B surface, not retiring it. - Note: We use `serde_json::Value` as the flexible inner type even though the wire format is MessagePack. MessagePack and JSON share the same data model (maps, arrays, strings, numbers, bools, null). `rmp-serde` serializes/deserializes `serde_json::Value` correctly. + ## Scope - ### Series encoding + ### In Scope - Series data (`Vec`) serializes naturally as MessagePack arrays of floats. For very large series, a future optimization could use MessagePack binary ext type for raw f64 bytes, but the standard array encoding is correct and sufficient for this milestone. + - Inventory of first-party compatibility and legacy seams outside E-16's analytical boundary + - Explicit supported-surface policy across Engine, Sim, Svelte UI, Blazor UI, docs, schemas, and examples + - Explicit boundary between current Sim authoring/orchestration surfaces and the future E-18 Time Machine foundation + - Retirement of first-party UI and Sim runtime fallback paths that are no longer required once canonical endpoints and contracts exist + - Retention/deletion decisions for storage-backed drafts, archived run bundles, bundle import paths, runtime catalogs, and catalog-era caller residue + - Removal or archival of deprecated schema, demo, template, and example material from active current surfaces + - Blazor UI alignment work so `FlowTime.UI` stays current with evolving Engine/Sim contracts without carrying stale compatibility wrappers + - Tests, grep audits, and documentation updates that prevent reintroduction of deleted compatibility layers - ### Post-eval pipeline + ### Out of Scope - After `evaluate_with_params`, the session must also run: - - Class decomposition normalization + proportional allocation - - Edge series computation - - Analysis warnings + - Analytical truth, emitted-series logic, by-class purity, warning fact ownership, or consumer fact publication already owned by E-16 + - New analytical primitives from E-10 Phase 3 (`p3d`, `p3c`, `p3b`) + - Delivering missing Svelte UI product features themselves; E-11 still owns additive Svelte buildout + - Blazor UI retirement or functionality removal as a cleanup goal + - The new Time Machine runtime parameter foundation, reevaluation API, evaluation SDK, CLI/sidecar, or optimization/fitting modes owned by E-18 + - Chunked/stateful execution semantics and streaming/stateful seams owned by later E-18 layers or dedicated streaming work + - Additive backward-compatibility phases for deprecated surfaces + - Generic low-level library fallbacks with no product-contract meaning (for example layout placement fallbacks) unless they are later promoted into a first-party compatibility promise - This means the session calls the same post-eval pipeline as `eval_model_with_params`. The simplest approach: the session stores the compiled Plan and the ModelDefinition, and each `eval` call runs `eval_model_with_params` reusing the model but with the new overrides. + ## Constraints - For the compile-once optimization (skip recompilation), a future milestone can cache the Plan separately. For now, recompiling per eval is acceptable if latency is under the AC-9 target. + - Starts after E-16 establishes purified current contracts; runtime/API cleanup milestones should consume those contracts rather than redefine them + - Does not block E-10 Phase 3 resume by default; this runs as a parallel post-purification cleanup lane unless a specific milestone intentionally touches a shared contract gate + - Svelte and Blazor run in parallel; shared client and contract changes must keep both UIs aligned rather than privileging only one surface + - Forward-only: archive or delete deprecated surfaces instead of carrying new compatibility shims + - Remove stale wrappers and fallback logic without deleting supported Blazor debugging/operator workflows unless a separate decision explicitly approves it + - Each milestone must make the supported-vs-historical boundary narrower, not broader - ## Out of Scope + ## Boundary To E-18 - - WebSocket transport (m-E17-01) - - .NET bridge for session mode (m-E17-01) - - UI parameter controls (m-E17-02) - - Parameter sweep batch mode (m-E18-03) - - Request IDs / multiplexing (single-client, sequential for now) - - Authentication or access control - - TLS/encryption + E-19 and E-18 are adjacent but they do different work: - ## Key References + - **E-19** narrows or deletes the current first-party residue around Sim authoring/orchestration, storage-backed drafts, archived bundle refs, runtime catalogs, and ambiguous caller paths. It decides what remains supported on today's surfaces. + - **E-18** builds the replacement Time Machine foundation: runtime parameter identity, deterministic overrides, reevaluation APIs, evaluation SDK, and CLI/sidecar over compiled graphs. - - `engine/core/src/compiler.rs` — `compile()`, `eval_model_with_params()` - - `engine/core/src/plan.rs` — `ParamTable`, `ParamValue` - - `engine/core/src/eval.rs` — `evaluate_with_params()` - - `engine/cli/src/main.rs` — existing CLI command dispatch - - `docs/architecture/headless-engine-architecture.md` — protocol design - - [rmp-serde crate](https://crates.io/crates/rmp-serde) — MessagePack for Rust - - [MessagePack spec](https://msgpack.org/) — wire format - - kind: milestone - id: M-003 - frontmatter: - title: Tiered Validation - status: done - parent: E-18 - body: | - ## Goal + The current Sim orchestration path is therefore not the default path forward for Time Machine evaluation. If an E-19 deletion depends on new capability, that dependency should be made explicit against E-18 rather than preserving the current residue indefinitely. - Expose model validation as a first-class, client-agnostic Time Machine operation. - Three tiers callable from the .NET SDK and from a new `POST /v1/validate` HTTP - endpoint. Tier 1 (schema) is also added to the Rust engine session protocol so - the Svelte What-If UI can get cheap per-edit feedback without a full compile. + ## Current Findings To Use - ## Scope + | Surface / Residue | Current Role | Risk If Left Unowned | E-19 Intent | + |------------------|--------------|----------------------|-------------| + | Sim `/api/v1/orchestration/runs` and `/api/v1/drafts/run` | Current first-party template-driven run creation | Transitional Sim execution APIs harden into a support obligation before the Time Machine replacement ships | Inventory, narrow, retain only if explicitly supported as an authoring surface | + | `data/storage/drafts` and draft CRUD endpoints | Saved editable YAML working copies | Hidden product surface with unclear support and lifecycle | Decide whether draft authoring is a supported Sim feature or transitional residue | + | `data/storage/runs` and `bundleRef`-based archive flow | Portable archived run bundles | Duplicates canonical runs and makes the archive path look like the primary run contract | Decide whether bundle refs stay as an interchange/import surface or are retired | + | Engine `/runs` import path via bundle path/archive/ref | Canonical bundle import, not template orchestration | Confused with Time Machine run creation | Keep only if import is explicitly supported; document it as import, not evaluation ownership | + | Runtime catalogs, catalog endpoints, and UI catalog clients | Residue from earlier Sim/catalog direction | Zombie compatibility surface that invites new callers | Retire from active first-party paths unless an explicit support case survives inventory | + | Template/draft/model/run/bundle terminology drift | Ambiguous product language | Wrong architecture hardens by naming accident | Publish one boundary ADR and use it as the inventory baseline | - **Tier 1 — Schema:** YAML parses + JSON schema validates + class references resolve. - Backed by `ModelSchemaValidator.Validate` + `ModelValidator.Validate` in Core. - Cheap: no compile, no eval. + ## Success Criteria - **Tier 2 — Compile:** Schema (tier 1) + model compiles into a Graph. - Backed by `ModelCompiler.Compile` + `ModelParser.ParseModel` in Core. - Catches structural errors (unresolved references, expression errors). + - [x] One explicit supported compatibility matrix exists for Engine, Sim, Svelte UI, Blazor UI, docs, schemas, and examples + - [x] One explicit terminology and ownership matrix exists for template, draft, model, run, bundle, and catalog surfaces + - [x] First-party clients no longer maintain duplicate endpoint, metrics, or health fallback logic where the canonical contract exists (m-E19-04: stale `IFlowTimeSimApiClient.RunAsync`/`GetIndexAsync`/`GetSeriesAsync` deleted and callers rewired to supported `CreateRunAsync` + Engine query surface) + - [x] Current Sim orchestration/storage/catalog residue is either explicitly supported with scope boundaries or removed from active first-party paths + - [x] Active UI/template surfaces no longer generate or promote deprecated schema shapes such as `binMinutes`-based demo templates (m-E19-03: Blazor mock template service, sample fixture, CLI verbose label, and UI test keys all rewritten to `binSize`/`binUnit`) + - [x] Legacy examples, docs, and schema references are either archived/historical or deleted; current docs present one canonical surface (m-E19-03: schema-migration example YAMLs moved to `examples/archive/`; stale `docs/ui/template-integration-spec.md` moved to `docs/archive/ui/`; active docs cleaned) + - [x] Blazor UI support policy is explicit: it remains a supported debugging/plan-B surface and consumes current Engine/Sim contracts without stale compatibility wrappers + - [x] E-18 planning remains clean: no current Sim draft/catalog/bundle choreography is treated as the default programmable/Time Machine contract + - [x] Grep and regression audits prove targeted legacy/fallback helpers are removed or isolated to historical/archive surfaces only (m-E19-02: 21 guards; m-E19-03: 11 guards; m-E19-04: 11 guards, all 43 passing) - **Tier 3 — Analyse:** Compile (tier 2) + deterministic evaluation + invariant - checks. Backed by `TemplateInvariantAnalyzer.Analyze` in Sim.Core. - Catches semantic issues (conservation violations, capacity breaches). + ## Risks & Open Questions - **In scope:** - - `src/FlowTime.TimeMachine/Validation/` — `TimeMachineValidator` (static service), - `ValidationResult`, `ValidationError`, `ValidationWarning`, `ValidationTier` enum - - `src/FlowTime.API/Endpoints/ValidationEndpoints.cs` — `POST /v1/validate` - - Rust engine session — new `validate_schema` command (tier 1 via session protocol) - - Unit tests: `tests/FlowTime.TimeMachine.Tests/Validation/` - - API tests: `tests/FlowTime.Api.Tests/ValidationEndpointsTests.cs` - - Rust integration tests: session `validate_schema` command + | Risk / Question | Impact | Mitigation | + |----------------|--------|------------| + | Which operational fallbacks are true compatibility seams vs useful local-dev resilience? | Medium | Milestone 1 creates the supported-surface inventory and keeps only explicitly justified operational resilience | + | Parallel Svelte and Blazor work may drift if sync ownership stays implicit | High | Keep milestone 4 about Blazor support alignment, plus shared contract audits, rather than retirement | + | Some docs/examples are still useful as migration references | Low | Move them to archive/historical locations instead of keeping them on current surfaces | + | Consumers outside first-party UI may still read deprecated endpoints/fields | Medium | Make the supported-surface policy explicit before deletion milestones start | - **Out of scope:** - - Line/column mapping in error messages - - Editor LSP integration - - Svelte UI changes (validate button) — separate UI milestone + ## Sequencing - ## Contract + This epic starts immediately after E-16 as a post-purification cleanup lane. It should run in parallel with resumed E-10 Phase 3 work and final E-11 parity work, not silently replace them. - ### HTTP Endpoint + - E-16 removes analytical truth and consumer-fact reconstruction seams. + - E-19 removes the remaining non-analytical legacy, fallback, and current-surface debt around those purified contracts. + - E-11 continues as a parallel UI track; E-19 keeps both first-party UIs aligned to the same current Engine/Sim contracts. - ``` - POST /v1/validate - Content-Type: application/json + ## Milestones - { - "yaml": "...", - "tier": "schema" | "compile" | "analyse" - } - ``` + | ID | Title | Summary | Depends On | Status | + |----|-------|---------|------------|--------| + | m-E19-01-supported-surface-inventory | Supported Surface Inventory, Boundary ADR & Exit Criteria | Inventory compatibility seams, define supported vs historical surfaces, publish the terminology/ownership ADR, and pin retention/deletion gates for drafts, bundles, catalogs, and import paths. | E-16 | completed | + | m-E19-02-sim-authoring-and-runtime-boundary-cleanup | Sim Authoring & Runtime Boundary Cleanup | Narrow Sim to explicitly supported authoring/orchestration surfaces, remove transitional catalog/runtime callers, and keep Engine import/query ownership explicit. | m-E19-01 | completed | + | m-E19-03-schema-template-example-retirement | Schema, Template & Example Retirement | Remove or archive deprecated schema, demo template, and example material from active current surfaces. | m-E19-01 | completed | + | m-E19-04-blazor-support-alignment | Blazor Support Alignment | Remove stale `FlowTime.UI` compatibility wrappers, keep Blazor aligned with current Engine/Sim contracts, and define clear supported debugging/operator workflows alongside the parallel Svelte UI. | m-E19-02, m-E19-03 | completed | - Response (200 always, errors in body): + ## Candidate Retention / Decision Matrix - ```json - { - "tier": "schema", - "isValid": false, - "errors": [ - { "message": "Unknown class reference: 'premium'" } - ], - "warnings": [] - } - ``` + ### Retain Only If Explicitly Supported - Tier 3 analyse response includes warnings in addition to errors: + - Repo-backed templates and template metadata served by Sim as an authoring surface + - Engine `/v1/runs/*` query/read endpoints over canonical run artifacts + - Blazor and Svelte as parallel first-party UIs consuming current contracts + - Storage-backed drafts only if saved draft authoring remains a supported user workflow - ```json - { - "tier": "analyse", - "isValid": true, - "errors": [], - "warnings": [ - { "nodeId": "Queue", "code": "high_utilization", "message": "..." } - ] - } - ``` + ### Decide In `m-E19-01` - ### Session Protocol Command (`validate_schema`) + - Sim `/api/v1/orchestration/runs` as a supported authoring convenience vs transitional surface + - Sim `/api/v1/drafts/*` lifecycle and whether storage-backed drafts remain a product promise + - `data/storage/runs` and `bundleRef` as an explicit interchange/import surface vs transitional archive residue + - Engine bundle import endpoints as supported import workflows vs temporary migration surfaces + - Runtime catalog seeding, catalog endpoints, and catalog-aware UI clients - ``` - request: { method: "validate_schema", params: { yaml: "..." } } - response (valid): { result: { is_valid: true, errors: [] } } - response (invalid): { result: { is_valid: false, errors: ["..."] } } - ``` + ### Delete / Archive Candidates - Tier 2 (compile) is already served by the existing `compile` command, which - returns `error: { code: "compile_error", ... }` on failure. + - `FlowTime.UI` client fallbacks and legacy endpoint probes that duplicate canonical Engine/Sim contracts + - stale metrics/state reconstruction in first-party Blazor consumers that survive E-16 + - demo/template generation paths that still emit deprecated schema fields on active surfaces + - active catalog selection, mock catalog services, and placeholder `CatalogId = "default"` callers that are no longer part of the supported path + - active docs/examples that present deprecated schema shapes as current guidance + - Sim service compatibility shims whose only purpose is preserving transitional first-party clients that have already been replaced + - docs or examples that imply the current Sim orchestration/storage choreography is the future programmable/Time Machine contract - ## Acceptance Criteria + ## References - - [x] `TimeMachineValidator.Validate(yaml, ValidationTier.Schema)` returns errors for invalid YAML - - [x] `TimeMachineValidator.Validate(yaml, ValidationTier.Compile)` catches structural errors (bad node refs, bad expressions) - - [x] `TimeMachineValidator.Validate(yaml, ValidationTier.Analyse)` returns warnings from invariant analyzer - - [x] `POST /v1/validate` responds 200 with `{ isValid, tier, errors, warnings }` for all three tiers - - [x] Invalid tier value → 400 Bad Request - - [x] Empty/null yaml → 400 Bad Request - - [x] Rust session `validate_schema` returns `{ is_valid, errors }` without full compile - - [x] `rg "FlowTime\.Generator" src/ tests/` still zero (no regressions) - - [x] `dotnet test FlowTime.sln` all green; Rust `cargo test` all green - - kind: milestone - id: M-004 + - `work/epics/E-16-formula-first-core-purification/spec.md` + - `work/epics/E-11-svelte-ui/spec.md` + - `work/epics/E-18-headless-pipeline-and-optimization/spec.md` + - `docs/architecture/template-draft-model-run-bundle-boundary.md` + - `ROADMAP.md` + - `work/gaps.md` + - kind: epic + id: E-20 frontmatter: - title: Generator Extraction → TimeMachine + title: Matrix Engine status: done - parent: E-18 body: | + > **Status note:** m-E20-01–10 all complete + + **Owner:** Engine + ## Goal - Rename `FlowTime.Generator` → `FlowTime.TimeMachine`. Move all classes, update all - references in consumers (src + tests), remove `FlowTime.Generator` from the solution. - Pure structural refactor — no behavior change, all tests green, no coexistence window - (per D-2026-04-07-019 Path B). + Replace the C# object-graph evaluation engine with a Rust-based column-store + evaluation-plan engine. The new engine reads the same YAML model files, produces identical output artifacts, and ships as a standalone CLI binary (`flowtime-engine`). This is the foundation for E-17 (Interactive What-If) and E-18 (Time Machine). - ## Scope + ## Context - **In scope:** - - Create `src/FlowTime.TimeMachine/FlowTime.TimeMachine.csproj` with identical dependencies - - Move all Generator source files; update `FlowTime.Generator.*` namespaces → `FlowTime.TimeMachine.*` - - Rename `tests/FlowTime.Generator.Tests/` → `tests/FlowTime.TimeMachine.Tests/`; update its csproj - - Update project references in: FlowTime.Cli, FlowTime.Sim.Service, FlowTime.API, FlowTime.Api.Tests, FlowTime.Cli.Tests, FlowTime.Integration.Tests - - Update `using FlowTime.Generator.*` → `using FlowTime.TimeMachine.*` across all source files - - Register TimeMachine in FlowTime.sln; remove Generator entry - - Delete `src/FlowTime.Generator/` entirely + The current C# engine (`FlowTime.Core`) evaluates models via an object graph: `INode` implementations, `Dictionary` memoization, defensive copies, and class hierarchy dispatch. It works correctly (1287 tests, all passing) but the representation fights the domain: - **Out of scope:** - - Tiered validation (m-E18-06) - - Any behavior changes whatsoever + - **Defensive copying**: `Series` clones on construction AND on `ToArray()` — two allocations per node per evaluation. + - **Hash lookups during evaluation**: `Dictionary` instead of direct array indexing. + - **No incremental re-evaluation**: changing one input requires re-evaluating the entire graph. + - **Feedback subgraphs require special handling**: the bin-by-bin evaluator added in E-10 p3b is correct but is a bridge to the matrix model where bin-sequential evaluation is the default. + - **No plan introspection**: the evaluation is opaque code, not inspectable data. - ## Acceptance Criteria + The research doc (`docs/research/engine-rewrite-language-and-representation.md`) establishes that the engine is fundamentally an array programming problem. All series live in one contiguous matrix (`double[seriesCount × binCount]`), and the evaluation plan is an ordered list of operations that read input columns and write output columns. - - [x] `src/FlowTime.TimeMachine/` exists; `src/FlowTime.Generator/` is gone - - [x] `tests/FlowTime.TimeMachine.Tests/` exists; `tests/FlowTime.Generator.Tests/` is gone - - [x] `dotnet build FlowTime.sln` succeeds with zero errors - - [x] `dotnet test FlowTime.sln` passes with the same test count - - [x] `rg "FlowTime\.Generator" src/ tests/ --include="*.cs" --include="*.csproj"` returns zero matches - - [x] Solution file contains TimeMachine entry; Generator entry is absent + ### Why Rust - ## Namespace Mapping + - Ownership eliminates all defensive copying — `&[f64]` slices are immutable by construction. + - `enum Op` + `match` replaces 6 class files and an interface hierarchy with ~80 lines. + - Compiles to native CLI binary (zero runtime, ~5-10 MB) and to WebAssembly (engine in browser for E-17). + - Single allocation during evaluation regardless of model size. + - SIMD opportunity for element-wise operations (70-80% of typical models). - | Old | New | - |-----|-----| - | `FlowTime.Generator` | `FlowTime.TimeMachine` | - | `FlowTime.Generator.Artifacts` | `FlowTime.TimeMachine.Artifacts` | - | `FlowTime.Generator.Capture` | `FlowTime.TimeMachine.Capture` | - | `FlowTime.Generator.Models` | `FlowTime.TimeMachine.Models` | - | `FlowTime.Generator.Orchestration` | `FlowTime.TimeMachine.Orchestration` | - | `FlowTime.Generator.Processing` | `FlowTime.TimeMachine.Processing` | - - kind: milestone - id: M-005 - frontmatter: - title: ITelemetrySource Contract - status: done - parent: E-18 - body: | - ## Goal + ### Why now - Define `ITelemetrySource` as the formal input contract for the Time Machine's external data - surface, with two concrete implementations from day one. Satisfies the deferred portion of - the spec's m-E18-01b scope (the tiered-validation half shipped as m-E18-06; this delivers - the source-contract half). + E-17 (Interactive What-If) needs incremental re-evaluation (<50ms response). E-18 (Time Machine) needs parameter sweeps (1 compile + N partial replays). Both are economically different with the matrix model vs without it. Building E-17/E-18 on the object graph means building them twice. + + The bin-by-bin feedback evaluator from E-10 p3b validates the approach — it IS the matrix model scoped to feedback subgraphs. ## Scope - **`ITelemetrySource` interface** — in `src/FlowTime.TimeMachine/Telemetry/`: - - `ITelemetrySource` — single method: `Task ReadAsync(CancellationToken)` - - `TelemetryData` — typed payload: grid, series dictionary, optional provenance metadata + ### In scope - **`CanonicalBundleSource : ITelemetrySource`** — reads the canonical bundle format - (`manifest.json` + CSV series files) written by the existing `TelemetryBundleBuilder` in - `FlowTime.Core`. Concrete class (not behind a second interface). + 1. **Rust crate** (`flowtime-core`) implementing: + - YAML model deserialization (same schema as C# `ModelDefinition`) + - Model compilation: YAML → column map + evaluation plan + - Topology synthesis (same logic as `ModelCompiler`: queue nodes, retry echo, WIP overflow routing, cycle validation) + - Expression parsing and compilation to plan ops + - Evaluation: execute plan against flat matrix + - Derived metrics as additional plan ops (utilization, latency, throughput ratio, cycle time, flow efficiency, Cv, Kingman) + - Invariant analysis (conservation checks, non-negativity, capacity bounds) as matrix column arithmetic + - WIP limit enforcement with overflow routing + - Feedback subgraph evaluation (bin-by-bin — the default mode for sequential ops) + - SHIFT-based backpressure (falls out naturally from bin-sequential evaluation) + - Constraint allocation (proportional allocation when demand > capacity) + - Router flow materialization + - Dispatch schedule gating + - Artifact writing: per-series CSVs, `series/index.json`, `run.json` (warnings, metadata) + - Tiered validation: schema validation → compilation → analysis (dry-run without artifact writing) + + 2. **CLI binary** (`flowtime-engine`) with commands: + - `eval --output ` — full evaluation + artifact writing + - `validate ` — tiered validation (schema → compile → analyse), no artifacts + - `plan ` — print the evaluation plan (for inspection/debugging) + - stdin/stdout support for pipeline composition + + 3. **Parity test harness**: a set of reference models (extracted from the C# test suite) with approved output artifacts. The Rust engine must produce bitwise-identical outputs. If discrepancies are found that reveal C# bugs, fix C# and update the approved outputs. + + 4. **Integration bridge**: the .NET API calls the Rust binary as a subprocess, reads its output artifacts, and serves existing API responses. No API contract changes. - **`FileCsvSource : ITelemetrySource`** — reads `file:`-referenced CSV inputs, extracting - the existing file-read logic already in `FlowTime.Core` into a named, injectable - implementation. + ### Out of scope - **In scope:** - - `src/FlowTime.TimeMachine/Telemetry/ITelemetrySource.cs` - - `src/FlowTime.TimeMachine/Telemetry/TelemetryData.cs` - - `src/FlowTime.TimeMachine/Telemetry/CanonicalBundleSource.cs` - - `src/FlowTime.TimeMachine/Telemetry/FileCsvSource.cs` - - Unit tests: `tests/FlowTime.TimeMachine.Tests/Telemetry/` + - API hosting (stays in C#/.NET) + - Topology metadata projection for `/graph` endpoint (stays in C#) + - UI work + - WebAssembly compilation (future — architecture supports it, but not this epic) + - In-process FFI from .NET to Rust (future optimization) + - E-17 interactive features (parameter sessions, push channel) + - E-18 advanced modes (sweep, optimize, fit) — though the plan structure enables them - **Out of scope:** - - `ITelemetrySink` — explicitly deferred per D-2026-04-07-020 - - Real-world format adapters (Prometheus, OTEL, BPI) — m-E18 telemetry adapters milestone - - Time Machine `Evaluate` / `Reevaluate` consuming the source — separate milestone - - HTTP endpoint changes + ## Architecture - ## Contract + ### Data representation - ### `ITelemetrySource` + ``` + State = f64[series_count × bin_count] // flat 1D, row-major + ``` - ```csharp - namespace FlowTime.TimeMachine.Telemetry; + Each row is a series. Each column is a time bin. All series contiguous in memory. - /// - /// Input contract for external data fed into the Time Machine. - /// Each implementation snapshots data from its source at ReadAsync time, - /// returning a deterministic TelemetryData payload the Time Machine can consume. - /// - public interface ITelemetrySource - { - Task ReadAsync(CancellationToken cancellationToken = default); + ### Evaluation plan + + ```rust + enum Op { + Const { out: usize, values: Vec }, + VecAdd { out: usize, a: usize, b: usize }, + VecSub { out: usize, a: usize, b: usize }, + VecMul { out: usize, a: usize, b: usize }, + VecDiv { out: usize, a: usize, b: usize }, + VecMin { out: usize, a: usize, b: usize }, + VecMax { out: usize, a: usize, b: usize }, + Clamp { out: usize, val: usize, lo: usize, hi: usize }, + Shift { out: usize, input: usize, lag: usize }, + Convolve { out: usize, input: usize, kernel: Vec }, + QueueRecurrence { out: usize, inflow: usize, outflow: usize, loss: Option, init: f64, wip_limit: Option, overflow_out: Option }, + DispatchGate { out: usize, input: usize, period: usize, phase: usize, capacity: Option }, + ScalarAdd { out: usize, input: usize, k: f64 }, + ScalarMul { out: usize, input: usize, k: f64 }, + Floor { out: usize, input: usize }, + Ceil { out: usize, input: usize }, + Round { out: usize, input: usize }, + Mod { out: usize, a: usize, b: usize }, + Step { out: usize, input: usize, threshold: usize }, + Pulse { out: usize, period: usize, phase: usize, amplitude: Option }, + // Router ops, constraint allocation ops, derived metric ops... } ``` - ### `TelemetryData` + ### Evaluator - ```csharp - public sealed class TelemetryData - { - /// Grid definition (bins, binSize, binUnit). - public required GridDefinition Grid { get; init; } + ```rust + fn evaluate(plan: &[Op], bins: usize, series_count: usize) -> Vec { + let mut state = vec![0.0; series_count * bins]; + for op in plan { + execute(op, &mut state, bins); + } + state + } + ``` - /// Node-id → double[] series values (one per bin). - public required IReadOnlyDictionary Series { get; init; } + ### Column map - /// Optional provenance: source path, captured-at timestamp, content hash. - public TelemetryProvenance? Provenance { get; init; } - } + Bidirectional mapping between human-readable series names and integer column indices. Produced once during compilation. + + ### Pipeline - public sealed class TelemetryProvenance - { - public string? SourcePath { get; init; } - public DateTimeOffset? CapturedAt { get; init; } - public string? ContentHash { get; init; } - } + ``` + YAML model + │ + │ deserialize + ▼ + ModelDefinition (Rust structs) + │ + │ compile: topo-sort + assign column indices + emit ops + ▼ + EvaluationPlan + ColumnMap + │ + │ execute: fill the matrix + ▼ + State Matrix (f64[series_count × bins]) + │ + │ derive: append derived metric ops, re-execute tail + ▼ + Full Matrix (with derived columns) + │ + │ analyse: conservation checks, warnings + ▼ + Warnings + │ + │ write: column map tells you which column is which series + ▼ + Artifacts (CSVs + index.json + run.json) ``` - ### `CanonicalBundleSource` + ## Constraints - Reads a canonical bundle directory (containing `manifest.json` and `series/*.csv`). + - The YAML model schema does not change. Existing models must work unmodified. + - Output artifacts must be bitwise-identical to C# output (modulo discovered C# bugs). + - The Rust crate must compile on Linux x86_64 (devcontainer target). macOS and Windows are nice-to-have. + - No external runtime dependencies (no JVM, no .NET, no Python). Single static binary. + - The .NET API integration must not require changes to API contracts or client code. - ```csharp - public sealed class CanonicalBundleSource : ITelemetrySource - { - public CanonicalBundleSource(string bundleDirectory) { ... } - public Task ReadAsync(CancellationToken cancellationToken = default) { ... } - } - ``` + ## Risks - ### `FileCsvSource` + 1. **Expression parser parity**: The C# expression parser (`FlowTime.Expressions`) supports a specific syntax. The Rust parser must match it exactly. Mitigation: extract expression test cases as a shared fixture. + 2. **Floating-point parity**: Different compilers may produce different results for the same math. Mitigation: use IEEE 754 double precision, avoid platform-specific optimizations, test with bitwise comparison. + 3. **YAML deserialization edge cases**: `serde_yaml` may handle edge cases differently from the C# YAML deserializer. Mitigation: test with all existing model fixtures. + 4. **Model compiler complexity**: `ModelCompiler` has significant logic (queue synthesis, retry echo, WIP overflow routing, constraint wiring). Mitigation: port method-by-method with test parity at each step. + 5. **Devcontainer Rust toolchain**: Need to add Rust to the devcontainer. Mitigation: `rustup` + `cargo` install is straightforward. - Reads a single CSV file as a named series. + ## Success Criteria - ```csharp - public sealed class FileCsvSource : ITelemetrySource - { - /// Path to the CSV file. - /// Node ID to assign the series to. - /// Grid definition to validate series length against. - public FileCsvSource(string filePath, string seriesId, GridDefinition grid) { ... } - public Task ReadAsync(CancellationToken cancellationToken = default) { ... } - } - ``` + - [ ] `flowtime-engine eval` produces bitwise-identical artifacts to C# engine on all reference models + - [ ] `flowtime-engine validate` reports the same warnings/errors as C# validation + - [ ] `flowtime-engine plan` prints a human-readable evaluation plan + - [ ] All reference models evaluate correctly (parity harness green) + - [ ] .NET API can call the Rust binary and serve identical API responses + - [ ] Single static binary, no runtime dependencies + - [ ] Evaluation performance equal to or faster than C# (not a hard gate, but expected) + - [ ] Feedback subgraphs (SHIFT-based backpressure) work without special-casing — bin-sequential evaluation handles them naturally - ## Acceptance Criteria + ## Dependencies - - [x] `ITelemetrySource` interface exists in `FlowTime.TimeMachine.Telemetry` - - [x] `TelemetryData` carries Grid + Series + optional Provenance - - [x] `CanonicalBundleSource.ReadAsync` reads a bundle directory and returns correct series values - - [x] `FileCsvSource.ReadAsync` reads a single CSV and returns the series under the specified ID - - [x] Both implementations compile and have passing unit tests (23 tests across 2 suites) - - [x] `ITelemetrySink` is **not** introduced (explicitly documented as deferred) - - [x] `rg "FlowTime\.Generator" src/ tests/` still zero (no regressions) - - [x] `dotnet test FlowTime.sln` all green (72 TimeMachine tests, 0 failures) - - kind: milestone - id: M-006 + - E-10 complete (provides the analytical primitives the Rust engine must reproduce) + - E-16 complete (provides the pure evaluation model the Rust engine implements) + + ## References + + - `docs/research/engine-rewrite-language-and-representation.md` — detailed research and design + - `src/FlowTime.Core/` — current C# engine (source of truth for semantics) + - `src/FlowTime.Core/Compiler/ModelCompiler.cs` — compilation logic to port + - `src/FlowTime.Core/Execution/Graph.cs` — evaluation logic to replace + - `src/FlowTime.Core/Expressions/` — expression system to port + - `tests/FlowTime.Core.Tests/` — test cases defining expected behavior + + ## Milestones + + | ID | Title | Summary | Status | + |----|-------|---------|--------| + | m-E20-01 | Scaffold, types, and parsers | Rust crate, model types with serde, YAML deserialization, expression parser. Reference model fixtures. Devcontainer Rust toolchain. | complete | + | m-E20-02 | Compiler and core evaluator | Column map, topo sort, plan generation for const/expr nodes. Evaluator loop + element-wise ops. First end-to-end parity on simple models. | complete | + | m-E20-03 | Topology and sequential ops | Queue synthesis, QueueRecurrence, Shift, Convolve, DispatchGate, PMF, WIP limits with overflow. Feedback subgraphs (bin-sequential). | complete | + | m-E20-04 | Routing and constraints | Router flow materialization, constraint allocation, multi-class flow distribution — all as plan ops. | complete | + | m-E20-05 | Derived metrics and analysis | Utilization, latency, cycle time, Cv, Kingman as plan ops. Invariant analysis as column arithmetic. Warnings. | complete | + | m-E20-06 | Artifacts, CLI, and integration | CSV/JSON artifact writer. CLI (eval, validate, plan). | complete | + | m-E20-07 | .NET subprocess bridge | SHA256 hashing + manifest.json. RustEngineRunner subprocess bridge. Config switch. Parity tests. | complete | + | m-E20-08 | Full parity harness | All 21 fixtures tested against C# engine. `outputs:` filtering. Green/red parity matrix. | complete | + | m-E20-09 | Per-class decomposition and edge series | Per-class columns, edge metrics, class assignment. Engine core feature-complete. | complete | + | m-E20-10 | Artifact sink parity | Full directory layout. StateQueryService compatible. RunArtifactWriter replaceable. | complete | + + ### Milestone progression + + Each milestone delivers a progressively more capable engine, testable against C# reference output at every stage: + + - After **M1**: parse any model YAML and any expression — data layer complete + - After **M2**: evaluate simple models (const + expr) — first correct series output + - After **M3**: evaluate models with queues, SHIFT, WIP, backpressure — core flow dynamics + - After **M4**: evaluate models with routers, constraints, classes — full flow algebra + - After **M5**: produce derived metrics and warnings — full analytical layer + - After **M6**: produce artifacts, run from CLI — standalone binary + - After **M7**: .NET can call the Rust engine as a subprocess — bridge operational + - After **M8**: parity verified against all fixtures — no surprises before core work + - After **M9**: engine returns complete results (classes, edges) — evaluation-complete + - After **M10**: artifact sink produces full layout — C# RunArtifactWriter replaceable, E-17/E-18 unblocked + - kind: epic + id: E-21 frontmatter: - title: Parameter Sweep + title: Svelte Workbench & Analysis Surfaces status: done - parent: E-18 body: | + **Completed:** 2026-04-28 + ## Goal - Implement parameter sweep as a first-class Time Machine operation: given a model YAML, a - const-node ID, and an array of values, evaluate the model once per value and return a - structured table of (param_value → series outputs). + Transform the Svelte UI from a Blazor-parallel clone into the primary platform for expert flow analysis and Time Machine surfaces, using a workbench paradigm (topology as navigation + inspection panel) instead of the Blazor overlay approach. - Builds on: - - m-E18-01 `evaluate_with_params` in the Rust engine (compile-once foundation) - - m-E18-07 `FlowTime.TimeMachine` project (host for the sweep domain model) - - m-E18-08 `ITelemetrySource` (pattern for injectable evaluation contracts) + ## Context + + The fork decision (2026-04-15) declared Svelte the platform for all new surfaces and Blazor maintenance-only. E-11 delivered M1-M4 + M6: scaffold, API layer, topology via dag-map, timeline with heatmap scrubbing, and run orchestration. E-17 added the what-if page with live parameter manipulation (200 vitest + 26 Playwright). + + What exists in Svelte today: + - SvelteKit + shadcn-svelte + Tailwind v4, dark/light theme toggle + - Topology DAG via dag-map with heatmap overlays and timeline scrubbing + - Run orchestration (template cards, bundle reuse) + - What-if page (E-17 — WebSocket parameter panel with real-time topology updates) + - Routes: `/`, `/health`, `/run`, `/what-if`, `/time-travel/topology`, `/time-travel/dashboard`, `/time-travel/artifacts`, `/engine-test` + + What's missing: + - Node/edge inspection (no way to click a node and see its metrics) + - Analysis surfaces for Time Machine APIs (sweep, sensitivity, goal-seek, optimize, validate — all built in E-18 but with no UI) + - Heatmap view (nodes-x-bins temporal pattern grid) + - Compact, information-dense layout (current shadcn defaults are too spacious for a technical workbench) + - Distinctive visual identity (current theme is generic shadcn) + + The UI paradigm proposal (`work/epics/unplanned/ui-workbench/reference/ui-paradigm.md`) established the direction: topology as navigation surface (structure + one color dimension), workbench panel for depth (click-to-pin cards), and layered views (heatmap, decomposition, comparison). E-21 implements that paradigm and adds the first Time Machine analysis surfaces. + + ### Supersedes + + - **E-11 M5** (Inspector & Feature Bar) — evolves into m-E21-01/02 workbench paradigm + - **E-11 M7** (Dashboard) — deferred; workbench + heatmap cover the same ground better + - **E-11 M8** (Polish) — absorbed into m-E21-08 + + E-11 remains paused at M6 as a completed historical track. + + ### Relationship to other unplanned epics + + - **UI Workbench & Topology Refinement** (`work/epics/unplanned/ui-workbench/spec.md`) — m-E21-01 and m-E21-02 implement its goals (G1-G5) + - **UI Analytical Views** (`work/epics/unplanned/ui-analytical-views/spec.md`) — m-E21-06 delivers the heatmap view; decomposition and comparison views are future extensions + - **Expert Authoring Surface** (`work/epics/unplanned/expert-authoring-surface/spec.md`) — not in E-21 scope; depends on E-21's workbench and validation surface being in place ## Scope - **`FlowTime.TimeMachine.Sweep` namespace** — in `src/FlowTime.TimeMachine/Sweep/`: - - `IModelEvaluator` — injectable evaluation contract; decouples SweepRunner from the Rust binary in tests - - `SweepSpec` — validated input: ModelYaml, ParamId, Values[], optional CaptureSeriesIds - - `SweepPoint` — single evaluation result: ParamValue + Series dictionary - - `SweepResult` — full sweep result: ParamId + SweepPoint[] - - `ConstNodePatcher` — internal YAML DOM manipulation; patches a named const node's values array - - `SweepRunner` — orchestrates N evaluations via injected `IModelEvaluator` - - `RustModelEvaluator : IModelEvaluator` — wraps `RustEngineRunner`, maps series list to dictionary + ### In Scope - **`POST /v1/sweep`** — in `src/FlowTime.API/Endpoints/SweepEndpoints.cs`: - - Request: `{ yaml, paramId, values: [double...], captureSeriesIds?: [string...] }` - - Response (200): `{ paramId, points: [{ paramValue, series: { seriesId: double[] } }] }` - - 400: missing yaml / paramId / values - - 503: engine not enabled (RustEngine:Enabled=false) + - Compact design system replacing shadcn spacing defaults — information-dense, calm chrome, rich data-viz colors + - Workbench panel with click-to-pin node/edge cards, sparklines, metric display + - Metric selector and class filter replacing Blazor's 15-toggle feature bar + - dag-map library enhancements (click/hover events, selected state) — we own dag-map + - Analysis page with tabbed Time Machine surfaces: sweep, sensitivity, goal-seek, optimize + - Heatmap view (nodes-x-bins grid, first layered view) + - Validation surface with tiered diagnostic display and topology warning badges + - Dark/light theme with distinctive palette (calm chrome, vivid data colors) + - Visual polish pass for demo-ready quality - **In scope:** - - `src/FlowTime.TimeMachine/Sweep/IModelEvaluator.cs` - - `src/FlowTime.TimeMachine/Sweep/SweepSpec.cs` - - `src/FlowTime.TimeMachine/Sweep/SweepResult.cs` - - `src/FlowTime.TimeMachine/Sweep/ConstNodePatcher.cs` - - `src/FlowTime.TimeMachine/Sweep/SweepRunner.cs` - - `src/FlowTime.TimeMachine/Sweep/RustModelEvaluator.cs` - - `src/FlowTime.API/Endpoints/SweepEndpoints.cs` - - DI registration in `Program.cs` - - Unit tests: `tests/FlowTime.TimeMachine.Tests/Sweep/` - - API tests: `tests/FlowTime.Api.Tests/SweepEndpointsTests.cs` + ### Out of Scope - **Out of scope:** - - Sensitivity analysis (numerical gradient) — follow-on - - Multi-parameter sweeps (grid sweeps) — follow-on - - Session-based compile-once optimization — follow-on (each sweep point uses subprocess eval) - - Optimization / fitting — m-E18-10+ - - Sweep result persistence / artifact writing — follow-on + - Expert authoring surface (CodeMirror + inline lenses — separate future epic) + - Executive/BI dashboard (separate surface consuming FlowTime data, not this UI) + - Blazor changes beyond maintenance-mode contract alignment + - Model fitting UI (`POST /v1/fit` — blocked on Telemetry Loop & Parity) + - Chunked evaluation / streaming UI + - Backend API changes other than read-only run-adjacent endpoints (see Constraints). Authoring, orchestration, compute, write paths, telemetry sinks — all out of scope. + - Mobile/responsive layout + - E-15 telemetry ingestion UI (parallel track, not E-21 scope) + - dag-map layout engine changes (separate `ui-layout` epic) + - Decomposition view, comparison view, flow balance view (future layered views beyond heatmap) - ## Design Notes + ## Design Direction - ### Implementation approach + ### Audience - Each sweep point calls `RustEngineRunner.EvaluateAsync(patchedYaml)` independently (one - subprocess per point). The YAML is patched in-memory before each call via `ConstNodePatcher`, - which uses YamlDotNet's representation model to substitute the const node's values array. + | Audience | Surface | Relationship to E-21 | + |----------|---------|---------------------| + | Expert modeler | Svelte workbench | Primary user. Compact, data-dense, keyboard-aware. | + | Demo viewer | Same workbench | The demo IS the tool. No separate demo mode. Must look like a serious technical instrument. | + | Executive / BI | Separate surface (future) | Not this UI. Consumes FlowTime data exports/API. Out of scope. | - This deliberately trades compile-once efficiency for implementation simplicity: the Rust - session protocol requires a MessagePack NuGet dependency not yet in the tree, while the - subprocess approach reuses existing infrastructure with no new dependencies. + ### Density - The `IModelEvaluator` abstraction isolates this choice from `SweepRunner`, so a future - session-based evaluator can be dropped in without changing the sweep domain model or tests. + The shadcn-svelte defaults produce a consumer-product aesthetic: generous padding (`p-4`/`p-6`), large text, wide spacing, friendly rounded corners. A workbench needs the opposite: tight padding, smaller type, information density. Every pixel of chrome padding is a pixel not showing data. - ### ConstNodePatcher behaviour + Concrete changes from current state: + - Main content padding: `p-6` (24px) → `p-2` (8px) or context-dependent + - Sidebar: 280px → ~200px expanded, ~40px collapsed + - Border radius: `0.5rem` → `0.25rem` or less + - Working text size: `text-xs` (12px) as default body, `text-sm` (14px) for emphasis + - Component padding: tighten all shadcn component internals + - Spacing scale: use 2/4/6/8/12px steps, not 4/8/12/16/24px - - Finds the first `nodes` entry where `id == nodeId` AND `kind == "const"` - - Replaces its `values` sequence with `[value, value, ..., value]` (same bin count) - - Returns the original YAML unchanged if the node is not found or is not a const node - - Uses `InvariantCulture` formatting for decimal precision + ### Color Architecture - ## Acceptance Criteria + Two separate token layers: - - [x] `IModelEvaluator` interface exists in `FlowTime.TimeMachine.Sweep` - - [x] `SweepSpec` validates: non-null/whitespace ModelYaml, non-null/whitespace ParamId, non-null/non-empty Values - - [x] `ConstNodePatcher.Patch` correctly replaces const node values; returns original YAML for unknown/non-const nodes - - [x] `SweepRunner.RunAsync` returns one `SweepPoint` per input value, with correct ParamValue and Series - - [x] `SweepRunner` respects `CaptureSeriesIds` filter (null = all series) - - [x] `SweepRunner` respects `CancellationToken` between evaluation points - - [x] `RustModelEvaluator` wraps `RustEngineRunner` and maps series list to dictionary - - [x] `POST /v1/sweep` returns 400 for missing yaml / paramId / empty values - - [x] `POST /v1/sweep` returns 503 when Rust engine not enabled - - [x] Unit tests pass: 28 sweep unit tests (SweepSpec ×9, ConstNodePatcher ×7, SweepRunner ×12) - - [x] API validation tests pass: 7 tests (6×400, 1×503) - - [x] `dotnet test FlowTime.sln` all green (105 TimeMachine, 235 API — pre-existing integration failures unrelated) - - kind: milestone - id: M-007 + **Chrome tokens** (backgrounds, borders, text) — calm, restrained, few colors: + - Dark mode: near-black backgrounds (`hsl(220 10% 4%)`-range), subtle dark grid/border lines, muted light gray text (not bright white) + - Light mode: warm light gray backgrounds, subtle borders, dark text + - Minimal accent color usage in chrome — the frame should recede + + **Data-viz tokens** (heatmaps, charts, sparklines, topology overlays) — where vibrancy lives: + - Hue families: teal/cyan, pink/magenta, coral/red, blue, green, gold/amber + - Designed for legibility against both dark and light backgrounds + - Consistent across all data surfaces (topology heatmap, sparklines, analysis charts, heatmap grid) + - Sequential scales for metric mapping (single-hue ramps) + - Diverging scales for comparison/delta views + + Reference inspiration: o16g.com/data/ dark palette (see `reference-palette.png` alongside this spec). The goal is "quantitative instrument" not "friendly SaaS dashboard." + + ### Theming Iteration + + The token architecture must make major theme changes easy: + - All spacing from semantic tokens, not inline Tailwind values + - Chrome and data-viz as independent palettes + - Theme presets possible by swapping one CSS import + - User will bring visual examples for palette iteration — the system must accommodate this without touching component code + + ## Constraints + + - .NET 9 backend APIs are the source of truth. Svelte UI is primarily a consumer. Two explicit carve-outs are admitted: (1) per **D-2026-04-17-033**, E-21 may add **read-only run-adjacent endpoints** that strictly serve already-persisted run artifacts (e.g. `GET /v1/runs/{runId}/model`); (2) per **D-2026-04-21-034**, E-21 may extend existing compute-endpoint response shapes with **additive** fields that expose state the endpoint already computes internally (specifically the `trace` field on `/v1/goal-seek` and `/v1/optimize`). Authoring, orchestration, new compute endpoints, write-path endpoints, and non-additive shape changes to existing compute endpoints remain out of scope and need their own decision record if ever proposed. + - dag-map enhancements must remain general-purpose (not FlowTime-specific API) + - Existing what-if page (E-17) must continue working after workbench changes + - Existing run orchestration (E-11 M6) must continue working + - Playwright tests required for every milestone that ships UI changes + - Vitest for pure logic (helpers, store derivations, protocol encoding) + - Must work in existing devcontainer (Node 20, ports 5173/8081) + + ## Success Criteria + + - [ ] Clicking a node in the topology opens a workbench card with metrics and sparkline + - [ ] All five Time Machine analysis modes are accessible from the UI (sweep, sensitivity, goal-seek, optimize, validate) + - [ ] Heatmap view shows nodes-x-bins temporal patterns with shared color scale + - [ ] Layout is visibly more compact than current shadcn defaults — no "air" + - [ ] Dark and light themes both work with calm chrome and vivid data colors + - [ ] A non-technical viewer watching a demo finds the tool visually serious and information-rich + - [ ] Existing what-if and run orchestration pages still function correctly + - [ ] dag-map click/hover events are implemented in the library, not as wrapper hacks + + ## Risks & Open Questions + + | Risk / Question | Impact | Mitigation | + |----------------|--------|------------| + | Compact density feels cramped or hard to read | Med | Start with defined density tokens; adjust after user testing. Semantic tokens make changes cheap. | + | dag-map library changes take longer than expected | Med | dag-map click/hover can be wired externally as a fallback while library work proceeds. Don't block workbench on it. | + | Color palette needs multiple iterations to land | Low | Token architecture makes palette swaps cheap. User will bring examples. | + | Analysis API calls are slow for large sweeps (session evaluator helps) | Med | Show progress/loading state. `SessionModelEvaluator` (m-E18-13) is the default and is fast for moderate sweeps. | + | Heatmap SVG performance with many nodes x bins | Low | 20 nodes x 100 bins = 2000 rects, well within SVG limits. Add row virtualization if measured problems appear. | + | What-if page assumes current spacing/layout | Med | Audit what-if page during density pass. It may need minor layout adjustments. | + + ## Milestones + + | ID | Title | Summary | Status | + |----|-------|---------|--------| + | m-E21-01-workbench-foundation | Workbench Foundation | Density system, dag-map events (library), topology as navigation (one color dimension), workbench panel with click-to-pin node cards | **complete** (merged 2026-04-17) | + | m-E21-02-metric-selector-edge-cards | Metric Selector & Edge Cards | Metric chip bar, edge click-to-pin, edge cards, class filter | **complete** (merged 2026-04-17) | + | m-E21-03-sweep-sensitivity | Sweep & Sensitivity Surfaces | `/analysis` route with tabs, sweep config + results, sensitivity bar chart | **complete** (merged 2026-04-17; ultrareview follow-ups 2026-04-20) | + | m-E21-04-goal-seek | Goal Seek Surface | Goal-seek panel, shared convergence chart + result card, `trace` on `/v1/goal-seek` and `/v1/optimize` (per D-2026-04-21-034) | **complete** (2026-04-22) | + | m-E21-05-optimize | Optimize Surface | N-param Nelder-Mead surface reusing shared convergence chart + result card from m-E21-04; per-param range table | **complete** (2026-04-22) | + | m-E21-06-heatmap-view | Heatmap View | Nodes-x-bins grid, row sorting, click-to-jump, view switcher (topology/heatmap), shared full-window color scale, shared node-mode toggle | **complete** (2026-04-24) | + | m-E21-07-validation-surface | Validation Surface (Svelte) | Consumer-side type widening on `state_window` warnings; validation panel as left column inside workbench panel; topology node + edge warning indicators; workbench-card warning surfaces; bidirectional cross-link via shared view-state store; Playwright real-bytes fixture for AC1 round-trip. No backend work. | **complete** (2026-04-28) | + | m-E21-08-polish | Visual Polish & Dark Mode QA | Topology keyboard + ARIA retrofit, full bidirectional cross-link (node + edge), dark mode audit, loading skeletons, transitions rule, elevation normalization | **complete** (2026-04-28) | + + ## ADRs + + - ADR-E21-01: New epic E-21 rather than resuming E-11. E-11 was "clone Blazor in Svelte." E-21 is "build the workbench paradigm + new surfaces." E-11's remaining milestones (M5/M7/M8) are absorbed with different scope. + - ADR-E21-02: dag-map click/hover events implemented in the library (we own dag-map) rather than external wrapper hacks. Cleaner API, reusable for non-FlowTime consumers. + - ADR-E21-03: Analysis surfaces use tabbed interface at `/analysis` route rather than separate routes. Tabs enable comparison and reduce navigation. + - ADR-E21-04: Dashboard (E-11 M7) deferred. Workbench + heatmap view cover per-node overview better than a dedicated dashboard page. + - ADR-E21-05: Density pass is m-E21-01 scope, not polish. Building on spacious defaults and compressing later cascades through every component. + - kind: epic + id: E-23 frontmatter: - title: Sensitivity Analysis + title: Model Validation Consolidation status: done - parent: E-18 body: | ## Goal - Add numerical sensitivity analysis as a Time Machine operation: given a model YAML, a set - of const-node parameters, and a target metric series, compute ∂metric_mean/∂param for each - parameter using a central-difference approximation. Answers "which parameter has the most - impact on this metric?" + Make `docs/schemas/model.schema.yaml` the **only declarative source of structural truth** about the post-substitution model, and `ModelSchemaValidator` the **only runtime evaluator**. Eliminate every "embedded schema" — every place outside the canonical schema where model rules are re-encoded. After E-23 closes: - Builds on: - - m-E18-09 `SweepRunner` + `ConstNodePatcher` — two-point sweep per parameter reuses the - sweep infrastructure directly - - `ConstNodePatcher` — YAML DOM manipulation already in place + - One schema. Declared in `model.schema.yaml`. + - One validator. `ModelSchemaValidator.Validate`, with named adjuncts (alongside `ValidateClassReferences`) for any rule JSON Schema draft-07 cannot express. + - Zero parallel imperative validators. `ModelValidator.cs` is deleted. + - Every rule has exactly one canonical home. No silent rules in parsers, emitters, or post-parse orchestration paths. - ## Scope + ## Spirit and history - **`FlowTime.TimeMachine.Sweep` namespace** (extending m-E18-09's namespace): - - `ConstNodeReader` — companion to `ConstNodePatcher`; reads the current scalar value of a - named const node's first bin. Returns `null` if the node is not found or not a const node. - - `SensitivitySpec` — validated input: ModelYaml, ParamIds[], MetricSeriesId, Perturbation (default 5%) - - `SensitivityPoint` — single result: ParamId, BaseValue, Gradient (∂metric_mean/∂param) - - `SensitivityResult` — `SensitivityPoint[]` sorted by `|Gradient|` descending - - `SensitivityRunner` — composes `SweepRunner`; for each param: read base, 2-point sweep, - central difference + The discovery driving E-23 was that FlowTime did not have a central model schema every component agreed on. Different files enforced different — sometimes contradictory — versions of "what is a valid model." E-24 Schema Alignment fixed two of those embedments: it unified the **type** (`SimModelArtifact` + `ModelDefinition` collapsed to `ModelDto`/`ProvenanceDto` in `FlowTime.Contracts`) and consolidated the **schema document** (`model.schema.yaml` rewritten top-to-bottom against `ModelDto`, hard-asserted at `val-err == 0` across the twelve templates per `TemplateWarningSurveyTests`). - **`POST /v1/sensitivity`** — in `src/FlowTime.API/Endpoints/SensitivityEndpoints.cs` - - Request: `{ yaml, paramIds: [string...], metricSeriesId, perturbation?: double }` - - Response (200): `{ metricSeriesId, points: [{ paramId, baseValue, gradient }] }` - - 400: missing yaml / paramIds (null or empty) / metricSeriesId - - 503: engine not enabled + E-23 closes the loop. After E-24 there is still a parallel imperative validator on the live `/v1/run` and CLI paths (`src/FlowTime.Core/Models/ModelValidator.cs`, 214 lines, ~25 rules) plus latent rules in the parser, emitter, and orchestration layers that have never been audited against the schema. E-23 audits those embedments, lifts every rule into either the schema or a named `ModelSchemaValidator` adjunct, migrates every call site to `ModelSchemaValidator`, and deletes `ModelValidator` outright. - **In scope:** - - `src/FlowTime.TimeMachine/Sweep/ConstNodeReader.cs` - - `src/FlowTime.TimeMachine/Sweep/SensitivitySpec.cs` - - `src/FlowTime.TimeMachine/Sweep/SensitivityResult.cs` - - `src/FlowTime.TimeMachine/Sweep/SensitivityRunner.cs` - - `src/FlowTime.API/Endpoints/SensitivityEndpoints.cs` - - DI registration in `Program.cs` - - Unit tests: `tests/FlowTime.TimeMachine.Tests/Sweep/` - - API tests: `tests/FlowTime.Api.Tests/SensitivityEndpointsTests.cs` + This is the same Truth Discipline spirit that drove E-24's type unification: the canonical contract owns the rule; nothing else does. The 2026-04-23 guard added to `.ai-repo/rules/project.md` (*"'API stability' does not mean 'keep old functions around.'"*) is the explicit prohibition on retaining `ModelValidator` as a dead alternative entry point once `ModelSchemaValidator` covers it. The companion guards (*"Do not restate a canonical contract in many places from memory"*, *"Do not let adapter/UI projection become the only place where semantics exist"*) extend the same discipline to the parser, emitter, and orchestration layers. - **Out of scope:** - - Multi-metric sensitivity — single metric per call - - Distribution-based sensitivity (Morris method, Sobol indices) — follow-on - - Forward-difference vs central-difference choice — central difference only - - Optimization / fitting — m-E18-11+ + ## Context - ## Design Notes + Two validators ship today in `FlowTime.Core`: - ### Gradient formula (central difference) + - **`ModelValidator.Validate`** (`src/FlowTime.Core/Models/ModelValidator.cs`) — hand-rolled, imperative, uses YamlDotNet `IgnoreUnmatchedProperties()`. Does not consult the JSON schema. Used by `POST /v1/run` (`src/FlowTime.API/Program.cs:657`), the Engine CLI (`src/FlowTime.Cli/Program.cs:76`), and `TimeMachineValidator` tier-1 (`src/FlowTime.TimeMachine/Validation/TimeMachineValidator.cs:50`). + - **`ModelSchemaValidator.Validate`** (`src/FlowTime.Core/Models/ModelSchemaValidator.cs`) — reads `docs/schemas/model.schema.yaml` and performs full JSON-schema evaluation plus the `ValidateClassReferences` adjunct. Used inside `TimeMachineValidator.ValidateSchema` (line 46), reachable from `POST /v1/validate` and the `flowtime validate` CLI command. - For each parameter `p` with base value `b` and perturbation fraction `ε`: + Post-E-24 they validate the same C# type (`ModelDto`) against semantically the same input. That is the foundation E-23 builds on. What E-23 must still do: - ``` - hi = b × (1 + ε) - lo = b × (1 - ε) - gradient = (mean(metric_series_at_hi) − mean(metric_series_at_lo)) / (hi − lo) - = (mean_hi − mean_lo) / (2 × b × ε) - ``` + 1. **Audit every rule embedment.** `ModelValidator`'s 25 rules are the obvious target, but parser tolerations, silent emission defaults, and post-parse orchestration checks are equally embedments. Each one is a place where model truth lives outside the canonical schema and can therefore drift. + 2. **Lift each rule to its canonical home.** Schema-expressible rules go to `model.schema.yaml`. Cross-reference / runtime-data rules go to `ModelSchemaValidator` adjuncts. A rule that legitimately cannot be a model rule (e.g., scalar-style coercion is YAML representation concern, not model rule — already addressed by E-24 m-E24-04) gets a written justification. + 3. **Migrate every production call site.** `POST /v1/run`, Engine CLI, `TimeMachineValidator` tier-1, plus four test surfaces flip from `ModelValidator.Validate` to `ModelSchemaValidator.Validate`. + 4. **Delete `ModelValidator.cs` outright.** No delegation shim, no dead alternative entry point. - **Zero-base edge case:** when `b == 0`, `hi == lo == 0` and the gradient is indeterminate. - Gradient is set to `0.0` and a note is included in the point. The parameter is still included - in the result so callers can see it was processed. + The historical context — the pause for E-24, the original m-E23-01 "schema-alignment" milestone, the stashed input material on branch `milestone/m-E23-01-schema-alignment` — is captured in `D-2026-04-24-036` (E-23 paused, E-24 created), `D-2026-04-24-037` (Option E unify ratified), and `D-2026-04-25-038` (E-24 closed, E-23 ready to resume). Those decisions stand; this spec rewrite is the post-E-24 reformulation, not a contradiction of them. - **Missing metric series:** if the evaluator returns series that do not include `MetricSeriesId`, - `SensitivityRunner` throws `InvalidOperationException` with a clear message. This is a caller - error (wrong series ID), not a graceful skip. + ## Scope - **Unknown param:** if `ConstNodeReader.ReadValue` returns `null` for a param ID (node not - found or not a const node), that param is skipped (omitted from result). Callers can detect - skipped params by comparing `spec.ParamIds.Length` vs `result.Points.Length`. + ### In Scope - ### `SensitivityRunner` composes `SweepRunner` + - **Embedment audit.** Every rule encoded in `ModelValidator.cs`, `ModelParser.cs`, `SimModelBuilder.cs` (post-E-24), `RunOrchestrationService.cs`, and `GraphService.cs` is enumerated and dispositioned. Per-rule outcome: schema-covered, schema-add, adjunct, parser-justified, or drop. + - **Schema additions.** Every rule classified `schema-add` lands in `docs/schemas/model.schema.yaml` with a citation comment so the rule's provenance is traceable. + - **Adjunct additions.** Every rule classified `adjunct` lands as a named method on `ModelSchemaValidator` alongside `ValidateClassReferences`, invoked from `Validate`, with at least one negative-case unit test. + - **Call-site migration.** Production: `POST /v1/run`, Engine CLI, `TimeMachineValidator` tier-1. Tests: `tests/FlowTime.Integration.Tests/SimToEngineWorkflowTests.cs`, `tests/FlowTime.Tests/Schema/{TargetSchemaValidationTests,SchemaVersionTests,SchemaErrorHandlingTests}.cs`. + - **Error-message phrasing audit.** `ModelValidator` returns flat strings (`"Grid must specify bins"`); `ModelSchemaValidator` returns JSON-schema-shaped messages (`/grid/bins: Required properties are missing: [bins]`). Tests asserting on phrasing get updated; UI / CLI consumers that surface raw strings get audited and adjusted (or none, if no consumer regex-parses validator output). + - **`ModelValidator` deletion.** `src/FlowTime.Core/Models/ModelValidator.cs` is removed; `ValidationResult` (currently bottom of that file) is moved to its own file. + - **Negative-case canary.** A new test catalogue feeds a deliberately-invalid model snippet for each non-trivial audited rule and asserts `ModelSchemaValidator.Validate(...).IsValid == false`. Locks in the audit's coverage claim. + - **Survey canary remains green.** `TemplateWarningSurveyTests.Survey_Templates_For_Warnings` continues to report `val-err == 0` across all twelve templates at `ValidationTier.Analyse` — E-24's hard assertion stays in place; nothing in E-23 may break it. - `SensitivityRunner(SweepRunner sweepRunner)` — takes the full `SweepRunner` including its - injected `IModelEvaluator`. Tests pass a `SweepRunner(fakeEvaluator)` — no additional - test doubles needed. + ### Out of Scope - ## Acceptance Criteria + - Any change to `SimModelBuilder`'s emission of `grid.start` from `window.start`. Sim is correct; E-24 already aligned the schema (`grid.start` declared optional). E-23 does not modify Sim emission. + - Any change to Blazor or Svelte UI. Both remain untouched. m-E21-07 Validation Surface (E-21) consumes the consolidated validator after E-23 closes; that is the next epic's concern. + - New validator features: line/column mapping, LSP integration, incremental validation, per-field suggestions, compile-time rule extraction, schema-draft migration. The goal is consolidation, not expansion. + - `Template`-layer validation. `TemplateSchemaValidator` validates pre-substitution authoring templates against `template.schema.json` — that is genuinely a different contract and stays distinct. E-23 only touches post-substitution model validation. + - Reintroducing any deprecated schema fields (`binMinutes`, snake_case provenance, two-type YAML, etc.). Per project rules. + - External-consumer compatibility. No camelCase aliases, no dual-shape acceptance, no migration mode. Forward-only per `ADR-E-24-02`. - - [x] `ConstNodeReader.ReadValue(yaml, nodeId)` returns the first-bin value for known const - nodes; returns `null` for unknown nodes, non-const nodes, and missing `nodes` section - - [x] `SensitivitySpec` validates: non-null/whitespace ModelYaml, non-null/non-empty ParamIds, - non-null/whitespace MetricSeriesId, Perturbation in (0, 1) exclusive - - [x] `SensitivityRunner.RunAsync` returns one `SensitivityPoint` per found param, sorted by - `|Gradient|` descending - - [x] Gradient computed correctly via central difference - - [x] Zero-base param produces Gradient = 0.0 (no crash) - - [x] Unknown param ID silently skipped (omitted from result) - - [x] Missing metric series throws `InvalidOperationException` - - [x] `SensitivityRunner` respects `CancellationToken` - - [x] `POST /v1/sensitivity` returns 400 for missing yaml / paramIds / metricSeriesId - - [x] `POST /v1/sensitivity` returns 503 when Rust engine not enabled - - [x] Unit tests pass: 32 tests (ConstNodeReader ×8, SensitivitySpec ×12, SensitivityRunner ×12) - - [x] API tests pass: 7 tests (6×400, 1×503) - - [x] `dotnet test FlowTime.sln` all green (137 TimeMachine, 242 API) - - kind: milestone - id: M-008 - frontmatter: - title: Goal Seeking - status: done - parent: E-18 - body: | - ## Goal + ## Constraints - Add 1D goal seeking: given a model YAML, a const-node parameter, a metric series, and a - target value, find the parameter value that drives the metric mean to the target via bisection. - Answers "what arrival rate gives 80% utilization?" without a full parameter sweep. + - **`ModelValidator` is deleted, not delegated.** No forwarding shim. No "temporary alias." Single consolidation endpoint after E-23: `ModelSchemaValidator.Validate`. + - **Schema-first preference for rule placement.** When a rule is expressible in JSON Schema draft-07, it belongs in the schema, not as an adjunct. Adjuncts are the exception (cross-reference checks, runtime-data conditionals), not the default. + - **Parser-justified is a written justification, not a default.** Any rule left in parser/emitter code must answer "why is it not a model rule?" in the m-E23-01 audit doc. "Hard to move" is not a justification. + - **Byte-for-byte parity on the `/v1/run` success path.** Currently-valid models receive byte-identical responses post-migration. Error responses preserve HTTP status (400) and JSON shape (`{ "error": "..." }`); error phrasing may change (covered by phrasing audit in m-E23-02). + - **Survey canary stays green at every milestone close.** `TemplateWarningSurveyTests` continues to report `val-err == 0` at `ValidationTier.Analyse`. A non-zero count fails the build. + - **No reintroduction of `FlowTime.Generator` or any variant of the deleted provenance pipeline.** Per `D-2026-04-07-019` Path B. - Builds on: - - m-E18-09 `SweepRunner` + `ConstNodePatcher` / `ConstNodeReader` (m-E18-10) - - Same `IModelEvaluator` seam + ## Success Criteria - ## Scope + - [ ] Embedment audit recorded in `m-E23-01-rule-coverage-audit-tracking.md`. Every rule in `ModelValidator.cs`, `ModelParser.cs`, `SimModelBuilder.cs`, `RunOrchestrationService.cs`, and `GraphService.cs` has a row with: file:line citation, plain-English rule, current schema status (yes/no/partial with line cite), and final disposition (schema-covered, schema-add, adjunct, parser-justified, drop). + - [ ] Every rule classified `schema-add` is declared in `docs/schemas/model.schema.yaml` with a citation comment. Every rule classified `adjunct` is implemented as a named method on `ModelSchemaValidator` and invoked from `Validate`, with at least one negative-case unit test. + - [ ] Negative-case canary (`tests/FlowTime.Core.Tests/Schema/RuleCoverageRegressionTests.cs` or analogous) covers every non-trivial audited rule. Each test feeds a deliberately-invalid model snippet and asserts `ModelSchemaValidator.Validate(...).IsValid == false` plus the expected error contains the rule's identifying substring. + - [ ] `src/FlowTime.Core/Models/ModelValidator.cs` is deleted. `grep -rn "ModelValidator\b" --include="*.cs"` outside `.claude/worktrees/` returns zero hits. + - [ ] `ModelSchemaValidator.Validate` is the single model-YAML validator called from `POST /v1/run`, `POST /v1/validate`, the Engine CLI, the `flowtime validate` Time Machine CLI, `TimeMachineValidator`, and all test paths. + - [ ] `POST /v1/run` produces byte-identical success responses for all currently-valid models post-migration. For currently-invalid models, error responses preserve HTTP 400 and `{ "error": "..." }` shape; error phrasing may differ and is covered by an explicit phrasing audit in m-E23-02. + - [ ] `TemplateWarningSurveyTests.Survey_Templates_For_Warnings` reports `val-err == 0` across all twelve templates at `ValidationTier.Analyse`. (E-24's hard assertion stays green.) + - [ ] Full `.NET` test suite passes: `dotnet test FlowTime.sln` is green. UI vitest and Playwright remain green (Svelte and Blazor surfaces untouched). + - [ ] `ValidationResult` is moved out of the deleted `ModelValidator.cs` into its own file `src/FlowTime.Core/Models/ValidationResult.cs` (pure relocation, no API change), since it remains the shared result type used by `ModelSchemaValidator` and `TimeMachineValidator`. + + ## Open Questions + + | Question | Blocking? | Resolution path | + |----------|-----------|-----------------| + | Does every rule in `ModelValidator` have an equivalent JSON Schema draft-07 expression, or do some land as adjuncts? | Yes — gates m-E23-01 outcome | m-E23-01 rule audit answers per-rule. Audit output is the first deliverable. Default leaning: most rules express cleanly (enums, ranges, const, `additionalProperties: false`). Cross-reference rules (e.g., node-id uniqueness within `nodes[]`) land as adjuncts because draft-07's `uniqueItems` only matches identical scalars, not "objects with same `id`". | + | Are there parser tolerations in `ModelParser.cs` that constitute unwritten rules? | Yes — gates m-E23-01 outcome | m-E23-01 reads `ModelParser.cs` (733 lines) for `IgnoreUnmatchedProperties`, `??` defaults, conditional branches that swallow missing data. Each becomes an audit row. | + | Are there orchestration-layer checks in `RunOrchestrationService` / `GraphService` that should be model rules? | Yes — gates m-E23-01 outcome | m-E23-01 surveys the post-parse path. Likely findings: class-reference checks (already in `ValidateClassReferences`), output-id uniqueness, etc. Each becomes an audit row. | + | Should m-E23-02 and m-E23-03 collapse into one milestone, or stay split for rollback safety? | No — sequencing detail | Default: stay split. m-E23-02 leaves `ModelValidator.cs` on disk as a single-revert safety net during the call-site flip; m-E23-03 deletes it once green. If the m-E23-01 audit shows the migration is byte-trivial on every site, the user can collapse before m-E23-02 starts. | + | Do non-test callers of `ModelValidator` exist in sibling repositories (MCP, external tools)? | Unknown — gates the delete | m-E23-01 audit includes a cross-repo grep against sibling checkouts visible to this workspace (treated read-only per project rule). If callers exist, E-23 either absorbs them or coordinates deletion with the sibling. | + + ## Risks (optional) + + | Risk | Impact | Mitigation | + |------|--------|------------| + | A rule embedded in `ModelValidator` (or parser, or emitter) is silently dropped when `ModelValidator.cs` is deleted because it had no schema or adjunct equivalent | High | m-E23-01 audit is the primary defense — every rule must classify into schema-covered, schema-add, adjunct, parser-justified, or drop. Negative-case canary in m-E23-01 AC6 is the secondary defense — proves `ModelSchemaValidator` actually catches every rule the audit claims. Tertiary defense: keep `ModelValidator.cs` on disk through m-E23-02 so revert is one commit. | + | Error-phrasing change breaks a downstream consumer (Blazor surfacing validation messages verbatim, CLI scripts grep'ing stdout) | Medium | m-E23-02 includes an error-phrasing audit + UI/CLI consumer scan. Tests asserting exact phrasing get relaxed to semantic assertions (`errors.Should().Contain(e => e.Contains("bins"))`). | + | `ModelSchemaValidator` per-request cost is materially higher than `ModelValidator`'s hand-rolled checks at hot paths like `POST /v1/run` | Low | m-E23-02 records a before/after timing of `POST /v1/run` against a representative template. If latency grows by more than ~2 ms the audit doc records it; if materially more, escalate. No mitigation work planned until we have a number. | + | External consumers (VSCode YAML plugin, docs tooling) read `docs/schemas/model.schema.yaml` directly and break on schema additions | Low | Additions are permissive (new optional rules tightening invalid cases that were already accidentally invalid). External consumers accept more, not less. Document the schema bump in the milestone tracking doc. | + | Post-E-24 line numbers in `Program.cs` / `TimeMachineValidator.cs` shift from what the spec recorded; m-E23-02 references stale lines | Low | m-E23-02 spec instructs starting with a fresh `grep -rn "ModelValidator\.Validate" --include="*.cs"` to enumerate live call sites at start-milestone time, not relying on cached line numbers. | - **`FlowTime.TimeMachine.Sweep` namespace:** - - `GoalSeekSpec` — validated input: ModelYaml, ParamId, MetricSeriesId, Target, SearchLo, - SearchHi, Tolerance (default 1e-6), MaxIterations (default 50) - - `GoalSeekResult` — output: ParamValue, AchievedMetricMean, Converged, Iterations - - `GoalSeeker` — bisection over `SweepRunner`; handles non-bracketed case gracefully + ## Milestones - **`POST /v1/goal-seek`** — in `src/FlowTime.API/Endpoints/GoalSeekEndpoints.cs` - - Request: `{ yaml, paramId, metricSeriesId, target, searchLo, searchHi, tolerance?, maxIterations? }` - - Response (200): `{ paramValue, achievedMetricMean, converged, iterations }` - - 400: missing/invalid required fields (searchLo ≥ searchHi is invalid) - - 503: engine not enabled + Sequencing: rule-coverage audit first (doc-only with schema and adjunct additions where the audit shows they are needed), then call-site migration (mechanical with phrasing audit), then `ModelValidator` deletion (cleanup + assertion that nothing calls it anymore). Three milestones. - **In scope:** - - `src/FlowTime.TimeMachine/Sweep/GoalSeekSpec.cs` - - `src/FlowTime.TimeMachine/Sweep/GoalSeekResult.cs` - - `src/FlowTime.TimeMachine/Sweep/GoalSeeker.cs` - - `src/FlowTime.API/Endpoints/GoalSeekEndpoints.cs` - - DI registration in `Program.cs` - - Unit tests: `tests/FlowTime.TimeMachine.Tests/Sweep/` - - API tests: `tests/FlowTime.Api.Tests/GoalSeekEndpointsTests.cs` - - Architecture doc: `docs/architecture/time-machine-analysis-modes.md` (written alongside) + - [m-E23-01-rule-coverage-audit](./m-E23-01-rule-coverage-audit.md) — Audit every embedment of model rules across `ModelValidator.cs`, `ModelParser.cs`, `SimModelBuilder.cs`, and post-parse orchestration. Land schema additions and `ModelSchemaValidator` adjuncts so every rule has a single canonical home. Negative-case canary catalogue locks coverage in. · **completed (2026-04-26 on `milestone/m-E23-01-rule-coverage-audit`)** — 94 rules audited, 16 schema-add edits, 12 adjunct methods (+ silent-error fallback), 26-test regression catalogue, AC1-AC9 closed · depends on: — + - [m-E23-02-call-site-migration](./m-E23-02-call-site-migration.md) — Switch every production call site and test from `ModelValidator.Validate` to `ModelSchemaValidator.Validate`. Audit error-message phrasing and update test assertions / UI consumers as needed. `ModelValidator.cs` left on disk as a single-revert safety net. · **completed (2026-04-26 on `milestone/m-E23-02-call-site-migration`)** — 3 production sites + 28 test calls migrated; `TimeMachineValidator` redundant-delegation block removed; scope-expansion fixes for `ProvenanceService.StripProvenance` (real production round-trip bug surfaced by the new strict validator) + 2 stale fixtures + 1 documented-future test flip; +16 net new tests (10 strip branch-coverage + 2 integration regression + 4 strip sub-case). Both canaries green. AC1-AC8 closed (AC9 deferred — optional). · depends on: m-E23-01 + - [m-E23-03-delete-model-validator](./m-E23-03-delete-model-validator.md) — Delete `ModelValidator.cs` and any dedicated `ModelValidator`-only test files that survived m-E23-02. Move `ValidationResult` to its own file. Assert `grep` returns zero callers. Archive E-23. · **completed (2026-04-26 on `milestone/m-E23-03-delete-model-validator`)** — `ModelValidator.cs` deleted; `ValidationResult` (14 lines) relocated to `src/FlowTime.Core/Models/ValidationResult.cs` keeping namespace `FlowTime.Core`; AC3 grep clean (7 historical-comment hits, zero live references); both canaries green; full suite **1862 / 0 / 9** — identical to m-E23-02 tip. Epic-folder archived to `work/epics/completed/E-23-model-validation-consolidation/` on merge to main. · depends on: m-E23-02 - **Out of scope:** - - Multi-dimensional optimization (Nelder-Mead) — m-E18-12+ - - Constraint handling beyond the `[searchLo, searchHi]` range - - Non-monotonic functions (bisection is undefined; `Converged=false` returned) + ## ADRs - ## Algorithm + - **ADR-E-23-01 — Delete, do not delegate.** Target state is a single validator (`ModelSchemaValidator`). `ModelValidator` is deleted rather than rewired to forward to `ModelSchemaValidator`, because: (1) a forwarding shim is a new compatibility layer the 2026-04-23 Truth Discipline guard explicitly forbids, (2) there are no external-surface users of `ModelValidator` that justify a shim, (3) a delete is cheaper to maintain than a forward for all future readers. Recorded in `work/decisions.md` before m-E23-01 begins. + - **ADR-E-23-02 — Schema as the single structural contract; adjuncts are the named exception.** `ModelSchemaValidator` reads `model.schema.yaml` and performs JSON-schema evaluation plus named adjunct methods (`ValidateClassReferences` is the prior art). Any structural rule not expressible in JSON Schema draft-07 lands as an adjunct method, not as a second parallel validator and not as silent parser/emitter logic. Adjuncts are named, invoked from `Validate`, and individually unit-tested. Ratified at m-E23-01 audit close, with the enumerated adjunct list as the ratification artefact. + - **ADR-E-23-03 (candidate) — Parser/emitter rules need written justification.** If the m-E23-01 audit classifies any rule as `parser-justified` (left in parser or emitter code rather than lifted to schema or adjunct), this ADR records the rationale framework: the rule is a YAML representation concern, not a model concern, and would not survive a non-YAML serialization of `ModelDto`. Deferred pending m-E23-01 audit findings — recorded only if the audit produces a non-empty `parser-justified` set. - Bisection on the metric mean: + ## References - ``` - 1. Evaluate at searchLo → meanLo = mean(metric at searchLo) - 2. Evaluate at searchHi → meanHi = mean(metric at searchHi) - 3. If target not in [min(meanLo,meanHi), max(meanLo,meanHi)]: - return best endpoint, Converged=false - 4. While iterations < maxIterations: - mid = (lo + hi) / 2 - midMean = mean(metric at mid) - if |midMean - target| < tolerance: return mid, Converged=true - if (midMean - target) same sign as (meanLo - target): lo = mid, meanLo = midMean - else: hi = mid, meanHi = midMean - 5. Return mid, Converged=false (max iterations reached) - ``` + ### Source-code pointers - ## Acceptance Criteria + - `src/FlowTime.Core/Models/ModelValidator.cs` — slated for deletion (214 lines, ~25 imperative rules) + - `src/FlowTime.Core/Models/ModelSchemaValidator.cs` — consolidation target (263 lines) + - `src/FlowTime.Core/Models/ModelParser.cs` — parser audited in m-E23-01 (733 lines) + - `src/FlowTime.Sim.Core/Templates/SimModelBuilder.cs` — emitter audited in m-E23-01 (458 lines) + - `src/FlowTime.TimeMachine/Validation/TimeMachineValidator.cs` — tiered validator, drops the `ModelValidator` delegation in m-E23-02 + - `src/FlowTime.API/Program.cs` — `POST /v1/run` call site (re-enumerate at m-E23-02 start) + - `src/FlowTime.Cli/Program.cs` — Engine CLI call site (re-enumerate at m-E23-02 start) + - `docs/schemas/model.schema.yaml` — schema, target for audit-driven additions + - `tests/FlowTime.Integration.Tests/TemplateWarningSurveyTests.cs` — survey canary (committed, hard-asserting per E-24 m-E24-05) - - [x] `GoalSeekSpec` validates: non-null/whitespace ModelYaml/ParamId/MetricSeriesId; - SearchLo < SearchHi; Tolerance > 0; MaxIterations ≥ 1 - - [x] `GoalSeeker.SeekAsync` converges on a linear model to within tolerance - - [x] `GoalSeeker` returns `Converged=false` when target is not bracketed - - [x] `GoalSeeker` returns `Converged=false` (best guess) when max iterations exhausted - - [x] `GoalSeeker` respects `CancellationToken` - - [x] `POST /v1/goal-seek` returns 400 for missing/invalid required fields - - [x] `POST /v1/goal-seek` returns 503 when engine not enabled - - [x] Unit tests pass: 26 tests (GoalSeekSpec ×14, GoalSeeker ×12) - - [x] API tests pass: 8 tests (7×400, 1×503) - - [x] `dotnet test FlowTime.sln` all green (163 TimeMachine, 250 API) - - kind: milestone - id: M-009 + ### Decisions and ADRs + + - `D-2026-04-24-035` — E-23 ratification (delete-not-delegate discipline) + - `D-2026-04-24-036` — E-23 paused, E-24 created + - `D-2026-04-24-037` — Option E (unify) ratified within E-24 + - `D-2026-04-25-038` — E-24 closed, E-23 ready to resume + - E-24 ADRs: ADR-E-24-01 Unify · ADR-E-24-02 Forward-only · ADR-E-24-03 Schema declares only consumed fields · ADR-E-24-04 `ScalarStyle.Plain` gates `ParseScalar` · ADR-E-24-05 `QuotedAmbiguousStringEmitter` + + ### Related epics + + - **E-24 Schema Alignment** (closed 2026-04-25): `work/epics/completed/E-24-schema-alignment/spec.md` — unified the type and the schema; E-23 builds on that foundation. + - **E-21 Svelte Workbench** (paused at m-E21-07): `work/epics/E-21-svelte-workbench-and-analysis/spec.md` — m-E21-07 Validation Surface consumes the consolidated `ModelSchemaValidator` once E-23 closes. + + ### Truth Discipline + + - `.ai-repo/rules/project.md` → Truth Discipline Guards — the 2026-04-23 *"'API stability' does not mean 'keep old functions around.'"* guard plus *"Do not restate a canonical contract in many places from memory"* and *"Do not let adapter/UI projection become the only place where semantics exist"*. + - kind: epic + id: E-24 frontmatter: - title: Multi-parameter Optimization + title: Schema Alignment status: done - parent: E-18 body: | ## Goal - Add multi-parameter optimization: given a model, a set of const-node parameters with search - ranges, a metric series, and an objective (minimize or maximize), find the parameter values that - drive the metric mean to its optimum using Nelder-Mead simplex — a derivative-free method that - works for any number of parameters without needing gradients. + Unify FlowTime's post-substitution model representation. One C# type. One YAML schema. One validator. `SimModelArtifact` is **deleted**. Sim builds the unified model type directly; the Engine accepts and parses the same type. Every field has exactly one declaration site. `TemplateWarningSurveyTests` reports `val-err=0` across all twelve templates at `ValidationTier.Analyse`, promoted to a hard build-time assertion. `ModelValidator` deletion (E-23) then becomes a mechanical cleanup. - Answers "what combination of arrival rate and capacity minimizes queue depth?" without a full - multi-dimensional grid search. + ## Context - Builds on: - - `IModelEvaluator` seam (m-E18-09) - - `ConstNodePatcher` for multi-parameter YAML mutation (m-E18-09) - - `ConstNodeReader` (m-E18-10) — used in tests to read patched values + ### The split is accidental drift, not a designed interface - ## Scope + An investigation into `SimModelArtifact`'s purpose (agent `a5aa3dfe26394aff5`) established that the current two-type split was never a deliberate architectural decision: - **`FlowTime.TimeMachine.Sweep` namespace:** - - `OptimizeObjective` — `Minimize | Maximize` enum - - `SearchRange` — `record(double Lo, double Hi)` with `Lo < Hi` invariant - - `OptimizeSpec` — validated input: ModelYaml, ParamIds, MetricSeriesId, Objective, - SearchRanges (one entry per ParamId), Tolerance (default 1e-4), MaxIterations (default 200) - - `OptimizeResult` — output: ParamValues, AchievedMetricMean, Converged, Iterations - - `Optimizer` — Nelder-Mead simplex over `IModelEvaluator`; patches all parameters - simultaneously per evaluation; respects CancellationToken + - **`SimModelArtifact` was born in commit `ce9ec9e` (Oct 2025)** shaped to Sim's authoring-time template schema (`template.schema.json`) — not to `docs/schemas/model.schema.yaml`, which predated it by a month and already described what the Engine consumes. The two types never shared a contract. + - **`docs/schemas/README.md:34` claims parity that the code does not deliver.** The file asserts that `SimModelArtifact` and `ModelDefinition` describe the same shape. Reality: the only boundary between them is a YAML string, and that YAML string fails the Engine's own schema validation in every one of the twelve shipped templates today. + - **No C# type boundary exists.** `SimModelArtifact` is serialized to YAML in Sim; the Engine parses that YAML into `ModelDefinition`. No shared interface, no projection method, no round-trip contract. Drift is the default; alignment has never been enforced. + - **Each feature addition paid a duplication tax.** Commit `51a99b9` added classes to both types in the same commit (+23 lines to `SimModelArtifact`, +28 to `ModelDefinition`). `ModelDefinition` acquired `wipLimit` / `wipLimitSeries` / `wipOverflow` that `SimModelArtifact` lacks. `SimModelArtifact` acquired `window` / `generator` / `mode` / top-level `metadata` that no Engine consumer reads. + - **Only `provenance` has genuine dual concern** — both sides already agree on the keys, they just disagree on casing (snake_case in the schema, camelCase in `SimProvenance` emission and in Sim's Engine `ProvenanceMetadata` reader). - **`POST /v1/optimize`** — in `src/FlowTime.API/Endpoints/OptimizeEndpoints.cs` - - Request: `{ yaml, paramIds, metricSeriesId, objective, searchRanges, tolerance?, maxIterations? }` - where `searchRanges` is `{ "": { "lo": N, "hi": N }, ... }` - and `objective` is `"minimize"` or `"maximize"` (case-insensitive) - - Response (200): `{ paramValues, achievedMetricMean, converged, iterations }` - - 400: missing/invalid required fields, searchRange lo >= hi, unknown objective string - - 503: engine not enabled + The m-E23-01 bisection survey (agent `a07d52c12dcaf3538`, evidence at `work/epics/E-23-model-validation-consolidation/m-E23-01-schema-alignment-tracking.md` → "AC4 canary re-run, full-shape audit (2026-04-24)") surfaced 16 distinct divergence shapes and 726 validator errors across the twelve templates. Most of those divergences dissolve under unification; a few (the `ParseScalar` defect, the `outputs[].as` semantic, the `nodes[].source` forward contract) are substantive decisions that survive. - **In scope:** - - `src/FlowTime.TimeMachine/Sweep/OptimizeObjective.cs` - - `src/FlowTime.TimeMachine/Sweep/SearchRange.cs` - - `src/FlowTime.TimeMachine/Sweep/OptimizeSpec.cs` - - `src/FlowTime.TimeMachine/Sweep/OptimizeResult.cs` - - `src/FlowTime.TimeMachine/Sweep/Optimizer.cs` - - `src/FlowTime.API/Endpoints/OptimizeEndpoints.cs` - - DI registration in `Program.cs` - - Unit tests: `tests/FlowTime.TimeMachine.Tests/Sweep/` - - API tests: `tests/FlowTime.Api.Tests/OptimizeEndpointsTests.cs` - - Architecture doc update: `docs/architecture/time-machine-analysis-modes.md` + ### Option A was rejected; Option E was ratified - **Out of scope:** - - Constraint handling (utilization < 0.8 etc.) — future milestone - - Bayesian optimization — future milestone - - Parallel evaluation of simplex vertices - - Gradient-based methods (sensitivity-driven descent) + The initial epic draft proposed **Option A**: keep the two types, introduce a projection layer (`SimModelArtifact.ToEngineSubmission()`), let the schema describe only the submission payload. Option A preserves the accidental split — it makes the leak a designed feature and accepts the ongoing duplication tax for every future field addition. The user rejected Option A in favor of **Option E: unify**. One type. One schema. One validator. `SimModelArtifact` deleted outright. Sim builds the unified type directly; the Engine parses it. - ## Algorithm + Unification is the foundationally-right answer because: - Nelder-Mead simplex (N parameters → N+1 vertices): + 1. **The split has no defender.** No code is protected by `SimModelArtifact`'s existence; no external consumer has a reason to care which type shape is on the wire. The only artefact the split produces is drift. + 2. **Forward-only is cheap.** The biggest historical cost of unification (migration of stored bundles) is removed by the user's forward-only stance: existing bundle YAML is obsolete; Sim regenerates from templates going forward. + 3. **`Template` stays distinct.** Authoring-time templates (pre-substitution, parameters, Liquid expressions) are genuinely a different contract. `Template` remains its own type with its own schema (`template.schema.json`). The merge target is `SimModelArtifact → unified model`, not `Template → unified model`. - ``` - Coefficients: α=1.0 (reflect), γ=2.0 (expand), ρ=0.5 (contract), σ=0.5 (shrink) - Objective f(v) = metricMean(v) for Minimize - f(v) = -metricMean(v) for Maximize (internally always minimize f) + ### Truth Discipline precedent - 1. Build initial N+1 simplex: - v[0] = midpoint of all search ranges - v[i] = v[0] with param[i-1] shifted +5% of its range (clamped) + The 2026-04-23 Truth Discipline guard (*"'API stability' does not mean 'keep old functions around.'"*) already rejected delegation shims in E-23. Option E extends the same discipline to types: once `SimModelArtifact` has no callers outside its own graph, it is deleted in the same change. No coexistence window, no "temporary" dual-type state, no projection layer that becomes permanent. - 2. Evaluate f at each vertex. + ## Scope - 3. Sort vertices so v[0] is best (lowest f) and v[N] is worst. + ### In Scope - 4. Check pre-loop convergence: if |f[N] - f[0]| < tolerance → Converged(0 iterations) + - **Unify the post-substitution model type.** A single unified type replaces `SimModelArtifact` entirely. Exact home (`FlowTime.Core`, `FlowTime.Contracts`, or a new `FlowTime.Contracts.Model` namespace) is decided by m-E24-01; the constraint is "one type, discoverable from both Sim and the Engine." + - **`Template` remains distinct.** Authoring templates (pre-substitution) stay at their current shape and schema (`template.schema.json`). Unification applies only to the post-substitution model. + - **Sim builds the unified type directly.** `SimModelBuilder` is rewritten to emit the unified type. `SimNode`, `SimOutput`, `SimProvenance`, `SimTraffic`, `SimArrival`, `SimArrivalPattern` are deleted in the same change — Truth Discipline: no callers left, delete. + - **Engine intake reads the unified type.** `ModelDefinition` either becomes the unified type or is replaced by it. Whichever path, exactly one type is used on both sides. + - **One schema: `docs/schemas/model.schema.yaml`.** camelCase throughout. `provenance` is declared as a first-class block inside the unified schema with camelCase keys. Fields emitted by Sim today with no Engine consumer (`window`, `generator`, top-level `metadata`, top-level `mode`) are **dropped from emission** — they are leaked authoring state, not schema pass-through candidates. Any genuinely traceability-worthy field moves into the `provenance` block. + - **One validator: `ModelSchemaValidator`.** Validates both Sim's emitted output and the Engine's intake. Sim's `TemplateSchemaValidator` continues to validate the *authoring template* (pre-substitution) — that is genuinely a different contract — but the Sim→Engine model output uses the same validator the Engine uses. + - **`ParseScalar` scalar-style fix** in both `ModelSchemaValidator` and `TemplateSchemaValidator`. Unification does not eliminate this defect; it is an independent validator bug that lands in the same epic for convergence. Mirrored fix, mirrored tests. The test-side coercion helper in `tests/FlowTime.TimeMachine.Tests/TemplateSchemaValidationTests.cs:134-169` is updated or replaced so tests do not silently mask the fix. + - **Canary promoted to hard assertion** at epic close. `TemplateWarningSurveyTests` fails the build on non-zero `val-err`. + - **Forward-only discipline.** No migration of stored bundles. No compatibility reader for the old two-type YAML shape. Any code path that reads the old shape is identified and either regenerates from template or is deleted if nothing current needs it. + - **Docs alignment.** `docs/schemas/README.md` rewritten to reflect the unified reality. `docs/architecture/` entries that describe the "two schemas" world are corrected or archived. + + ### Out of Scope (firm) + + - **`ModelValidator` deletion.** Stays with E-23 (m-E23-03, post-E-24 resume). E-24 makes the deletion mechanical; it does not perform it. + - **Bundle / run migration.** Forward-only per user direction. Existing stored bundles are obsolete. Sim regenerates from templates. No migration code is written; the forward-only stance is documented in the spec and any residual migration helper that surfaces during m-E24-02 is deleted rather than kept. + - **UI work.** No Blazor changes. No Svelte changes. `nodes[].metadata` remains a load-bearing `GraphService` concern — its schema declaration is already correct on the Engine side and stays. + - **New validator features.** No line/column mapping, no LSP integration, no compile-time rule extraction, no per-field suggestions. + - **`Template` redesign.** Authoring-time template schema and type remain unchanged. + - **E-15 Telemetry Ingestion runtime.** E-24 decides the `nodes[].source` contract under unification so E-15 starts aligned; E-15's actual implementation is not part of this epic. + - **Deprecated schema fields.** No reintroduction of `binMinutes` or other previously-retired shapes. Forward-only. + - **External-consumer compatibility windows.** No camelCase aliases in provenance. No dual-field acceptance. No migration mode. The schema changes in one cut. - 5. For iteration = 1 to MaxIterations: - a. Compute centroid c of best N vertices (v[0]..v[N-1]). + ## Constraints + + - **Exactly one type, one schema, one validator.** Any deviation from "exactly one" needs a written design justification in m-E24-01's tracking doc. + - **Forward-only.** No compatibility reader, no "accept either shape" branch, no version-detection logic. If something changes shape, runs and fixtures are regenerated. + - **"Right, not easy" applied per field.** For every field currently on `SimModelArtifact`, m-E24-01 records: current emission site, whether any Engine consumer reads it, the decision (keep + declare, drop entirely, or move into `provenance`), and the rationale. The "easy option" is named as rejected; the "foundationally-right option" is chosen. + - **Truth Discipline guard at the type layer.** No compatibility shim. No temporary accommodation. When a type or function has no callers after the refactor, it is deleted in the same change — not retained as a dead alternative entry point. + - **Mirrored fixes.** The `ParseScalar` defect has two homes (`ModelSchemaValidator`, `TemplateSchemaValidator`). Fix both in the same milestone or drift re-opens. Same constraint applies to the test-side coercion helper. + - **camelCase everywhere.** No snake_case survives in `docs/schemas/model.schema.yaml` at epic close. `grep -rn "generated_at\|model_id\|template_id\|template_version" docs/schemas/model.schema.yaml` returns zero hits. + - **Byte-identical `POST /v1/run` success** for every currently-valid template (after Sim regenerates its template outputs under the unified type). Error responses may have different phrasing; HTTP status codes and JSON shape remain unchanged. + + ## Success Criteria + + - [ ] `SimModelArtifact` is deleted. `grep -rn "SimModelArtifact" --include="*.cs"` returns zero hits. + - [ ] `SimNode`, `SimOutput`, `SimProvenance`, `SimTraffic`, `SimArrival`, `SimArrivalPattern` are deleted. A sibling `grep` confirms. + - [ ] One C# type represents the post-substitution model. `SimModelBuilder` emits it directly; the Engine parses it directly. `ModelDefinition` either *is* the unified type or has been replaced by it. + - [ ] `docs/schemas/model.schema.yaml` declares exactly one model shape including its `provenance` block. camelCase throughout. `grep -rn "generated_at\|model_id\|template_id\|template_version" docs/schemas/model.schema.yaml` returns zero hits. + - [ ] `ModelSchemaValidator` is the single runtime validator for post-substitution model YAML. `TemplateSchemaValidator` is limited to pre-substitution templates (`template.schema.json`). + - [ ] `TemplateWarningSurveyTests.Survey_Templates_For_Warnings` reports `val-err=0` across all twelve templates at `ValidationTier.Analyse`. The test is a hard `Assert` — non-zero `val-err` fails the build. + - [ ] `POST /v1/run` returns byte-identical success responses for every currently-valid template in `templates/*.yaml` with default parameters. Error responses may have different phrasing for currently-invalid models; HTTP status codes and response JSON shape remain unchanged. + - [ ] `ModelSchemaValidator.ParseScalar` and `TemplateSchemaValidator.ParseScalar` both honor `YamlScalarNode.Style`. Dedicated scalar-style tests cover `Plain` / `SingleQuoted` / `DoubleQuoted` variants for scalars that look like bool / int / double. The test-side coercion helper in `tests/FlowTime.TimeMachine.Tests/TemplateSchemaValidationTests.cs` is updated or replaced so tests do not mask the fix. + - [ ] Full `.NET` test suite passes: `dotnet test FlowTime.sln` is green with zero regressions. Any pre-existing flakes remain the same flakes — E-24 introduces no new timing sensitivity. + - [ ] `docs/schemas/README.md` accurately describes one-schema reality. Any `docs/architecture/` entry that described a two-schema world is corrected or archived. + - [ ] E-23 can resume cleanly. `ModelSchemaValidator` and the unified type agree by construction; m-E23-02 (call-site migration) and m-E23-03 (`ModelValidator` delete) become byte-trivial mechanical cleanup. + + ## Open Questions + + The six design questions below are resolved by m-E24-01 as written design decisions before any other milestone starts. Each answer goes into the m-E24-01 tracking doc with its rejected alternative named. + + | Question | Blocking? | Resolution path | + |----------|-----------|-----------------| + | Where does the unified type live? `FlowTime.Core`, `FlowTime.Contracts`, or a new `FlowTime.Contracts.Model` namespace? | Yes — gates m-E24-02 | m-E24-01 decides. Default leaning: `FlowTime.Contracts` — it is the shared-contracts project and Sim already references it. Decision is recorded with the rejected options. | + | Does `ModelDefinition` become the unified type, or is it replaced by a new type? `ModelDefinition` has `wipLimit` fields `SimModelArtifact` lacks; `SimModelArtifact` has Sim-only fields that are dropping. The unified type is their union minus leaked state. | Yes — gates m-E24-02 | m-E24-01 decides. Default leaning: extend `ModelDefinition` in place and rename if the expanded scope warrants. Decision captures the name, the namespace, and any members that need introduction or rename. | + | `outputs[].as` semantics under unification: optional (reflecting that auto-added outputs need no filename) or required (every output declares a filename)? Today `EnsureSemanticsOutputs` auto-adds entries without `as`, producing 366 of 495 non-defect validator errors. | Yes — gates m-E24-02 and m-E24-03 | m-E24-01 decides. Two clean options: (a) `as` is optional and auto-added outputs omit it; (b) `as` is required and the emitter synthesizes a default (e.g. `as = "{id}.csv"`) for auto-added outputs. The decision cites the Engine consumer (if any) that reads `as`. | + | `nodes[].source` forward contract: drop entirely until E-15 lands and defines it, or declare as optional in the unified schema now? Sim currently emits empty-string defaults that YamlDotNet does not auto-omit. | No — scoped to m-E24-01 | m-E24-01 decides. Default leaning: drop — E-24's discipline is "schema declares what has a consumer." E-15 declares the field when it builds the consumer. If dropped, Sim omits emission via `OmitDefaults` or `ShouldSerialize` guard. | + | Provenance shape in the unified schema: flat or nested? `SimProvenance` nests `parameters` as a sub-dictionary. Engine's `ProvenanceMetadata` is flat. Unification requires one shape. | Yes — gates m-E24-03 | m-E24-01 decides. Default leaning: nested `parameters` sub-map (matches Sim's current shape and preserves author-intent grouping). Decision cites both sides' current shape and the reader adjustments required. | + | Canary: integration test against live Engine API (current shape) or fast unit-style check (`ModelSchemaValidator.Validate(...)` in-process)? Under unification, the unit-level variant becomes attractive because render-and-validate is fully in-process. | No — scoped to m-E24-05 | m-E24-05 decides. Default: keep the integration test; add a parallel fast unit variant if CI cost becomes visible. The hard-assertion gate in m-E24-05 is whichever variant runs in CI. | + + ## Risks (optional) + + | Risk | Impact | Mitigation | + |------|--------|------------| + | The unified type touches more Engine-side code than anticipated (every consumer of `ModelDefinition` + every producer of `SimModelArtifact`). | Medium | m-E24-01's inventory enumerates every call site before m-E24-02 plans the type-unification change. If the scope is large, m-E24-02 subdivides deliberately — the split plan is written before work begins, not after. | + | Forward-only means existing run fixtures in `tests/` and sample bundles in `docs/samples/` produce the obsolete two-type YAML shape and stop validating. | Medium | Forward-only regeneration: m-E24-02 regenerates every fixture and sample under the unified shape in the same milestone. No compatibility reader survives. Anything that cannot be regenerated is deleted rather than patched. | + | `ParseScalar` fix regresses a test that was accidentally relying on coercion (e.g. a quoted integer). | Low | m-E24-04 runs the full solution suite immediately after the fix. Any regression is investigated case-by-case — correct response is to update the test to author the scalar correctly. `tests/FlowTime.TimeMachine.Tests/TemplateSchemaValidationTests.cs` is the known hotspot; its coercion helper is updated in the same milestone as the validator fix. | + | `outputs[].as` decision forces a cascade: if optional, every Engine consumer that reads `as` is audited; if required with synthesized default, the filename convention must be stable and discoverable. | Medium | m-E24-01 decides before any code lands. The chosen side names the convention (if synthesize) or the consumer audit (if drop required). m-E24-03 lands the schema edit; m-E24-02 lands any emitter synthesis. | + | External consumers (VSCode YAML plugin, docs tooling) read `docs/schemas/model.schema.yaml` directly and break on the snake_case → camelCase rename. | Low | The schema is consumed internally. Sibling checkouts are read-only per project rule; a quick `grep` in `/workspaces/flowtime-sim-vnext` for `generated_at` / `model_id` surfaces any drift before m-E24-03 lands the rename. | + + ## Milestones + + Sequencing: design decisions first (m-E24-01 is doc-only), then type unification (m-E24-02 is the largest architectural change), then schema consolidation (m-E24-03 describes what m-E24-02 built), then validator defect (m-E24-04 is independent and mirrored), then canary close (m-E24-05 makes the zero assertion permanent and flips E-23 to ready-to-resume). m-E24-04 can be parallelized with m-E24-02 / m-E24-03 if desired, but the conservative default is serial after m-E24-03 so the canary run in m-E24-05 operates on a fully-converged stack. + + - [m-E24-01-inventory-and-design-decisions](./m-E24-01-inventory-and-design-decisions.md) — **complete (2026-04-25, commit `c43e8c0`).** Doc-only milestone. Every field on `SimModelArtifact` and `ModelDefinition` got a recorded decision (keep / drop / move); the six open questions answered (Q1–Q6 + A1–A5); unified type home (`FlowTime.Contracts`), name (`ModelDto`), and provenance shape (nested, 7 fields, camelCase) named. No code, no schema changes. · depends on: — + - [m-E24-02-unify-model-type](./m-E24-02-unify-model-type.md) — **complete (2026-04-25, commits `131dd35` / `c4064bf` / `3806097` / `dece22f`).** All 14 ACs landed. `ModelDto` + `ProvenanceDto` introduced in `FlowTime.Contracts`; `SimModelBuilder` rewritten to emit `ModelDto` directly; Engine intake routed through `ModelDto`; `SimModelArtifact` + 6 satellites deleted; leaked-state fields (`window`, `generator`, top-level `metadata`, top-level `mode`) dropped from emission; fixtures regenerated; YamlDotNet bumped to 17.0.1 across 6 projects. AC8 residual histogram: 587 errors → m-E24-03, 89 → m-E24-04, 0 → m-E24-02 emitter regressions. 1,755 tests passing. Three follow-up gaps filed (`work/gaps.md`): `ProvenanceEmbedder`, `GridDefinition.StartTimeUtc` rename, Template-layer `Legacy*` aliases — none on critical path. · depends on: m-E24-01 + - [m-E24-03-schema-unification](./m-E24-03-schema-unification.md) — **complete (2026-04-25, commit `d6a3263`).** All 14 ACs landed. `docs/schemas/model.schema.yaml` rewritten top-to-bottom (927 → 1059 lines) against the unified `ModelDto`; nested 7-field camelCase `provenance` block; `grid.start` declared optional; `nodes[].metadata` declared; `nodes[].source` absent (Q4); `outputs[].as` made optional (Q3); leaked-state root keys absent; every property carries DTO-source + consumer citations. `docs/schemas/README.md` rewritten. `docs/architecture/run-provenance.md` updated for camelCase + 7-field shape. Canary: 676 → 231 errors at Analyse — schema-rewrite shapes 587 → **0**, ParseScalar residuals 89 → 231 (newly-reachable inner errors after `nodes[].metadata` declaration; same defect class — m-E24-04 owns), 0 emitter regressions. 1,750 tests passing. · depends on: m-E24-02 + - [m-E24-04-parser-validator-scalar-style-fix](./m-E24-04-parser-validator-scalar-style-fix.md) — **complete (2026-04-25, commit `a7c984f`).** All 9 ACs landed. `ParseScalar` honors `YamlScalarNode.Style` in both `ModelSchemaValidator` and `TemplateSchemaValidator` (mirrored guard); test-side coercion helper in `TemplateSchemaValidationTests.cs` rewritten to delegate through the same rule. Round-trip closure required the emitter half: sibling `QuotedAmbiguousStringEmitter` plugged into `TemplateService.CreateYamlSerializer` next to `FlowSequenceEventEmitter`, forcing `ScalarStyle.DoubleQuoted` on string source values whose literal would re-resolve as YAML 1.2 null/bool/int/float (D-m-E24-04-03 / ADR-E-24-05). Canary: **231 → 0 errors** at `ValidationTier.Analyse` across 12 templates; histogram empty; step-4 emitter regressions: 0. **+64 new tests** (18 Engine ParseScalar + 17 Sim ParseScalar + 29 emitter round-trip). Full `.NET` suite green. · depends on: m-E24-03 (conservative; could have run parallel to m-E24-02 / m-E24-03) + - [m-E24-05-canary-green-hard-assertion](./m-E24-05-canary-green-hard-assertion.md) — **complete (2026-04-25)**. All 8 ACs landed. Canary `TemplateWarningSurveyTests.Survey_Templates_For_Warnings` rewritten so that the in-process tier-3 (`Analyse`) loop runs unconditionally and asserts `val-err == 0` (and zero render failures); the live-API run-warn collection becomes a strictly evidence-only phase 2 that gracefully skips on `/v1/healthz` probe failure. Regression-catching verified by transient un-wire of `QuotedAmbiguousStringEmitter` — the canary fails with 231 errors across all 12 templates exactly matching the m-E24-04 pre-fix histogram, then returns to green on restore. Full `.NET` suite green (1,749 passed / 9 skipped pre-existing / 0 failed). All three AC4 grep audits clean. `docs/schemas/model.schema.md` provenance section rewritten to canonical 7-field nested camelCase form. E-23 status flipped from `paused` to `ready-to-resume`. `D-2026-04-25-038` logs E-24 close. **E-24 epic now closed; all five milestones complete.** · depends on: m-E24-04 + + ## ADRs + + - **ADR-E-24-01 — Unify the post-substitution model type.** `SimModelArtifact` and `ModelDefinition` collapse to one type. `Template` (authoring-time, pre-substitution) stays distinct. Rationale: the split is accidental drift per commit history, has no type boundary, and has compounded a duplication tax since October 2025. Rejected alternative (Option A): keep both types with a projection layer — preserves the accidental leak as a designed feature and accepts the ongoing duplication tax. Ratified in m-E24-01. + - **ADR-E-24-02 — Forward-only regeneration.** No compatibility reader for the old two-type YAML shape. Existing stored bundles are obsolete; Sim regenerates from templates going forward. Rationale: the biggest historical cost of unification (bundle migration) is removed by forward-only; compatibility readers are the shape the Truth Discipline guard explicitly rejects. Rejected alternative: migration window with dual-shape acceptance. Ratified in m-E24-01. + - **ADR-E-24-03 — Schema declares only consumed fields.** Fields emitted by Sim with no Engine consumer are dropped from emission, not declared as schema pass-through. Rationale: Truth Discipline — the schema is the contract; the contract declares what exists, not what is tolerated. Rejected alternative: declare `window` / `mode` / `generator` / top-level `metadata` in the schema as optional pass-through. Ratified in m-E24-01. + - **ADR-E-24-04 — `ScalarStyle.Plain` gates numeric / boolean coercion in `ParseScalar`.** YAML 1.2 resolution requires honoring scalar style; quoted strings must short-circuit to string. Rejected alternative: tag-based inference (`!!str`, `!!int`) — YAML's resolver already embeds the distinction in `Style`; a second inference layer is redundant. Ratified in m-E24-04 (2026-04-25, commit `a7c984f`). + - **ADR-E-24-05 — `QuotedAmbiguousStringEmitter` (round-trip symmetry on the emitter side).** Sibling `IEventEmitter` plugged into `TemplateService.CreateYamlSerializer` next to `FlowSequenceEventEmitter`. Activates on `eventInfo.Source.Type == typeof(string)` and forces `ScalarStyle.DoubleQuoted` when the literal text would re-resolve as a YAML 1.2 plain non-string scalar (null forms, bool, int, float). The ambiguity classifier mirrors the validator's plain-scalar coercion attempt-order exactly; the two halves of the round-trip pair are symmetric by construction. Rejected alternatives: per-field `[YamlMember]` annotations (no surface for `Dictionary` values, rots when new DTO fields ship), post-hoc YAML rewriting (brittle regex on serializer output), `coerceQuotedLiterals` opt-in flag (forbidden by spec — "the correct behavior is the only behavior"), wider emitter quoting all strings (corrupts legitimate plain-text wire shape). Prior art: `FlowSequenceEventEmitter` (commit `c57b597`, 2025-10-24) at the sequence-style boundary; this is the symmetric scalar-style sibling. Ratified in m-E24-04 (2026-04-25, commit `a7c984f`). + + ## References + + ### Upstream input + + - **`SimModelArtifact` purpose investigation (agent `a5aa3dfe26394aff5`):** established that the split was accidental, not designed. Commit `ce9ec9e` (Oct 2025) introduced `SimModelArtifact` shaped to the template schema, not `model.schema.yaml` which predated it. `docs/schemas/README.md:34` claimed parity the code does not deliver. No C# type boundary exists — the only interface between Sim and Engine is a YAML string, and that YAML string fails the Engine's own schema validation in every shipped template today. + - **Survey agent `a07d52c12dcaf3538`:** produced the 16-row divergence table and the full-shape audit (726 total validator errors). Captured in `work/epics/E-23-model-validation-consolidation/m-E23-01-schema-alignment-tracking.md` → "AC4 canary re-run, full-shape audit (2026-04-24)". + - **m-E23-01 tracking doc:** held on branch `milestone/m-E23-01-schema-alignment` as stashed input material for m-E24-01. Contains the rule audit, the ParseScalar defect prior-art investigation, and the full-shape audit that motivated this epic. + - **Uncommitted schema edits on `milestone/m-E23-01-schema-alignment`:** three additions to `docs/schemas/model.schema.yaml` (`grid.start`, `nodes[].metadata`, `nodes[].source`) — treated as input material for m-E24-01's design-decision review, not as commitments. Under unification, `grid.start` probably stands (Engine consumer confirmed); `nodes[].metadata` stays as a load-bearing `GraphService` concern; `nodes[].source` probably reverts (no consumer until E-15). + + ### Epic and decision context + + - **E-23 Model Validation Consolidation:** `work/epics/E-23-model-validation-consolidation/spec.md` — paused at m-E23-01 pending E-24. m-E23-02 and m-E23-03 become mechanical once E-24 closes. + - **D-2026-04-24-035:** E-23 ratification — established the "delete, do not delegate" discipline that E-24 extends to types. + - **D-2026-04-24-036:** E-23 pause and E-24 creation. Within E-24 planning, **Option E (unify)** was ratified over **Option A (two types with projection)**. + + ### Truth Discipline + + - `.ai-repo/rules/project.md` → Truth Discipline Guards — precedence hierarchy, camelCase rule, "API stability ≠ keep old functions around" (2026-04-23), "Do not restate a canonical contract in many places from memory", "Do not let adapter/UI projection become the only place where semantics exist", "Do not keep 'temporary' compatibility shims without explicit deletion criteria." + + ### Source-code pointers + + - `src/FlowTime.Sim.Core/Templates/SimModelArtifact.cs` — slated for deletion (and its satellite types `SimNode`, `SimOutput`, `SimProvenance`, `SimTraffic`, `SimArrival`, `SimArrivalPattern`) + - `src/FlowTime.Sim.Core/Templates/SimModelBuilder.cs` — the Sim emitter; rewritten to produce the unified type directly + - `src/FlowTime.Core/Models/ModelDefinition.cs` — Engine-side model type; either becomes the unified type or is replaced + - `src/FlowTime.Core/Models/ModelParser.cs` — Engine intake; switches to parse the unified type directly + - `src/FlowTime.Sim.Service/Program.cs:1079` — Sim wire-serialization site; produces the unified type + - `src/FlowTime.TimeMachine/Orchestration/RunOrchestrationService.cs:627` — Engine-side intake; parses the unified type + - `src/FlowTime.Core/Models/ModelSchemaValidator.cs:222-246` — `ParseScalar` defect site + - `src/FlowTime.Sim.Core/Templates/TemplateSchemaValidator.cs:173-197` — mirrored `ParseScalar` defect + - `src/FlowTime.Core/Models/ParallelismReference.cs:97` — prior art for honoring `ScalarStyle.Plain` + - `tests/FlowTime.TimeMachine.Tests/TemplateSchemaValidationTests.cs:134-169` — test-side coercion helper that masks the defect + - `tests/FlowTime.Integration.Tests/TemplateWarningSurveyTests.cs` — the canary (committed to `main` per D-2026-04-24-035) + - `docs/schemas/model.schema.yaml` — the schema under unification + - `docs/schemas/README.md:34` — the parity claim the code does not deliver; rewritten at m-E24-03 + + ### Downstream impact + + - **E-23 resume:** `ModelSchemaValidator` and the unified type agree by construction once E-24 closes. m-E23-02 call-site migration becomes a mechanical `sed`-scale change; m-E23-03 `ModelValidator` delete becomes a one-file removal with its dedicated tests. + - **m-E21-07 Validation Surface (E-21):** consumes the single consolidated `ModelSchemaValidator` once both E-24 and E-23 close. + - **E-15 Telemetry Ingestion:** starts from a ratified `nodes[].source` forward contract (m-E24-01's decision) rather than inheriting a forward-declared placeholder with no consumer. + - kind: milestone + id: M-001 + frontmatter: + title: Parameterized Evaluation + status: done + parent: E-18 + body: | + ## Goal + + The Rust engine can compile a model once and re-evaluate it many times with different parameter values without recompiling. This is the critical primitive that every downstream use case builds on — interactive what-if, parameter sweeps, optimization, sensitivity analysis. The Plan becomes a reusable program; parameters are its inputs. + + ## Context + + The current `compile(model) → Plan` bakes all constants into `Op::Const { out, values }` at compile time. To change an arrival rate from 10 to 15, you must recompile the entire model. Compilation is O(nodes) with topological sorting, expression parsing, and constraint resolution — unnecessary work when only a scalar value changed. + + After this milestone, the Plan carries a `ParamTable` that lists every user-visible constant. `evaluate_with_params(plan, overrides)` writes overrides into the state matrix before the eval loop, then runs the same bin-major evaluation. The Plan is immutable and shareable; only the parameter values change. + + ### Where constants come from in the compiler + + The compiler creates `Op::Const` from seven sources: + + | Source | Example | Parameter? | + |--------|---------|-----------| + | `kind: const` node values | `values: [10, 20, 30]` | Yes — primary user input | + | Traffic arrival `ratePerBin` | `ratePerBin: 20` | Yes — class arrival rate | + | PMF expected value | `pmf: { values, probabilities }` | Yes — derived from PMF definition | + | WIP limit scalar | `wipLimit: 50` | Yes — topology constraint | + | Queue initial condition | `initialCondition: { queueDepth: 5 }` | Yes — initial state | + | Expression literal | `8` in `MIN(arrivals, 8)` | Yes — inline constant in formula | + | Compiler-generated temps | Internal proportional alloc, router weight columns | No — derived, not user-visible | + + The distinction: a parameter is a constant that traces back to a user-authored value in the model YAML. Compiler-generated intermediate constants (temp columns, normalized weights) are NOT parameters. + + ## Acceptance Criteria + + 1. **AC-1: ParamTable struct.** `Plan` gains a `params: ParamTable` field. `ParamTable` contains a `Vec` where each entry has: + - `id: String` — stable identifier matching the model YAML source (e.g., `"arrivals"` for a const node, `"arrivals.Order"` for a traffic class rate, `"Queue.wipLimit"` for a topology WIP limit) + - `column: usize` — the column index in the state matrix this parameter fills + - `default: ParamValue` — original value from the model (`Scalar(f64)` for uniform, `Vector(Vec)` for per-bin) + - `kind: ParamKind` — `ConstNode`, `ArrivalRate`, `WipLimit`, `InitialCondition`, `ExprLiteral` + + 2. **AC-2: Compiler populates ParamTable.** The compiler registers parameters for: + - Every `kind: const` node (id = node id, value from `values` field) + - Every `traffic.arrivals` entry with `ratePerBin` (id = `"{nodeId}.{classId}"`) + - Every topology node with scalar `wipLimit` (id = `"{topoNodeId}.wipLimit"`) + - Every topology node with `initialCondition.queueDepth` (id = `"{topoNodeId}.init"`) + - Expression literals are NOT parameters (they're inline formula constants, not model inputs) + + 3. **AC-3: `evaluate_with_params` function.** New public function: + ```rust + pub fn evaluate_with_params(plan: &Plan, overrides: &[(String, ParamValue)]) -> Vec + ``` + - Applies overrides to matching param IDs before the eval loop + - `Scalar(v)` fills all bins with `v`; `Vector(vs)` writes per-bin values + - Unmatched override IDs are ignored (forward-compatible) + - Unknown param IDs do not cause errors + - Returns the filled state matrix (same shape as `evaluate`) + + 4. **AC-4: Equivalence.** `evaluate_with_params(plan, &[])` (no overrides) produces identical results to `evaluate(plan)`. A Rust test asserts bitwise equality. + + 5. **AC-5: Full post-eval pipeline.** `eval_model` is refactored to accept optional overrides. When overrides are provided, it calls `evaluate_with_params` instead of `evaluate`, then runs the same post-eval pipeline: class decomposition normalization, proportional allocation propagation, edge series computation, analysis warnings. A new public entry point: + ```rust + pub fn eval_model_with_params( + model: &ModelDefinition, + overrides: &[(String, ParamValue)] + ) -> Result + ``` + + 6. **AC-6: Parameter override affects downstream.** Overriding a const node's value propagates through all downstream expressions, queue recurrences, per-class decomposition, and edge series. Test: override `arrivals` from 10 to 20 → verify `served`, `queue_depth`, per-class series, and edge flow all change correctly. + + 7. **AC-7: Class arrival rate override.** Overriding a class arrival rate (e.g., `"arrivals.Order"` from 6 to 12) changes the class fraction and propagates through normalization and downstream decomposition. Test: change one class rate, verify normalization invariant still holds. + + 8. **AC-8: WIP limit override.** Overriding `"{topoNodeId}.wipLimit"` changes the queue's WIP limit and affects overflow. Test: lower WIP limit → verify overflow increases. + + 9. **AC-9: Parameter schema extraction.** New public function: + ```rust + pub fn extract_params(plan: &Plan) -> &ParamTable + ``` + Returns the plan's parameter table. Clients use this to discover what can be tweaked, with IDs, kinds, and defaults. This is what the UI will use to auto-generate controls. + + 10. **AC-10: Compile-once, eval-many pattern.** Demonstrate the pattern with a Rust test that compiles once, evaluates 10 times with different arrival rates, and verifies each result is independent (no state leakage between evaluations). Measure that subsequent evals are faster than the first (no recompilation). + + ## Out of Scope + + - Session management or persistent process (m-E18-02) + - Streaming protocol or MessagePack framing (m-E18-02) + - CLI interface changes (m-E18-02) + - UI parameter controls (m-E17-02) + - Parameter bounds, display names, or template metadata enrichment (future — the parameter table carries IDs and defaults only) + - Expression literal parameterization (inline `8` in `MIN(arrivals, 8)` stays baked — parameterizing expression constants requires expression-tree rewriting, which is a different problem) + - Structural model changes (adding/removing nodes requires recompilation — by design) + + ## Key References + + - `engine/core/src/plan.rs` — Plan struct, Op enum, ColumnMap + - `engine/core/src/eval.rs` — `evaluate()` function, bin-major loop + - `engine/core/src/compiler.rs` — `compile()`, `eval_model()`, all `Op::Const` emission sites + - `docs/architecture/headless-engine-architecture.md` — overall architecture + - `work/epics/E-18-headless-pipeline-and-optimization/milestone-plan-v2.md` — milestone sequence + - kind: milestone + id: M-002 + frontmatter: + title: Engine Session + Streaming Protocol + status: done + parent: E-18 + depends_on: + - M-001 + body: | + ## Goal + + The Rust engine runs as a persistent process that accepts commands and streams results. `flowtime-engine session` reads length-prefixed MessagePack messages from stdin, holds a compiled Plan in memory, and writes responses to stdout. This is the headless pipeline component — the same protocol works over stdin/stdout (CLI pipes) and WebSocket (UI, via m-E17-01 proxy). + + ## Context + + After m-E18-01, the engine can compile once and evaluate many times with different parameters via `evaluate_with_params(plan, overrides)`. But every invocation is still a batch subprocess: spawn → parse YAML → compile → evaluate → write files → exit. The overhead of process spawn + file I/O dominates latency (100-500ms). For interactive use, we need a persistent process that holds the compiled Plan and responds to parameter changes in microseconds. + + The session is a stateful loop: + + ``` + stdin → [compile] → hold Plan → [eval overrides] → stdout + → [eval overrides] → stdout + → [eval overrides] → stdout + → EOF → exit + ``` + + ### Why MessagePack + + - **Binary f64 arrays.** A 1,000-bin series is 8KB as binary vs ~8KB+ as JSON text (with formatting overhead and parse cost). MessagePack encodes `Vec` as a binary ext type — zero parsing, memcpy-fast. + - **Length-prefixed framing.** 4-byte big-endian length prefix before each message. No newline ambiguity, no incomplete-line bugs. + - **Cross-language.** Native libraries: Rust (`rmp-serde`), JavaScript (`@msgpack/msgpack`), C# (`MessagePack-CSharp`), Python (`msgpack`). + - **Pipe-friendly.** Works over stdin/stdout for CLI composition, over WebSocket for UI. + + ## Acceptance Criteria + + 1. **AC-1: `session` CLI command.** `flowtime-engine session` enters a persistent loop reading from stdin and writing to stdout. No file arguments required. Exits cleanly on stdin EOF or SIGTERM. + + 2. **AC-2: Length-prefixed MessagePack framing.** Each message is `[4-byte big-endian length][MessagePack payload]`. Both requests (stdin) and responses (stdout) use this framing. Stderr is reserved for human-readable log messages (not protocol). + + 3. **AC-3: `compile` command.** Request: `{ method: "compile", params: { yaml: "" } }`. Response: `{ result: { params: [{ id, kind, default }], series: [{ id, bins, values }], bins, grid } }`. Compiles the model, holds the Plan in session state, evaluates with defaults, returns the parameter schema and initial series. + + 4. **AC-4: `eval` command.** Request: `{ method: "eval", params: { overrides: { "arrivals": 15.0, "Queue.wipLimit": 30.0 } } }`. Response: `{ result: { series: { "arrivals": , "served": , ... }, elapsed_us } }`. Re-evaluates with overrides, returns updated series. Must not recompile. Series values are MessagePack binary arrays (not JSON text arrays). + + 5. **AC-5: `get_params` command.** Request: `{ method: "get_params" }`. Response: `{ result: { params: [{ id, kind, default }] } }`. Returns the current parameter table from the compiled Plan. + + 6. **AC-6: `get_series` command.** Request: `{ method: "get_series", params: { names: ["arrivals", "served"] } }`. Response: `{ result: { series: { "arrivals": , "served": } } }`. Returns specific series from the current evaluation state. If no names provided, returns all non-internal series. + + 7. **AC-7: Error handling.** Invalid requests return `{ error: { code, message } }`. Specific errors: `not_compiled` (eval before compile), `compile_error` (bad YAML), `unknown_method`. The session continues after errors — it does not exit. + + 8. **AC-8: Session state.** The session holds: compiled Plan, current parameter overrides, current state matrix (from most recent eval). `compile` replaces the entire session state. `eval` updates overrides and state. Multiple `eval` calls are independent (no accumulation). + + 9. **AC-9: Performance.** For a model with 8 bins and ~10 series, `eval` with scalar overrides completes in under 1ms (excluding I/O). A Rust benchmark test evaluates 1,000 times in a loop and asserts total < 1 second. + + 10. **AC-10: Integration test.** A Rust integration test spawns `flowtime-engine session` as a subprocess, sends compile + eval + eval (with different overrides) + get_params via the MessagePack protocol over stdin/stdout, and verifies all responses are correct. + + ## Technical Notes + + ### Dependencies to add + + - `rmp-serde` (MessagePack serialization for Rust) — workspace dependency + - `serde` derive on request/response types + + ### Module structure + + - `engine/core/src/session.rs` — Session struct, state management, command dispatch + - `engine/core/src/protocol.rs` — Request/Response types, MessagePack framing (read/write) + - `engine/cli/src/main.rs` — `cmd_session()` entry point + + ### Message envelope + + ```rust + #[derive(Serialize, Deserialize)] + struct Request { + method: String, + #[serde(default)] + params: serde_json::Value, // flexible params per method + } + + #[derive(Serialize)] + struct Response { + #[serde(skip_serializing_if = "Option::is_none")] + result: Option, + #[serde(skip_serializing_if = "Option::is_none")] + error: Option, + } + ``` + + Note: We use `serde_json::Value` as the flexible inner type even though the wire format is MessagePack. MessagePack and JSON share the same data model (maps, arrays, strings, numbers, bools, null). `rmp-serde` serializes/deserializes `serde_json::Value` correctly. + + ### Series encoding + + Series data (`Vec`) serializes naturally as MessagePack arrays of floats. For very large series, a future optimization could use MessagePack binary ext type for raw f64 bytes, but the standard array encoding is correct and sufficient for this milestone. + + ### Post-eval pipeline + + After `evaluate_with_params`, the session must also run: + - Class decomposition normalization + proportional allocation + - Edge series computation + - Analysis warnings + + This means the session calls the same post-eval pipeline as `eval_model_with_params`. The simplest approach: the session stores the compiled Plan and the ModelDefinition, and each `eval` call runs `eval_model_with_params` reusing the model but with the new overrides. + + For the compile-once optimization (skip recompilation), a future milestone can cache the Plan separately. For now, recompiling per eval is acceptable if latency is under the AC-9 target. + + ## Out of Scope + + - WebSocket transport (m-E17-01) + - .NET bridge for session mode (m-E17-01) + - UI parameter controls (m-E17-02) + - Parameter sweep batch mode (m-E18-03) + - Request IDs / multiplexing (single-client, sequential for now) + - Authentication or access control + - TLS/encryption + + ## Key References + + - `engine/core/src/compiler.rs` — `compile()`, `eval_model_with_params()` + - `engine/core/src/plan.rs` — `ParamTable`, `ParamValue` + - `engine/core/src/eval.rs` — `evaluate_with_params()` + - `engine/cli/src/main.rs` — existing CLI command dispatch + - `docs/architecture/headless-engine-architecture.md` — protocol design + - [rmp-serde crate](https://crates.io/crates/rmp-serde) — MessagePack for Rust + - [MessagePack spec](https://msgpack.org/) — wire format + - kind: milestone + id: M-003 + frontmatter: + title: Tiered Validation + status: done + parent: E-18 + body: | + ## Goal + + Expose model validation as a first-class, client-agnostic Time Machine operation. + Three tiers callable from the .NET SDK and from a new `POST /v1/validate` HTTP + endpoint. Tier 1 (schema) is also added to the Rust engine session protocol so + the Svelte What-If UI can get cheap per-edit feedback without a full compile. + + ## Scope + + **Tier 1 — Schema:** YAML parses + JSON schema validates + class references resolve. + Backed by `ModelSchemaValidator.Validate` + `ModelValidator.Validate` in Core. + Cheap: no compile, no eval. + + **Tier 2 — Compile:** Schema (tier 1) + model compiles into a Graph. + Backed by `ModelCompiler.Compile` + `ModelParser.ParseModel` in Core. + Catches structural errors (unresolved references, expression errors). + + **Tier 3 — Analyse:** Compile (tier 2) + deterministic evaluation + invariant + checks. Backed by `TemplateInvariantAnalyzer.Analyze` in Sim.Core. + Catches semantic issues (conservation violations, capacity breaches). + + **In scope:** + - `src/FlowTime.TimeMachine/Validation/` — `TimeMachineValidator` (static service), + `ValidationResult`, `ValidationError`, `ValidationWarning`, `ValidationTier` enum + - `src/FlowTime.API/Endpoints/ValidationEndpoints.cs` — `POST /v1/validate` + - Rust engine session — new `validate_schema` command (tier 1 via session protocol) + - Unit tests: `tests/FlowTime.TimeMachine.Tests/Validation/` + - API tests: `tests/FlowTime.Api.Tests/ValidationEndpointsTests.cs` + - Rust integration tests: session `validate_schema` command + + **Out of scope:** + - Line/column mapping in error messages + - Editor LSP integration + - Svelte UI changes (validate button) — separate UI milestone + + ## Contract + + ### HTTP Endpoint + + ``` + POST /v1/validate + Content-Type: application/json + + { + "yaml": "...", + "tier": "schema" | "compile" | "analyse" + } + ``` + + Response (200 always, errors in body): + + ```json + { + "tier": "schema", + "isValid": false, + "errors": [ + { "message": "Unknown class reference: 'premium'" } + ], + "warnings": [] + } + ``` + + Tier 3 analyse response includes warnings in addition to errors: + + ```json + { + "tier": "analyse", + "isValid": true, + "errors": [], + "warnings": [ + { "nodeId": "Queue", "code": "high_utilization", "message": "..." } + ] + } + ``` + + ### Session Protocol Command (`validate_schema`) + + ``` + request: { method: "validate_schema", params: { yaml: "..." } } + response (valid): { result: { is_valid: true, errors: [] } } + response (invalid): { result: { is_valid: false, errors: ["..."] } } + ``` + + Tier 2 (compile) is already served by the existing `compile` command, which + returns `error: { code: "compile_error", ... }` on failure. + + ## Acceptance Criteria + + - [x] `TimeMachineValidator.Validate(yaml, ValidationTier.Schema)` returns errors for invalid YAML + - [x] `TimeMachineValidator.Validate(yaml, ValidationTier.Compile)` catches structural errors (bad node refs, bad expressions) + - [x] `TimeMachineValidator.Validate(yaml, ValidationTier.Analyse)` returns warnings from invariant analyzer + - [x] `POST /v1/validate` responds 200 with `{ isValid, tier, errors, warnings }` for all three tiers + - [x] Invalid tier value → 400 Bad Request + - [x] Empty/null yaml → 400 Bad Request + - [x] Rust session `validate_schema` returns `{ is_valid, errors }` without full compile + - [x] `rg "FlowTime\.Generator" src/ tests/` still zero (no regressions) + - [x] `dotnet test FlowTime.sln` all green; Rust `cargo test` all green + - kind: milestone + id: M-004 + frontmatter: + title: Generator Extraction → TimeMachine + status: done + parent: E-18 + body: | + ## Goal + + Rename `FlowTime.Generator` → `FlowTime.TimeMachine`. Move all classes, update all + references in consumers (src + tests), remove `FlowTime.Generator` from the solution. + Pure structural refactor — no behavior change, all tests green, no coexistence window + (per D-2026-04-07-019 Path B). + + ## Scope + + **In scope:** + - Create `src/FlowTime.TimeMachine/FlowTime.TimeMachine.csproj` with identical dependencies + - Move all Generator source files; update `FlowTime.Generator.*` namespaces → `FlowTime.TimeMachine.*` + - Rename `tests/FlowTime.Generator.Tests/` → `tests/FlowTime.TimeMachine.Tests/`; update its csproj + - Update project references in: FlowTime.Cli, FlowTime.Sim.Service, FlowTime.API, FlowTime.Api.Tests, FlowTime.Cli.Tests, FlowTime.Integration.Tests + - Update `using FlowTime.Generator.*` → `using FlowTime.TimeMachine.*` across all source files + - Register TimeMachine in FlowTime.sln; remove Generator entry + - Delete `src/FlowTime.Generator/` entirely + + **Out of scope:** + - Tiered validation (m-E18-06) + - Any behavior changes whatsoever + + ## Acceptance Criteria + + - [x] `src/FlowTime.TimeMachine/` exists; `src/FlowTime.Generator/` is gone + - [x] `tests/FlowTime.TimeMachine.Tests/` exists; `tests/FlowTime.Generator.Tests/` is gone + - [x] `dotnet build FlowTime.sln` succeeds with zero errors + - [x] `dotnet test FlowTime.sln` passes with the same test count + - [x] `rg "FlowTime\.Generator" src/ tests/ --include="*.cs" --include="*.csproj"` returns zero matches + - [x] Solution file contains TimeMachine entry; Generator entry is absent + + ## Namespace Mapping + + | Old | New | + |-----|-----| + | `FlowTime.Generator` | `FlowTime.TimeMachine` | + | `FlowTime.Generator.Artifacts` | `FlowTime.TimeMachine.Artifacts` | + | `FlowTime.Generator.Capture` | `FlowTime.TimeMachine.Capture` | + | `FlowTime.Generator.Models` | `FlowTime.TimeMachine.Models` | + | `FlowTime.Generator.Orchestration` | `FlowTime.TimeMachine.Orchestration` | + | `FlowTime.Generator.Processing` | `FlowTime.TimeMachine.Processing` | + - kind: milestone + id: M-005 + frontmatter: + title: ITelemetrySource Contract + status: done + parent: E-18 + body: | + ## Goal + + Define `ITelemetrySource` as the formal input contract for the Time Machine's external data + surface, with two concrete implementations from day one. Satisfies the deferred portion of + the spec's m-E18-01b scope (the tiered-validation half shipped as m-E18-06; this delivers + the source-contract half). + + ## Scope + + **`ITelemetrySource` interface** — in `src/FlowTime.TimeMachine/Telemetry/`: + - `ITelemetrySource` — single method: `Task ReadAsync(CancellationToken)` + - `TelemetryData` — typed payload: grid, series dictionary, optional provenance metadata + + **`CanonicalBundleSource : ITelemetrySource`** — reads the canonical bundle format + (`manifest.json` + CSV series files) written by the existing `TelemetryBundleBuilder` in + `FlowTime.Core`. Concrete class (not behind a second interface). + + **`FileCsvSource : ITelemetrySource`** — reads `file:`-referenced CSV inputs, extracting + the existing file-read logic already in `FlowTime.Core` into a named, injectable + implementation. + + **In scope:** + - `src/FlowTime.TimeMachine/Telemetry/ITelemetrySource.cs` + - `src/FlowTime.TimeMachine/Telemetry/TelemetryData.cs` + - `src/FlowTime.TimeMachine/Telemetry/CanonicalBundleSource.cs` + - `src/FlowTime.TimeMachine/Telemetry/FileCsvSource.cs` + - Unit tests: `tests/FlowTime.TimeMachine.Tests/Telemetry/` + + **Out of scope:** + - `ITelemetrySink` — explicitly deferred per D-2026-04-07-020 + - Real-world format adapters (Prometheus, OTEL, BPI) — m-E18 telemetry adapters milestone + - Time Machine `Evaluate` / `Reevaluate` consuming the source — separate milestone + - HTTP endpoint changes + + ## Contract + + ### `ITelemetrySource` + + ```csharp + namespace FlowTime.TimeMachine.Telemetry; + + /// + /// Input contract for external data fed into the Time Machine. + /// Each implementation snapshots data from its source at ReadAsync time, + /// returning a deterministic TelemetryData payload the Time Machine can consume. + /// + public interface ITelemetrySource + { + Task ReadAsync(CancellationToken cancellationToken = default); + } + ``` + + ### `TelemetryData` + + ```csharp + public sealed class TelemetryData + { + /// Grid definition (bins, binSize, binUnit). + public required GridDefinition Grid { get; init; } + + /// Node-id → double[] series values (one per bin). + public required IReadOnlyDictionary Series { get; init; } + + /// Optional provenance: source path, captured-at timestamp, content hash. + public TelemetryProvenance? Provenance { get; init; } + } + + public sealed class TelemetryProvenance + { + public string? SourcePath { get; init; } + public DateTimeOffset? CapturedAt { get; init; } + public string? ContentHash { get; init; } + } + ``` + + ### `CanonicalBundleSource` + + Reads a canonical bundle directory (containing `manifest.json` and `series/*.csv`). + + ```csharp + public sealed class CanonicalBundleSource : ITelemetrySource + { + public CanonicalBundleSource(string bundleDirectory) { ... } + public Task ReadAsync(CancellationToken cancellationToken = default) { ... } + } + ``` + + ### `FileCsvSource` + + Reads a single CSV file as a named series. + + ```csharp + public sealed class FileCsvSource : ITelemetrySource + { + /// Path to the CSV file. + /// Node ID to assign the series to. + /// Grid definition to validate series length against. + public FileCsvSource(string filePath, string seriesId, GridDefinition grid) { ... } + public Task ReadAsync(CancellationToken cancellationToken = default) { ... } + } + ``` + + ## Acceptance Criteria + + - [x] `ITelemetrySource` interface exists in `FlowTime.TimeMachine.Telemetry` + - [x] `TelemetryData` carries Grid + Series + optional Provenance + - [x] `CanonicalBundleSource.ReadAsync` reads a bundle directory and returns correct series values + - [x] `FileCsvSource.ReadAsync` reads a single CSV and returns the series under the specified ID + - [x] Both implementations compile and have passing unit tests (23 tests across 2 suites) + - [x] `ITelemetrySink` is **not** introduced (explicitly documented as deferred) + - [x] `rg "FlowTime\.Generator" src/ tests/` still zero (no regressions) + - [x] `dotnet test FlowTime.sln` all green (72 TimeMachine tests, 0 failures) + - kind: milestone + id: M-006 + frontmatter: + title: Parameter Sweep + status: done + parent: E-18 + body: | + ## Goal + + Implement parameter sweep as a first-class Time Machine operation: given a model YAML, a + const-node ID, and an array of values, evaluate the model once per value and return a + structured table of (param_value → series outputs). + + Builds on: + - m-E18-01 `evaluate_with_params` in the Rust engine (compile-once foundation) + - m-E18-07 `FlowTime.TimeMachine` project (host for the sweep domain model) + - m-E18-08 `ITelemetrySource` (pattern for injectable evaluation contracts) + + ## Scope + + **`FlowTime.TimeMachine.Sweep` namespace** — in `src/FlowTime.TimeMachine/Sweep/`: + - `IModelEvaluator` — injectable evaluation contract; decouples SweepRunner from the Rust binary in tests + - `SweepSpec` — validated input: ModelYaml, ParamId, Values[], optional CaptureSeriesIds + - `SweepPoint` — single evaluation result: ParamValue + Series dictionary + - `SweepResult` — full sweep result: ParamId + SweepPoint[] + - `ConstNodePatcher` — internal YAML DOM manipulation; patches a named const node's values array + - `SweepRunner` — orchestrates N evaluations via injected `IModelEvaluator` + - `RustModelEvaluator : IModelEvaluator` — wraps `RustEngineRunner`, maps series list to dictionary + + **`POST /v1/sweep`** — in `src/FlowTime.API/Endpoints/SweepEndpoints.cs`: + - Request: `{ yaml, paramId, values: [double...], captureSeriesIds?: [string...] }` + - Response (200): `{ paramId, points: [{ paramValue, series: { seriesId: double[] } }] }` + - 400: missing yaml / paramId / values + - 503: engine not enabled (RustEngine:Enabled=false) + + **In scope:** + - `src/FlowTime.TimeMachine/Sweep/IModelEvaluator.cs` + - `src/FlowTime.TimeMachine/Sweep/SweepSpec.cs` + - `src/FlowTime.TimeMachine/Sweep/SweepResult.cs` + - `src/FlowTime.TimeMachine/Sweep/ConstNodePatcher.cs` + - `src/FlowTime.TimeMachine/Sweep/SweepRunner.cs` + - `src/FlowTime.TimeMachine/Sweep/RustModelEvaluator.cs` + - `src/FlowTime.API/Endpoints/SweepEndpoints.cs` + - DI registration in `Program.cs` + - Unit tests: `tests/FlowTime.TimeMachine.Tests/Sweep/` + - API tests: `tests/FlowTime.Api.Tests/SweepEndpointsTests.cs` + + **Out of scope:** + - Sensitivity analysis (numerical gradient) — follow-on + - Multi-parameter sweeps (grid sweeps) — follow-on + - Session-based compile-once optimization — follow-on (each sweep point uses subprocess eval) + - Optimization / fitting — m-E18-10+ + - Sweep result persistence / artifact writing — follow-on + + ## Design Notes + + ### Implementation approach + + Each sweep point calls `RustEngineRunner.EvaluateAsync(patchedYaml)` independently (one + subprocess per point). The YAML is patched in-memory before each call via `ConstNodePatcher`, + which uses YamlDotNet's representation model to substitute the const node's values array. + + This deliberately trades compile-once efficiency for implementation simplicity: the Rust + session protocol requires a MessagePack NuGet dependency not yet in the tree, while the + subprocess approach reuses existing infrastructure with no new dependencies. + + The `IModelEvaluator` abstraction isolates this choice from `SweepRunner`, so a future + session-based evaluator can be dropped in without changing the sweep domain model or tests. + + ### ConstNodePatcher behaviour + + - Finds the first `nodes` entry where `id == nodeId` AND `kind == "const"` + - Replaces its `values` sequence with `[value, value, ..., value]` (same bin count) + - Returns the original YAML unchanged if the node is not found or is not a const node + - Uses `InvariantCulture` formatting for decimal precision + + ## Acceptance Criteria + + - [x] `IModelEvaluator` interface exists in `FlowTime.TimeMachine.Sweep` + - [x] `SweepSpec` validates: non-null/whitespace ModelYaml, non-null/whitespace ParamId, non-null/non-empty Values + - [x] `ConstNodePatcher.Patch` correctly replaces const node values; returns original YAML for unknown/non-const nodes + - [x] `SweepRunner.RunAsync` returns one `SweepPoint` per input value, with correct ParamValue and Series + - [x] `SweepRunner` respects `CaptureSeriesIds` filter (null = all series) + - [x] `SweepRunner` respects `CancellationToken` between evaluation points + - [x] `RustModelEvaluator` wraps `RustEngineRunner` and maps series list to dictionary + - [x] `POST /v1/sweep` returns 400 for missing yaml / paramId / empty values + - [x] `POST /v1/sweep` returns 503 when Rust engine not enabled + - [x] Unit tests pass: 28 sweep unit tests (SweepSpec ×9, ConstNodePatcher ×7, SweepRunner ×12) + - [x] API validation tests pass: 7 tests (6×400, 1×503) + - [x] `dotnet test FlowTime.sln` all green (105 TimeMachine, 235 API — pre-existing integration failures unrelated) + - kind: milestone + id: M-007 + frontmatter: + title: Sensitivity Analysis + status: done + parent: E-18 + body: | + ## Goal + + Add numerical sensitivity analysis as a Time Machine operation: given a model YAML, a set + of const-node parameters, and a target metric series, compute ∂metric_mean/∂param for each + parameter using a central-difference approximation. Answers "which parameter has the most + impact on this metric?" + + Builds on: + - m-E18-09 `SweepRunner` + `ConstNodePatcher` — two-point sweep per parameter reuses the + sweep infrastructure directly + - `ConstNodePatcher` — YAML DOM manipulation already in place + + ## Scope + + **`FlowTime.TimeMachine.Sweep` namespace** (extending m-E18-09's namespace): + - `ConstNodeReader` — companion to `ConstNodePatcher`; reads the current scalar value of a + named const node's first bin. Returns `null` if the node is not found or not a const node. + - `SensitivitySpec` — validated input: ModelYaml, ParamIds[], MetricSeriesId, Perturbation (default 5%) + - `SensitivityPoint` — single result: ParamId, BaseValue, Gradient (∂metric_mean/∂param) + - `SensitivityResult` — `SensitivityPoint[]` sorted by `|Gradient|` descending + - `SensitivityRunner` — composes `SweepRunner`; for each param: read base, 2-point sweep, + central difference + + **`POST /v1/sensitivity`** — in `src/FlowTime.API/Endpoints/SensitivityEndpoints.cs` + - Request: `{ yaml, paramIds: [string...], metricSeriesId, perturbation?: double }` + - Response (200): `{ metricSeriesId, points: [{ paramId, baseValue, gradient }] }` + - 400: missing yaml / paramIds (null or empty) / metricSeriesId + - 503: engine not enabled + + **In scope:** + - `src/FlowTime.TimeMachine/Sweep/ConstNodeReader.cs` + - `src/FlowTime.TimeMachine/Sweep/SensitivitySpec.cs` + - `src/FlowTime.TimeMachine/Sweep/SensitivityResult.cs` + - `src/FlowTime.TimeMachine/Sweep/SensitivityRunner.cs` + - `src/FlowTime.API/Endpoints/SensitivityEndpoints.cs` + - DI registration in `Program.cs` + - Unit tests: `tests/FlowTime.TimeMachine.Tests/Sweep/` + - API tests: `tests/FlowTime.Api.Tests/SensitivityEndpointsTests.cs` + + **Out of scope:** + - Multi-metric sensitivity — single metric per call + - Distribution-based sensitivity (Morris method, Sobol indices) — follow-on + - Forward-difference vs central-difference choice — central difference only + - Optimization / fitting — m-E18-11+ + + ## Design Notes + + ### Gradient formula (central difference) + + For each parameter `p` with base value `b` and perturbation fraction `ε`: + + ``` + hi = b × (1 + ε) + lo = b × (1 - ε) + gradient = (mean(metric_series_at_hi) − mean(metric_series_at_lo)) / (hi − lo) + = (mean_hi − mean_lo) / (2 × b × ε) + ``` + + **Zero-base edge case:** when `b == 0`, `hi == lo == 0` and the gradient is indeterminate. + Gradient is set to `0.0` and a note is included in the point. The parameter is still included + in the result so callers can see it was processed. + + **Missing metric series:** if the evaluator returns series that do not include `MetricSeriesId`, + `SensitivityRunner` throws `InvalidOperationException` with a clear message. This is a caller + error (wrong series ID), not a graceful skip. + + **Unknown param:** if `ConstNodeReader.ReadValue` returns `null` for a param ID (node not + found or not a const node), that param is skipped (omitted from result). Callers can detect + skipped params by comparing `spec.ParamIds.Length` vs `result.Points.Length`. + + ### `SensitivityRunner` composes `SweepRunner` + + `SensitivityRunner(SweepRunner sweepRunner)` — takes the full `SweepRunner` including its + injected `IModelEvaluator`. Tests pass a `SweepRunner(fakeEvaluator)` — no additional + test doubles needed. + + ## Acceptance Criteria + + - [x] `ConstNodeReader.ReadValue(yaml, nodeId)` returns the first-bin value for known const + nodes; returns `null` for unknown nodes, non-const nodes, and missing `nodes` section + - [x] `SensitivitySpec` validates: non-null/whitespace ModelYaml, non-null/non-empty ParamIds, + non-null/whitespace MetricSeriesId, Perturbation in (0, 1) exclusive + - [x] `SensitivityRunner.RunAsync` returns one `SensitivityPoint` per found param, sorted by + `|Gradient|` descending + - [x] Gradient computed correctly via central difference + - [x] Zero-base param produces Gradient = 0.0 (no crash) + - [x] Unknown param ID silently skipped (omitted from result) + - [x] Missing metric series throws `InvalidOperationException` + - [x] `SensitivityRunner` respects `CancellationToken` + - [x] `POST /v1/sensitivity` returns 400 for missing yaml / paramIds / metricSeriesId + - [x] `POST /v1/sensitivity` returns 503 when Rust engine not enabled + - [x] Unit tests pass: 32 tests (ConstNodeReader ×8, SensitivitySpec ×12, SensitivityRunner ×12) + - [x] API tests pass: 7 tests (6×400, 1×503) + - [x] `dotnet test FlowTime.sln` all green (137 TimeMachine, 242 API) + - kind: milestone + id: M-008 + frontmatter: + title: Goal Seeking + status: done + parent: E-18 + body: | + ## Goal + + Add 1D goal seeking: given a model YAML, a const-node parameter, a metric series, and a + target value, find the parameter value that drives the metric mean to the target via bisection. + Answers "what arrival rate gives 80% utilization?" without a full parameter sweep. + + Builds on: + - m-E18-09 `SweepRunner` + `ConstNodePatcher` / `ConstNodeReader` (m-E18-10) + - Same `IModelEvaluator` seam + + ## Scope + + **`FlowTime.TimeMachine.Sweep` namespace:** + - `GoalSeekSpec` — validated input: ModelYaml, ParamId, MetricSeriesId, Target, SearchLo, + SearchHi, Tolerance (default 1e-6), MaxIterations (default 50) + - `GoalSeekResult` — output: ParamValue, AchievedMetricMean, Converged, Iterations + - `GoalSeeker` — bisection over `SweepRunner`; handles non-bracketed case gracefully + + **`POST /v1/goal-seek`** — in `src/FlowTime.API/Endpoints/GoalSeekEndpoints.cs` + - Request: `{ yaml, paramId, metricSeriesId, target, searchLo, searchHi, tolerance?, maxIterations? }` + - Response (200): `{ paramValue, achievedMetricMean, converged, iterations }` + - 400: missing/invalid required fields (searchLo ≥ searchHi is invalid) + - 503: engine not enabled + + **In scope:** + - `src/FlowTime.TimeMachine/Sweep/GoalSeekSpec.cs` + - `src/FlowTime.TimeMachine/Sweep/GoalSeekResult.cs` + - `src/FlowTime.TimeMachine/Sweep/GoalSeeker.cs` + - `src/FlowTime.API/Endpoints/GoalSeekEndpoints.cs` + - DI registration in `Program.cs` + - Unit tests: `tests/FlowTime.TimeMachine.Tests/Sweep/` + - API tests: `tests/FlowTime.Api.Tests/GoalSeekEndpointsTests.cs` + - Architecture doc: `docs/architecture/time-machine-analysis-modes.md` (written alongside) + + **Out of scope:** + - Multi-dimensional optimization (Nelder-Mead) — m-E18-12+ + - Constraint handling beyond the `[searchLo, searchHi]` range + - Non-monotonic functions (bisection is undefined; `Converged=false` returned) + + ## Algorithm + + Bisection on the metric mean: + + ``` + 1. Evaluate at searchLo → meanLo = mean(metric at searchLo) + 2. Evaluate at searchHi → meanHi = mean(metric at searchHi) + 3. If target not in [min(meanLo,meanHi), max(meanLo,meanHi)]: + return best endpoint, Converged=false + 4. While iterations < maxIterations: + mid = (lo + hi) / 2 + midMean = mean(metric at mid) + if |midMean - target| < tolerance: return mid, Converged=true + if (midMean - target) same sign as (meanLo - target): lo = mid, meanLo = midMean + else: hi = mid, meanHi = midMean + 5. Return mid, Converged=false (max iterations reached) + ``` + + ## Acceptance Criteria + + - [x] `GoalSeekSpec` validates: non-null/whitespace ModelYaml/ParamId/MetricSeriesId; + SearchLo < SearchHi; Tolerance > 0; MaxIterations ≥ 1 + - [x] `GoalSeeker.SeekAsync` converges on a linear model to within tolerance + - [x] `GoalSeeker` returns `Converged=false` when target is not bracketed + - [x] `GoalSeeker` returns `Converged=false` (best guess) when max iterations exhausted + - [x] `GoalSeeker` respects `CancellationToken` + - [x] `POST /v1/goal-seek` returns 400 for missing/invalid required fields + - [x] `POST /v1/goal-seek` returns 503 when engine not enabled + - [x] Unit tests pass: 26 tests (GoalSeekSpec ×14, GoalSeeker ×12) + - [x] API tests pass: 8 tests (7×400, 1×503) + - [x] `dotnet test FlowTime.sln` all green (163 TimeMachine, 250 API) + - kind: milestone + id: M-009 + frontmatter: + title: Multi-parameter Optimization + status: done + parent: E-18 + body: | + ## Goal + + Add multi-parameter optimization: given a model, a set of const-node parameters with search + ranges, a metric series, and an objective (minimize or maximize), find the parameter values that + drive the metric mean to its optimum using Nelder-Mead simplex — a derivative-free method that + works for any number of parameters without needing gradients. + + Answers "what combination of arrival rate and capacity minimizes queue depth?" without a full + multi-dimensional grid search. + + Builds on: + - `IModelEvaluator` seam (m-E18-09) + - `ConstNodePatcher` for multi-parameter YAML mutation (m-E18-09) + - `ConstNodeReader` (m-E18-10) — used in tests to read patched values + + ## Scope + + **`FlowTime.TimeMachine.Sweep` namespace:** + - `OptimizeObjective` — `Minimize | Maximize` enum + - `SearchRange` — `record(double Lo, double Hi)` with `Lo < Hi` invariant + - `OptimizeSpec` — validated input: ModelYaml, ParamIds, MetricSeriesId, Objective, + SearchRanges (one entry per ParamId), Tolerance (default 1e-4), MaxIterations (default 200) + - `OptimizeResult` — output: ParamValues, AchievedMetricMean, Converged, Iterations + - `Optimizer` — Nelder-Mead simplex over `IModelEvaluator`; patches all parameters + simultaneously per evaluation; respects CancellationToken + + **`POST /v1/optimize`** — in `src/FlowTime.API/Endpoints/OptimizeEndpoints.cs` + - Request: `{ yaml, paramIds, metricSeriesId, objective, searchRanges, tolerance?, maxIterations? }` + where `searchRanges` is `{ "": { "lo": N, "hi": N }, ... }` + and `objective` is `"minimize"` or `"maximize"` (case-insensitive) + - Response (200): `{ paramValues, achievedMetricMean, converged, iterations }` + - 400: missing/invalid required fields, searchRange lo >= hi, unknown objective string + - 503: engine not enabled + + **In scope:** + - `src/FlowTime.TimeMachine/Sweep/OptimizeObjective.cs` + - `src/FlowTime.TimeMachine/Sweep/SearchRange.cs` + - `src/FlowTime.TimeMachine/Sweep/OptimizeSpec.cs` + - `src/FlowTime.TimeMachine/Sweep/OptimizeResult.cs` + - `src/FlowTime.TimeMachine/Sweep/Optimizer.cs` + - `src/FlowTime.API/Endpoints/OptimizeEndpoints.cs` + - DI registration in `Program.cs` + - Unit tests: `tests/FlowTime.TimeMachine.Tests/Sweep/` + - API tests: `tests/FlowTime.Api.Tests/OptimizeEndpointsTests.cs` + - Architecture doc update: `docs/architecture/time-machine-analysis-modes.md` + + **Out of scope:** + - Constraint handling (utilization < 0.8 etc.) — future milestone + - Bayesian optimization — future milestone + - Parallel evaluation of simplex vertices + - Gradient-based methods (sensitivity-driven descent) + + ## Algorithm + + Nelder-Mead simplex (N parameters → N+1 vertices): + + ``` + Coefficients: α=1.0 (reflect), γ=2.0 (expand), ρ=0.5 (contract), σ=0.5 (shrink) + Objective f(v) = metricMean(v) for Minimize + f(v) = -metricMean(v) for Maximize (internally always minimize f) + + 1. Build initial N+1 simplex: + v[0] = midpoint of all search ranges + v[i] = v[0] with param[i-1] shifted +5% of its range (clamped) + + 2. Evaluate f at each vertex. + + 3. Sort vertices so v[0] is best (lowest f) and v[N] is worst. + + 4. Check pre-loop convergence: if |f[N] - f[0]| < tolerance → Converged(0 iterations) + + 5. For iteration = 1 to MaxIterations: + a. Compute centroid c of best N vertices (v[0]..v[N-1]). b. Reflect: xr = c + α*(c - v[N]); clamp; fr = f(xr) c. if fr < f[0]: expand: xe = c + γ*(xr-c); clamp; fe = f(xe) replace v[N] with (fe 0; MaxIterations ≥ 1 + - [x] `Optimizer.OptimizeAsync` converges on a 1D bowl function to within tolerance + - [x] `Optimizer.OptimizeAsync` converges on a 2D bowl function to within tolerance + - [x] `Optimizer.OptimizeAsync` supports Maximize objective (maximizes a linear metric) + - [x] `Optimizer` returns `Converged=false` when MaxIterations exhausted before convergence + - [x] `Optimizer` respects `CancellationToken` + - [x] `POST /v1/optimize` returns 400 for missing/invalid required fields + - [x] `POST /v1/optimize` returns 503 when engine not enabled + - [x] Unit tests pass: 29 tests (OptimizeSpec ×17, Optimizer ×12) + - [x] API tests pass: 10 tests (9×400, 1×503) + - [x] `dotnet test FlowTime.sln` all green (192 TimeMachine, 260 API) + - kind: milestone + id: M-010 + frontmatter: + title: SessionModelEvaluator + status: done + parent: E-18 + body: | + ## Goal + + Replace per-point subprocess compile overhead with a single persistent engine session. + Today, every `IModelEvaluator.EvaluateAsync` call spawns `flowtime-engine eval` as a fresh + subprocess that re-parses YAML and re-compiles the Plan. For a sweep of 200 points this is + 200 compiles; for an optimization run it can be 100–1000 compiles. Each spawn is + ~100–500 ms of pure compile overhead. + + `SessionModelEvaluator` uses the m-E18-02 session protocol (MessagePack over stdin/stdout): + compile once on the first call, then send `eval` with parameter overrides for every + subsequent call. The expected speedup for large batches is ~10–50×. + + Also makes model fitting practical — fitting typically runs 100–1000 evaluations with + the optimizer as the inner loop, which is not viable with per-point subprocess compile. + + ## Scope + + **Namespace:** `FlowTime.TimeMachine.Sweep` + + - `SessionModelEvaluator : IModelEvaluator, IAsyncDisposable` — persistent session bridge: + - Lazy-spawns `flowtime-engine session` subprocess on first `EvaluateAsync` + - First call: sends `compile` request with the (already-patched) YAML; captures the list + of parameter IDs from the response; returns the series from the compile result + - Subsequent calls: uses `ConstNodeReader` to read the current value of each captured + parameter ID from the patched YAML; sends `eval { overrides: { ... } }`; returns series + from the response + - Serializes protocol I/O with `SemaphoreSlim` (one request at a time per instance) + - MessagePack via the `MessagePack` package (already used in integration tests); encodes + requests with `ContractlessStandardResolver` as `Dictionary` + - Wire framing: 4-byte big-endian length prefix + MessagePack payload (matches + `engine/cli/src/protocol.rs`) + - `DisposeAsync`: closes stdin, waits briefly for the subprocess to exit, kills the + process tree if still alive after the timeout + + **DI registration** (`src/FlowTime.API/Program.cs`): + - New config key `RustEngine:UseSession` (default `true`). Selects which `IModelEvaluator` + implementation is registered: + - `true` → `SessionModelEvaluator` (persistent session, compile-once) + - `false` → `RustModelEvaluator` (stateless subprocess per eval — retained as fallback) + - `IModelEvaluator`, `SweepRunner`, `SensitivityRunner`, `GoalSeeker`, `Optimizer` change + from `AddSingleton` → `AddScoped`. Session lifetime must match the analysis run; Scoped + gives one evaluator per HTTP request with automatic disposal. Even when `UseSession=false` + the Scoped lifetime is harmless — runners are stateless wrappers. + - `RustEngineRunner` remains `Singleton` (still used by E-20 bridge/parity tests and by + `RustModelEvaluator`). + + **Why keep `RustModelEvaluator`:** + - Fallback switch if the session protocol surfaces bugs in the wild (30 lines of code; negligible maintenance). + - Diagnostic comparison path — "does the non-session path agree?" is a cheap bug-triage question. + - Process isolation per eval is genuinely different behavior from a stateful session; both have legitimate deployment shapes (see cloud-deployment notes in `ROADMAP.md`). + - Two production impls make `IModelEvaluator` a real seam, not a testing-only interface. + + **Package reference:** + - Add `MessagePack` 3.1.4 to `src/FlowTime.TimeMachine/FlowTime.TimeMachine.csproj` + (same version already used in `FlowTime.Integration.Tests`) + + **In scope:** + - `src/FlowTime.TimeMachine/Sweep/SessionModelEvaluator.cs` + - `src/FlowTime.TimeMachine/FlowTime.TimeMachine.csproj` (MessagePack package) + - `src/FlowTime.API/Program.cs` (config switch + DI scope changes) + - Unit tests: `tests/FlowTime.TimeMachine.Tests/Sweep/SessionModelEvaluatorTests.cs` + (covers override extraction logic and error paths that do not require the subprocess) + - Integration tests: `tests/FlowTime.Integration.Tests/SessionModelEvaluatorIntegrationTests.cs` + (requires the Rust binary; skipped if not present, following the existing + `EngineSessionWebSocketTests` pattern) + - Milestone tracking doc + - Update `docs/architecture/time-machine-analysis-modes.md` to note the new evaluator and config switch + + **Out of scope:** + - Session pooling / reuse across HTTP requests — each request gets its own session + - Auto-reconnect on session crash — if the subprocess dies, the next call surfaces the error + - Chunked evaluation (Mode 6) — separate later milestone + - Model-change detection (session always compiles the YAML it sees first; further calls + assume the same base model, which holds for all current analysis runners) + + ## Design + + ### Lifecycle + + ``` + T=0 SessionModelEvaluator ctor (no I/O yet) + T=1 SweepRunner calls EvaluateAsync(patchedYaml1) + → lazy spawn subprocess + → send compile { yaml: patchedYaml1 } + → receive compile result { params: [...], series: {...} } + → store paramIds from the response + → return series + T=2 SweepRunner calls EvaluateAsync(patchedYaml2) + → use ConstNodeReader to read each paramId value from patchedYaml2 + → send eval { overrides: { ... } } + → receive eval result { series: {...} } + → return series + ... + T=N HTTP request ends → DI scope disposes the evaluator + → DisposeAsync: close stdin, wait 1s, kill if still alive + ``` + + ### Why the `overrides` approach works + + The compile result captures the initial parameter defaults from the YAML (including whatever + values the first patch applied). On every subsequent call the evaluator sends an explicit + override for every tracked parameter, so the session always evaluates with the current + patched values. The compile-time defaults only matter for the very first call's return. + + ### Request/response shapes + + Requests are plain `Dictionary` (contractless MessagePack). Responses + are `Dictionary` navigated by key. Matching the Rust protocol: + + | Method | Request params | Response `result` | + |--------|----------------|-------------------| + | `compile` | `{ yaml: string }` | `{ params: [...], series: { id: [double,...] }, bins, grid, graph, warnings }` | + | `eval` | `{ overrides: { paramId: double } }` | `{ series: { id: [double,...] }, elapsed_us, warnings }` | + + Errors arrive as `{ error: { code, message } }` with no `result` key. The evaluator + raises `InvalidOperationException` with the error code + message. + + ## Acceptance Criteria + + - [x] `SessionModelEvaluator` exists, implements `IModelEvaluator` and `IAsyncDisposable` + - [x] Constructor validates engine path (non-null, non-whitespace) + - [x] First `EvaluateAsync` call spawns the subprocess exactly once; subsequent calls reuse it + - [x] First call sends `compile`; subsequent calls send `eval` with overrides extracted via `ConstNodeReader` + - [x] Returned series dictionary uses case-insensitive keys (matches `RustModelEvaluator`) + - [x] Error responses (`error` key present) raise `InvalidOperationException` with code + message + - [x] `DisposeAsync` closes stdin, waits for exit, kills the process tree on timeout + - [x] `DisposeAsync` is idempotent (safe to call multiple times) + - [x] Calling `EvaluateAsync` after `DisposeAsync` throws `ObjectDisposedException` + - [x] `CancellationToken` is observed during I/O + - [x] Concurrent `EvaluateAsync` calls on one instance are serialized (no interleaved frames) + - [x] DI: `IModelEvaluator`, `SweepRunner`, `SensitivityRunner`, `GoalSeeker`, `Optimizer` all registered as `Scoped` + - [x] DI: `RustEngine:UseSession` config (default `true`) selects `SessionModelEvaluator`; `false` selects `RustModelEvaluator` + - [x] `RustModelEvaluator.cs` retained as fallback; covered by an API test that flips the config switch + - [x] Unit tests pass: 32 tests total + - 6 constructor + disposal (SessionModelEvaluatorTests) + - 3 BuildOverrides (empty / all-found / some-missing) + - 5 ExtractResult (success / error-with-code-msg / error-missing-subfields / neither / malformed-result) + - 4 ExtractParamIds (missing-key / not-array / valid / malformed-items) + - 6 ExtractSeries (missing-key / not-dict / valid / case-insensitive / non-string-key / non-array-value) + - 1 WriteFrameAsync (length-prefixed MessagePack) + - 5 ReadFrameAsync (valid / zero / negative / excessive / truncated) + - 2 ReadExactAsync (full-read / EOF-mid-read) + - [x] Integration tests pass with the Rust binary present: 8 tests (SessionModelEvaluatorIntegrationTests) + - [x] Compile-once / eval-many returns correct series after parameter override + - [x] Parity on numeric values against per-eval path (keys differ by design — documented in `work/gaps.md`) + - [x] `SweepRunner` drives `SessionModelEvaluator` end-to-end over a 5-point sweep + - [x] Session subprocess does not leak after disposal + - [x] Invalid model raises `InvalidOperationException` with engine error code + - [x] Concurrent calls on one instance are serialized + - [x] API DI tests pass: 4 tests (ModelEvaluatorRegistrationTests — default/true/false/scope lifetime) + - [x] `dotnet build FlowTime.sln` green + - [x] `dotnet test FlowTime.sln` all green (1,620 passed / 9 skipped) + - [x] `docs/architecture/time-machine-analysis-modes.md` updated — now documents both evaluator paths, config switch, and scoped lifetime + + ## Coverage notes + + **Covered:** every reachable branch in the production implementation — 44 dedicated tests (32 unit + 8 integration + 4 DI). The unit tests deliberately exercise every parsing helper with hand-crafted protocol payloads that the real Rust engine would not produce (missing fields, malformed types, non-string keys, out-of-range frame lengths), because those are defense-in-depth paths against protocol corruption and must not fail silently. + + **Explicitly not covered (defensive paths, acceptable gaps):** + + | Path | Why untested | + |------|--------------| + | `DisposeAsync` graceful-timeout → `Process.Kill` (line ~380) | Requires simulating a stuck subprocess; no deterministic way in unit tests. Behavior is symmetrically correct with the kill-succeeds case which IS covered by `Dispose_TerminatesSubprocess`. | + | `DisposeAsync` generic exception while waiting for exit (line ~385) | Defense-in-depth catch — unreachable in practice. `WaitForExitAsync` only throws `OperationCanceledException` (covered) or completes normally. | + | `SpawnProcess` `Process.Start` returns null | Only happens on platform-level process creation failure with an executable path that exists. Not reproducible in test. | + | `ExchangeAsync` `stdin`/`stdout` null guard | Defensive — caller always invokes after `SpawnProcess` has assigned both streams. Unreachable in practice. | + | `EvaluateAsync` inner-after-mutex disposed check | Race between `DisposeAsync` and an in-flight `EvaluateAsync`. Hard to trigger deterministically. The outer check + mutex make this extremely narrow. | + | `EvalAsync` error response | The Rust session only errors on `eval` when no model has been compiled (covered by compile-error path instead) or on a programmer bug that isn't otherwise reachable. | + + These six branches remain in the code as defense-in-depth and would be removed only with explicit evidence that they cannot occur under any future refactor. + + ## Dependencies + + - m-E18-02 (engine session protocol) — delivered + - m-E18-08 (ITelemetrySource) — independent + - m-E18-09 (`IModelEvaluator` seam, `ConstNodePatcher`) — delivered + - m-E18-10 (`ConstNodeReader`) — delivered + - `MessagePack` 3.1.4 — already in integration tests, add to TimeMachine + + ## Risks / notes + + - **Scope lifetime change.** Moving the four runners from `Singleton` → `Scoped` is a DI + semantics change. Runners are stateless wrappers over `IModelEvaluator`, so the risk is + low, but verify the minimal API endpoints still resolve them correctly. + - **Test flakiness from subprocess I/O.** Integration tests must guard against slow spawn + on first call; use a 5 s initial-compile timeout and skip cleanly if the binary is absent. + - **Process leak on abnormal termination.** `DisposeAsync` kills the process tree; CI must + not accumulate stray `flowtime-engine` processes between tests. + - **MessagePack dependency surface.** Adding `MessagePack` to `FlowTime.TimeMachine` pulls + it into the runtime surface. Acceptable — it is already transitively available through + `FlowTime.Integration.Tests` and matches the wire format owned by the Rust engine. + - kind: milestone + id: M-011 + frontmatter: + title: .NET Time Machine CLI + status: done + parent: E-18 + body: | + ## Goal + + Expose the Time Machine analysis modes (validate / sweep / sensitivity / goal-seek / + optimize) through the `FlowTime.Cli` binary as pipeable JSON-over-stdio commands. + The CLI becomes the canonical pipeline entry point for Azure Functions custom handlers, + Container Apps jobs, scripted regression suites, shell composition, and AI-assistant + iteration — without requiring the ASP.NET API to be running. + + Spec success criterion from E-18: + ``` + cat model.yaml | flowtime validate + cat sweep-spec.json | flowtime sweep | jq '.points[].metricMean' + ``` + + ## Scope + + Five new commands under `src/FlowTime.Cli/Commands/` mirroring the `/v1/` API surface: + + | CLI command | Spec type | Runner | Matches API endpoint | + |-------------|-----------|--------|----------------------| + | `flowtime validate` | `TimeMachineValidator` (no wrapping spec) | `TimeMachineValidator` | `POST /v1/validate` | + | `flowtime sweep` | `SweepSpec` | `SweepRunner` | `POST /v1/sweep` | + | `flowtime sensitivity` | `SensitivitySpec` | `SensitivityRunner` | `POST /v1/sensitivity` | + | `flowtime goal-seek` | `GoalSeekSpec` | `GoalSeeker` | `POST /v1/goal-seek` | + | `flowtime optimize` | `OptimizeSpec` | `Optimizer` | `POST /v1/optimize` | + + ### JSON I/O contract + + Each command reads a JSON request on stdin (or via `--spec `), runs the analysis, + and writes a JSON response on stdout. The request/response shapes are **identical to the + corresponding `/v1/` endpoint bodies** — byte-for-byte compatible, so `cat spec.json | + flowtime sweep` produces the same payload as `curl -d @spec.json /v1/sweep`. + + `validate` is the exception: its input is raw YAML (on stdin or `--model `), + not JSON. Output is the same JSON response shape as `POST /v1/validate`. + + ### Unified options across commands + + - `--spec ` — read JSON request from a file instead of stdin (analysis commands) + - `--model ` — read model YAML from a file (validate only) + - `--output ` / `-o` — write JSON response to a file instead of stdout + - `--no-session` — use `RustModelEvaluator` (stateless, subprocess-per-eval) instead of + `SessionModelEvaluator` (default). Matches the `RustEngine:UseSession=false` config. + - `--engine ` — override engine binary path (default: `FLOWTIME_RUST_BINARY` env + var, then `/engine/target/release/flowtime-engine`) + - `-h` / `--help` — command-specific help + + ### Exit codes + + - **0** — success + - **1** — analysis produced an explicit failure (e.g., validate returned invalid, + optimize didn't converge and exited cleanly). The JSON response is still written to + stdout and describes the failure — stderr is clean. + - **2** — input error: missing required args, invalid JSON, engine binary not found, + spec failed validation. Error message on stderr; nothing on stdout. + - **3** — engine/runtime error: session subprocess crashed, protocol error, + `InvalidOperationException` from evaluator. Error message on stderr; nothing on stdout. + + ### Engine binary resolution + + Same precedence as the API, extracted to a shared helper: + 1. `--engine ` command-line flag + 2. `FLOWTIME_RUST_BINARY` environment variable + 3. `/engine/target/release/flowtime-engine` (found via `DirectoryProvider.FindSolutionRoot`) + 4. `flowtime-engine` on `$PATH` (fallback) + + Fail with exit 2 and clear stderr message if binary is not found or not executable. + + ### Shared infrastructure + + Extract two helpers to `src/FlowTime.Cli/Commands/`: + + - `CliEngineSetup` — resolves engine binary path, constructs the chosen `IModelEvaluator` + as `IAsyncDisposable` (so callers can `await using`). Also exposes a factory for + `RustEngineRunner` (needed by `RustModelEvaluator` fallback). + - `CliJsonIO` — reads JSON from stdin-or-file, writes JSON to stdout-or-file, common + serialization options matching the API (camelCase, web defaults). + + ### In scope + + - `src/FlowTime.Cli/Commands/ValidateCommand.cs` + - `src/FlowTime.Cli/Commands/SweepCommand.cs` + - `src/FlowTime.Cli/Commands/SensitivityCommand.cs` + - `src/FlowTime.Cli/Commands/GoalSeekCommand.cs` + - `src/FlowTime.Cli/Commands/OptimizeCommand.cs` + - `src/FlowTime.Cli/Commands/CliEngineSetup.cs` (helper) + - `src/FlowTime.Cli/Commands/CliJsonIO.cs` (helper) + - `src/FlowTime.Cli/Program.cs` — command routing + `PrintUsage` updates + - Unit tests per command: `tests/FlowTime.Cli.Tests/Commands/{Validate,Sweep,Sensitivity,GoalSeek,Optimize}CommandTests.cs` + - Integration tests that exercise end-to-end with the Rust binary: + `tests/FlowTime.Integration.Tests/TimeMachineCliIntegrationTests.cs` + - Update `docs/architecture/time-machine-analysis-modes.md` — new "CLI surface" section + - Update `CLAUDE.md` Current Work + + ### Out of scope + + - `fit` command (blocked on Telemetry Loop & Parity epic — not yet started) + - `chunked-eval` command (explicitly deferred by spec) + - `monte-carlo` command (explicitly deferred by spec) + - `System.CommandLine` framework migration — keep the existing minimal-args convention + used by the `run` and `artifacts` commands; introducing a library would be a separate + refactor + - YAML / CSV / table output formats — JSON only + - Interactive REPL mode + - Progress reporting beyond stderr status lines + + ## Design + + ### Command shape (every analysis command) + + ```csharp + public static class SweepCommand + { + public static async Task ExecuteAsync(string[] args) + { + var parsed = ParseArgs(args); // spec path, output, no-session, engine + if (parsed.ShowHelp) { PrintHelp(); return 0; } + + SweepSpec spec; + try { spec = CliJsonIO.Read(parsed.SpecPath); } + catch (JsonException ex) { Console.Error.WriteLine($"Invalid JSON: {ex.Message}"); return 2; } + + await using var evaluator = CliEngineSetup.CreateEvaluator(parsed); + var runner = new SweepRunner(evaluator); + + try + { + var result = await runner.RunAsync(spec); + CliJsonIO.Write(parsed.OutputPath, result); + return 0; + } + catch (InvalidOperationException ex) { Console.Error.WriteLine(ex.Message); return 3; } + } + } + ``` + + Each command is ~30-40 lines — parsing, spec deserialization, runner invocation, output. + + ### Why not `System.CommandLine`? + + The existing `FlowTime.Cli` uses hand-rolled arg parsing (see `Program.cs` — `for` loop + over args). Adding `System.CommandLine` would be a larger refactor that touches the + `run` command too. Keeping consistency is more important than getting the nicer library + for this milestone. A future cleanup milestone can migrate all commands together. + + ### Why JSON-over-stdio and not flag-driven? + + Specs like `OptimizeSpec` carry search ranges (one entry per param with lo/hi), objective + enums, tolerance, max iterations. Representing them as CLI flags is unergonomic: + + ``` + flowtime optimize --param arrivals --range-arrivals 0:100 --param capacity \ + --range-capacity 1:20 --metric util --objective minimize \ + --tolerance 1e-4 --max-iters 200 + ``` + + vs. + + ``` + cat optimize-spec.json | flowtime optimize + ``` + + The JSON path is pipeline-native: compose with `jq`, store specs as fixtures, invoke + from Azure Functions custom handlers, share spec files with the API. + + ## Acceptance Criteria + + - [x] Five CLI commands (`validate`, `sweep`, `sensitivity`, `goal-seek`, `optimize`) wired into `Program.cs` router + - [x] Each command parses `--spec` / stdin, `--output` / stdout, `--no-session`, `--engine`, `--help` + - [x] `validate` reads YAML (not JSON) via `--model` / stdin; outputs `ValidationResult` as JSON + - [x] Each analysis command reads its matching `*Spec` as JSON and writes its matching result as JSON, byte-compatible with the corresponding `/v1/` endpoint + - [x] `CliEngineSetup` helper resolves binary path via `--engine` → `FLOWTIME_RUST_BINARY` → solution-relative default → `$PATH` + - [x] `CliEngineSetup` constructs `SessionModelEvaluator` by default; `--no-session` selects `RustModelEvaluator` + - [x] `CliJsonIO` helper reads JSON from stdin-or-file and writes JSON to stdout-or-file with camelCase / web defaults matching the API; `JsonStringEnumConverter` added so `objective: "minimize"` etc. deserialize correctly + - [x] Exit codes follow the 0/1/2/3 contract (success / analysis-failed / input-error / engine-error) + - [x] Missing engine binary produces exit 2 with a readable stderr message + - [x] Invalid JSON produces exit 2 with a stderr message; no partial stdout + - [x] `--help` on any command prints command-specific usage and exits 0 + - [x] Unit tests pass: 72 new CLI unit tests + - 15 CliJsonIO (read/write, file/stdin, camelCase, null literal, errors) + - 14 CliCommonArgs (all flag variants, missing values, unknown flag, positional, dash-as-positional) + - 8 CliEngineSetup (path precedence, evaluator selection, disposal idempotency) + - 13 ValidateCommand (help, arg errors, tier, valid/invalid YAML, output) + - 18 AnalysisCommandTests (help for each of 4 commands, shared error paths, IsOnPath, BarePath) + - 4 deferred (covered by integration tests instead — see below) + - [x] Integration tests pass with the Rust binary present: 10 tests (TimeMachineCliIntegrationTests) + - [x] `flowtime validate` with valid and invalid YAML + - [x] `flowtime sweep` end-to-end producing correct series (arrivals=10,20,30 → served=5,10,15) + - [x] `flowtime sensitivity` end-to-end (∂served/∂arrivals = 0.5) + - [x] `flowtime goal-seek` end-to-end (target served=25 → arrivals≈50) + - [x] `flowtime optimize` converging on a `MAX(x-7,7-x)` bowl around arrivals=14 + - [x] Session vs. per-eval flag (`--no-session`) both work + - [x] Output to file (`-o`) matches output to stdout + - [x] Engine compile error (unknown function) produces exit 3 + - [x] Every reachable path in the new command classes and helpers is covered (line-by-line audited) + - [x] `docs/architecture/time-machine-analysis-modes.md` — new "CLI surface" section documents the five commands, JSON I/O contract, exit codes, evaluator selection, engine resolution, and pipeline composition example + - [x] `Program.cs` `PrintUsage` updated with the five new commands + - [x] `dotnet build FlowTime.sln` green + - [x] `dotnet test FlowTime.sln` all green — 1,702 passed / 9 skipped + + ## Coverage notes + + **Covered:** every reachable branch in the command classes, helpers, and the `AnalysisCliRunner` shared path. The 89 CLI unit tests explicitly exercise: + + - Help paths for all 5 commands + - All `CliCommonArgs` flag variants (spec/model/output/no-session/engine/help) and their error paths (missing value, unknown flag) + - Positional spec path AND `-` as a positional + - JSON I/O to/from file and stdin/stdout; invalid JSON; null JSON literal; missing file + - Engine path precedence (explicit/env/default); empty explicit falls through + - Evaluator construction for both session and no-session; disposal idempotency for both + - Input-error paths (exit 2): unknown flag, missing spec file, invalid JSON, invalid spec (ArgumentException), missing engine binary + - `IsOnPath` branches (absolute / relative-with-separator / bare name) + - Bare-name engine path bypasses file-existence check and reaches the spawn step + + Integration tests (8) cover success paths and the exit-3 engine-error path. + + **Explicitly not covered:** + + | Path | Why untested | + |------|--------------| + | `CliEngineSetup.ResolveEnginePath` fallback to bare `"flowtime-engine"` when `DirectoryProvider.FindSolutionRoot()` returns null | Would require environment manipulation to move outside any .git-rooted directory tree. The env-var and explicit paths are covered; the default-path branch is covered when run inside the repo. | + + This is a single acceptable gap for platform-edge behavior. + + ## Dependencies + + - m-E18-06 (TimeMachineValidator) — delivered + - m-E18-09 (`IModelEvaluator`, `SweepRunner`, `ConstNodePatcher`) — delivered + - m-E18-10 (`SensitivityRunner`, `ConstNodeReader`) — delivered + - m-E18-11 (`GoalSeeker`) — delivered + - m-E18-12 (`Optimizer`, `OptimizeSpec`) — delivered + - m-E18-13 (`SessionModelEvaluator`, evaluator config switch) — delivered + + ## Risks / notes + + - **Argument parser consistency.** The existing CLI uses manual `for`-loop parsing. New + commands should follow the same convention; don't introduce a parsing library in this + milestone. A future cleanup epic can migrate all commands to `System.CommandLine`. + - **Test isolation.** Integration tests spawn the Rust engine subprocess per test. Same + skip-if-missing pattern as m-E18-13 integration tests. + - **Stdin handling in tests.** Tests should not actually redirect Console.In — use the + `--spec ` flag with a temp file for test inputs. Reserve stdin testing for a + single smoke test that sets `Console.SetIn`. + - **Binary resolution on Windows.** Paths use `Path.Combine`; binary name on Windows is + `flowtime-engine.exe`. The existing `DirectoryProvider` / `EngineSessionBridge` handle + this correctly — reuse their logic. + - kind: milestone + id: M-012 + frontmatter: + title: Compiled Semantic References + status: done + parent: E-16 + body: | + ## Goal + + Introduce typed semantic references in the compiled/runtime model so FlowTime stops deriving runtime relationships from raw authoring strings inside API and analytical code. This milestone creates the first hard boundary between authoring syntax and runtime truth. It also types the `Parallelism` field, which currently uses `object?` and is parsed at runtime in both Core and API. + + ## Context + + Today runtime-facing semantics still preserve raw strings such as `file:SupportQueue_queue.csv` or `series:NodeId`, and later layers parse those strings again to recover meaning. That is the root cause behind the current logical-type drift and duplicate parsers in `StateQueryService`. + + Reference parsing is currently duplicated across four sites: + - `SemanticLoader.IsFileUri()` / `LoadSeries()` — Core, combines reference resolution with file I/O + - `ModelCompiler.IsFileReference()` — Core, reference detection only + - `StateQueryService.TryResolveSeriesNodeId()` — API, extracts node IDs from references for logicalType promotion + - `RunArtifactReader.NormalizeSeriesIdentifier()` — Generator, normalizes references during capture + + Runtime metadata recovery also still leaks across the authoring/runtime boundary: + - `RunManifestReader.ExtractTelemetrySourcesFromText()` — Core, reparses raw YAML text to recover telemetry-source facts when artifact metadata is missing + + Before analytical purity can be enforced, the compiler must own semantic reference resolution once. This cut is forward-only: old runs, fixtures, and approved snapshots can be regenerated once the compiled runtime shape changes. + + ## Pre-flight Gate + + Before implementation begins, confirm these invariants are testable and will gate review: + + 1. No new semantic string parsing helpers are introduced in API/UI for analytical identity. Remaining logical-type bridge helpers are isolated and explicitly deleted in m-E16-03. + 2. No new `file:` / `series:` / `@` parsing helper outside the compiler. + 3. `Parallelism` is no longer `object?` — it is a typed reference resolved at compile time. + 4. SemanticLoader accepts typed references for data loading, not raw authoring strings. + + ## Acceptance Criteria + + 1. Runtime-facing node semantics use typed references for the semantic fields that affect runtime behavior, including arrivals, served, queueDepth, capacity, processingTimeMsSum, servedCount, attempts, failures, retryEcho, and related analytical inputs. + 2. The compiler resolves self, node, file, and series references into one canonical representation with deterministic tests covering current repo-standard reference patterns. + 3. Raw source text is retained only for provenance/debug surfaces; runtime behavior no longer depends on reparsing those raw strings. + 4. Forward-only migration is explicit: existing run directories, generated fixtures, and approved snapshots that depend on the old runtime shape are regenerated; no compatibility reader or fallback path is added for the old analytical/runtime boundary. + 5. `StateQueryService` no longer contains raw-string semantic-reference parsing helpers for data loading, queue-source recovery, or parallelism resolution. Any remaining logical-type bridge code consumes compiled typed semantics and is tracked for deletion in m-E16-03. + 6. `Parallelism` becomes a typed reference (numeric constant or series ref) resolved at compile time. The `object?` type on `NodeSemantics.Parallelism` is replaced. + 7. `SemanticLoader` is split: reference resolution moves to the compiler; data loading stays as I/O and takes typed references as input instead of raw strings. + 8. A grep-based audit confirms no `file:` / `series:` string parsing remains in API or adapter code for runtime analytical behavior. + 9. Runtime metadata readers no longer recover telemetry-source facts by reparsing raw YAML/model text once regenerated artifacts can carry those facts explicitly; raw-text fallback readers are deleted in the same forward-only cut. + 10. `dotnet build` and `dotnet test --nologo` are green. + + ## Guards / DO NOT + + - **DO NOT** add a compatibility reader or fallback parser for the old raw-string reference shapes. Forward-only. + - **DO NOT** create a new reference-parsing helper in the API or adapter layer. If a new reference pattern is discovered, extend the compiler. + - **DO NOT** keep `NodeSemantics.Parallelism` as `object?`. The loose typing is the problem, not a feature. + - **DO NOT** make SemanticLoader parse raw strings itself. After this milestone, SemanticLoader receives typed refs for I/O. + - **DO NOT** preserve raw-string parsing helpers such as `TryResolveSeriesNodeId` or `seriesFileRegex` as "temporary" helpers. Descriptor-driven logical-type deletion belongs to m-E16-03; do not blur the slice by pulling that milestone forward. + - **DO NOT** introduce adapter-side reference resolution "for convenience." The compiler owns reference resolution. + - **DO NOT** keep raw-YAML fallback extraction in runtime metadata readers once regenerated artifacts can carry telemetry-source facts explicitly. + + ## Deletion Targets + + These specific code paths must be removed or replaced by this milestone's completion: + + | Target | Location | Why | + |--------|----------|-----| + | `TryResolveSeriesNodeId()` | StateQueryService.cs:5288 | Parses `file:`, `series:`, `@` in the adapter to extract node IDs | + | `seriesFileRegex` | StateQueryService.cs:5250 | Regex for extracting filenames from `file:` URIs | + | `IsFileUri()` (as reference resolution) | SemanticLoader.cs:185 | Data loader should not decide what a reference means | + | `IsFileReference()` | ModelCompiler.cs:140 | Collapses into typed reference resolution in the compiler | + | `ParseParallelismScalar()` | StateQueryService.cs | Runtime parsing of `object?` parallelism — compiler resolves it | + | `BuildParallelismSeries()` | StateQueryService.cs:1694 | Runtime conversion of parallelism to series — compiler resolves it | + | `ExtractTelemetrySourcesFromText()` fallback | RunManifestReader.cs | Runtime metadata must not recover telemetry facts by reparsing raw YAML | + + ## Test Strategy + + - **Reference resolution tests:** Cover all current reference patterns (`file:name.csv`, `series:NodeId`, `series:NodeId@Class`, self-reference, numeric literals for parallelism, node references for parallelism) with deterministic compiler-level tests. + - **Round-trip parity tests:** Prove that compiled typed references produce the same runtime data loading as the old raw-string path, then delete the old path. + - **Negative tests:** Verify that `StateQueryService` and adapter code do not contain reference-parsing helpers post-migration (grep-based audit as AC). + - **Fixture regeneration:** All existing approved snapshots that depend on the old runtime shape are regenerated and re-approved. + - **Runtime-metadata migration tests:** Regenerated runs prove telemetry-source facts are loaded from explicit artifact metadata, not raw-YAML fallback extraction. + + ## Execution Checklist + + Work these slices in order. Do not pull descriptor-era cleanup forward from m-E16-03. + + ### Slice 1: Typed runtime reference model + + - [ ] Introduce the typed semantic reference model at the Core/compiler boundary. + - [ ] Replace runtime-facing `object?` parallelism with an explicit typed shape in `src/FlowTime.Core/Models/NodeSemantics.cs`. + - [ ] Update authored-model/template surfaces that currently carry loose parallelism values: `src/FlowTime.Contracts/Dtos/ModelDtos.cs`, `src/FlowTime.Sim.Core/Templates/Template.cs`, and the related template validation/substitution paths. + - [ ] Preserve authored YAML/template syntax; authoring stays string/number friendly even though runtime semantics become typed. + - Review gate: compiler tests cover `file:`, `series:`, `@`, self, numeric literal, and node/series-backed parallelism cases. + + ### Slice 2: Compiler owns reference resolution + + - [ ] Move runtime-relevant reference parsing/resolution into `src/FlowTime.Core/Compiler/ModelCompiler.cs`. + - [ ] Narrow `src/FlowTime.Core/DataSources/SemanticLoader.cs` so it performs typed-ref I/O only, not reference interpretation. + - [ ] Audit generator/run-orchestration call sites that currently depend on normalized raw references, including `src/FlowTime.Generator/Orchestration/RunOrchestrationService.cs`. + - [ ] Remove raw-YAML telemetry-source fallback from `src/FlowTime.Core/TimeTravel/RunManifestReader.cs` once regenerated runtime metadata carries those facts explicitly. + - Review gate: round-trip parity tests prove typed refs load the same runtime data as the old raw-string path before the old path is deleted. + + ### Slice 3: API stops reparsing semantics + + - [ ] Remove or replace `TryResolveSeriesNodeId()`, `seriesFileRegex`, `ParseParallelismScalar()`, and `BuildParallelismSeries()` usage in `src/FlowTime.API/Services/StateQueryService.cs`. + - [ ] Audit `src/FlowTime.API/Services/GraphService.cs` so graph projection consumes compiled/typed semantics rather than normalizing raw parallelism references. + - [ ] Keep any remaining logical-type bridge code isolated for m-E16-03; this slice ends when raw reference parsing is gone, not when descriptor cleanup is complete. + - Review gate: grep audit over `src/FlowTime.API` and adapters finds no new raw reference parsers for runtime behavior. + + ### Slice 4: Validation sweep + + - [ ] Core compiler/data-source tests: `tests/FlowTime.Core.Tests/Compiler/ModelCompilerTests.cs`, `tests/FlowTime.Core.Tests/Compiler/ModelCompilerParityTests.cs`, `tests/FlowTime.Core.Tests/DataSources/SemanticLoaderTests.cs`. + - [ ] Sim/template tests: `tests/FlowTime.Sim.Tests/NodeBased/TemplateParserTests.cs` and any template substitution/validation coverage touched by typed parallelism. + - [ ] API regression tests: `tests/FlowTime.Api.Tests/StateEndpointTests.cs` for service-with-buffer parallelism and current-state/state-window behavior. + - [ ] Generator/integration smoke coverage: `tests/FlowTime.Generator.Tests/` plus at least one full-loop scenario in `tests/FlowTime.Integration.Tests/`. + - [ ] Final gate: `dotnet build` and `dotnet test --nologo` green after forward-only fixture regeneration. + + ## Technical Notes + + - Introduce a `SeriesRef`-style value object or equivalent typed semantic reference model. Consider a discriminated union: `FileRef | SeriesRef | SelfRef | NodeRef | ConstantRef`. + - Keep provenance-friendly raw text separate from runtime compiled semantics. + - Centralize reference parsing in the compiler/parser boundary rather than adding new adapter helpers. + - Prefer deleting and regenerating old runs/fixtures over carrying mixed old/new runtime shapes. + - **SemanticLoader split:** `SemanticLoader.LoadSeries(string uri, int bins)` becomes `SemanticLoader.LoadSeries(SeriesRef ref, int bins)` or equivalent. The `UriResolver.ResolveFilePath()` call stays but receives a typed ref. + - **Parallelism typing:** Replace `object? Parallelism` with a typed ref (e.g., `ParallelismRef` that is either a constant double or a series reference). Resolve at compile time. The 21-file cross-cut noted in gaps.md is acceptable because E-16 is the right place to do it. + - **Milestone boundary:** Descriptor-driven deletions (`DetermineLogicalType`, `NormalizeKind`, `IsDlqKind`) happen in m-E16-03 once the compiled descriptor exists. This slice stops at typed references and parallelism typing. + + ## Out of Scope + + - Public contract changes for analytical facts + - Class-truth boundary cleanup + - Consolidation of analytical evaluator logic + - UI/client cleanup + + ## Dependencies + + - Builds on the existing compiler/model parser foundation in Core + - kind: milestone + id: M-013 + frontmatter: + title: Class Truth Boundary + status: done + parent: E-16 + body: | + ## Goal + + Separate real by-class truth from wildcard fallback before runtime analytical descriptors and evaluators depend on it. This milestone makes class truth explicit instead of letting later layers infer it from `*` and coverage side effects. + + ## Context + + The current class path mixes aggregation, coverage, warnings, and synthesized wildcard fallback. That is too late and too implicit for the rest of E-16: if evaluator and contract work build on it as-is, they will keep guessing whether a class result is real or synthesized. + + Wildcard handling is currently scattered across: + - `StateQueryService` (line ~1711): blank class keys → `*` + - `StateQueryService` (line ~4712): `*` and `DEFAULT` detection, skip logic to avoid double-counting + - `Dashboard.razor.cs` `EnsureFallbackClassesFromWindow()`: client-side fallback class extraction + + ## Acceptance Criteria + + 1. Internal class surfaces distinguish real by-class data, synthesized fallback, and no-class coverage explicitly via a typed representation (not just string key conventions). + 2. Wildcard fallback is represented as an explicit fallback fact rather than inferred solely from the `*` key or the absence of real class data. + 3. Analytical evaluation and projection code consume explicit class-truth facts instead of silently relying on wildcard fallback. + 4. Tests cover real multi-class fixtures separately from fallback projection cases, and approved outputs are regenerated forward-only where needed. + 5. A test or assertion proves that fallback-only data cannot be confused with real by-class analytical results in downstream evaluation. + 6. Forward-only regenerated runtime metadata carries explicit fallback labeling at the class boundary; legacy `*` / `DEFAULT` normalization helpers are deleted rather than retained as compatibility translators. + 7. `dotnet build` and `dotnet test --nologo` are green. + + ## Guards / DO NOT + + - **DO NOT** add `IsFallback` booleans to every DTO or scatter fallback awareness across unrelated types. Use a small explicit runtime shape (e.g., a tagged union or wrapper) at the class-data boundary. + - **DO NOT** keep `*` as the sole signal for fallback. The `*` key may persist for serialization, but runtime code must not use string-equality checks against `*` or `DEFAULT` to decide truth vs fallback. + - **DO NOT** unify `*` and `DEFAULT` by normalizing to one string. Replace the string convention with a typed fact. + - **DO NOT** keep a legacy class-id normalization helper as a permanent ingest shim once regenerated artifacts can encode fallback explicitly. + - **DO NOT** change the public API contract in this milestone. Class-truth is an internal boundary; contract publication comes in m-E16-06. + - **DO NOT** let tests pass by exercising only fallback projection. Real multi-class and fallback-only must be separate test categories. + + ## Deletion Targets + + | Target | Location | Why | + |--------|----------|-----| + | Blank-key → `*` string inference | StateQueryService.cs:~1711 | Replace with typed fallback fact at the source | + | `*`/`DEFAULT` string-equality skip logic | StateQueryService.cs:~4712 | Replaced by typed class-truth discriminator | + | `ClassEntry.FromLegacyClassId()` / `IsLegacyFallbackId()` | ClassEntry.cs | Forward-only runtime metadata should encode fallback explicitly, not via legacy string translation | + + ## Test Strategy + + - **Two distinct fixture categories:** (1) real multi-class data with 2+ named classes, (2) single-class or no-class data that produces fallback. Tests must not share fixtures. + - **Fallback-is-labeled tests:** Assert that downstream evaluation code receives an explicit fallback marker, not just a `*` key string. + - **No false parity tests:** A test that proves "by-class analytical results match expectations" must use real multi-class fixtures, not wildcard-only data. + - **Negative assertion:** Grep-based check that no new `== "*"` or `== "DEFAULT"` string comparisons are added for analytical class-truth decisions. + + ## Technical Notes + + - Favor a small explicit runtime shape over spreading fallback booleans across unrelated DTOs. A tagged type like `ClassResult` with `Real | Fallback` discriminator is one option. + - Keep this milestone internal to Core/API surfaces; public analytical contract publication comes later. + - If wildcard fallback remains visible externally at this stage, it must be labeled as fallback in the internal representation even if the external key is still `*`, and regenerated runtime metadata must carry that fact without relying on `*` / `DEFAULT` translation helpers. + - Dashboard and other client fallback heuristics are deleted only when explicit fallback facts are published in m-E16-06. Do not pull client migration into this internal boundary slice. + + ## Out of Scope + + - Public analytical contract redesign + - Runtime analytical descriptor publication + - Client heuristic deletion + + ## Dependencies + + - [m-E16-01-compiled-semantic-references](m-E16-01-compiled-semantic-references.md) + - kind: milestone + id: M-014 + frontmatter: + title: Runtime Analytical Descriptor + status: done + parent: E-16 + body: | + ## Goal + + Compile an authoritative analytical descriptor onto runtime nodes and make all analytical capability checks consume that descriptor. This removes adapter-owned logical-type reconstruction and turns analytical identity into a compiled invariant. The descriptor absorbs and replaces `AnalyticalCapabilities`. + + ## Context + + `AnalyticalCapabilities` is currently a useful bridge abstraction resolved at query time from `kind + logicalType` strings. Once typed semantic references exist and class truth is explicit, runtime nodes can carry an analytical descriptor that is authoritative instead of forcing `StateQueryService` to infer one from strings and fallback behavior. + + The descriptor absorbs `AnalyticalCapabilities` rather than sitting alongside it. `AnalyticalCapabilities.Resolve(kind, logicalType)` was the right bridge for m-ec-p3a1, but string-based resolution is exactly what E-16 eliminates. The descriptor is produced by the compiler using typed semantic references. The computation methods formerly on `AnalyticalCapabilities` (`ComputeBin`, `ComputeWindow`, etc.) now live on the analytical evaluator; broader evaluator consolidation continues in m-E16-04. + + ## Structural Decision: Descriptor absorbs AnalyticalCapabilities + + | Aspect | AnalyticalCapabilities (current) | Analytical Descriptor (target) | + |--------|----------------------------------|-------------------------------| + | Resolution | `Resolve(string? kind, string? logicalType)` at query time | Produced by compiler at compile time from typed refs | + | Capability flags | `HasQueueSemantics`, `HasServiceSemantics`, etc. | Same flags, now compiled facts | + | `EffectiveKind` | String normalization bridge | Removed — replaced by typed analytical identity on the descriptor | + | Computation methods | `ComputeBin`, `ComputeWindow`, etc. | Owned by the descriptor-backed analytical evaluator; extended further in m-E16-04 | + | Queue origin | Not captured — recovered by `TryResolveSeriesNodeId` in API | Compiled fact: source-node identity for queue depth | + | Node category | Not captured — `GraphMapper.Classify()` and `TopologyCanvas.ClassifyNode()` reconstruct it | Compiled fact: expression / constant / service / queue / dlq / router | + | Parallelism | Not captured — resolved at runtime from `object?` | Compiled fact: resolved from typed parallelism reference | + + ## Acceptance Criteria + + 1. Runtime nodes carry a compiled analytical descriptor that captures: effective analytical identity, queue/service semantics, cycle-time applicability, warning applicability, queue-origin/source-node facts, node category, and resolved parallelism. + 2. Explicit `serviceWithBuffer` nodes and reference-resolved queue-backed nodes produce identical descriptors using typed references and real fixture shapes, not basename heuristics. + 3. Snapshot/window analytical paths, backlog warnings, flow-latency base composition, SLA helper logic, and internal state/graph projection paths consume the descriptor rather than reconstructing analytical identity from strings. + 4. `AnalyticalCapabilities` is deleted. Its capability flags are absorbed into the descriptor. Its computation methods live on the descriptor-backed evaluator surface. + 5. Adapter-side logical-type inference helpers used for runtime analytical behavior are deleted. + 6. Core and targeted API tests prove parity for both explicit and reference-resolved cases. + 7. Node category (expression/constant/service/queue/dlq) is a compiled descriptor field. No downstream code re-derives it from `kind` strings. + 8. `dotnet build` and `dotnet test --nologo` are green. + + ## Guards / DO NOT + + - **DO NOT** keep `AnalyticalCapabilities` alongside the descriptor. The descriptor replaces it. + - **DO NOT** resolve the descriptor from `kind + logicalType` strings. It must be produced by the compiler using typed semantic references. + - **DO NOT** put computation methods on the descriptor. Descriptor fields are facts, not deferred computations. Math moves to the evaluator. + - **DO NOT** add a `string EffectiveKind` field to the descriptor. That was a bridge concept for string-based resolution. The descriptor captures identity directly. + - **DO NOT** let any adapter or UI code reconstruct node category from `kind` strings after this milestone. + - **DO NOT** design the descriptor as a bag of booleans. Prefer a structured type with explicit semantics over `bool HasX` proliferation where a richer type is clearer. + + ## Deletion Targets + + | Target | Location | Why | + |--------|----------|-----| + | `AnalyticalCapabilities` class | Core/Metrics/AnalyticalCapabilities.cs | Absorbed into the compiled descriptor | + | `AnalyticalCapabilities.Resolve()` | Core/Metrics/AnalyticalCapabilities.cs:32 | String-based resolution replaced by compiler | + | `DetermineLogicalType()` (if survived m-E16-01) | StateQueryService.cs:5252 | Adapter-side logicalType inference | + | `NormalizeKind()` (if survived m-E16-01) | StateQueryService.cs:5240 | String normalization — descriptor has the identity | + | `IsDlqKind()` (if survived m-E16-01) | StateQueryService.cs:5354 | DLQ classification becomes a compiled fact | + | `TryResolveServiceWithBufferDefinition()` | StateQueryService.cs:5267 | Queue-origin discovery from strings — now a compiled fact | + | UI-side `IsServiceLike()` for analytical gating (if any exists) | Various | Replaced by descriptor fact; UI deletion completes in m-E16-06 | + + ## Test Strategy + + - **Descriptor compilation tests:** Given a model with explicit `serviceWithBuffer`, reference-resolved queue, expression, constant, DLQ, and router nodes, prove the compiler produces correct descriptors. + - **Parity tests:** Explicit `serviceWithBuffer` and reference-resolved queue-backed nodes produce identical descriptor fields. + - **Adapter-does-not-reclassify tests:** Assert that `StateQueryService` and projection code read descriptor fields without re-deriving analytical identity. + - **Category tests:** Prove that node category (expression/constant/service/queue/dlq) is a compiled fact, not inferred from strings downstream. + - **Regression guard:** Grep-based check that no new `NormalizeKind`, `DetermineLogicalType`, or `kind.ToLowerInvariant()` patterns appear in API code. + + ## Technical Notes + + - Separate authoring `kind` from runtime analytical category. `kind` remains on the authored model; the descriptor owns the runtime truth. + - Effective analytical identity is a typed descriptor fact, not a normalized string bridge. + - Descriptor fields should be facts, not deferred computations. + - Queue origin and source-node identity should come from compiled references rather than file-name or string-shape inference. + - Consider an enum for node category rather than strings: `NodeCategory { Expression, Constant, Service, Queue, Dlq, Router }`. + - Legacy projection hints may still be serialized for compatibility until m-E16-06, but they must be derived from descriptor facts rather than reparsed semantics or local string heuristics. + + ## Out of Scope + + - Public contract publication of the descriptor (m-E16-06) + - Consolidation of emitted-series truth and warning facts into the evaluator (m-E16-04, m-E16-05) + + ## Dependencies + + - [m-E16-02-class-truth-boundary](m-E16-02-class-truth-boundary.md) + - kind: milestone + id: M-015 + frontmatter: + title: Core Analytical Evaluation + status: done + parent: E-16 + body: | + ## Goal + + Move analytical values, emitted-series truth, and graph-level analytical computation into a pure Core evaluation surface so the API projects analytical results instead of deciding them. This includes both per-node analytical evaluation (from `AnalyticalCapabilities`) and graph-level flow latency propagation (from `StateQueryService.ComputeFlowLatency`). + + ## Context + + `AnalyticalCapabilities` moved current math into Core, but emitted-series truth is still partially computed in the adapter. That leaves the most important projection question — what should actually be emitted for this node/window/class — split across Core and API. + + Additionally, `flowLatencyMs` — the cumulative flow latency from entry to a node through the queueing network — is currently computed in `StateQueryService.ComputeFlowLatency()` as a topological graph traversal with flow-volume-weighted accumulation. This is graph-level queueing theory (expected sojourn time through a network), not an adapter concern. The Core IS a graph engine; graph-level analytical computation belongs there. + + Effective capacity computation (`capacity x parallelism`) is also currently split between `InvariantAnalyzer` (Core) and `StateQueryService.GetEffectiveCapacity` (API). It should have one owner in Core. + + Metrics query resolution also still carries a duplicate analytical path: `MetricsService` first asks `StateQueryService` for a window, then falls back to `ResolveViaModelAsync()` and recomputes analytical behavior from the model when state-window resolution fails. That is still a second analytical execution path in the adapter and E-16 must delete it. E-16 stays forward-only here as well: legacy runs that depended on that path are removed and regenerated rather than supported through a special failure/upgrade mode. + + **Supersedes D-2026-04-03-003** for `flowLatencyMs`: that decision was a bridge for m-ec-p3a1 scope. E-16 is the full purification — graph-level analytical computation moves to Core. + + ## Acceptance Criteria + + 1. Core exposes an analytical evaluation surface for snapshot, window, and by-class values driven by the compiled analytical descriptor and explicit class-truth boundary. + 2. Core returns one consolidated internal analytical result surface with explicit nested sections for derived values, emitted-series truth, effective-capacity/utilization facts, and graph-level flow-latency outputs sufficient for projection. + 3. `StateQueryService` no longer computes analytical emission truth or per-node/per-class analytical math locally in the current state paths. + 4. `flowLatencyMs` computation moves to Core as a pure function: `(compiledTopology, perNodeCycleTime[], edgeFlowVolume[]) → perNodeFlowLatency[]`. The adapter passes inputs, Core owns the graph traversal and accumulation. + 5. Effective capacity computation (`capacity x parallelism`) has one owner in Core. The API's `GetEffectiveCapacity` / `GetParallelismValue` / `ComputeUtilizationSeries` delegation is replaced by Core evaluation. + 6. Tests prove analytical evaluation against both real multi-class fixtures and explicit fallback cases without conflating the two. + 7. `MetricsService` and analogous analytical query surfaces consume the same Core evaluation surface instead of maintaining a second model-evaluation fallback path for analytical behavior; legacy runs that depended on the fallback path are removed or regenerated as part of the forward-only cut. + 8. `dotnet build` and `dotnet test --nologo` are green. + + ## Guards / DO NOT + + - **DO NOT** leave `flowLatencyMs` in the adapter. It is graph-level queueing theory — expected sojourn time through a network — not an orchestration concern. The Core is a graph engine; this computation belongs there. + - **DO NOT** leave partial computation in the adapter "for convenience." If the adapter computes any analytical value, that is a regression. + - **DO NOT** keep `GetEffectiveCapacity()` or `ComputeUtilizationSeries()` in `StateQueryService`. Effective capacity (with parallelism) is a flow algebra concept. + - **DO NOT** keep `MetricsService.ResolveViaModelAsync()` as a second analytical execution path once the unified Core evaluator exists. + - **DO NOT** introduce an "unsupported legacy run" mode or regeneration hint path for analytical fallback cleanup. Delete old runs and regenerate future-version runs instead. + - **DO NOT** make the evaluator depend on adapter types. The evaluator takes compiled descriptors + raw series data and returns analytical results. The adapter projects those results into DTOs. + - **DO NOT** use ad hoc dictionaries or flag tuples for results. Prefer explicit result types and one consolidated internal result surface with nested sections over many partial policy-bearing result fragments. + - **DO NOT** make `flowLatencyMs` depend on implicit node ordering. The Core evaluator should perform explicit topological sorting or receive an explicit order. + + ## Deletion Targets + + | Target | Location | Why | + |--------|----------|-----| + | `ComputeFlowLatency()` | StateQueryService.cs:2748 | Graph-level analytical computation moves to Core | + | `BuildIncomingFlowEdges()` | StateQueryService.cs:2867 | Supporting method for flowLatencyMs — moves to Core | + | `BuildEdgeFlowVolumeLookup()` | StateQueryService.cs:2921 | Supporting method for flowLatencyMs — moves to Core | + | `GetEffectiveCapacity()` | StateQueryService.cs:2644 | Effective capacity moves to Core | + | `GetParallelismValue()` | StateQueryService.cs:2666 | Parallelism resolution moves to Core (compiler) | + | `ComputeUtilizationSeries()` | StateQueryService.cs:2595 | Utilization is derived from effective capacity — Core | + | `ConvertClassMetrics()` local computation | StateQueryService.cs:1011 | Class analytical math moves to Core evaluator | + | `ResolveViaModelAsync()` fallback | MetricsService.cs | Duplicate adapter-side analytical execution path must be deleted | + | `AnalyticalCapabilities.ComputeBin/Window/ClassBin/ClassWindow` | AnalyticalCapabilities.cs (if not deleted in m-E16-03) | These methods become the evaluator | + | Per-node analytical math in snapshot/window builders | StateQueryService.cs | Replaced by Core evaluator calls | + + ## Test Strategy + + - **Evaluator unit tests:** Pure function tests — given descriptor + raw series data, assert correct analytical results for snapshot, window, and by-class. + - **flowLatencyMs graph tests:** Given a small compiled topology with known cycle times and edge flow volumes, assert correct cumulative latency propagation. + - **Effective capacity tests:** Given capacity series + parallelism (constant and time-varying), assert correct effective capacity and utilization. + - **Adapter-is-projector tests:** Assert that `StateQueryService` calls Core evaluator and maps results to DTOs without computing any analytical values locally. + - **Single analytical path tests:** Assert that metrics queries and state-window queries consume the same Core evaluator surface and do not diverge through an alternate model-fallback path. + - **Multi-class vs fallback tests:** Evaluator tests must include both real multi-class and fallback-only fixtures as separate test cases. + - **Parity tests:** End-to-end API tests prove the same analytical outputs as before the migration. + + ## Technical Notes + + - Keep pure math helpers where useful, but move policy composition out of the adapter. + - Prefer one consolidated internal analytical result model with nested sub-results over many partial adapter-composed results. + - Warning/analyzer facts are split into the next milestone so this slice stays independently shippable. + - The flowLatencyMs evaluator should take explicit topological order or compute it from the compiled graph. Do not rely on implicit iteration order. + - `flowLatencyMs` uses flow-volume-weighted accumulation: `upstream = sum(flow[i] * predLatency[i]) / sum(flow[i])`. This is the right formula — preserve it. + + ## Out of Scope + + - Warning/analyzer fact publication (m-E16-05) + - Public contract changes (m-E16-06) + - Client migration (m-E16-06) + + ## Dependencies + + - [m-E16-03-runtime-analytical-descriptor](m-E16-03-runtime-analytical-descriptor.md) + - kind: milestone + id: M-016 + frontmatter: + title: Analytical Warning Facts and Primitive Cleanup + status: done + parent: E-16 + body: | + ## Goal + + Move analytical warning facts into the consolidated Core analytical result surface and finish the primitive ownership cleanup so analytical policy has one owner per concept. + + ## Context + + E16-04 correctly moved descriptor-backed analytical evaluation, emitted-series truth, effective capacity, utilization, and flow latency into Core. The remaining impurity is narrower: warning production and a few helper seams still leave the adapter and public helper APIs with partial analytical ownership. + + Current warning production is still split across adapter-local policy and Core predicates: + - `BuildBacklogWarnings()` in `StateQueryService` assembles analytical warnings in the API. + - `FindQueueGrowthStreak()`, `FindOverloadStreak()`, and `FindAgeRiskStreak()` still live in the adapter and compute analytical policy over evaluated series. + - `BuildStationarityWarnings()` still constructs warning DTOs in the adapter after calling `RuntimeAnalyticalEvaluator.CheckStationarity()`. + - Descriptor-backed warning eligibility is already in Core, but warning production itself is not. + + Current primitive ownership is also still split: + - `RuntimeAnalyticalEvaluator` is already the public analytical owner for descriptor-backed evaluation and emitted truth. + - `LatencyComputer` still duplicates `CycleTimeComputer.CalculateQueueTime()` in different units and is called both from the evaluator and directly from the API. + - `CycleTimeComputer.CheckNonStationary()` still acts as the public stationarity policy implementation even though stationarity is now an evaluator/analyzer concern. + - E16-04 already established one consolidated internal analytical result family. E16-05 must extend that surface with analytical warning facts rather than introduce a second parallel warning pipeline. + + ## Acceptance Criteria + + 1. Stationarity, backlog growth, overload, and age-risk analysis consume compiled descriptors plus evaluated analytical facts from Core; no current-state API path reconstructs analytical warning applicability from raw semantics or payload shape. + 2. Core extends the consolidated internal analytical result surface with structured analytical warning facts for the relevant window/current-state paths. The API projects those facts into contract DTOs without building analytical warning policy locally. + 3. Primitive ownership is singular: + - `RuntimeAnalyticalEvaluator`, or a tightly-scoped Core analyzer directly beneath it, is the only public analytical owner for latency and stationarity warning semantics. + - `LatencyComputer` is deleted or reduced to private/internal helper math beneath the evaluator/analyzer surface; `StateQueryService` does not call it directly. + - `CycleTimeComputer.CheckNonStationary()` is deleted or reduced to private/internal helper math; public stationarity policy is no longer exposed from `CycleTimeComputer`. + 4. `StateQueryService` no longer contains analytical warning producers such as `BuildBacklogWarnings()`, `BuildStationarityWarnings()`, `FindQueueGrowthStreak()`, `FindOverloadStreak()`, or `FindAgeRiskStreak()` for runtime analytical behavior. + 5. Duplicate analytical policy paths are removed. Each analytical concept relevant to this slice (`queueTimeMs`, `latencyMinutes`, stationarity, backlog growth, overload, age risk) has exactly one public owner in Core, and the API acts only as projector. + 6. `dotnet build` and `dotnet test --nologo` are green, with regenerated approved outputs where warning facts changed. + + ## Guards / DO NOT + + - **DO NOT** fold non-analytical warnings (e.g., configuration warnings, model validation warnings) into this milestone. Scope is analytical warnings only. + - **DO NOT** move warning formatting/presentation into Core. Core returns analytical facts; the adapter projects those facts into DTOs. + - **DO NOT** introduce a second parallel analytical warning pipeline or result surface beside the consolidated internal analytical result family established in m-E16-04. + - **DO NOT** keep `FindQueueGrowthStreak`, `FindOverloadStreak`, `FindAgeRiskStreak`, or `BuildStationarityWarnings()` in the adapter. These are analytical policy over evaluated facts — they belong in Core. + - **DO NOT** let `LatencyComputer.Calculate()` survive as a public semantic owner parallel to `RuntimeAnalyticalEvaluator`. + - **DO NOT** keep `CycleTimeComputer.CheckNonStationary()` as a public analytical seam once stationarity ownership is moved behind the evaluator/analyzer layer. + - **DO NOT** add new analyzer policies in the adapter. If a new warning type is needed, it goes in Core. + + ## Deletion Targets + + | Target | Location | Why | + |--------|----------|-----| + | `BuildBacklogWarnings()` | StateQueryService.cs | Adapter-local analytical warning assembly moves to Core | + | `BuildStationarityWarnings()` | StateQueryService.cs | Adapter-local stationarity warning construction moves to Core | + | `FindQueueGrowthStreak()` | StateQueryService.cs | Backlog growth analyzer policy moves to Core | + | `FindOverloadStreak()` | StateQueryService.cs | Overload analyzer policy moves to Core | + | `FindAgeRiskStreak()` | StateQueryService.cs | Age-risk analyzer policy moves to Core | + | Direct `LatencyComputer.Calculate()` API call | StateQueryService.cs | API stops calling analytical primitives directly for warning production | + | Public `LatencyComputer` semantic ownership | LatencyComputer.cs | Little's Law latency has one public owner in Core | + | Public stationarity policy on `CycleTimeComputer` | CycleTimeComputer.cs | Stationarity becomes evaluator/analyzer-owned rather than a public helper seam | + + ## Test Strategy + + - **Core warning-fact tests:** Evaluator/analyzer tests prove structured warning facts for stationarity, backlog growth, overload, and age risk from descriptor-backed inputs. + - **Projector tests:** API tests prove `state_window` warnings are projected from Core warning facts rather than assembled from adapter-local streak helpers. + - **Ownership audit:** Grep-based check that no analytical warning detection logic or direct analytical primitive calls remain in `StateQueryService`. + - **Helper-boundary tests:** If helper math survives internally, public behavior tests move to the evaluator/analyzer surface; helper tests do not assert those helpers are the public analytical owner. + - **Golden/API parity:** Existing backlog/stationarity endpoint coverage and approved outputs are regenerated from Core-produced warning facts as needed. + + ## Technical Notes + + - Extend the consolidated analytical result family from m-E16-04 rather than inventing a second analytical warning surface. + - Keep warnings as analyzers over evaluated facts, not as ad hoc projection helpers. + - If helper math survives, make it private/internal beneath evaluator/analyzer ownership rather than a public analytical seam. + - Avoid folding unrelated non-analytical warnings into this milestone. + - Use explicit analytical warning facts nested under the Core analytical result surface; the adapter maps those facts to warning DTOs. + + ## Out of Scope + + - Public contract redesign (m-E16-06) + - UI/client heuristic deletion (m-E16-06) + + ## Dependencies + + - [m-E16-04-core-analytical-evaluation](m-E16-04-core-analytical-evaluation.md) + - kind: milestone + id: M-017 + frontmatter: + title: Analytical Contract and Consumer Purification + status: done + parent: E-16 + body: | + ## Goal + + Publish authoritative analytical and categorical facts in the current state and graph contracts and delete the first-party consumer heuristics in one forward-only cut. + + ## Context + + Internal purity is not enough if current consumers still classify analytical behavior or node category from `kind + logicalType`. This milestone makes the first-party consumer scope explicit so the cleanup does not balloon into a vague "fix the UI later" bucket. + + The current first-party consumers reconstruct node classification from strings across both state and graph surfaces: + - `TimeTravelMetricsClient.IsServiceLike()` (line ~234) — filters nodes for metrics by string-matching kind/logicalType + - `Dashboard.razor.cs.IsServiceLike()` (line ~382) — identical copy of the same heuristic + - `Topology.razor` helpers (`IsComputedKind`, `IsSinkKind`, fallback-class extraction) — classify and patch topology state from string hints + - `GraphMapper.Classify()` (line ~227) — categorizes nodes as Expression/Constant/Service from kind strings for layout + - `TopologyCanvas.ClassifyNode()` (line ~499) — categorizes nodes for visual filtering with a broader enum + - `TooltipFormatter` and `topologyCanvas.js` helpers — classify sink/queue/computed nodes in the rendering path + + All of these can be replaced by consuming compiled node category and analytical facts from the API contracts. + + ## Scope Clarification: State vs Graph Consumers + + `/state` and `/state_window` consumers need analytical applicability facts. `/graph` consumers need node category facts. Both are compiled domain facts and both are in scope for this cut. + + What stays in UI: + - **Layout rules** that consume the category: spacing ratios (55%/70%/100%), lane assignment (left/center/right), rendering dimensions — these are visual presentation, not domain logic. + - **Visibility toggles** (IncludeExpressionNodes, IncludeConstNodes, etc.) — these are user preferences, not domain classification. + - **Rendering decisions** (IsQueueLikeKind for width, IsComputedKind for leaf circles) — these may consume the category from the contract instead of re-deriving it. + + What is deleted: + - The `Classify()` and `ClassifyNode()` methods themselves — replaced by reading the category from the contract. + - The `IsServiceLike()` methods — replaced by reading analytical facts from the contract. + + ## Acceptance Criteria + + 1. `FlowTime.Contracts` and the current `/state`, `/state_window`, and `/graph` response shapes expose a compact authoritative fact surface sufficient to determine analytical behavior and node category without `kind + logicalType` inference. This includes at minimum: node category (expression/constant/service/queue/dlq/router/sink as needed by first-party consumers), analytical applicability flags (queue semantics, service semantics, cycle-time decomposition), fallback/class-truth labeling, and warning applicability. + 2. The explicit first-party consumer scope for this milestone is migrated to use engine-published facts: `src/FlowTime.UI/Services/TimeTravelMetricsClient.cs`, `src/FlowTime.UI/Pages/TimeTravel/Dashboard.razor.cs`, `src/FlowTime.UI/Pages/TimeTravel/Topology.razor`, `src/FlowTime.UI/Components/Topology/GraphMapper.cs`, `src/FlowTime.UI/Components/Topology/TopologyCanvas.razor.cs`, `src/FlowTime.UI/Components/Topology/TooltipFormatter.cs`, and `src/FlowTime.UI/wwwroot/js/topologyCanvas.js`. + 3. Old hint fields and targeted analytical heuristics are removed in the same forward-only cut once those consumers are migrated; runs, fixtures, and approved snapshots are regenerated rather than compatibility-layered. + 4. API/UI tests and a grep-based audit prove the targeted analytical classification helpers are deleted. + 5. Documentation and decision records are updated so E-10 Phase 3 can resume on the purified boundary. + + ## Guards / DO NOT + + - **DO NOT** expose the full internal Core descriptor type directly in the API contract. Prefer a compact fact surface designed for consumers, not a leak of internal types. + - **DO NOT** keep `kind + logicalType` as the primary way consumers determine analytical behavior. The old hint fields may remain for backward-compatible display purposes, but analytical behavior must come from the new fact surface. + - **DO NOT** grow scope implicitly. If another consumer surface appears before this milestone starts, add it explicitly to the consumer list or defer it with a documented reason. + - **DO NOT** let `IsServiceLike()`, `Classify()`, or `ClassifyNode()` survive in any form. These are the heuristics this milestone exists to delete. + - **DO NOT** move layout/rendering logic into Core. Layout spacing, lane assignment, and rendering dimensions stay in the UI — they consume the category, they don't define it. + - **DO NOT** add a new `kind`-string-based classification method in the UI as a "simpler" replacement. The contract provides the category as a fact. + + ## Deletion Targets + + | Target | Location | Why | + |--------|----------|-----| + | `IsServiceLike()` | TimeTravelMetricsClient.cs:234 | String-based analytical node classification | + | `IsServiceLike()` | Dashboard.razor.cs:382 | Duplicate of the above | + | `EnsureFallbackClassMetadataFromWindow()` | Topology.razor | Client-side fallback class heuristic on the topology page | + | `IsComputedKind()` / `IsSinkKind()` / related kind helpers | Topology.razor | String-based topology-page categorization | + | `Classify()` | GraphMapper.cs:227 | String-based node categorization | + | `ClassifyNode()` | TopologyCanvas.razor.cs:499 | String-based node categorization (broader enum) | + | `IsQueueLikeKind()` | TopologyCanvas.razor.cs | String-based queue detection for rendering | + | `IsComputedKind()` | TopologyCanvas.razor.cs | String-based expression detection for rendering | + | `IsSinkKind()` | TopologyCanvas.razor.cs | String-based sink detection | + | `IsSinkKind()` / dependency-kind helpers | TooltipFormatter.cs | String-based tooltip categorization | + | `isComputedKind()` / `isQueueLikeKind()` / `isSinkKind()` | wwwroot/js/topologyCanvas.js | String-based canvas/rendering categorization | + | `EnsureFallbackClassesFromWindow()` (if survived m-E16-02) | Dashboard.razor.cs | Client-side class fallback heuristic | + | Old `nodeLogicalType` hint field usage for analytical behavior | Contracts, consumers | Replaced by authoritative fact surface | + + ## Test Strategy + + - **Contract completeness tests:** Assert that the new fact surface on `/state`, `/state_window`, and `/graph` responses contains the category and analytical flags required by first-party consumers. + - **Consumer migration tests:** Each named first-party consumer has tests proving it reads from the contract fact surface, not from `kind + logicalType` strings. + - **Grep-based deletion audit:** `rg "IsServiceLike\|Classify\b\|ClassifyNode\|IsQueueLikeKind\|IsComputedKind\|IsSinkKind\|EnsureFallbackClassMetadataFromWindow\|isComputedKind\|isQueueLikeKind\|isSinkKind" src/FlowTime.UI/` returns zero matches. + - **Negative contract test:** Assert that removing `nodeLogicalType` from the contract does not break any consumer's analytical behavior (display may degrade, but analytical classification must not). + - **End-to-end parity:** UI integration tests (if present) show the same analytical behavior as before the migration. + + ## Technical Notes + + - Prefer a small explicit fact surface over leaking internal Core types directly. A nested `analytical` object on state nodes and a compact `category` fact on graph nodes is one option. + - Visual presentation categorization that is unrelated to analytical truth may remain in the UI, but it must consume the contract-provided category rather than re-deriving it from strings. + - If another consumer surface appears before this milestone starts, add it explicitly or defer it; do not grow scope implicitly. + + ## Out of Scope + + - General UI redesign unrelated to analytical truth + - New analytical primitives + - Svelte UI migration (E-11) — the four named files are Blazor UI + + ## Dependencies + + - [m-E16-05-analytical-warning-facts-and-primitive-cleanup](m-E16-05-analytical-warning-facts-and-primitive-cleanup.md) + - kind: milestone + id: M-018 + frontmatter: + title: WebSocket Engine Bridge + status: done + parent: E-17 + depends_on: + - M-002 + body: | + ## Goal + + The .NET API exposes a WebSocket endpoint that proxies a client connection to a Rust engine session subprocess. The Svelte UI can connect, send compile/eval commands, and receive streaming results. This is the bridge between the browser and the headless engine. + + ## Context + + After m-E18-02, the Rust engine has a `session` CLI mode: persistent process, MessagePack over stdin/stdout, commands for compile/eval/get_params/get_series. But there's no way for a browser to connect to it — browsers speak WebSocket, not stdin/stdout. + + The bridge is deliberately simple: the .NET API is a **transparent proxy**. It spawns one engine session process per WebSocket connection and pipes frames between the two. No protocol translation — the same MessagePack messages flow through. The API adds session lifecycle management (spawn on connect, kill on disconnect) but no business logic. + + ### Why proxy through .NET instead of direct WebSocket from Rust + + - The .NET API already runs, handles CORS, auth, logging, and health checks + - Adding a WebSocket server to the Rust engine would require `tokio`/async runtime — heavyweight for a CLI tool + - The proxy pattern keeps the engine as a pure stdin/stdout component, composable in pipelines + - Future: the API can add session pooling, auth, and rate limiting without changing the engine + + ## Acceptance Criteria + + 1. **AC-1: WebSocket endpoint.** `ws://localhost:8081/v1/engine/session` accepts WebSocket upgrade requests. Responds with 101 Switching Protocols on success. + + 2. **AC-2: Engine process lifecycle.** On WebSocket connect, the API spawns `flowtime-engine session` as a subprocess. On WebSocket close (client disconnect or error), the subprocess is killed and resources cleaned up. + + 3. **AC-3: Bidirectional proxy.** WebSocket binary frames are forwarded to the engine's stdin. Engine stdout data is forwarded as WebSocket binary frames. The MessagePack length-prefix framing is preserved end-to-end — the API does not parse or modify the payload. + + 4. **AC-4: Multiple concurrent sessions.** Each WebSocket connection gets its own engine subprocess. Two clients can run independent sessions simultaneously. + + 5. **AC-5: Error handling.** If the engine process crashes or exits unexpectedly, the WebSocket is closed with a 1011 (Internal Error) status code. If the engine binary is not found, the WebSocket is closed with 1013 (Try Again Later) and an error is logged. + + 6. **AC-6: CORS compatibility.** The existing permissive CORS policy allows WebSocket connections from the Svelte UI origin (localhost:5173). + + 7. **AC-7: Health check.** `GET /v1/engine/session/health` returns 200 with `{ "available": true/false }` indicating whether the engine binary is found and executable. + + 8. **AC-8: Svelte WebSocket client.** A TypeScript client module in `ui/src/lib/api/engine-session.ts` that: + - Connects to the WebSocket endpoint + - Encodes requests as length-prefixed MessagePack + - Decodes responses from length-prefixed MessagePack + - Exposes typed async methods: `compile(yaml)`, `eval(overrides)`, `getParams()`, `getSeries(names?)` + - Handles reconnection on disconnect + + 9. **AC-9: Integration test.** A C# integration test that: + - Starts the API with WebApplicationFactory + - Connects a WebSocket client + - Sends compile + eval sequence via MessagePack + - Verifies correct series values in responses + - Verifies WebSocket closes cleanly + + 10. **AC-10: Svelte smoke test.** A minimal Svelte page at `/engine-test` that connects to the WebSocket, compiles a hardcoded model, displays the parameter list, and shows one series as numbers. This proves the end-to-end path works before m-E17-02 builds the real UI. + + ## Technical Notes + + ### .NET WebSocket middleware + + ```csharp + app.Map("/v1/engine/session", async (HttpContext context) => + { + if (!context.WebSockets.IsWebSocketRequest) + { + context.Response.StatusCode = 400; + return; + } + var ws = await context.WebSockets.AcceptWebSocketAsync(); + await ProxyToEngineSession(ws, enginePath, cancellationToken); + }); + ``` + + Requires `app.UseWebSockets()` in the middleware pipeline. + + ### Proxy implementation + + The proxy runs two concurrent tasks: + 1. **WS → stdin**: Read WebSocket binary frames, write to engine stdin + 2. **stdout → WS**: Read engine stdout, write as WebSocket binary frames + + Both tasks run until either side closes. Use `Task.WhenAny` to detect the first close and clean up both sides. + + ### MessagePack in TypeScript + + Use `@msgpack/msgpack` npm package: + ```typescript + import { encode, decode } from '@msgpack/msgpack'; + + // Encode request with length prefix + function encodeMessage(obj: unknown): Uint8Array { + const payload = encode(obj); + const frame = new Uint8Array(4 + payload.length); + new DataView(frame.buffer).setUint32(0, payload.length); + frame.set(payload, 4); + return frame; + } + ``` + + ### Engine binary path + + Reuse the existing `RustEngine:BinaryPath` configuration from Program.cs. Fall back to `engine/target/release/flowtime-engine` relative to solution root. + + ## Out of Scope + + - Parameter UI controls (m-E17-02) + - Chart/topology reactive updates (m-E17-03) + - Session pooling or sharing (multiple clients per session) + - Authentication or authorization + - Binary frame compression + + ## Key References + + - `engine/cli/src/session.rs` — engine session loop + - `engine/cli/src/protocol.rs` — MessagePack framing + - `src/FlowTime.API/Program.cs` — API startup and middleware + - `ui/src/lib/api/client.ts` — existing Svelte HTTP client pattern + - `docs/architecture/headless-engine-architecture.md` — protocol design + - kind: milestone + id: M-019 + frontmatter: + title: Svelte Parameter Panel + status: done + parent: E-17 + depends_on: + - M-018 + body: | + ## Goal + + A polished Svelte page where the user picks a model, sees an auto-generated parameter panel (sliders + numeric inputs), and watches series values update live as they tweak parameters. This is the interactive what-if experience — the first real UI that exercises the WebSocket bridge at full speed. + + ## Context + + After m-E17-01, the `/engine-test` smoke test page proves the plumbing works: browser → .NET WebSocket proxy → Rust engine session, round-trip in milliseconds. But the smoke test only tweaks one hard-coded parameter and displays series as flat text. It's a plumbing verification, not a user experience. + + m-E17-02 builds the real interactive surface: + - A proper page layout under the existing Svelte UI shell (sidebar + topbar) + - Auto-generated controls from the parameter schema returned by `compile` + - Reactive Svelte stores for each series, bound to UI updates + - Debounced eval on slider drag (no flood of requests mid-drag) + - Loading state while eval is in flight + - Error surface for compile/eval failures + + The emphasis is **interactive feel**: the user should be able to grab a slider and see numbers move immediately. Chart/topology visualizations are m-E17-03 — this milestone shows series as small-multiple sparklines or numeric tables that update on parameter changes. + + ## Acceptance Criteria + + 1. **AC-1: Route `/what-if`.** A new SvelteKit route exists. The page uses the existing app layout (sidebar + topbar) and appears in sidebar navigation. + + 2. **AC-2: Model picker.** The page shows at least 3 built-in example models the user can select (simple const+expr, queue with WIP limit, class-based decomposition). Selecting a model compiles it via the engine session. Built-in models are bundled as string constants, not loaded from disk or network. + + 3. **AC-3: Parameter panel auto-generation.** After `compile`, the panel renders one control per parameter: + - **Scalar parameters** (ConstNode with uniform values, ArrivalRate, WipLimit): numeric input + slider. Slider range: `max(0, default × 0.1)` to `max(default × 3, default + 10)`, step = `max(default / 100, 0.1)`. + - **Vector parameters** (ConstNode with varying values): read-only display showing the default values (editing is out of scope for this milestone). + - **Parameter ID** displayed as the label. **Default value** shown as a small annotation. **Kind** shown as a colored badge (ConstNode / ArrivalRate / WipLimit). + + 4. **AC-4: Reactive series stores.** Each series from the compile/eval response is held in a Svelte writable store (or `$state` rune). When `eval` returns updated series, the stores are updated atomically and the UI re-renders. + + 5. **AC-5: Live eval on parameter change.** Dragging a slider or typing in a numeric input triggers `session.eval({ paramId: newValue })`. The returned series update the stores. The full current override set is sent on every eval (not just the changed one), so the state is always consistent. + + 6. **AC-6: Debounced slider updates.** During a continuous slider drag, eval requests are debounced to fire at most every 50ms. The final value after drag-end always triggers an eval to ensure the displayed state is accurate. Numeric-input changes (via typing) are debounced to 150ms. + + 7. **AC-7: Loading and error states.** While an `eval` is in flight, the parameter panel shows a subtle loading indicator (spinner next to the active control, or a top progress bar). If an eval returns an error, the error message is displayed in an alert banner and the UI remains interactive. The old series values stay visible until the error is resolved. + + 8. **AC-8: Reset button.** A "Reset to defaults" button resets all parameter controls to their defaults (from the parameter schema) and triggers a fresh eval. + + 9. **AC-9: Series display.** For every non-internal series in the eval response, the UI shows: + - The series name + - The current values as a compact numeric list (e.g., `[10.0, 10.0, 10.0, 10.0]`) + - A minimal inline sparkline (simple SVG path, not a full chart library) showing the shape over time + The sparkline's y-axis auto-scales to the series' min/max. This is intentionally lightweight — full charting is m-E17-03. + + 10. **AC-10: Latency display.** The page shows the elapsed_us from the most recent eval response, prominently (e.g., "Last eval: 42 µs"). This is the "look how fast" badge of honor. + + 11. **AC-11: Disconnect recovery.** If the WebSocket drops (e.g., API restarts), the next parameter change triggers automatic reconnect + replay (already handled by EngineSession). The UI shows a brief "reconnecting…" indicator, then resumes normal operation. No page reload required. + + 12. **AC-12: Unit tests for debounce + control generation.** Vitest unit tests cover: + - Debounce helper: multiple rapid calls collapse to one, trailing edge fires + - Control config derivation: `ParamInfo` → `{ type, min, max, step, initial }` + These are pure functions with no DOM dependencies. + + ## Technical Notes + + ### Debounce helper + + Write a simple `createDebouncer(ms)` utility that returns `{ schedule, flush, cancel }`. Use `setTimeout` + `clearTimeout`. Keep it in `ui/src/lib/utils/debounce.ts` as a shared primitive. Unit tests with `vi.useFakeTimers()`. + + ### Parameter control config + + A pure function: + + ```ts + export function paramControlConfig(param: ParamInfo): ControlConfig { + if (Array.isArray(param.default)) return { type: 'vector', values: param.default }; + const d = param.default; + const min = Math.max(0, d * 0.1); + const max = Math.max(d * 3, d + 10); + const step = Math.max(d / 100, 0.1); + return { type: 'scalar', min, max, step, initial: d }; + } + ``` + + Keep this in `ui/src/lib/api/param-controls.ts`. Test as pure function. + + ### Slider component + + shadcn-svelte doesn't have `slider` installed yet. Add via `pnpm dlx shadcn-svelte@latest add slider` or implement a minimal native `` wrapped in a Svelte component. Prefer the native approach for this milestone — one dependency less to wrangle, and full control over reactivity. + + ### Session lifecycle + + Create a single `EngineSession` instance per page load. Store it in a `$state` or a module-level variable (carefully — SvelteKit SSR wants client-only code). Close on `onDestroy` to kill the subprocess. + + ### Built-in models + + Keep the model YAML strings in `ui/src/lib/api/example-models.ts`. Three models: + 1. **Simple pipeline**: const arrivals + expr served = arrivals × 0.8 (4 bins) + 2. **Queue with WIP**: serviceWithBuffer topology, WIP limit 50, scalar inflow/outflow (6 bins) + 3. **Class decomposition**: traffic.arrivals with 2 classes, served = MIN(arrivals, 8) (4 bins) + + ### Sparkline + + No chart library dependency. A 120×30 SVG `` with a single polyline drawn from min/max-normalized values. ~20 lines of Svelte. + + ## Out of Scope + + - Full interactive charts with hover, zoom, legends (m-E17-03) + - Topology graph visualization with heatmap (m-E17-03) + - Loading models from URL, file upload, or template library (future) + - Editing vector parameters (editing per-bin const values) — show read-only + - Saving/loading parameter snapshots (future) + - Multi-model comparison views (future) + - Authentication, user sessions, or per-user state + - Model editing / YAML textarea — use the 3 built-ins only + + ## Key References + + - `ui/src/lib/api/engine-session.ts` — EngineSession WebSocket client (m-E17-01) + - `ui/src/routes/engine-test/+page.svelte` — smoke test pattern to build on + - `ui/src/lib/components/app-sidebar.svelte` — existing sidebar nav + - `ui/src/lib/components/ui/input/` — shadcn input primitive + - `work/epics/E-17-interactive-what-if-mode/spec.md` — epic context + - `docs/architecture/headless-engine-architecture.md` — WebSocket data flow + + ## Success Indicator + + Drag a slider in the parameter panel → watch the numeric series values update in real time, with `Last eval: µs` staying under 100 µs. That's the interactive what-if experience working. + - kind: milestone + id: M-020 + frontmatter: + title: Live Topology and Charts + status: done + parent: E-17 + depends_on: + - M-019 + body: | + ## Goal + + The What-If page shows a **topology graph** and **time-series charts** that both update reactively when parameters change. Topology node colors reflect current metrics (heatmap); charts show full time series with axes and hover tooltips. This is the "wow" moment: the user drags a slider, watches the graph recolor, and sees the chart curves animate in real time. + + ## Context + + After m-E17-02, the What-If page has auto-generated parameter controls, reactive state, debounced eval, and sparklines that show series shapes as small SVG paths. The data pipeline is fully working and sub-millisecond fast. What's missing is the **visual richness** that makes the interactivity compelling: + + - **Topology graph**: no visualization yet. The user sees series names and values as text. For topology-heavy models (queue, class decomposition), understanding which node feeds which is impossible without a graph. + - **Charts**: sparklines are 140×28 pixels. They convey shape but not exact values, no axes, no hover, no legend. For longer series or comparison across metrics, they're insufficient. + + m-E17-03 replaces both: a proper topology graph (using the existing `dag-map-view` component with metric overlays) and a richer chart component (SVG-based, no external library) with axes, hover tooltips, and multi-series support. + + **Layout stability is critical.** The graph must re-layout only on *structural* changes (model switch, new parameters). On *value* changes (slider drag), only colors and chart data update — nodes don't move, edges don't rewire. This preserves the user's mental map and makes the experience feel alive rather than jittery. + + ## Acceptance Criteria + + 1. **AC-1: Topology graph renders.** When a compiled model has 2+ nodes, the What-If page shows a DAG-style graph alongside the parameter panel and series list. Uses the existing `dag-map-view.svelte` component. Graph structure is derived from the compiled model's nodes + dependencies (not from a separate HTTP call). + + 2. **AC-2: Graph structure from compile response.** The engine session `compile` command's response includes a `graph` field: `{ nodes: [{id, kind}], edges: [{from, to}] }`. The server derives this from the parsed model definition. The Svelte client stores it in state and passes it to the graph renderer. + + 3. **AC-3: Layout stability on value changes.** Dragging a slider does NOT trigger a graph re-layout. The dag-map-view's computed positions are preserved across evals. This is verified by a Playwright test that checks node positions are identical before and after a parameter tweak. + + 4. **AC-4: Reactive heatmap.** Each topology node is colored by its current series value (e.g., `served` for expression nodes, `queue_depth` for queue nodes). Colors update when the series store updates. A Svelte-derived store maps series → `Map` for the graph overlay. + + 5. **AC-5: Metric selection.** A small dropdown above the graph lets the user pick which metric drives the heatmap (e.g., "served", "queue_depth", "utilization"). The dropdown lists all non-internal series. Default selection is the first primary output series. + + 6. **AC-6: Time-series chart component.** A new `` component replaces the inline sparkline usage in the series panel: + - SVG-based, no external library + - Configurable width/height, default 300×120 + - X-axis: bin index labels + - Y-axis: min/max labels (auto-scaled) + - Hover: shows bin index + exact value at mouse position + - Smooth line with optional point markers + + 7. **AC-7: Chart updates are reactive.** When series data changes, the chart re-renders in place. The axis auto-scales to the new range. No flicker, no re-mount. + + 8. **AC-8: Multi-series overlay (optional per chart).** A chart can render multiple series as overlapping lines with different colors and a legend. Default: one series per chart. Overlay mode is used for related metrics (e.g., arrivals vs served). + + 9. **AC-9: Chart hover tooltip.** On mouseover, a crosshair line shows the current bin index, and a tooltip displays `{name}: {value}` for each series rendered in that chart. The tooltip follows the cursor and does not cause layout shift. + + 10. **AC-10: Pure unit tests.** Vitest unit tests cover: + - Graph derivation from compile response (node/edge mapping) + - Metric map computation (series → heatmap metric) + - Chart path computation (extending sparkline-path tests to include axis-aware layout) + - Hover bin index resolution (cursor x → nearest bin) + No DOM-level component tests required; pure functions only. + + 11. **AC-11: Playwright E2E.** Extend `tests/ui/specs/svelte-what-if.spec.ts` with: + - Topology graph renders when model is loaded + - Drag slider → graph colors change (verify a specific node's fill attribute changes) + - Drag slider → node positions don't change (layout stability) + - Model switch → graph re-layouts (new structure) + - Chart hover → tooltip appears with correct values + + 12. **AC-12: Visual latency unchanged.** Tweaking a parameter to seeing the graph/chart update should still feel instant. Reuse the existing `Last eval: N µs` badge — it should stay under 1000 µs for the simple-pipeline model even with the new rendering. + + ## Technical Notes + + ### Graph in compile response + + Extend `engine/cli/src/protocol.rs` `CompileResult`: + + ```rust + pub struct CompileResult { + pub params: Vec, + pub series: HashMap>, + pub bins: usize, + pub grid: GridInfo, + pub graph: GraphInfo, // NEW + } + + pub struct GraphInfo { + pub nodes: Vec, + pub edges: Vec, + } + + pub struct GraphNodeInfo { + pub id: String, + pub kind: String, // "const", "expr", "pmf", "queue", "router" + } + + pub struct GraphEdgeInfo { + pub from: String, + pub to: String, + } + ``` + + Graph derivation in session: + - Iterate model.nodes → collect (id, kind) for `nodes` + - For expr nodes → parse the expression to find referenced nodes → edges + - For topology.nodes (queue/service) → add node with kind="queue", edges from semantics.arrivals source to the queue, from queue to semantics.served target + - For router nodes → edges from `inputs.queue` source to each route target + + This is a one-shot derivation — cached per compile, not recomputed on eval. + + ### Client-side graph type mapping + + In `ui/src/lib/api/engine-session.ts`: + + ```typescript + export interface CompileResult { + params: ParamInfo[]; + series: Record; + bins: number; + grid: GridInfo; + graph: EngineGraph; + } + + export interface EngineGraph { + nodes: { id: string; kind: string }[]; + edges: { from: string; to: string }[]; + } + ``` + + In the what-if page, adapt to the `GraphResponse` shape that `dag-map-view` expects: + + ```typescript + function adaptGraph(eg: EngineGraph): GraphResponse { + return { + nodes: eg.nodes.map(n => ({ id: n.id, kind: n.kind })), + edges: eg.edges.map((e, i) => ({ id: `edge-${i}`, from: e.from, to: e.to })), + order: [], // dag-map-view computes its own order + }; + } + ``` + + ### Metric map + + Pure function in `ui/src/lib/api/topology-metrics.ts`: + + ```typescript + export function buildMetricMap( + series: Record, + metricName: string, + bins: number, + ): Map { + const map = new Map(); + for (const [seriesName, values] of Object.entries(series)) { + if (isInternalSeries(seriesName)) continue; + // Use the mean over all bins as the heatmap value + const mean = values.reduce((a, b) => a + b, 0) / values.length; + map.set(seriesName, { value: mean, label: seriesName }); + } + return map; + } + ``` + + Initially: heatmap colors every node by its series' mean. Future enhancement: let the user pick a bin slider (scrub through time). + + ### Chart component + + New `ui/src/lib/components/chart.svelte`: + - SVG element sized to viewport + - Pure derivation: `computeChartGeometry(values, { width, height, padding })` → `{ path, xTicks, yTicks, xScale, yScale }` + - Hover: listen to `mousemove`, compute `binFromX(mouseX)`, render crosshair line + tooltip + - Multi-series: accept `series: { name, values, color }[]` as prop + + Keep the pure geometry in `chart-geometry.ts`, testable without DOM. Follow the sparkline-path.ts pattern. + + ### Layout stability + + `dag-map-view.svelte` uses `$derived.by(() => ...)` to compute the DAG + layout when `graph` changes. As long as the parent passes the *same* graph object reference across evals (not a new one each time), Svelte's reactivity won't retrigger layout. Use `let graph = $state(null)` and only assign it in `compileModel`, never in `runEval`. + + Verify in Playwright by reading `cx`/`cy` (or `transform`) attributes of node elements before and after a slider drag — they should be identical. + + ### Playwright selectors + + Add `data-testid` to the graph container and tooltip in the component. For the dag-map-view, the nodes render as SVG elements; use `[data-node-id="..."]` or similar. Check what dag-map-view already exposes. + + ## Out of Scope + + - Animated transitions between values (ease-in color changes) — the page already feels instant at 42µs/eval; animation is pure polish and can come later + - Zooming/panning the graph — future + - Time scrubber (pick a specific bin for the heatmap) — future + - Export chart as image — future + - Comparison mode (multiple parameter snapshots side-by-side) — future + - Editing graph structure in the UI — out of scope forever; models are YAML-authored + - Replacing the chart component library later — we'll add uPlot or ECharts if the custom SVG chart proves insufficient, but that's a future concern + + ## Success Indicator + + Load the **queue-with-wip** model → see the topology graph with queue node and its inputs → drag the WIP limit slider → watch the queue node's color shift from green to orange as the queue depth increases → watch the `queue_queue` chart curve flatten at the new WIP cap. All in under 100ms from drag to visual update. + + ## Key References + + - `ui/src/lib/components/dag-map-view.svelte` — existing topology renderer + - `ui/src/lib/api/types.ts` — `GraphResponse`, `GraphNode`, `GraphEdge` types + - `ui/src/routes/what-if/+page.svelte` — target page for new additions + - `ui/src/lib/components/sparkline-path.ts` — pattern for pure geometry functions + - `ui/src/lib/api/engine-session.ts` — WebSocket client to extend with graph type + - `engine/cli/src/protocol.rs` — session protocol types to extend + - `engine/cli/src/session.rs` — session compile handler to extend + - `work/epics/E-17-interactive-what-if-mode/m-E17-02-svelte-parameter-panel.md` — prior milestone + - kind: milestone + id: M-021 + frontmatter: + title: Warnings Surface + status: done + parent: E-17 + depends_on: + - M-020 + body: | + ## Goal + + The What-If page surfaces engine warnings — conservation violations, queue mismatches, non-stationary arrivals, negative values — as banners and node badges. When the user tweaks a parameter to push the system past a limit, warnings light up in real time; when they pull it back, warnings clear. The interactive experience becomes a complete feedback loop: *see not just what changes, but what's wrong*. + + ## Context + + After m-E17-03 the What-If page shows topology, charts, and a latency badge — but nothing tells the user when their model is broken or degenerate. The Rust engine already computes a rich set of warnings during `eval_model` (via `analyze()` in `analysis.rs`), but none of them make it through the session protocol. The `CompileResult` and `EvalResult` messages carry series and timing but drop warnings on the floor. + + ### Warnings the engine currently produces + + From `engine/core/src/analysis.rs`: + + | Code | Severity | Fires when | Node context | + |---|---|---|---| + | `arrivals_negative` | warning | `semantics.arrivals` contains `< -1e-6` in any bin | topology node | + | `served_negative` | warning | `semantics.served` contains negative values | topology node | + | `queue_negative` | warning | `queueDepth` column contains negative values | topology node | + | `errors_negative` | warning | `errors` column contains negative values | topology node | + | `served_exceeds_arrivals` | warning | `served > arrivals` in any bin (non-queue kinds only) | topology node | + | `served_exceeds_capacity` | warning | `served > capacity` in any bin | topology node | + | `queue_depth_mismatch` | warning | computed queue vs actual queue differs beyond tolerance | topology node | + | `non_stationary` | warning | first-half vs second-half arrivals mean differs > 25% | topology node | + + Each warning has a `node_id`, `code`, `message`, `bins` (the affected bin indices), and `severity`. + + **Key property:** because warnings are computed in the post-eval pipeline (which runs on every `eval` call), they are already recomputed per parameter override. The only thing missing is transport + rendering. + + ## Acceptance Criteria + + 1. **AC-1: Warnings in protocol — compile response.** `CompileResult` (Rust `protocol.rs`, TS `engine-session.ts`) includes a `warnings: WarningInfo[]` field. The session handler populates it from the initial eval's `result.warnings`. Each entry carries `node_id`, `code`, `message`, `bins`, `severity`. + + 2. **AC-2: Warnings in protocol — eval response.** `EvalResultMsg` includes the same `warnings: WarningInfo[]` field, populated from the eval's `result.warnings`. The field is **always present** — empty array means "no warnings," not "unknown". + + 3. **AC-3: Example model that triggers a warning.** A new example model `capacity-constrained.yaml` is added to `ui/src/lib/api/example-models.ts`: + - `arrivals` const (default 15, tweakable) + - `capacity` const (default 10, tweakable) + - `served` expression forcing served equal to arrivals + - Topology node with `semantics.capacity = capacity` + - With defaults: `served=15 > capacity=10` → `served_exceeds_capacity` warning fires on bins 0..N + - Dropping `arrivals` below `capacity` clears the warning + - Raising `capacity` above `arrivals` clears the warning + + 4. **AC-4: Warnings banner.** When the warnings array is non-empty, the What-If page shows a colored alert banner near the top (below the model picker). The banner shows: + - Warning count + - Compact list of `{node_id} · {code}` entries (up to 5, then "…and N more") + - A severity color (amber for `warning`, red for future `error`) + When the array is empty, the banner is hidden entirely (no empty state). + + 5. **AC-5: Warning details panel.** Below the topology panel (or as a collapsible section within it), a `Warnings` list shows every warning: + - Each row: severity icon, `node_id`, `code`, full `message`, affected bin count + - Rows are grouped by `node_id` + - The panel is hidden when there are no warnings + + 6. **AC-6: Node badge on topology graph.** Each topology node with at least one warning is visually flagged: + - A small ⚠ icon overlay in the top-right of the node, OR + - A red/amber outline around the node + - Implementation: Svelte-level SVG overlay on top of the dag-map-view SVG, using node positions exposed by dag-map-view (or computed from the DOM) + - When the node has no warnings, no badge is rendered + - Badges update reactively as the warnings array changes + + 7. **AC-7: Warnings update reactively on eval.** When a parameter change clears a warning, the banner, panel, and node badge all disappear. When a change introduces a warning, they all appear. No page reload. + + 8. **AC-8: Pure unit tests.** + - `format.ts` or new `warnings.ts`: group-by-node helper, severity-to-class mapping (8+ tests) + - Tests use fixtures with 0, 1, N warnings across multiple nodes + - Unknown warning codes fall back to a default icon/color + + 9. **AC-9: Playwright E2E.** New spec cases: + - Load `capacity-constrained` model → warnings banner visible, panel has ≥1 entry, `Service` node shows the badge + - Tweak `capacity` from 10 to 20 → banner disappears within 500ms, panel empty, badge gone + - Tweak `capacity` back to 10 → banner returns + - Load `simple-pipeline` → **no** banner, **no** panel (no topology warnings possible) + + 10. **AC-10: Compile error surface unchanged.** Compile errors (invalid YAML, missing grid) still flow through the existing `error` field on the Response envelope — not the `warnings` array. Warnings are a separate, orthogonal channel. A model with compile errors produces an error response with no warnings; a valid model produces a result response with a (possibly empty) warnings array. + + ## Technical Notes + + ### Rust protocol extension + + Add to `engine/cli/src/protocol.rs`: + + ```rust + #[derive(Debug, Serialize)] + pub struct WarningMsg { + pub node_id: String, + pub code: String, + pub message: String, + pub bins: Vec, + pub severity: String, + } + + pub struct CompileResult { + // ...existing fields... + pub warnings: Vec, + } + + pub struct EvalResultMsg { + // ...existing fields... + pub warnings: Vec, + } + ``` + + In `engine/cli/src/session.rs` `handle_compile` and `handle_eval`, convert `result.warnings` (type `Vec` from `analysis.rs`) to `Vec` before populating the message. + + ### TypeScript type + + Add to `ui/src/lib/api/engine-session.ts`: + + ```typescript + export interface WarningInfo { + node_id: string; + code: string; + message: string; + bins: number[]; + severity: string; + } + + export interface CompileResult { + // ...existing... + warnings: WarningInfo[]; + } + + export interface EvalResult { + // ...existing... + warnings: WarningInfo[]; + } + ``` + + Existing vitest tests that mock compile/eval responses need `warnings: []` added to their fixtures. + + ### Warnings helper module + + New file `ui/src/lib/api/warnings.ts`: + + ```typescript + export interface WarningGroup { + nodeId: string; + warnings: WarningInfo[]; + } + + /** Group warnings by node_id, preserving insertion order within each group. */ + export function groupWarningsByNode(warnings: WarningInfo[]): WarningGroup[]; + + /** True if any warning in the array references the given node. */ + export function nodeHasWarning(warnings: WarningInfo[], nodeId: string): boolean; + + /** Tailwind/CSS class suffix for a severity. Defaults to 'warning'. */ + export function severityClass(severity: string): 'warning' | 'error'; + ``` + + Pure functions, fully unit-tested. + + ### Svelte UI changes + + - **Banner**: new inline block between model picker and topology panel. Uses existing `AlertCircleIcon` and color tokens. Conditionally rendered on `warnings.length > 0`. + - **Panel**: new section below topology panel (or embedded in it). Iterates `groupWarningsByNode(warnings)`. + - **Node badges**: this is the trickiest part. `dag-map-view` renders an SVG. The Svelte page has the graph container; I can overlay a second SVG positioned absolutely on top with badge circles at the same coordinates as the nodes. Node coordinates come from the dag-map layout — which dag-map-view doesn't currently expose. + + **Pragmatic approach**: query the rendered SVG after each metric update, find nodes by their dag-map-generated `data-node-id` or text label, read their bounding boxes, and render badges at those positions. Done via `bind:this` + `tick()` + `getBoundingClientRect`. Not ideal but avoids modifying `dag-map-view`. + + **Cleaner approach** (optional, if the pragmatic one is messy): extend `dag-map-view` to accept a `nodeBadges: Map` prop and render badges as part of its SVG. This is a cross-cutting change to the shared component. + + I'll start with the pragmatic approach and fall back to extending the component only if positioning is unreliable. + + ### Capacity-constrained example model + + Add to `ui/src/lib/api/example-models.ts`: + + ```typescript + const CAPACITY_CONSTRAINED: ExampleModel = { + id: 'capacity-constrained', + name: 'Capacity constrained', + description: 'Served forced equal to arrivals; exceeds capacity by default. Drop arrivals or raise capacity to clear the warning.', + yaml: `grid: + bins: 4 + binSize: 1 + binUnit: hours + nodes: + - id: arrivals + kind: const + values: [15, 15, 15, 15] + - id: capacity + kind: const + values: [10, 10, 10, 10] + - id: served + kind: expr + expr: "arrivals" + topology: + nodes: + - id: Service + kind: serviceWithBuffer + semantics: + arrivals: arrivals + served: served + capacity: capacity + edges: [] + constraints: [] + `, + }; + ``` + + **Important:** verify this model actually produces the `served_exceeds_capacity` warning during Rust unit testing. Adjust values if needed. + + ### Field naming + + The Rust `Warning` struct uses `node_id` (snake_case). MessagePack serialization preserves the field name. On the TS side, I'll keep it as `node_id` rather than converting to `nodeId` — matches what's on the wire, matches the C# `RustWarning` DTO (which already uses `NodeId` but is a separate struct). + + ## Out of Scope + + - **Suggestion actions** — no "click to fix" — the user tweaks sliders manually. + - **Animated badge transitions** — plain show/hide is fine. + - **Bin-level warning visualization** — chart doesn't highlight the specific bins where warnings occurred (future polish). + - **Warning history / timeline** — only the current state is shown. + - **Custom warning rules / thresholds** — the engine's defaults stand. + - **Editable severity mapping** — warning/error for now, future may add "info" or "critical". + - **Error surface** — compile/protocol errors stay in their existing path. + + ## Key References + + - `engine/core/src/analysis.rs` — warning generation + - `engine/core/src/compiler.rs` `eval_model` — runs analysis on every eval + - `engine/cli/src/protocol.rs` — `CompileResult`, `EvalResultMsg` + - `engine/cli/src/session.rs` — `handle_compile`, `handle_eval` + - `ui/src/lib/api/engine-session.ts` — client types + - `ui/src/lib/components/dag-map-view.svelte` — existing graph renderer + - `ui/src/routes/what-if/+page.svelte` — target page + - `work/epics/E-17-interactive-what-if-mode/m-E17-03-live-topology-and-charts.md` — prior milestone + + ## Success Indicator + + Load `capacity-constrained` → amber banner at top says "1 warning: Service · served_exceeds_capacity", `Service` node on the topology graph has a ⚠ badge, warnings panel shows the full message "served > capacity in 4 bins". Drag `capacity` slider from 10 up to 20 — banner disappears, badge gone, panel empty, charts still animate. Drop `capacity` back to 5 — warnings reappear. End-to-end under 200ms per change. + - kind: milestone + id: M-022 + frontmatter: + title: Edge Heatmap + status: done + parent: E-17 + depends_on: + - M-021 + body: | + ## Goal + + Color the edges in the What-If topology graph by the throughput they carry, so the user can immediately see where flow is heavy, where it is throttled, and how it redistributes when parameters change. + + ## Context + + After m-E17-04, the topology graph colors **nodes** by their series mean (utilization, queue depth, served rate). The edges between nodes are currently monochrome — a missed opportunity to show the *flow* on each connection. + + The `dag-map-view` component already accepts an `edgeMetrics?: Map` prop — the rendering support exists but has never been wired up. This milestone computes the edge metric map from the series data and passes it in. + + **What "flow on an edge" means here.** For an edge from node A to node B, the flow is the output series of A — the rate at which A produces values that travel toward B. This is the same series used to color A as a node, applied to its outgoing edges. The interpretation: + + - `const` node A → expr node B: A's series (the constant value) is the flow on the edge. + - `expr` node A → topology node B (as `semantics.arrivals`): A's series is the arrival rate flowing into B. + - Topology node Q → whatever consumes Q's served output: Q's `served` series (the `{snake(Q)}_served` column, or Q's own primary series if that's what the heatmap uses) is the edge flow. + + This interpretation is consistent and symmetric: a node that feeds into multiple downstream nodes will color all its outgoing edges with its own output series. The user sees "this value leaves A, flows along each edge to downstream nodes." + + ## Acceptance Criteria + + 1. **AC-1: Edge metric map computation.** A new pure function `buildEdgeMetricMap` in `ui/src/lib/api/topology-metrics.ts`: + + ```typescript + export function buildEdgeMetricMap( + graph: EngineGraph, + series: Record, + ): MetricMap; + ``` + + For each edge `{ from, to }` in `graph.edges`: + - Look up the `from` node's primary series using the same `findNodeSeries` logic used for nodes. + - Compute the mean across all bins. + - Insert into the map with key `${from}->${to}` (see Technical Notes for key format). + - If no series is found for `from`, omit the edge from the map (edge renders with default styling). + + 2. **AC-2: Edge metric map normalization.** `normalizeMetricMap` (already exists, normalizes to `[0, 1]`) is applied to the raw edge metric map before passing to `dag-map-view`. The function is shared — no duplication. + + 3. **AC-3: Edge metrics wired into `dag-map-view`.** In `+page.svelte`, a new `$derived.by` computes the normalized edge metric map and passes it as `edgeMetrics` to ``. The map recomputes when `series` or `engineGraph` changes. + + 4. **AC-4: Layout unaffected.** Adding `edgeMetrics` does not trigger a DAG re-layout. The `edgeMetrics` prop is consumed only in `renderSVG`, not in `layoutMetro`. This is the existing separation in `dag-map-view.svelte`. Verify: the `layout` derived store does not read `edgeMetrics`. + + 5. **AC-5: Visual result.** Load the `capacity-constrained` model. The edge from `arrivals` to `Service` is colored (not gray/default). Dragging the `arrivals` slider shifts the edge color intensity. This is a visual smoke-test via Playwright. + + 6. **AC-6: Edge key format confirmed.** The `dag-map` library's `edgeMetrics` key format is `${fromId}\u2192${toId}` — the Unicode right-arrow character `→` (`\u2192`) between node IDs (confirmed in `dag-map/src/render.js:151`). The `buildEdgeMetricMap` function uses this exact format. A comment in the source records this. + + 7. **AC-7: Pure unit tests.** Vitest tests in `ui/src/lib/api/topology-metrics.test.ts` cover: + - `buildEdgeMetricMap` with a simple two-node graph where `from` has a known series → map entry present with correct mean. + - `buildEdgeMetricMap` with an edge whose `from` node has no series → edge omitted from map. + - `buildEdgeMetricMap` with a multi-edge graph → correct keys for all edges. + - Map has same size as number of edges with known series (no extra entries). + - At least 8 tests total for the new function. + + 8. **AC-8: Playwright E2E.** Extend `tests/ui/specs/svelte-what-if.spec.ts`: + - After model load, the topology graph SVG contains at least one colored (non-default) edge element. + - Dragging `arrivals` slider on `capacity-constrained` model changes an edge's visual attribute (color or stroke). + - Layout stability: edge positions (path data or endpoint coords) do not change when a parameter is tweaked — only fill/stroke attributes change. + + 9. **AC-9: No regression on prior ACs.** All 173 vitest and 19 Playwright tests from m-E17-04 continue to pass. + + ## Technical Notes + + ### Edge key format in dag-map + + Before implementation, inspect the `dag-map` package to determine the key format for `edgeMetrics`. The two likely candidates are: + + - **`${from}->${to}`** — matches the deduplication key used inside `dag-map-view.svelte` (`const key = \`${from}->${to}\``). + - **`e-${index}`** — matches the synthesized edge IDs in `graph-adapter.ts`. + + Run a quick spike: pass a known edge metric with both key formats to a rendered `` in the engine-test page and observe which one colors the edge. Record the confirmed format as a comment near `buildEdgeMetricMap`. + + If `dag-map` uses a different format entirely, adapt accordingly and note it. + + ### `findNodeSeries` reuse + + `findNodeSeries` is currently a private function in `topology-metrics.ts`. Export it (or extract the lookup logic into a shared internal helper) so `buildEdgeMetricMap` can call it without duplicating the lookup order: + + 1. `series[nodeId]` — exact match + 2. `series[\`${toSnakeCase(nodeId)}_queue\`]` — topology queue column + 3. `undefined` — omit + + ### Edge metric map key example + + For a model with: + - `arrivals` const → `Service` topology node + - `served` expr → `Service.semantics.served` + + The `EngineGraph.edges` produced by the Rust session will include `{ from: "arrivals", to: "Service" }`. + + The map entry: `"arrivals->Service" → { value: mean(series["arrivals"]), label: "arrivals" }`. + + ### Page-level derived store + + In `+page.svelte`: + + ```typescript + const edgeMetricMap = $derived.by(() => { + if (!engineGraph) return new Map(); + return normalizeMetricMap(buildEdgeMetricMap(engineGraph, series)); + }); + ``` + + Pass to ``. + + The `edgeMetricMap` reacts to `series` changes on every eval — colors update in real time without re-layout. + + ## Out of Scope + + - Per-bin edge coloring (time scrubber drives this — m-E17-06). + - Edge labels (flow rate text on edges) — visual noise at current scale. + - Animated edge color transitions. + - Edge click / inspect interaction. + - Separate edge metric selector (use same heatmap metric as nodes for now). + + ## Success Indicator + + Load `queue-with-wip` model → both nodes and edges have heatmap colors. Drag the WIP limit slider — node colors shift AND edge colors shift in tandem, both within 100ms. The topology graph "lights up" the whole flow network. + + ## Key References + + - `ui/src/lib/api/topology-metrics.ts` — `buildMetricMap`, `normalizeMetricMap`, `findNodeSeries` (to export) + - `ui/src/lib/components/dag-map-view.svelte` — `edgeMetrics` prop (already present) + - `ui/src/lib/api/graph-adapter.ts` — `adaptEngineGraph` (edge ID synthesis) + - `ui/src/routes/what-if/+page.svelte` — page to update + - `work/epics/E-17-interactive-what-if-mode/m-E17-04-warnings-surface.md` — prior milestone + - kind: milestone + id: M-023 + frontmatter: + title: Time Scrubber + status: done + parent: E-17 + depends_on: + - M-022 + body: | + ## Goal + + Add a bin-position scrubber below the topology graph. Dragging it switches the heatmap from "mean across all bins" to "value at bin T", and stamps a crosshair on all charts at the same bin. The user can step through time and watch the model "breathe" — seeing exactly how node load and edge flow evolve bin by bin. + + ## Context + + After m-E17-05, both nodes and edges in the topology graph are colored by the **mean** of their series across all bins. The mean is a useful summary, but it collapses time — users cannot see: + + - How does queue depth build up in early bins? + - When does the capacity constraint first bite? + - Where does flow peak in a variable-arrival model? + + The time scrubber answers these questions by letting the user "scrub" through the time axis just as a video scrubber steps through frames. All heatmap colors and the chart crosshair update immediately — no new eval, no new WebSocket round-trip. + + The m-E17-03 spec explicitly noted this as a future enhancement: + > *"Future enhancement: let the user pick a bin slider (scrub through time)."* + + ## Acceptance Criteria + + ### Scrubber state + + 1. **AC-1: `selectedBin` state.** The What-If page introduces `let selectedBin = $state(null)`. `null` means "mean mode" (existing behavior, unchanged). A number means "bin T mode". + + 2. **AC-2: Scrubber control.** Below the topology panel (before the warnings panel), a scrubber section renders when a model is compiled and has `bins > 1`: + - Label: `Bin: {T} / {bins - 1}` (or `Mean` when null). + - A range `` from `0` to `bins - 1`, step `1`, bound to `selectedBin` (0 when null). + - A `Mean` toggle button / checkbox that sets `selectedBin` back to `null`. + - When `selectedBin` is `null`, the slider displays at position 0 but is visually dimmed or labeled "Mean". + - `data-testid="bin-scrubber"` on the range input, `data-testid="bin-mean-toggle"` on the reset button. + + 3. **AC-3: Scrubber hidden when `bins === 1`.** Models with a single bin have nothing to scrub. The scrubber section is not rendered when `bins <= 1`. + + ### Heatmap integration + + 4. **AC-4: `buildMetricMap` accepts optional bin.** The signature of `buildMetricMap` in `topology-metrics.ts` is extended: + + ```typescript + export function buildMetricMap( + graph: EngineGraph, + series: Record, + bin?: number, // undefined → mean mode + ): MetricMap; + ``` + + When `bin` is a valid index, use `values[bin]` instead of the mean. When `bin` is out of range or undefined, fall back to the mean. The existing mean behavior is unchanged — no callers break. + + 5. **AC-5: `buildEdgeMetricMap` accepts optional bin.** The same optional `bin` parameter is added to `buildEdgeMetricMap` (from m-E17-05), with identical semantics. + + 6. **AC-6: `metricMap` and `edgeMetricMap` react to `selectedBin`.** In `+page.svelte`, both derived stores read `selectedBin` and pass it to the metric builders. Svelte's reactivity ensures the topology recolors without re-layout when the scrubber moves. + + ### Chart crosshair + + 7. **AC-7: `Chart` accepts `crosshairBin`.** The `` component (in `chart.svelte`) gains a new optional prop: + + ```typescript + interface Props { + // ...existing... + crosshairBin?: number; // undefined → no crosshair + } + ``` + + When set, a vertical SVG line is rendered at the x-position corresponding to `crosshairBin`. The line is styled as a light gray or muted dashed line that does not interfere with the hover tooltip or series paths. + + 8. **AC-8: `crosshairX` pure helper.** A new export in `chart-geometry.ts`: + + ```typescript + export function crosshairX(bin: number, geom: ChartGeometry): number | null; + ``` + + Returns the SVG x-coordinate for `bin`, or `null` if `bin` is out of range. Uses `geom.plotLeft`, `geom.plotRight`, and `geom.bins` for the linear interpolation. Fully unit-tested. + + 9. **AC-9: All charts show crosshair at `selectedBin`.** In `+page.svelte`, every `` is passed `crosshairBin={selectedBin ?? undefined}`. When the scrubber moves, all charts update the crosshair position in lockstep. + + ### Unit tests + + 10. **AC-10: `buildMetricMap` per-bin tests.** New vitest cases in `topology-metrics.test.ts`: + - `bin=0` → returns value at index 0, not mean. + - `bin=N-1` → returns value at last index. + - `bin=N` (out of range) → falls back to mean. + - `bin=undefined` → returns mean (unchanged behavior, existing test keeps passing). + - At least 6 new tests. + + 11. **AC-11: `buildEdgeMetricMap` per-bin tests.** Same coverage as AC-10 but for edge metrics. At least 4 new tests. + + 12. **AC-12: `crosshairX` tests.** In `chart-geometry.test.ts`: + - Bin 0 → returns `plotLeft`. + - Bin `bins-1` → returns `plotRight`. + - Bin at midpoint → returns midpoint x. + - Bin out of range (negative, >= bins) → returns `null`. + - At least 5 tests. + + 13. **AC-13: Chart crosshair render test (Svelte component smoke test).** Using the existing Vitest + jsdom setup (matching the chart-geometry tests), verify that when `crosshairBin` is set, a `` element appears in the Chart SVG. + + ### Playwright E2E + + 14. **AC-14: Scrubber visible and functional.** On the `queue-with-wip` model (multiple bins): + - Scrubber is present with `data-testid="bin-scrubber"`. + - Drag scrubber to bin `N/2` → topology node colors change (at least one node's fill differs from the mean-mode color). + - All chart SVGs contain a `` element (crosshair) after scrubber is moved. + - Click `data-testid="bin-mean-toggle"` → crosshair disappears from charts, heatmap reverts to mean colors. + + 15. **AC-15: Scrubber absent on single-bin model.** Load `capacity-constrained` model (4 bins — scrubber should appear). Load a hypothetical 1-bin model or verify by counting bins from the latency badge area. If no suitable model exists, verify the condition is covered by unit test only and note the gap. + + 16. **AC-16: No regression.** All previous vitest (173+) and Playwright (19+) tests pass. + + ## Technical Notes + + ### `buildMetricMap` extension + + Current implementation: + + ```typescript + export function buildMetricMap( + graph: EngineGraph, + series: Record, + ): MetricMap { + const map = new Map(); + for (const node of graph.nodes) { + const values = findNodeSeries(node.id, series); + if (values === undefined) continue; + map.set(node.id, { value: seriesMean(values), label: node.id }); + } + return map; + } + ``` + + Extended: + + ```typescript + export function buildMetricMap( + graph: EngineGraph, + series: Record, + bin?: number, + ): MetricMap { + const map = new Map(); + for (const node of graph.nodes) { + const values = findNodeSeries(node.id, series); + if (values === undefined) continue; + const value = pickValue(values, bin); + map.set(node.id, { value, label: node.id }); + } + return map; + } + + function pickValue(values: number[], bin?: number): number { + if (bin !== undefined && bin >= 0 && bin < values.length) { + return values[bin]; + } + return seriesMean(values); + } + ``` + + `pickValue` is a pure helper extractable for direct unit testing. + + ### `crosshairX` implementation + + ```typescript + export function crosshairX(bin: number, geom: ChartGeometry): number | null { + if (bin < 0 || bin >= geom.bins) return null; + if (geom.bins <= 1) return geom.plotLeft; + return geom.plotLeft + (bin / (geom.bins - 1)) * (geom.plotRight - geom.plotLeft); + } + ``` + + ### Chart crosshair SVG + + In `chart.svelte`, after the series `` elements: + + ```svelte + {#if crosshairBin !== undefined && crosshairX(crosshairBin, geom) !== null} + + {/if} + ``` + + The crosshair is rendered at pointer-events none so it doesn't interfere with hover. + + ### Scrubber position in page layout + + Insert between the topology panel and the warnings panel: + + ```svelte + + {#if bins > 1} +
+
+
Time
+
+ {selectedBin !== null ? `Bin ${selectedBin}` : 'Mean'} +
+
+
+ selectedBin = parseInt((e.target as HTMLInputElement).value)} + class="flex-1" + data-testid="bin-scrubber" + /> + +
+
+ {/if} + ``` + + The `bins` value comes from the `CompileResult.bins` field — store it as `let bins = $state(0)` and populate in `compileModel`. + + ### Derived store updates + + ```typescript + const metricMap = $derived.by(() => { + if (!engineGraph) return new Map(); + const bin = selectedBin !== null ? selectedBin : undefined; + return normalizeMetricMap(buildMetricMap(engineGraph, series, bin)); + }); + + const edgeMetricMap = $derived.by(() => { + if (!engineGraph) return new Map(); + const bin = selectedBin !== null ? selectedBin : undefined; + return normalizeMetricMap(buildEdgeMetricMap(engineGraph, series, bin)); + }); + ``` + + No layout change — only metrics change. The `$effect` that applies `.has-warning` classes already runs after metric/graph updates, so warning badges remain correct at all scrubber positions. + + ## Out of Scope + + - Play/pause animation (auto-advancing the scrubber through bins) — future polish. + - Highlighting the selected bin in the chart axes/ticks. + - Per-bin warning filtering (show warnings only in the selected bin). + - Synchronized scrubber across multiple chart panels. + - Keyboard navigation of the scrubber (native range input handles this). + + ## Success Indicator + + Load `queue-with-wip` model. Drag the bin scrubber from bin 0 to bin 3. The topology nodes and edges shift colors as the queue builds up across time bins. All charts show a vertical crosshair at bin 3. Click "Mean" — crosshairs vanish, heatmap reverts to mean colors. The whole interaction is seamless: no eval, no network, just derived state recomputing. + + ## Key References + + - `ui/src/lib/api/topology-metrics.ts` — `buildMetricMap`, `buildEdgeMetricMap` (to extend) + - `ui/src/lib/components/chart-geometry.ts` — add `crosshairX` + - `ui/src/lib/components/chart.svelte` — add `crosshairBin` prop + - `ui/src/routes/what-if/+page.svelte` — add `selectedBin` state, scrubber UI, wire `crosshairBin` + - `work/epics/E-17-interactive-what-if-mode/m-E17-05-edge-heatmap.md` — prior milestone + - kind: milestone + id: M-024 + frontmatter: + title: Supported Surface Inventory, Boundary ADR & Exit Criteria + status: done + parent: E-19 + body: | + ## Goal + + Produce the authoritative inventory, boundary ADR, and retention/deletion decisions that govern the rest of E-19. When this milestone closes, every first-party compatibility and legacy seam outside E-16's analytical boundary has an explicit classification (supported / transitional / delete / archive), an owning downstream milestone, and a grep guard specification. No code is deleted in this milestone; decisions are locked so m-E19-02, m-E19-03, and m-E19-04 can execute forward-only without re-litigating scope. + + ## Context + + E-16 purified the analytical truth and consumer fact surfaces. Everything else — Sim authoring/orchestration endpoints, server-side drafts, archived run bundles, Engine bundle import, runtime catalogs, deprecated schema/template/example material, and Blazor compatibility wrappers — is still present in the tree. None of it is analytical, so folding it back into E-16 is wrong. But leaving it unowned makes temporary compatibility shims harden into a de facto product support promise. + + This milestone does three things: + + 1. **Inventories** every in-scope surface exhaustively so later milestones inherit a single source of truth. + 2. **Decides** the six open retention/responsibility questions the epic flagged (A1–A6), plus the responsibility-clarification framing for Engine/Core/Sim/Time Machine. + 3. **Publishes** the boundary ADR (extended) and a new supported-surfaces matrix, both as durable documents that later milestones cite instead of rediscovering. + + The spec also records the shared framing that governs every downstream E-19 milestone: no project renames in E-19, `FlowTime.TimeMachine` is a new separate component owned by E-18, `FlowTime.Core` stays pure, `FlowTime.API` stays as the query/operator surface over canonical runs, and Sim owns authoring (with transitional execution hosting until the Time Machine ships). + + ## Decisions Locked By This Milestone + + These are recorded here as the authoritative source. `work/decisions.md` gets corresponding short entries pointing at this spec. + + ### Shared Framing + + 1. **No project renames in E-19.** `FlowTime.Core`, `FlowTime.Generator`, `FlowTime.API`, and `FlowTime.Sim.*` keep their names. The boundary ADR documents what each actually does. + 2. **`FlowTime.Core` is the evaluation engine.** Pure library. `ModelCompiler.Compile` + `ModelParser.ParseModel` + `Graph.Evaluate`, plus the authoritative validators (`ModelSchemaValidator`, `ModelValidator`) and the invariant analyzers. No HTTP, no orchestration, no storage, no client awareness. E-19 does not touch it. **Forward (post-E-18): Core remains pure and unchanged in these invariants.** The Time Machine depends on Core, never the reverse. Core is the library of deterministic operations (the "instruction set"); the Time Machine is the hosted component that composes those operations into first-class callable services. Nothing new gets added to Core that would reintroduce HTTP, orchestration, or client awareness. When the Time Machine needs a capability, it either composes existing Core primitives or — if Core is genuinely missing a pure computational primitive — the primitive is added to Core as a pure library function, not to the Time Machine as a parallel implementation. + 3. **`FlowTime.Generator` is today's shared orchestration layer** between `Sim.Service` and `API`. `RunOrchestrationService`, `RunArtifactWriter`, deterministic run ID logic, RNG seeding, and dry-run/plan mode all live here. **During E-19, Generator is unchanged — name, structure, and responsibilities all stay the same.** **Generator's forward fate is decided and scoped to E-18: Path B — extraction and deletion.** Most of Generator's current responsibilities (compile, evaluate, artifact write, run IDs, RNG seeding, dry-run) overlap the Time Machine's scope and cannot coexist with it. In E-18, Generator's execution-pipeline responsibilities are **extracted** into the new `FlowTime.TimeMachine` project, and `FlowTime.Generator` is **deleted** in the same milestone. No "Generator and Time Machine coexist in parallel" window is permitted — this matches the no-coexistence discipline established in E-16. The specific tier 3 analyser binding (`TemplateInvariantAnalyzer` currently in `FlowTime.Sim.Core.Analysis`) is also subject to the E-18 extraction: the invariant rules belong conceptually in Core, with the Time Machine composing them into the tier 3 validation surface. See decision D-2026-04-07-019. + 4. **`FlowTime.API` is the query/operator surface over canonical run artifacts.** It reads canonical run artifacts and exposes the current read/query and operator endpoints. It does not execute models, and when an obsolete API write path is retired E-19 deletes it outright instead of preserving a 410 or advisory tombstone. + 5. **`FlowTime.Sim.Service` hosts authoring and, transitionally, execution.** Templates, parameter UX, provenance are permanent Sim responsibilities. Execution hosting is transitional — it exists in Sim only because no other HTTP host is wired to `FlowTime.Core` today. + 6. **The Time Machine (`FlowTime.TimeMachine`) is owned by E-18 and is a new separate component.** Responsibilities: compile, tiered validation (schema / compile / analyse), evaluate, reevaluate, parameter override with stable runtime parameter identity, artifact write. Surfaces: in-process SDK, CLI, optional sidecar protocol. Not analytical primitives, not template authoring, not query/analysis of past runs. The Time Machine does not live inside Sim or API. **Dependency direction: Time Machine → Core, never reverse.** The Time Machine composes Core's pure operations (`ModelSchemaValidator`, `ModelCompiler`, `ModelParser`, `Graph.Evaluate`, invariant analyzers) into first-class callable services with consistent request/response shapes. It never reimplements what Core already does. In the BEAM/JVM framing: Core is the instruction set and execution kernel as a pure library; the Time Machine is the hosted machine that loads programs (compiled graphs), drives them, exposes iteration and reevaluation protocols, and presents a client-agnostic API. Naming rationale: FlowTime's execution component is an abstract machine in the BEAM/JVM sense — instructions (compiled graph), state (time grid plus accumulating series), deterministic stepping through time. "Time Machine" also aligns with the existing Blazor "Time Travel" UI feature that navigates runs the Time Machine produces, and the reevaluation semantics (rewind a compiled model, run it forward with different parameters) are literally time travel. + 7. **When the Time Machine ships, Sim's orchestration endpoints are deleted by default.** If a temporary facade is kept at all, it must be justified by a concrete technical migration constraint, documented in the owning E-18 milestone, and treated as a short-lived bridge rather than a supported steady state. That migration is E-18's job. E-19 records the commitment so no new non-UI callers land on Sim orchestration in the meantime. + 8. **The Time Machine serves all clients on equal footing.** Sim UI, Blazor UI, Svelte UI, MCP servers, external AI agents, tests, and CI are all first-class callers of Time Machine operations. No client is privileged. In particular, validation (A6) is a client-agnostic operation; MCP servers and AI agents generating candidate models need the same validation contract that UIs need for editor-time feedback. + 9. **Telemetry is an adapter concern outside the Time Machine, with one exception.** The Time Machine itself does not contain external-telemetry-format-specific code (no Prometheus, no OTEL, no BPI event log parsing). External-format ingestion lives in adapter projects under `FlowTime.Telemetry.*`. The exception: writing the **canonical bundle** format (defined by E-15's schema, today produced by `TelemetryBundleBuilder` in Generator) is a Time Machine core capability, not a pluggable adapter, because it serves the **telemetry loop** that is fundamental to FlowTime's bootstrap, self-consistency, and AI-iteration use cases. The canonical run directory (`data/runs//model/`, `series/`, `run.json`) and the canonical bundle (`model.yaml`, `manifest.json`, `series/`, CSV) are **two distinct artifacts with different purposes** — runs are the in-place clear-text debugging surface, bundles are the portable interchange format — and both are preserved by Path B. The bundle format may evolve independently of the run directory format. **`ITelemetrySource` is introduced** by E-18 m-E18-01b (after the Path B extraction cut in m-E18-01a creates the concrete `CanonicalBundleSource`), with multiple implementations once 01b ships (`CanonicalBundleSource`, `FileCsvSource`, plus future Prometheus/OTEL/event-log adapters under `FlowTime.Telemetry.*` delivered by m-E18-06). **`ITelemetrySink` is explicitly deferred** until a second sink format exists; canonical bundle writing is a concrete Time Machine capability, not behind an interface. The **telemetry loop** (capture → bundle → replay → parity, established vocabulary from `work/epics/telemetry-loop-parity/spec.md`) is a first-class use case with three primary purposes: **specification/bootstrap** (generate target telemetry from a model to define what the real system must emit), **self-consistency testing** (round-trip verification of capture+replay correctness), and **AI iteration / model fitting** (compare model-generated telemetry to real observed telemetry, adjust model, iterate). Path B extracts both Generator's execution code (into the Time Machine) and Generator's telemetry-generation code (`TelemetryBundleBuilder`, `TelemetryCapture`, `CaptureManifestWriter`, `RunArtifactReader`) into the canonical bundle writer and `CanonicalBundleSource`. Existing public surfaces — `POST /telemetry/captures` API and `flowtime telemetry capture` CLI — are re-wired to the new home without changing their contracts. The parity harness itself, drift tolerance rules, and CI gating are not E-18's responsibility; they are owned by the Telemetry Loop & Parity epic. + + ### A1 — Sim orchestration endpoints + + **`/api/v1/orchestration/runs` remains supported as the Sim authoring + transitional execution host for first-party UIs (Blazor and Svelte).** `/api/v1/drafts/run` (inline source only) remains supported as a narrower inline-YAML execution surface. + + - `/api/v1/orchestration/runs` is the active first-party UI run path today. + - `/api/v1/drafts/run` is retained for explicit inline-YAML "run this now" flows, not as the default UI orchestration path. + - They are not the Time Machine contract, not the programmable contract, and not an external-integration surface. + - No new non-UI callers during E-19. + - No new responsibilities bolted onto them (no external auth, no batch APIs, no programmatic surfaces). + - Explicit sunset hook to E-18 Time Machine: when the Time Machine ships, these endpoints are deleted by default. A temporary thin facade is allowed only if a concrete technical migration constraint is documented in the owning E-18 milestone. + + ### A2 — Stored drafts + + **Retire stored drafts entirely.** No UI exercises `/api/v1/drafts` CRUD today; the only callers are `DraftEndpointsTests.cs`. Active Blazor and Svelte run flows use `/api/v1/orchestration/runs`; retaining `/api/v1/drafts/run` is only about the inline-source "run this YAML right now" surface, not the default UI orchestration path. + + Deletion scope (executed by m-E19-02): + - `/api/v1/drafts` CRUD endpoints: GET, PUT, POST create, DELETE, list + - `StorageKind.Draft` and `data/storage/drafts/` directory + - `draftId` resolution branches in `/api/v1/drafts/validate`, `/api/v1/drafts/generate`, `/api/v1/drafts/run` + - `DraftEndpointsTests.cs` tests for CRUD paths (inline-source tests survive) + + Retained: `/api/v1/drafts/run` with `DraftSource.type = "inline"` only. This is the narrow "run this YAML right now" surface. First-party template-driven run creation remains `/api/v1/orchestration/runs`. + + If real model versioning is wanted later, it must be designed against compiled-graph identity (E-18 territory), not resurrected drafts. + + ### A3 — `data/storage/runs` + `bundleRef` + + **Delete the ZIP/bundle archive layer.** Sim's post-hoc ZIP write to `data/storage/runs/` has no production reader; `bundleRef` is consumed only by `RunOrchestrationTests.cs` exercising Engine bundle import. + + Deletion scope (executed by m-E19-02): + - `StorageKind.Run` bundle ZIP writes in `RunOrchestrationService.CreateSimulationRunAsync` + - `BundleRef` / `StorageRef` return values on `RunCreateResponse` + - `data/storage/runs/` directory and backend write path for run bundles + - Any Sim-side references to bundle ZIPs + + **Explicitly out of scope for deletion:** the canonical run directory layout at `data/runs//` (`model/`, `series/`, `run.json`). That layout stays unchanged. + + ### A4 — Engine `POST /v1/runs` bundle import + + **Delete bundle-import branches.** Only `RunOrchestrationTests.cs` exercises them; no UI, CLI, background job, or production workflow depends on Sim-exports-bundle → Engine-imports-bundle. The "loop" is designed but never wired. + + Deletion scope (executed by m-E19-02): + - `bundlePath`, `bundleArchiveBase64`, and `BundleRef` branches in `RunOrchestrationEndpoints.cs` `POST /v1/runs` + - `ExtractArchiveAsync` support helpers if unused after deletion + - Bundle-import tests in `RunOrchestrationTests.cs` (forward-only deletion) + + Deletion includes the `POST /v1/runs` route itself. No 410-style rejection stub is retained once the bundle-import branches are removed. + + If cross-environment run transfer is needed later, it comes back as an E-18 concern (Time Machine runs, programmable execution, portable artifacts) and gets designed properly. + + ### A5 — Catalogs + + **Delete entirely.** `data/catalogs/` is empty. `TemplateServiceImplementations.GetCatalogsAsync` calls `GetMockCatalogsAsync` in both demo and API modes. `TemplateRunner.razor` hardcodes `CatalogId = "default", // No longer using catalogs`. No UI creates or selects a catalog. No tests assert meaningful catalog behavior. + + Deletion scope (executed by m-E19-02): + - `/api/v1/catalogs` endpoints (GET, PUT, POST validate) in Sim.Service + - `CatalogService`, `ICatalogService`, mock catalog service implementations + - `CatalogPicker.razor` (Blazor) and any Svelte catalog selector + - `CatalogId = "default"` placeholder callers and the `catalogId` field on request/response DTOs where present + - `data/catalogs/` directory + - Catalog-only tests + + If catalogs ever come back, redesign from scratch against a real use case. + + ### A6 — Validation as a first-class, client-agnostic operation + + **Retire the current `POST /api/v1/drafts/validate` endpoint in m-E19-02. Preserve every library piece a future validation operation composes. Record a hard E-18 dependency: the Time Machine must expose tiered validation as a first-class, client-agnostic operation alongside compile, evaluate, reevaluate, parameter override, and artifact write.** + + **Principle (recorded in the boundary ADR):** Validation — answering "is this YAML a correct FlowTime model?" — is a first-class, client-agnostic operation. `FlowTime.Core` owns the authoritative answer via `ModelSchemaValidator`, `ModelCompiler`, `ModelParser`, and `InvariantAnalyzer`. Sim UI, Blazor UI, Svelte UI, MCP servers, and external AI agents are all legitimate callers of validation, on equal footing. No single client — including Sim — is a privileged host for the validation operation. + + **Context (grounded in code, April 2026):** + - `POST /api/v1/drafts/validate` exists in `src/FlowTime.Sim.Service/Program.cs:540-615`. It calls `TemplateInvariantAnalyzer.Analyze` in `Sim.Core.Analysis`, which internally chains `ModelCompiler.Compile` → `ModelParser.ParseModel` → `RouterAwareGraphEvaluator.Evaluate` → `InvariantAnalyzer.Analyze`. So the endpoint does use Core — but it also executes the full graph, not just schema/compile checking. The name is misleading; it is really "compile + evaluate + analyse, without artifact write." + - No UI (Blazor or Svelte) calls `/api/v1/drafts/validate`. Only `DraftEndpointsTests.cs` exercises it. Same unused-endpoint pattern as stored drafts (A2), bundle archive layer (A3), and Engine bundle import (A4). + - `FlowTime.Core` already exposes cheaper validators that nothing calls: `ModelSchemaValidator` (`src/FlowTime.Core/Models/ModelSchemaValidator.cs:21`) for pure schema checking, and `ModelValidator` (`src/FlowTime.Core/Models/ModelValidator.cs:21`) for schemaVersion/grid/structure checks. Both return `ValidationResult`. + - `TemplateInvariantAnalyzer` is the right *implementation* of the heaviest validation tier, but it lives behind one mislabeled HTTP endpoint on Sim, which is not the right *home* for a client-agnostic operation. + + **Deletion scope (executed by m-E19-02):** + - `POST /api/v1/drafts/validate` endpoint handler in `src/FlowTime.Sim.Service/Program.cs:540-615` + - Endpoint-specific tests (forward-only — the inline/draft-source validation path through this endpoint is unused) + + **Explicitly preserved (out of scope for deletion now and later):** + - `FlowTime.Core.Models.ModelSchemaValidator` — tier 1 (schema-only) library + - `FlowTime.Core.Models.ModelValidator` — tier 2 adjacent (schemaVersion/grid/structure + legacy field detection) library + - `FlowTime.Core.Compiler.ModelCompiler` and `FlowTime.Core.Models.ModelParser` — tier 2 (compile without execute) library + - `FlowTime.Sim.Core.Analysis.TemplateInvariantAnalyzer` — tier 3 (full invariant analysis including graph eval) library + - `FlowTime.Sim.Core.Analysis.InvariantAnalyzer` — invariant rules themselves + + These are the ingredients the future Time Machine validation operation will compose. Deleting the HTTP wrapper does not delete the validation capability. + + **Hard dependency on E-18 (recorded in `work/decisions.md` and appended to the E-18 epic spec as an explicit scope item):** the Time Machine must expose **tiered validation**: + + - **Tier 1 — schema:** YAML parses, JSON schema validates, class references resolve. Cheap, no compile. Intended for per-keystroke editor feedback and per-iteration AI inner-loop feedback. Backed by `ModelSchemaValidator`. + - **Tier 2 — compile:** Model compiles and parses into a `Graph`. Catches structural errors (topology, dependencies, expression compile). No execution. Backed by `ModelCompiler.Compile` + `ModelParser.ParseModel`. + - **Tier 3 — analyse:** Full invariant analysis. Includes graph evaluation under deterministic conditions. Catches semantic issues that only emerge after evaluation (capacity violations, conservation errors, runtime warnings). Backed by `TemplateInvariantAnalyzer` logic composed into the Time Machine. + + All three tiers are callable from the Time Machine's in-process SDK, CLI, and sidecar protocol, with consistent request/response shapes. Clients: Sim UI, Blazor UI, Svelte UI, MCP servers, external AI agents, tests, CI. No client is privileged. + + This is not optional for E-18. Validation and compile-only are natural siblings of compile-then-evaluate; leaving them out forces every client that needs "just check this" to pay the full evaluate cost, which breaks AI inner-loop performance and makes editor-time UX expensive. + + **Retained unchanged in E-19:** `/api/v1/templates/{id}/generate` and `/api/v1/drafts/generate` (Sim). These are template authoring surfaces that validate as a *side effect* of materialising the model. They are not replacements for the first-class validation operation; they stay scoped to "give me the expanded model for this template and these parameters." + + ### Blazor / Svelte Support Policy (principles-level) + + 1. Blazor remains a supported first-party UI for debugging, operator workflows, and as a plan-B to Svelte. No retirement as a cleanup goal. + 2. **Feature parity between Blazor and Svelte is not a goal.** Svelte is intentionally behind Blazor. Each UI carries only the features it actually has; neither is blocked waiting for the other. + 3. Both UIs consume current Engine and Sim contracts. Shared contract changes (endpoints, DTOs, schemas) keep both UIs compiling and functional, but "functional" does not mean "featurally equivalent." + 4. Blazor proceeds with planned deprecations and removals on its own track. Features Blazor is removing do not need to be built in Svelte first, and Svelte is not required to inherit them. + 5. No stale compatibility wrappers, duplicate endpoint probes, or local metrics/state reconstruction where canonical endpoints exist — in either UI. + 6. Neither UI carries demo/template generation or schema shapes that are deprecated on the shared contract surface. + 7. Blazor-specific workflows (operator/debugging tools that only exist in Blazor) are supported as long as they call current contracts. + + The inventory table (AC 3) will naturally show asymmetry: Blazor rows will include "deprecated, scheduled for removal" entries that have no Svelte counterpart. That is expected, not a gap. + + ## Acceptance Criteria + + 1. **Boundary ADR extended.** `docs/architecture/template-draft-model-run-bundle-boundary.md` contains a new "Responsibility Clarification" section (Core = evaluation library, Generator = orchestrator, API = query/operator surface over canonical runs, Sim = authoring + transitional execution host, Time Machine = new E-18 component) and three Mermaid sequence diagrams labelled **Current**, **Transitional (end of E-19)**, and **Target (post-E-18)**. The Target diagram shows the Time Machine as a distinct participant with both UI and AI/MCP clients as equal callers of tiered validation and execution operations. Diagrams correctly distinguish canonical run directory (`data/runs//`) from bundle ZIP (`data/storage/runs/`). The ADR also records the A6 principle that validation is a first-class client-agnostic operation owned by Core and surfaced through the Time Machine. + + 2. **Supported-surfaces matrix published.** New file `docs/architecture/supported-surfaces.md` exists and contains the exhaustive inventory table (see AC 3), the Blazor/Svelte support policy verbatim from this spec, and the shared framing (no renames, Time Machine ownership by E-18, Core purity, API identity, Sim responsibilities, no privileged validation client). + + 3. **Exhaustive inventory table populated.** A single table in `supported-surfaces.md` covers every in-scope surface element with these columns: + + | Surface | Element | Current status | Decision | Target state | Owning milestone | Grep guard | + + Populated by systematic sweep of: + - Every route in `src/FlowTime.API/Endpoints/*.cs` + - Every route in `src/FlowTime.Sim.Service/Program.cs` and `src/FlowTime.Sim.Service/Extensions/*EndpointExtensions.cs` + - Every HTTP call site in `src/FlowTime.UI/Services/*` and the Svelte UI equivalents (`ui/src/lib/api/*` or current path) + - Every public DTO in `src/FlowTime.Contracts` + - Every JSON/YAML schema file tracked in the repo + - Every template under the active Sim template directory + - Every example under `docs/examples/` (or equivalent current-surface example location) + - Every `docs/` page that documents a contract on a current surface + + Every row with `Decision = delete` or `Decision = archive` has an owning downstream milestone (m-E19-02, m-E19-03, or m-E19-04) and a grep guard specification. Every row with `Decision = supported` has a one-line rationale. Rows where the decision is still unclear are listed as explicit open questions at the bottom of the document, not silently marked supported. + + 4. **A1–A6 decisions are cited, not reinvented, in the inventory.** Every orchestration-endpoint, draft, bundle, import, catalog, and validation row in the inventory links to the corresponding decision section of this spec (or to `work/decisions.md` entries derived from it) rather than reargued inline. + + 5. **`work/decisions.md` updated.** Short entries exist for: the shared framing (no renames, Time Machine ownership by E-18), A1, A2, A3, A4, A5, A6, the Time Machine naming decision, and the Blazor/Svelte support policy. Each entry points at this milestone spec and/or the supported-surfaces doc for detail. + + 6. **E-18 epic spec updated with the validation requirement and Time Machine naming.** `work/epics/E-18-headless-pipeline-and-optimization/spec.md` (directory path preserved for historical stability) is updated in content to title the epic `E-18 Time Machine`, gains an explicit scope item for tiered validation (schema / compile / analyse) as a first-class operation alongside compile/evaluate/reevaluate/parameter-override/artifact-write with the client list (Sim UI, Blazor UI, Svelte UI, MCP servers, external AI agents, tests, CI) and the "no privileged client" principle, and has body references from "Headless" / `FlowTime.Headless` updated to "Time Machine" / `FlowTime.TimeMachine`. The same wrap pass also syncs `ROADMAP.md`, `work/epics/epic-roadmap.md`, and `CLAUDE.md` to the new naming and m-E19-01 status. + + 7. **`CLAUDE.md` "Current Work" section updated.** E-19 status reflects that m-E19-01 is complete (when the milestone closes) and names m-E19-02 as the next milestone, consistent with the status-sync discipline in the repo's project rules. + + 8. **Epic status surfaces reconciled.** `work/epics/E-19-surface-alignment-and-compatibility-cleanup/spec.md` milestone table, `ROADMAP.md`, and `work/epics/epic-roadmap.md` all reflect m-E19-01 status in a single pass at wrap time. + + 9. **Tracking doc maintained.** `work/epics/E-19-surface-alignment-and-compatibility-cleanup/m-E19-01-supported-surface-inventory-tracking.md` exists and is updated after each AC is satisfied. + + 10. **No code deletion in this milestone.** The inventory names what will be deleted and in which downstream milestone, but no endpoint, DTO, UI client, schema, template, example, or doc is deleted as part of m-E19-01 itself. If the sweep discovers something obviously and trivially dead that cannot wait, it is logged in `work/gaps.md` with a target milestone rather than removed here. + + ## Guards / DO NOT + + - **DO NOT** delete any code in this milestone. m-E19-01 is a decision and documentation milestone. Every deletion is an AC of a downstream milestone with an explicit grep guard. + - **DO NOT** rename `FlowTime.Core`, `FlowTime.Generator`, `FlowTime.API`, or `FlowTime.Sim.*`. The shared framing explicitly disallows renames in E-19. + - **DO NOT** design the Time Machine component in this milestone. The Time Machine is E-18's responsibility. m-E19-01 only records the commitment, the sunset hook, and the tiered-validation scope requirement (A6). + - **DO NOT** treat the current Sim orchestration path as the future Time Machine contract, in diagrams, ADR text, matrix entries, or decision records. + - **DO NOT** privilege any client (Sim UI, Blazor UI, Svelte UI, MCP, AI agent) in the Time Machine validation or execution contract. The "no privileged client" principle is load-bearing for the AI/MCP use case (A6) and must survive into E-18 design. + - **DO NOT** mark an inventory row `supported` to avoid making a decision. If a row cannot be decided now, it goes into the explicit open-questions section with a named owner, not silently into `supported`. + - **DO NOT** require Svelte feature parity with Blazor as a condition of any decision. Feature parity is explicitly not a goal. + - **DO NOT** fold analytical-series concerns into the inventory. Anything E-16 owns is out of scope. + - **DO NOT** fold E-10 Phase 3 primitives (`p3d`, `p3c`, `p3b`) into the inventory. Those are analytical primitives, not compatibility surfaces. + - **DO NOT** extend the boundary ADR with speculative future architectures beyond the three locked diagrams. The ADR is a snapshot of decided direction, not a design doc for E-18. + - **DO NOT** let the inventory be curated to only the seams already named in the epic. AC 3 requires exhaustive sweep across the listed surfaces. + + ## Sequence Diagrams (canonical content for the boundary ADR) + + These three diagrams are the deliverable for AC 1. They are reproduced here so the spec itself is self-contained; the ADR extension uses them verbatim. + + ### Current + + ```mermaid + sequenceDiagram + actor User + participant UI as FlowTime.UI + participant Sim as FlowTime.Sim.Service + participant Tpl as TemplateService + participant Orch as RunOrchestrationService + participant Core as FlowTime.Core + participant Runs as data/runs + participant Store as data/storage + participant API as FlowTime.API + + Note over UI: Blazor and Svelte, both first-party + + User->>UI: Choose template and click Run + UI->>Sim: POST /api/v1/orchestration/runs + Note over UI,Sim: UI is the caller. Sim owns template-driven run creation. + + Sim->>Tpl: Load template from templates/ + Tpl-->>Sim: Template YAML and metadata + Sim->>Orch: CreateRunAsync templateId params mode + Orch->>Core: Compile and evaluate graph + Core-->>Orch: Canonical run artifacts + Orch->>Runs: Write canonical run directory for runId + Orch->>Store: Zip run and save as StorageKind.Run + Note over Store: data/storage/runs is a post-hoc archive bundle layer + + Sim-->>UI: RunCreateResponse with metadata and bundleRef + + UI->>API: GET /v1/runs/runId/graph + UI->>API: GET /v1/runs/runId/state_window + UI->>API: GET /v1/runs/runId/metrics + API->>Runs: Read canonical run artifacts + Runs-->>API: graph state series metrics + API-->>UI: Time-travel responses + + opt Optional inline-YAML run path + Note over UI,Sim: Adjacent surface. Not the primary first-party UI run path. + UI->>Sim: POST /api/v1/drafts/run with inline YAML + Sim->>Orch: Same CreateRunAsync flow + end + + opt Draft CRUD path tests only + Note over UI,Store: /api/v1/drafts GET PUT POST DELETE exists. data/storage/drafts exists. + Note over UI,Store: No UI calls it. Only DraftEndpointsTests.cs exercises it. + end + + opt Engine bundle import tests only + Note over API,Store: POST /v1/runs with bundlePath, bundleArchiveBase64, or bundleRef exists. + Note over API,Store: Only RunOrchestrationTests.cs exercises it. No production caller. + end + + opt Catalog residue + Note over UI,Sim: /api/v1/catalogs exists. CatalogPicker.razor exists. + Note over UI,Sim: TemplateRunner.razor hardcodes CatalogId default. GetCatalogsAsync returns mocks. + end + ``` + + ### Transitional (end of E-19) + + ```mermaid + sequenceDiagram + actor User + participant UI as FlowTime.UI + participant Sim as FlowTime.Sim.Service + participant Tpl as TemplateService + participant Orch as RunOrchestrationService + participant Core as FlowTime.Core + participant Runs as data/runs + participant API as FlowTime.API + + Note over Sim: Transitional execution host. Sunsets when E-18 Time Machine ships. + + User->>UI: Choose template and click Run + UI->>Sim: POST /api/v1/orchestration/runs + Note over UI,Sim: Supported authoring surface. Not the Time Machine contract. + + Sim->>Tpl: Load template + Tpl-->>Sim: Template YAML and metadata + Sim->>Orch: CreateRunAsync templateId params mode + Orch->>Core: Compile and evaluate + Core-->>Orch: Canonical run artifacts + Orch->>Runs: Write canonical run directory for runId + Sim-->>UI: RunCreateResponse with metadata + + UI->>API: GET /v1/runs/runId/graph + UI->>API: GET /v1/runs/runId/state_window + UI->>API: GET /v1/runs/runId/metrics + API->>Runs: Read canonical run artifacts + API-->>UI: Time-travel responses + + opt Optional inline-YAML run path + Note over UI,Sim: Adjacent surface. Not the primary first-party UI run path. + UI->>Sim: POST /api/v1/drafts/run with inline YAML + Sim->>Orch: Same CreateRunAsync flow + end + + Note over Sim,Runs: No data/storage/runs bundles. No bundleRef on responses. + Note over Sim,Runs: No /api/v1/drafts CRUD. No data/storage/drafts. + Note over Sim,Runs: No /api/v1/catalogs. No CatalogPicker. No catalogId placeholders. + Note over API: No POST /v1/runs route remains. + ``` + + ### Target (post-E-18) + + ```mermaid + sequenceDiagram + actor User + participant UI as FlowTime.UI + participant Agent as AI or MCP client + participant Sim as FlowTime.Sim.Service + participant Tpl as TemplateService + participant TM as FlowTime.TimeMachine + participant Core as FlowTime.Core + participant Runs as data/runs + participant API as FlowTime.API + + Note over Sim: Authoring only. No execution. + Note over TM: New component owned by E-18. Compile, tiered validation, evaluate, reevaluate, artifact write. + Note over Agent: MCP servers and external AI agents. First-class clients on equal footing with UIs. + + Note over User,API: Authoring. UI talks to Sim for templates and parameters. + + User->>UI: Choose template and edit parameters + UI->>Sim: GET /api/v1/templates + UI->>Sim: POST /api/v1/templates/id/generate + Sim->>Tpl: Load template and expand parameters + Tpl-->>Sim: Resolved model YAML + Sim-->>UI: Preview YAML and provenance + + Note over User,API: Validation is tiered. Any client can call any tier. + + opt Tier 1 schema + Note over UI,TM: Cheap per-keystroke feedback. Any client can call. + UI->>TM: Validate schema only + TM->>Core: ModelSchemaValidator + Core-->>TM: Schema result + TM-->>UI: Valid or errors + end + + opt Tier 2 compile + Note over Agent,TM: Structural check. No execution. + Agent->>TM: Validate compile only + TM->>Core: ModelCompiler and ModelParser + Core-->>TM: Compile result + TM-->>Agent: Valid or errors + end + + opt Tier 3 analyse + Note over UI,TM: Full invariant analysis including evaluation. + UI->>TM: Validate with invariant analysis + TM->>Core: Compile plus evaluate plus InvariantAnalyzer + Core-->>TM: Warnings and errors + TM-->>UI: Analysis result + end + + Note over User,API: Execution. Clients talk directly to the Time Machine. + + User->>UI: Click Run + UI->>TM: Compile and evaluate model with params and seed + TM->>Core: Compile and evaluate graph + Core-->>TM: Series and compiled graph + TM->>Runs: Write canonical run directory for runId + TM-->>UI: RunCreateResponse with runId metadata warnings + + opt AI driven iteration + Note over Agent,TM: Generate then validate then run then refine. + Agent->>TM: Compile and evaluate candidate model + TM->>Core: Compile and evaluate graph + Core-->>TM: Series + TM->>Runs: Write canonical run directory + TM-->>Agent: runId and metadata + end + + opt Parameter override or sweep or fit + UI->>TM: Reevaluate with parameter overrides + TM->>Core: Evaluate with overrides + Core-->>TM: New series + TM->>Runs: Write new canonical run directory + TM-->>UI: New runId + end + + UI->>API: GET /v1/runs/runId/graph + UI->>API: GET /v1/runs/runId/state_window + UI->>API: GET /v1/runs/runId/metrics + API->>Runs: Read canonical run artifacts + API-->>UI: Time-travel responses + ``` + + ## Test Strategy + + This milestone produces documents and decisions, not code. "Tests" are artifact-existence and consistency checks. + + - **ADR artifact check:** `docs/architecture/template-draft-model-run-bundle-boundary.md` contains the Responsibility Clarification section and the three Mermaid diagrams. The Mermaid blocks parse (render via the repo's existing Mermaid pipeline or a lint pass). + - **Supported-surfaces doc check:** `docs/architecture/supported-surfaces.md` exists, contains the inventory table with all required columns, contains the Blazor/Svelte policy, and contains the shared framing. + - **Inventory completeness check:** A short script or manual checklist confirms every endpoint in `src/FlowTime.API/Endpoints/` and `src/FlowTime.Sim.Service/` has a row. Every `src/FlowTime.Contracts` public DTO has a row. Every tracked schema file has a row. + - **Decision-link check:** Every inventory row with `Decision in {delete, archive}` has a non-empty `Owning milestone` and `Grep guard` cell. Every row with `Decision = supported` has a non-empty rationale. + - **`work/decisions.md` consistency check:** Entries for A1–A5, shared framing, and Blazor/Svelte policy exist and point at this spec or `supported-surfaces.md`. + - **Status-sync check:** `CLAUDE.md`, `ROADMAP.md`, `work/epics/epic-roadmap.md`, and the E-19 epic spec milestone table all reflect the same m-E19-01 status at wrap time. + - **Grep guard baselines (specification only, not enforcement):** For each delete-decision row, a candidate `rg` pattern is specified. The patterns are not asserted by this milestone — enforcement is the downstream milestone's AC — but they exist so the downstream milestone inherits them directly. + + ## Out of Scope + + - All code deletion. Executed by m-E19-02 (Sim authoring & runtime boundary), m-E19-03 (schema/template/example retirement), and m-E19-04 (Blazor support alignment). + - Any change to `FlowTime.Core`, `FlowTime.Generator`, or the canonical run directory layout at `data/runs//`. + - Designing, implementing, or scoping the Time Machine component beyond recording requirements. The Time Machine is owned by E-18. m-E19-01's only E-18-touching actions are (a) updating the epic title and body references to the new name and (b) appending the tiered-validation scope requirement. + - Renaming any existing project or namespace. `FlowTime.TimeMachine` is a new component added by E-18, not a rename of an existing one. `FlowTime.Core`, `FlowTime.Generator`, `FlowTime.API`, and `FlowTime.Sim.*` keep their names. + - Analytical-series work, warning fact ownership, consumer fact publication, or by-class purity. All owned by E-16 (complete). + - E-10 Phase 3 analytical primitives (`p3d`, `p3c`, `p3b`). + - Svelte feature parity with Blazor. Explicitly disavowed by the Blazor/Svelte policy. + - New compatibility shims, additive backward-compatibility phases, or "temporary" wrappers carried past this milestone. + - Cross-environment run transfer / portable bundle interchange. If ever needed, comes back as an E-18 concern. + + ## Dependencies + + - **E-16** complete. Analytical truth boundary purified, consumer facts published. This milestone depends on E-16 being the authoritative owner of everything analytical so the E-19 scope line is clean. + - **Boundary ADR seed** already landed in commit `ef644d1` (`docs(work): define E19 surface boundary and ADR`). This milestone extends that document, it does not create it. + - No dependency on E-10 Phase 3, E-11 Svelte UI buildout, or E-18 Time Machine execution. m-E19-01 runs as a parallel cleanup-planning lane. E-19's references to the Time Machine are forward commitments; E-19 does not wait for E-18 to ship. + + ## References + + - `work/epics/E-19-surface-alignment-and-compatibility-cleanup/spec.md` — epic scope, constraints, milestone table + - `work/epics/E-16-formula-first-core-purification/spec.md` — prior boundary + - `work/epics/E-18-headless-pipeline-and-optimization/spec.md` — **E-18 Time Machine** epic (directory path preserved historically; content titled *E-18 Time Machine*). Owner of the Time Machine component and the tiered validation scope requirement. Sunset hook target for Sim transitional execution hosting. + - `docs/architecture/template-draft-model-run-bundle-boundary.md` — boundary ADR (extended by this milestone) + - `docs/architecture/supported-surfaces.md` — supported-surfaces matrix (created by this milestone) + - `work/decisions.md` — short decision entries (updated by this milestone) + - `ROADMAP.md`, `work/epics/epic-roadmap.md`, `CLAUDE.md` — status surfaces reconciled at wrap time + - kind: milestone + id: M-025 + frontmatter: + title: Sim Authoring & Runtime Boundary Cleanup + status: done + parent: E-19 + body: | + ## Goal + + Execute the runtime deletions locked by [m-E19-01](./m-E19-01-supported-surface-inventory.md) A1–A6: remove stored drafts, the Sim ZIP archive layer, Engine bundle-import and dead direct-eval routes, runtime catalogs, and the Sim-only `POST /api/v1/drafts/validate` wrapper. Narrow `POST /api/v1/drafts/run` to inline-source only. When this milestone closes, Sim authoring surfaces expose only the explicitly supported paths and Engine exposes only the canonical query/operator surface over `data/runs//`. + + ## Context + + m-E19-01 published the supported-surface matrix in [docs/architecture/supported-surfaces.md](../../../docs/architecture/supported-surfaces.md) and locked retention/deletion decisions A1–A6 in the milestone spec. No code changed in m-E19-01 — every deletion was assigned an owning downstream milestone and a grep guard. This milestone is the first deletion pass and executes every row whose `Owning milestone` column is `m-E19-02`. + + Scope boundaries inherited from m-E19-01: + + - `FlowTime.Core`, `FlowTime.Generator`, `FlowTime.API`, and `FlowTime.Sim.*` are **not renamed** and their high-level responsibilities do not change in E-19. Generator stays frozen; its Path B extraction belongs to E-18 m-E18-01a. + - The canonical run directory under `data/runs//` is unchanged. + - Analytical surfaces purified by E-16 are out of scope. + - Blazor stale-wrapper cleanup (beyond the catalog selector, which is coupled to A5) belongs to m-E19-04. + - Schema, template, example, and current-doc cleanup belong to m-E19-03. + - `FlowTime.TimeMachine` is not introduced here — that is E-18 m-E18-01a. + + The default execution path for first-party UIs during and after this milestone remains `POST /api/v1/orchestration/runs` on `FlowTime.Sim.Service` per A1. Sunsetting that endpoint is an E-18 decision, not this milestone's. + + ## Acceptance Criteria + + ### AC1 — Stored drafts retired (A2) + + Forward-only deletion of the stored-draft product surface. + + **Delete:** + + - Sim routes in [src/FlowTime.Sim.Service/Program.cs](../../../src/FlowTime.Sim.Service/Program.cs): `GET /api/v1/drafts` (line 399), `GET /api/v1/drafts/{draftId}` (line 418), `POST /api/v1/drafts` (line 444), `PUT /api/v1/drafts/{draftId}` (line 489), `DELETE /api/v1/drafts/{draftId}` (line 527). + - `StorageKind.Draft` enum value in [src/FlowTime.Contracts/Storage/StorageContracts.cs](../../../src/FlowTime.Contracts/Storage/StorageContracts.cs) and every call site that writes or reads drafts through `IStorageBackend`. + - `data/storage/drafts/` directory references in Sim service configuration and any backend code that materialises that path. + - `DraftEndpointsTests.cs` CRUD test cases in `tests/FlowTime.Sim.Tests/` (inline-source tests that survive A7 narrowing must stay). + - `DraftCreateRequest`, `DraftUpdateRequest`, and other stored-draft request/response contracts that only serve the deleted CRUD routes. + + **Preserve:** + + - `POST /api/v1/drafts/generate` (A2 narrowing: stays as an authoring generate-materialised-model surface, but must not resolve any `draftId`). + - `POST /api/v1/drafts/map-profile` (A2: supported profile authoring helper). + + **Grep guards (must return zero matches in `src/` and `tests/` after deletion):** + + - `"drafts/{draftId"`, `"drafts\"\s*,\s*async"` on CRUD handlers + - `StorageKind.Draft` + - `data/storage/drafts` + + ### AC2 — `/api/v1/drafts/run` narrowed to inline-source only (A1, A2) + + `POST /api/v1/drafts/run` at [src/FlowTime.Sim.Service/Program.cs:675](../../../src/FlowTime.Sim.Service/Program.cs) remains a live route but only accepts `DraftSource.type == "inline"`. Any `draftId` resolution branch is removed. + + - No request shape accepts `draftId` on this endpoint after the milestone. + - Inline-source tests in `DraftEndpointsTests.cs` survive and are the only tests left covering this route. + - Documentation for this endpoint (in `docs/reference/contracts.md` and elsewhere) is updated by m-E19-03 — this milestone only removes the code branch. + + **Grep guard:** No `draftId` reference remains in the `/api/v1/drafts/run` handler or its request shape. + + ### AC3 — Sim-only `/api/v1/drafts/validate` deleted (A6) + + `POST /api/v1/drafts/validate` at [src/FlowTime.Sim.Service/Program.cs:540](../../../src/FlowTime.Sim.Service/Program.cs) is removed along with its endpoint-specific tests. The library pieces that back it remain untouched (they become the tier 1/2/3 ingredients the future Time Machine composes per [D-2026-04-07-017](../../decisions.md)): + + **Preserved unchanged:** + + - `FlowTime.Core.Models.ModelSchemaValidator` + - `FlowTime.Core.Models.ModelValidator` + - `FlowTime.Core.Compiler.ModelCompiler` + - `FlowTime.Core.Models.ModelParser` + - `FlowTime.Sim.Core.Analysis.TemplateInvariantAnalyzer` + - `FlowTime.Sim.Core.Analysis.InvariantAnalyzer` + + **Grep guard:** No `/api/v1/drafts/validate` route literal or `drafts/validate` handler remains in `src/` or `tests/`. + + ### AC4 — Sim-side ZIP archive layer deleted (A3) + + Remove the post-hoc run-bundle archive path that writes ZIPs to `data/storage/runs/` and the `BundleRef` / `StorageRef` return values that surface them. + + **Delete:** + + - `StorageKind.Run` bundle ZIP writes inside `RunOrchestrationService.CreateSimulationRunAsync` (wherever that service currently calls the archive writer). + - `BundleRef` and `StorageRef` members on `RunCreateResponse` in [src/FlowTime.Contracts/TimeTravel/RunContracts.cs](../../../src/FlowTime.Contracts/TimeTravel/RunContracts.cs). + - `StorageKind.Run` enum value in [src/FlowTime.Contracts/Storage/StorageContracts.cs](../../../src/FlowTime.Contracts/Storage/StorageContracts.cs) and the `data/storage/runs/` backend write path, together with any helper that only services that writer. + - Any Sim-side helper whose only caller was the archive writer. + + **Explicitly preserved:** the canonical run directory layout at `data/runs//` (`model/`, `series/`, `run.json`). This is not a bundle and is untouched by this milestone. + + **Grep guards:** No `StorageKind.Run`, `BundleRef`, `StorageRef`, or `data/storage/runs` reference remains in `src/` or `tests/` on the current surface. + + ### AC5 — Engine `POST /v1/runs` deleted outright (A4) + + `POST /v1/runs` in [src/FlowTime.API/Endpoints/RunOrchestrationEndpoints.cs:19](../../../src/FlowTime.API/Endpoints/RunOrchestrationEndpoints.cs) is removed entirely. No 410-style rejection stub remains. The read endpoints `GET /v1/runs` (line 20) and `GET /v1/runs/{runId}` (line 21) stay — they are the canonical run discovery/detail contract. + + **Delete:** + + - `group.MapPost("/runs", HandleCreateRunAsync)` at line 19 and the `HandleCreateRunAsync` handler itself. + - `bundlePath`, `bundleArchiveBase64`, and `BundleRef` resolution branches (wherever they live once bundled into the removed handler). + - `RunImportRequest` fields `BundlePath`, `BundleArchiveBase64`, and the `BundleRef` type on import contracts in [src/FlowTime.Contracts/TimeTravel/RunContracts.cs](../../../src/FlowTime.Contracts/TimeTravel/RunContracts.cs). + - `ExtractArchiveAsync` and any support helpers that only served bundle import. + - Bundle-import test cases in `RunOrchestrationTests.cs` (forward-only deletion — do not keep them as "preserved for future import redesign"). + + **Preserve:** `GET /v1/runs` and `GET /v1/runs/{runId}` — they are the canonical run query surface consumed by the Svelte UI and operator workflows. + + **Grep guards:** `MapPost("/runs", HandleCreateRunAsync)`, `BundlePath`, `BundleArchiveBase64`, and `BundleRef` return zero matches in `src/` and `tests/` on the current API surface. + + ### AC6 — Engine debug route deleted (scope narrowed during implementation) + + The m-E19-01 audit originally scheduled three Engine routes for deletion in this milestone: `GET /v1/debug/scan-directory/{dirName}`, `POST /v1/run`, and `POST /v1/graph`. During implementation, discovery showed that `POST /v1/run` is used by 50+ test call sites across the Engine Provenance, Parity, and Legacy test suites as the primary run-creation mechanism, and `POST /v1/graph` is used by `Legacy/ApiIntegrationTests.cs`. The matrix entry claim that these routes are "not used by current first-party UIs" is technically correct but underweighted the test-infrastructure coupling. Deleting them in this milestone would either regress ~50 tests of Engine-side runtime provenance coverage (forward-only test deletion — unacceptable) or pull substantial test-migration work that is out of scope for a runtime-cleanup milestone. + + **Scope for this milestone:** + + - Delete `GET /v1/debug/scan-directory/{dirName}` handler from [src/FlowTime.API/Program.cs](../../../src/FlowTime.API/Program.cs). It is a genuine zombie — no product caller, no test caller. + + **Deferred (tracked in [work/gaps.md](../../gaps.md)):** + + - `POST /v1/run` and `POST /v1/graph` deletion is deferred out of m-E19-02. Retained as a transitional test-infrastructure surface until the Provenance/Parity/Legacy test suites are migrated to an alternative run-creation path (either a test-only in-process adapter over `Graph.Evaluate` / `RunOrchestrationService` or the supported Sim orchestration endpoint with template fixtures). A decisions.md entry (D-2026-04-08-029) records the scope change and the reason. + + **Grep guard (narrowed):** No `"/v1/debug/scan-directory"` literal remains in runtime code. + + ### AC7 — Catalogs retired entirely (A5) + + Catalog surfaces are zombie residue with no supported first-party caller. Delete them atomically across runtime and the Blazor catalog selector (the one UI site coupled to this server deletion). + + **Delete:** + + - Sim routes: `GET /api/v1/catalogs`, `GET /api/v1/catalogs/{id}`, `PUT /api/v1/catalogs/{id}`, `POST /api/v1/catalogs/validate` in [src/FlowTime.Sim.Service/Program.cs](../../../src/FlowTime.Sim.Service/Program.cs). + - `CatalogService`, `ICatalogService`, and any mock catalog service implementation in Sim. + - `CatalogPicker.razor` in the Blazor UI and any Svelte catalog selector if one exists. + - `CatalogId = "default"` placeholder callers (including the hardcoded value in `TemplateRunner.razor`). + - The `catalogId` field on any request/response DTO where it appears. + - `data/catalogs/` directory references. + - Catalog-only tests. + + **Grep guards:** `/api/v1/catalogs`, `CatalogService`, `ICatalogService`, `CatalogPicker`, and the literal `CatalogId = "default"` return zero matches in `src/` and `tests/` on the current surface. + + ### AC8 — Public contracts cleanup consolidated + + All public contract changes forced by AC1–AC7 above land in [src/FlowTime.Contracts/](../../../src/FlowTime.Contracts/) in a single consistent pass: + + - `RunImportRequest.BundlePath`, `RunImportRequest.BundleArchiveBase64`, `RunCreateResponse.BundleRef`, and the `BundleRef` / `StorageRef` types removed. + - `StorageKind.Draft` and `StorageKind.Run` enum values removed from [src/FlowTime.Contracts/Storage/StorageContracts.cs](../../../src/FlowTime.Contracts/Storage/StorageContracts.cs). Any storage-kind switch statements lose their draft/run cases. + - Stored-draft request/response contracts (`DraftCreateRequest`, `DraftUpdateRequest`) removed unless a surviving inline-only route still needs them. + + `StorageBackendOptions`, `IStorageBackend`, `StorageWriteRequest`, `StorageWriteResult`, `StorageReadResult`, `StorageListRequest`, and `StorageItemSummary` remain on the public surface — they still serve surviving storage needs (for example, series storage referenced in the supported-surfaces matrix). This milestone removes only the draft/run kinds, not the underlying storage abstraction. + + ### AC9 — Build, tests, and grep guards green + + - `dotnet build FlowTime.sln` is green with no new warnings introduced by this milestone. + - `dotnet test FlowTime.sln` is green across all test projects (deleted tests for deleted code are acceptable; failing tests or reduced coverage for surviving code is not). + - Every grep guard from AC1–AC7 is asserted by a simple repo-root script or CI check that `rg` returns zero matches in `src/` and `tests/` for the deleted symbols. The check can be a single shell script runnable locally; it does not need to become a full CI pipeline step in this milestone but it must exist and be documented. + + ### AC10 — Status surfaces reconciled at wrap time + + At milestone wrap: + + - [work/epics/E-19-surface-alignment-and-compatibility-cleanup/spec.md](./spec.md) milestone table marks m-E19-02 complete and m-E19-03 next. + - [ROADMAP.md](../../../ROADMAP.md) and [work/epics/epic-roadmap.md](../../epic-roadmap.md) reflect the same status. + - [CLAUDE.md](../../../CLAUDE.md) Current Work section names m-E19-02 complete and m-E19-03 next. + - The tracking doc [m-E19-02-sim-authoring-and-runtime-boundary-cleanup-tracking.md](./m-E19-02-sim-authoring-and-runtime-boundary-cleanup-tracking.md) records every AC checked, the final test count, and the grep guard results. + - `work/decisions.md` does **not** need new entries — this milestone executes decisions A1–A6 already recorded under D-2026-04-07-023 through D-2026-04-07-028. If an implementation judgment call surfaces that m-E19-01 did not anticipate, it is logged in `work/gaps.md` or as a new D-entry at wrap time. + + ## Technical Notes + + ### Recommended sequence + + Each step should leave the build green and the test suite passing before the next step begins. This is a forward-only cleanup — no compatibility shims, no temporary wrappers. + + 1. **Catalogs (AC7).** Fully self-contained: routes, services, Blazor picker, placeholder callers, `data/catalogs/`, catalog-only tests. Lowest coupling, highest confidence. + 2. **`/api/v1/drafts/validate` (AC3).** Trivial — unused route, clean deletion, library pieces explicitly preserved. + 3. **Stored drafts CRUD (AC1).** Delete routes, `StorageKind.Draft`, `data/storage/drafts/`, CRUD tests. + 4. **Narrow `/api/v1/drafts/run` (AC2).** Strip `draftId` branches; keep inline-source path. + 5. **Sim ZIP archive layer (AC4).** Remove `StorageKind.Run` writes in `RunOrchestrationService`, drop `BundleRef`/`StorageRef` return values, delete `data/storage/runs/` backend writer, remove `StorageKind.Run` from the enum. + 6. **Engine `POST /v1/runs` + bundle-import (AC5).** Delete handler, remove bundle-import fields from `RunImportRequest`, delete bundle-import tests, delete the route registration. + 7. **Engine debug route (AC6).** Delete `GET /v1/debug/scan-directory/{dirName}`. `POST /v1/run` and `POST /v1/graph` deletion deferred out of the milestone per the AC6 scope change above. + 8. **Public contracts finalisation (AC8).** Sanity pass to confirm every deleted runtime symbol is also gone from `FlowTime.Contracts`. + 9. **Grep guards + build/test finalisation (AC9).** Run every grep guard, fix any straggler references, rerun the full test suite. + 10. **Wrap (AC10).** Tracking doc, status surfaces, `CLAUDE.md` current work. + + ### Supporting data + + - Sim service endpoints are defined inline in [src/FlowTime.Sim.Service/Program.cs](../../../src/FlowTime.Sim.Service/Program.cs) (75 catalog/drafts references counted during planning). + - Engine run orchestration is isolated to [src/FlowTime.API/Endpoints/RunOrchestrationEndpoints.cs](../../../src/FlowTime.API/Endpoints/RunOrchestrationEndpoints.cs). + - Engine debug/eval routes are in [src/FlowTime.API/Program.cs](../../../src/FlowTime.API/Program.cs). + - Storage abstractions are in [src/FlowTime.Contracts/Storage/](../../../src/FlowTime.Contracts/Storage/). + - Run-contract DTOs that need trimming are in [src/FlowTime.Contracts/TimeTravel/RunContracts.cs](../../../src/FlowTime.Contracts/TimeTravel/RunContracts.cs). + + ### Test strategy + + Forward-only deletion, not migration: + + - Tests that exist only to exercise deleted routes are deleted alongside the routes. + - Tests covering surviving inline paths (`/api/v1/drafts/run` with `inline` source, `/api/v1/drafts/generate`, `/api/v1/drafts/map-profile`, `/api/v1/orchestration/runs`, Engine `GET /v1/runs*`) must stay green. + - No new unit tests are required by this milestone unless a deletion surfaces a regression that existing coverage did not catch. In that case, the regression test is added alongside the fix. + - Grep guards (AC9) are the load-bearing regression check for this milestone. Every deleted symbol is asserted absent. + + ### Do NOT touch + + - `FlowTime.Core` — no changes. Library pieces preserved for A6 are explicitly unchanged. + - `FlowTime.Generator` — frozen in E-19; any Generator work belongs to E-18 m-E18-01a. + - Canonical run directory layout at `data/runs//` — unchanged. + - `/api/v1/orchestration/runs` — supported per A1. No changes. + - `/api/v1/templates/*` authoring surface — supported. No changes. + - `/api/v1/drafts/generate` and `/api/v1/drafts/map-profile` — supported authoring surfaces. No changes beyond removing `draftId` resolution. + - `/api/v1/series/*`, `/api/v1/profiles/*`, `/api/v1/models/*` — supported Sim authoring/data-intake surfaces. No changes. + - Blazor stale-wrapper cleanup beyond `CatalogPicker.razor` — that is m-E19-04's job. + - Schema files under `docs/schemas/` — m-E19-03 owns any deprecated-schema removal. + - Template files under `templates/` — m-E19-03 owns any deprecated-template removal. + - Example files under `examples/` — m-E19-03 owns schema-compatibility example retirement. + + ## Out of Scope + + - Introducing or referencing `FlowTime.TimeMachine`. That component is new in E-18 m-E18-01a and does not exist yet. + - Any Path B extraction of `FlowTime.Generator`. Generator is frozen. + - Schema, template, example, or docs cleanup (m-E19-03 owns those). + - Blazor stale compatibility wrappers outside the catalog picker (m-E19-04). + - Replacing the deleted validation endpoint with a tiered validation API on Sim — that is explicitly an E-18 m-E18-01b deliverable per A6. + - Reintroducing any deleted surface as a "temporary compatibility shim." + - Refactoring `RunOrchestrationService` or `IStorageBackend` beyond removing the deleted code paths. + - Performance, observability, or error-handling improvements unrelated to deletion. + - Introducing new tests for surviving endpoints beyond what already exists. + + ## Guards / DO NOT + + - **DO NOT** preserve a 410-style rejection stub or advisory tombstone for any deleted route. Forward-only deletion per shared framing in [m-E19-01 § Shared Framing](./m-E19-01-supported-surface-inventory.md#shared-framing). + - **DO NOT** design or stub anything under `FlowTime.TimeMachine` or any `Headless` namespace. The Time Machine is E-18 m-E18-01a. + - **DO NOT** extend the `POST /api/v1/orchestration/runs` surface. It stays as-is; sunsetting is an E-18 decision. + - **DO NOT** add new compatibility wrappers, feature flags, or configuration toggles to keep deleted behaviour reachable in any environment. + - **DO NOT** widen the scope into schema/template/example cleanup. Those are m-E19-03. + - **DO NOT** touch the canonical run directory layout at `data/runs//`. The bundle archive layer is separate. + - **DO NOT** re-home `TemplateInvariantAnalyzer` into `FlowTime.Core` in this milestone. That is an E-18 m-E18-01b concern. + - **DO NOT** leave partially deleted symbols behind. Every grep guard must pass at wrap time. + + ## Dependencies + + - [m-E19-01 Supported Surface Inventory, Boundary ADR & Exit Criteria](./m-E19-01-supported-surface-inventory.md) — locks A1–A6 decisions and the boundary ADR this milestone executes against. + - [docs/architecture/supported-surfaces.md](../../../docs/architecture/supported-surfaces.md) — authoritative row-by-row ownership for deletions. + - [docs/architecture/template-draft-model-run-bundle-boundary.md](../../../docs/architecture/template-draft-model-run-bundle-boundary.md) — current/transitional/target diagrams that deletions must not contradict. + + ## References + + - [E-19 epic spec](./spec.md) + - [m-E19-01 spec](./m-E19-01-supported-surface-inventory.md) + - [work/decisions.md](../../decisions.md) — D-2026-04-07-017 (A6), D-2026-04-07-022 through D-2026-04-07-028 (shared framing and A1–A5) + - [E-18 epic spec](../E-18-headless-pipeline-and-optimization/spec.md) — downstream dependency for validation replacement + - kind: milestone + id: M-026 + frontmatter: + title: Schema, Template & Example Retirement + status: done + parent: E-19 + body: | + ## Goal + + Remove deprecated schema shapes, demo-template residue, schema-migration compatibility examples, and stale authoring docs from active first-party surfaces. When this milestone closes, no active `src/`, `templates/`, `examples/`, or `docs/` surface emits or promotes the deprecated `binMinutes` YAML authoring shape, and no schema-migration fixture or pre-v1 authoring spec survives on the current `examples/` or `docs/ui/` surfaces. + + ## Context + + [m-E19-01](./m-E19-01-supported-surface-inventory.md) inventoried active schema, template, example, and docs surfaces in the supported-surfaces matrix at [docs/architecture/supported-surfaces.md](../../../docs/architecture/supported-surfaces.md) and assigned owning milestones. Every row whose `Owning milestone` column is `m-E19-03` is executed here. + + [m-E19-02](./m-E19-02-sim-authoring-and-runtime-boundary-cleanup.md) already deleted the runtime seams (stored drafts, Sim ZIP archive layer, Engine bundle-import, runtime catalogs, `/api/v1/drafts/validate`, Engine `/v1/debug/scan-directory`) and narrowed `/api/v1/drafts/run` to inline-only. This milestone is the schema/authoring cleanup pass over the same supported-surface baseline. + + Scope boundaries inherited from m-E19-01: + + - `FlowTime.Core`, `FlowTime.Generator`, `FlowTime.API`, and `FlowTime.Sim.*` are **not renamed** and their high-level responsibilities do not change in E-19. + - Analytical surfaces purified by E-16 (notably `MetricsContracts.MetricsGrid.BinMinutes` as a retained wire-format field, the `TimeGrid.BinMinutes` computed property, `ModelValidator`'s `binMinutes` rejection gate, and `TargetSchemaValidationTests` that assert the gate) are explicitly out of scope and must remain untouched. + - Engine and Sim runtime route deletions are not re-opened here — m-E19-02 owns them. + - Blazor stale-wrapper cleanup and demo-mode policy belong to m-E19-04. + - `POST /v1/run` / `POST /v1/graph` remain deferred per [D-2026-04-08-029](../../decisions.md#d-2026-04-08-029-defer-post-v1run-and-post-v1graph-deletion-out-of-m-e19-02-ac6-scope-narrowing). Tests that deserialize `Grid { int binMinutes }` against those routes stay as-is. + + The distinction this milestone enforces: + + - **`binMinutes` as a YAML authoring schema field** — deprecated. Engine's `ModelValidator` rejects it at parse time. Current authoring schema is `binSize` + `binUnit`. Any active surface emitting `binMinutes` in an authored YAML shape is in scope for this milestone. + - **`binMinutes` as the derived internal concept** (bin duration in minutes) — still live. `TimeGrid.BinMinutes`, `MetricsContracts.MetricsGrid.BinMinutes`, internal analytical math, and mathematical notation in architecture docs are out of scope. + + ## Acceptance Criteria + + ### AC1 — UI demo template generators emit current schema (schema-migration residue) + + `src/FlowTime.UI/Services/TemplateServiceImplementations.cs` is the Blazor mock template service used by demo mode. It currently declares two `JsonSchemaProperty` entries keyed `"binMinutes"` and emits three demo YAML strings with `grid: binMinutes: 60` / `binMinutes: 1440`. These are active surfaces promoting the deprecated YAML authoring shape. + + **Rewrite:** + + - [src/FlowTime.UI/Services/TemplateServiceImplementations.cs:431](../../../src/FlowTime.UI/Services/TemplateServiceImplementations.cs) — remove the `["binMinutes"]` `JsonSchemaProperty` entry. Replace with a `binSize` / `binUnit` pair matching the current authoring schema, or drop the property if no demo template exposes a bin-duration parameter. + - [src/FlowTime.UI/Services/TemplateServiceImplementations.cs:475](../../../src/FlowTime.UI/Services/TemplateServiceImplementations.cs) — same treatment as line 431. + - [src/FlowTime.UI/Services/TemplateServiceImplementations.cs:1356](../../../src/FlowTime.UI/Services/TemplateServiceImplementations.cs) — demo YAML generator currently writes `grid:\n binMinutes: 60`. Rewrite to `binSize: 1\n binUnit: hours`. + - [src/FlowTime.UI/Services/TemplateServiceImplementations.cs:1462](../../../src/FlowTime.UI/Services/TemplateServiceImplementations.cs) — same; rewrite `binMinutes: 60` to `binSize: 1, binUnit: hours`. + - [src/FlowTime.UI/Services/TemplateServiceImplementations.cs:1536](../../../src/FlowTime.UI/Services/TemplateServiceImplementations.cs) — rewrite `binMinutes: 1440` to `binSize: 1, binUnit: days` (daily bins). + - Remove the top-of-file `⚠️ SCHEMA MIGRATION IN PROGRESS` warning comment once the file is clean. + + **Preserve:** + + - Any `JsonIgnore`-annotated computed `BinMinutes` property used purely for UI display (e.g. in `GridInfo`, `TimeTravelMetricsGridDto`) — these are internal convenience fields, not authoring shapes. + - Demo mode itself — m-E19-03 does not retire demo mode. Blazor demo-mode policy is m-E19-04. + + **Grep guard:** No `binMinutes` literal remains anywhere under `src/FlowTime.UI/Services/TemplateServiceImplementations.cs`. Broader `src/FlowTime.UI/` check is deferred to AC7 (grep guard script). + + ### AC2 — UI sample fixture uses current schema + + [src/FlowTime.UI/wwwroot/sample/run-example.json](../../../src/FlowTime.UI/wwwroot/sample/run-example.json) currently reads: + + ```json + { "grid": { "bins": 8, "binMinutes": 60 }, ... } + ``` + + This is a static authoring fixture shipped with the Blazor UI and is not a wire-format response. Rewrite the grid shape to current schema: + + ```json + { "grid": { "bins": 8, "binSize": 1, "binUnit": "hours" }, ... } + ``` + + **Grep guard:** No `binMinutes` literal remains under `src/FlowTime.UI/wwwroot/`. + + ### AC3 — CLI verbose output label uses current schema + + [src/FlowTime.Cli/Program.cs:98](../../../src/FlowTime.Cli/Program.cs) currently prints: + + ```csharp + Console.WriteLine($" Grid: bins={grid.Bins}, binMinutes={grid.BinMinutes}"); + ``` + + The underlying `TimeGrid` record already exposes `BinSize` and `BinUnit` ([src/FlowTime.Core/Models/TimeGrid.cs:58-59](../../../src/FlowTime.Core/Models/TimeGrid.cs)). Rewrite the label to expose the current schema shape: + + ```csharp + Console.WriteLine($" Grid: bins={grid.Bins}, binSize={grid.BinSize}, binUnit={grid.BinUnit.ToString().ToLowerInvariant()}"); + ``` + + The computed `TimeGrid.BinMinutes` property itself stays — it is the live internal concept, not a deprecated schema field. Only the user-facing label string changes. + + **Grep guard:** No `binMinutes` literal remains under `src/FlowTime.Cli/`. + + ### AC4 — Active architecture docs use current schema in YAML examples + + Two active architecture docs contain YAML authoring examples still using the deprecated grid shape. Rewrite every YAML example; leave mathematical notation that uses `binMinutes` as the live derived concept (AC4 is about authoring shapes, not math). + + **Rewrite YAML examples:** + + - [docs/architecture/whitepaper.md:250](../../../docs/architecture/whitepaper.md) — `grid: { bins: 6, binMinutes: 5 }` → `grid: { bins: 6, binSize: 5, binUnit: minutes }`. + - [docs/architecture/retry-modeling.md:417](../../../docs/architecture/retry-modeling.md) — `grid: { bins: 24, binMinutes: 60 }` → `grid: { bins: 24, binSize: 1, binUnit: hours }`. + - [docs/architecture/retry-modeling.md:466](../../../docs/architecture/retry-modeling.md) — same rewrite. + - [docs/architecture/retry-modeling.md:527](../../../docs/architecture/retry-modeling.md) — same rewrite. + + **Explicitly leave alone:** + + - [docs/architecture/whitepaper.md:77](../../../docs/architecture/whitepaper.md) — Little's Law formula `W[t] ≈ Q[t] / served_rate[t] * binMinutes`. This is mathematical notation for the live derived concept (bin duration in minutes), not a schema reference. `TimeGrid.BinMinutes`, `MetricsGrid.BinMinutes`, and internal evaluator math all still use this concept. **Append the inline marker `` to the end of this line** so the grep guard script can deterministically allowlist it. + - [docs/architecture/reviews/*](../../../docs/architecture/reviews/) — historical point-in-time review snapshots. Out of scope. + - [docs/schemas/model.schema.md](../../../docs/schemas/model.schema.md) and [docs/schemas/model.schema.yaml](../../../docs/schemas/model.schema.yaml) — authoritative migration docs that explain the historical transition. Their `binMinutes` references are documented history, not current guidance. + + **Grep guard:** No `binMinutes` literal remains in `docs/architecture/whitepaper.md` or `docs/architecture/retry-modeling.md` **except** lines containing the marker `m-E19-03:allow-binminutes-notation`. The marker is an HTML comment that Markdown renderers strip from display; it lets the grep-guard script allowlist legitimate derived-concept notation without depending on drift-prone line numbers. + + ### AC5 — Schema-migration example fixtures archived + + The three schema-migration example YAMLs under `examples/` exist solely as back-compat coverage fixtures, not as current user-facing examples. Per m-E19-01's supported-surfaces matrix (row for schema-migration compatibility examples), their decision is `archive`. + + **Move (preserve git history via `git mv`):** + + - [examples/test-old-schema.yaml](../../../examples/test-old-schema.yaml) → `examples/archive/test-old-schema.yaml` + - [examples/test-no-schema.yaml](../../../examples/test-no-schema.yaml) → `examples/archive/test-no-schema.yaml` + - [examples/test-new-schema.yaml](../../../examples/test-new-schema.yaml) → `examples/archive/test-new-schema.yaml` + + **Delete:** + + - [examples/time-travel/](../../../examples/time-travel/) — empty leftover directory. `rmdir` it. + + **Audit:** + + - Search for callers of the moved files in `src/`, `tests/`, `docs/`, and scripts. Update any references to point at the new `examples/archive/` path or remove the reference if it is dead. + - Add a short `examples/archive/README.md` (or update any existing README in that folder) noting that the files are schema-migration fixtures preserved for historical reference, not current examples. + + **Grep guard:** No path `examples/test-old-schema.yaml`, `examples/test-no-schema.yaml`, or `examples/test-new-schema.yaml` remains referenced anywhere in `src/`, `tests/`, or active `docs/` content. Matches under `examples/archive/` and `docs/archive/` are allowed. + + ### AC6 — Stale template-integration spec archived + + [docs/ui/template-integration-spec.md](../../../docs/ui/template-integration-spec.md) is a pre-v1 UI spec that references `/api/templates/{templateId}/schema` and `/api/templates/generate` routes (pre-v1 template surface), contains `binMinutes` references, and carries its own `⚠️ SCHEMA MIGRATION IN PROGRESS` warning. Per m-E19-01's matrix (`archive/update`), move it to the archive tree: + + **Move:** + + - [docs/ui/template-integration-spec.md](../../../docs/ui/template-integration-spec.md) → `docs/archive/ui/template-integration-spec.md` + + **Audit:** + + - Search for inbound links in `docs/`, `README.md`, `CLAUDE.md`, and any other active doc. Remove dead links or update to the archive path. + + **Grep guard:** No active docs (outside `docs/archive/`) reference `docs/ui/template-integration-spec.md` or the pre-v1 routes `/api/templates/{id}/schema` or `/api/templates/generate`. + + ### AC7 — Catalog-stale phrasing in active docs updated + + m-E19-02 deleted all catalog routes, services, UI components, and DTOs per A5. Two active docs still carry leftover phrasing describing Sim as owning "template/catalog" endpoints. Rewrite the phrasing: + + **Rewrite:** + + - [docs/guides/UI.md:3](../../../docs/guides/UI.md) — drop `template/catalog calls` to `template calls` (the Sim API hosts template authoring, not catalogs). + - [docs/reference/contracts.md:111](../../../docs/reference/contracts.md) — drop `template/catalog endpoints for model generation` to `template endpoints for model generation`. + - [docs/reference/engine-capabilities.md:30](../../../docs/reference/engine-capabilities.md) — rewrite `no catalog/export/import/registry endpoints` to drop `catalog/` for consistency with m-E19-02's catalog retirement. The statement becomes `no streaming endpoints; no export/import/registry endpoints` (or the closest natural phrasing). The line is factually true either way — this is a consistency edit, not a correction. + + **Explicitly leave alone (not in scope):** + + - [docs/templates/profiles.md:58](../../../docs/templates/profiles.md) — incidental English phrase `catalog authors stay consistent`, not a FlowTime catalog reference. + - [docs/architecture/template-draft-model-run-bundle-boundary.md](../../../docs/architecture/template-draft-model-run-bundle-boundary.md) — already documents catalogs in their historical/retired context correctly. + - [docs/reference/contracts.md:124](../../../docs/reference/contracts.md) — already correctly notes `no catalog endpoints are shipped`. + + **Grep guard:** No `template/catalog` literal remains in `docs/guides/UI.md` or `docs/reference/contracts.md`. No `catalog/export/import/registry` phrasing remains in `docs/reference/engine-capabilities.md`. + + ### AC8 — Test fixtures with stale parameter keys cleaned + + [tests/FlowTime.UI.Tests/ParameterConversionIntegrationTests.cs](../../../tests/FlowTime.UI.Tests/ParameterConversionIntegrationTests.cs) uses `["binMinutes"] = 60` as a template parameter key in three test dictionaries (lines 23, 51, 107). Active templates expose `binSize` (not `binMinutes`) — see [templates/transportation-basic.yaml:23](../../../templates/transportation-basic.yaml) — so the test key references a template parameter that does not exist. The test itself is about parameter type conversion (string arrays vs number arrays being serialized to `demandPattern` / `capacityPattern`) and does not assert anything about the grid parameter key, so a rename preserves semantic meaning. + + **Rewrite:** + + - Lines 23, 51, 107 — rename the key `["binMinutes"]` to `["binSize"]` in each dictionary literal. Value stays `60` (which is now "60 minutes" interpreted per `binUnit: minutes`, matching the transportation-basic template's default parameter). The assertions on `demandPattern` and `capacityPattern` serialization remain unchanged and continue to exercise the type-conversion behavior the test is named after. + - Confirm at commit time that the test class still passes after the rename with no other edits. + + **Explicitly leave alone:** + + - Any test that uses `binMinutes` as an internal local variable name (e.g. [tests/FlowTime.UI.Tests/TemplateServiceMetadataTests.cs](../../../tests/FlowTime.UI.Tests/TemplateServiceMetadataTests.cs)) — local naming, not schema. + - Any test that asserts `binMinutes` is rejected by validators (e.g. [tests/FlowTime.Tests/Schema/TargetSchemaValidationTests.cs](../../../tests/FlowTime.Tests/Schema/TargetSchemaValidationTests.cs)) — legitimate invariant test. + - Any test that asserts `binMinutes` does **not** appear in serialized JSON (e.g. [tests/FlowTime.UI.Tests/GridInfoSchemaTests.cs](../../../tests/FlowTime.UI.Tests/GridInfoSchemaTests.cs), `SimGridInfoSchemaTests.cs`) — legitimate invariant test. + - [tests/FlowTime.Tests/ApiIntegrationTests.cs:93](../../../tests/FlowTime.Tests/ApiIntegrationTests.cs), [tests/FlowTime.Api.Tests/Legacy/ApiIntegrationTests.cs:188](../../../tests/FlowTime.Api.Tests/Legacy/ApiIntegrationTests.cs) — `Grid { int binMinutes }` DTOs that deserialize the retained `MetricsGrid.BinMinutes` wire-format field from `POST /v1/run` and `POST /v1/graph`. Deferred per D-2026-04-08-029. + - [tests/FlowTime.Api.Tests/StateEndpointTests.cs](../../../tests/FlowTime.Api.Tests/StateEndpointTests.cs), [tests/FlowTime.Api.Tests/StateResponseSchemaTests.cs](../../../tests/FlowTime.Api.Tests/StateResponseSchemaTests.cs) — state query tests passing the current request-shape including the retained `binMinutes` field. Active contract. + - [tests/FlowTime.Api.Tests/Golden/metrics-run_metrics_fixture.json](../../../tests/FlowTime.Api.Tests/Golden/metrics-run_metrics_fixture.json) — golden fixture for the retained `MetricsGrid` response shape. + - [tests/FlowTime.Core.Tests/Safety/NaNPolicyTests.cs](../../../tests/FlowTime.Core.Tests/Safety/NaNPolicyTests.cs) — internal `ComputeLatencyMinutes(binMinutes: …)` helper parameter name, not a schema reference. + + **Grep guard:** No `["binMinutes"]` dictionary-key literal remains in `tests/FlowTime.UI.Tests/ParameterConversionIntegrationTests.cs`. + + ### AC9 — Grep-guard script codified + + Create `scripts/m-E19-03-grep-guards.sh` mirroring the structure of `scripts/m-E19-02-grep-guards.sh`. Every guard listed in AC1–AC8 becomes a line in the script. The script must exit 0 when all guards pass. + + **Guards, as implemented in the script (each a named test):** + + 1. No `binMinutes` in `src/FlowTime.UI/Services/TemplateServiceImplementations.cs` + 2. No `binMinutes` in `src/FlowTime.UI/wwwroot/` + 3. No `binMinutes` in `src/FlowTime.Cli/` + 4. No `binMinutes` in `docs/architecture/whitepaper.md` **except** lines containing the comment marker `m-E19-03:allow-binminutes-notation`. The script filters with `grep -v 'm-E19-03:allow-binminutes-notation'` before counting matches. + 5. No `binMinutes` in `docs/architecture/retry-modeling.md` + 6. No `examples/test-old-schema.yaml`, `examples/test-no-schema.yaml`, or `examples/test-new-schema.yaml` path literal outside `examples/archive/` and `docs/archive/` + 7. No active reference (outside `docs/archive/`) to `docs/ui/template-integration-spec.md` + 8. No active reference to pre-v1 routes `/api/templates/{id}/schema` or `/api/templates/generate` outside `docs/archive/` and release notes + 9. No `template/catalog` literal in `docs/guides/UI.md` or `docs/reference/contracts.md` + 10. No `catalog/export/import/registry` literal in `docs/reference/engine-capabilities.md` + 11. No `["binMinutes"]` dictionary-key literal in `tests/FlowTime.UI.Tests/ParameterConversionIntegrationTests.cs` + + Scoped searches are limited to `src/`, `tests/`, `docs/`, `examples/`, and `templates/` by default, with per-guard exclusions for `docs/archive/`, `docs/releases/`, `docs/architecture/reviews/`, `work/epics/completed/`, and anywhere the guard explicitly allowlists (including the comment-marker allowlist on guard 4). + + The script runs locally and in the wrap pass. It is not wired into CI in this milestone — `scripts/m-E19-02-grep-guards.sh` remains the pattern, and CI wiring is deferred. + + ### AC10 — Tracking doc and status surfaces reconciled + + - Create `work/epics/E-19-surface-alignment-and-compatibility-cleanup/m-E19-03-schema-template-example-retirement-tracking.md` at milestone start and update it after each AC lands. Tracking doc records: per-AC file changes, grep-guard results, test counts, and deviations from the spec (if any). + - Flip milestone status in a single reconciliation pass at wrap time: + - This spec: `draft` → `in-progress` at start → `completed` at wrap. + - [work/epics/E-19-surface-alignment-and-compatibility-cleanup/spec.md](./spec.md) milestone table: `m-E19-03` status `next` → `in-progress` → `completed`; header `Status:` line updated; `## Milestones` sequence note updated to point at `m-E19-04`. + - [ROADMAP.md](../../../ROADMAP.md) E-19 section: sync m-E19-03 completion and name `m-E19-04` as next. + - [work/epics/epic-roadmap.md](../epic-roadmap.md) E-19 row: same sync. + - [CLAUDE.md](../../../CLAUDE.md) Current Work section: sync E-19 topology and next-step pointer. + - All status-surface updates happen in a single wrap commit after the grep guards pass. + + ## Technical Notes + + ### Commit plan (bundled) + + ACs are grouped into five focused commits plus the wrap. Each bundle is a single atomic concept so bisect points to one conceptual slice of the milestone. + + 1. **Bundle A — deprecated `binMinutes` authoring shape in code (AC1 + AC2 + AC3 + AC8).** Four code/fixture edits that all remove `binMinutes` as an authoring/parameter/display label: `TemplateServiceImplementations.cs` demo generators, `run-example.json` fixture, `Cli/Program.cs` verbose label, `ParameterConversionIntegrationTests.cs` parameter key. One conceptual cleanup, one commit. + 2. **Bundle B — active docs cleanup (AC4 + AC7).** Rewrites deprecated YAML examples in `whitepaper.md`/`retry-modeling.md` and the catalog-stale phrasing in `UI.md`/`contracts.md`/`engine-capabilities.md`. One docs-cleanup pass, one commit. Includes appending the `m-E19-03:allow-binminutes-notation` marker to `whitepaper.md:77`. + 3. **Bundle C — archive moves (AC5 + AC6).** Move 3 test-schema YAMLs to `examples/archive/`, move `template-integration-spec.md` to `docs/archive/ui/`, delete empty `examples/time-travel/`, update any inbound references. All archive operations in one commit so the tree is never half-migrated. + 4. **Grep guard script (AC9).** Its own commit. The script must pass against the tree from commits 1–3, proving the cleanup is complete before the wrap. + 5. **Wrap (AC10).** Tracking doc finalization and status-surface reconciliation in a single commit after the grep guards pass. + + If any bundle surfaces a complication at implementation time (e.g. inbound reference to an archived file requires a cross-bundle edit), stop and present options before widening or splitting the bundle, the way m-E19-02 handled the AC6 scope narrowing. + + ### Implementation notes + + - All archive moves use `git mv` so rename history stays intact. + - Do not rewrite or delete historical review docs, completed-milestone specs, or archived Sim docs under `docs/archive/docs-sim/`. Anything under `docs/archive/`, `docs/releases/`, `work/epics/completed/`, or `docs/architecture/reviews/` is out of scope regardless of whether it contains `binMinutes` or `catalog` references. + - Do not introduce new demo templates or new sample fixtures. If a demo yaml generator no longer has a natural `binMinutes`-equivalent parameter after the rewrite, drop the parameter rather than inventing a new one. + - Do not add advisory comments like `// deprecated, see m-E19-04` to files being rewritten. Forward-only — once the schema shape is current, no migration commentary is needed. + - When moving files to `docs/archive/ui/` and `examples/archive/`, ensure the target directory exists (create it with `mkdir -p` if needed) before the `git mv`. + - The grep-guard script allowlists `whitepaper.md:77` via an HTML comment marker on the line itself (``). Markdown renderers strip the comment from display; the script filters matching lines with `grep -v`. This avoids the drift problem that line-number allowlists hit. + + ## Preserved Surfaces + + Explicit list of surfaces that must remain untouched by this milestone. Any accidental change to these surfaces is a milestone regression. + + - `src/FlowTime.Core/Models/TimeGrid.cs` — `BinMinutes` computed property is the live internal concept. + - `src/FlowTime.Core/Models/ModelValidator.cs` — `binMinutes` rejection gate at `ValidateGrid`. + - `src/FlowTime.Core/Metrics/RuntimeAnalyticalEvaluator.cs` — internal `binMinutes` parameter on helper methods. + - `src/FlowTime.Contracts/TimeTravel/MetricsContracts.cs` — `MetricsGrid.BinMinutes` retained wire-format field. + - `src/FlowTime.API/Services/MetricsService.cs`, `StateQueryService.cs`, `AggregatesCsvExporter.cs`, `NdjsonExporter.cs`, `ParquetExporter.cs` — internal `BinMinutes` options and computed fields driven by the retained `MetricsGrid` contract. + - `src/FlowTime.UI/Services/FlowTimeApiModels.cs`, `FlowTimeSimApiClient.cs` — `[JsonIgnore]`-annotated computed `BinMinutes` display helpers and their `NOT serialized to/from JSON` comments. + - `src/FlowTime.UI/Services/TimeTravelMetricsClient.cs`, `TimeTravelApiModels.cs` — `binMinutes` consumption of the retained `MetricsGrid` field for display. + - `src/FlowTime.UI/Services/SimResultsService.cs` — `var binMinutes = 60` internal variable for mock-data generation. + - `src/FlowTime.UI/Pages/Simulate.razor`, `Pages/TimeTravel/Topology.razor` — UI consumers of the computed `GridInfo.BinMinutes` display helper. + - `docs/schemas/model.schema.md`, `docs/schemas/model.schema.yaml` — authoritative migration docs. + - `docs/architecture/reviews/*` — historical review snapshots. + - `docs/architecture/whitepaper.md:77` — Little's Law math notation. + - `examples/m0.const.yaml`, `m0.const.sim.yaml`, `m0.poisson.sim.yaml`, `m15.complex-pmf.yaml`, `m2.pmf.yaml`, `class-enabled.yaml`, `hello/model.yaml`, `http-demo/*.csv` — active examples, already on current schema. + - All 12 YAML files under `templates/` — already on current schema per the template sweep. + - `tests/FlowTime.Tests/Schema/TargetSchemaValidationTests.cs`, `tests/FlowTime.UI.Tests/GridInfoSchemaTests.cs`, `tests/FlowTime.UI.Tests/SimGridInfoSchemaTests.cs`, `tests/FlowTime.UI.Tests/GraphRunResultSchemaTests.cs`, `tests/FlowTime.Core.Tests/Safety/NaNPolicyTests.cs`, `tests/FlowTime.UI.Tests/TemplateServiceMetadataTests.cs`, `tests/FlowTime.Tests/ApiIntegrationTests.cs`, `tests/FlowTime.Api.Tests/Legacy/ApiIntegrationTests.cs`, `tests/FlowTime.Api.Tests/StateEndpointTests.cs`, `tests/FlowTime.Api.Tests/StateResponseSchemaTests.cs`, `tests/FlowTime.Api.Tests/Golden/metrics-run_metrics_fixture.json` — all exercising retained contracts or internal naming. + + ## Out of Scope + + - Touching or rewriting `MetricsGrid.BinMinutes` or any other E-16-purified analytical contract field. + - Rewriting the Little's Law formula in `whitepaper.md:77`. Math notation for the live derived concept stays. + - Rewriting historical review docs under `docs/architecture/reviews/`. + - Rewriting authoritative migration docs under `docs/schemas/model.schema.md` and `docs/schemas/model.schema.yaml`. + - Retiring Blazor demo mode itself. That is m-E19-04 territory. + - Deleting demo-mode `TemplateServiceImplementations.cs` wholesale. m-E19-03 narrows, m-E19-04 decides demo-mode policy. + - `POST /v1/run` and `POST /v1/graph` and their test fixtures — deferred per D-2026-04-08-029. + - Removing the `binMinutes` rejection gate in `ModelValidator` or its covering test. The gate is load-bearing. + - Introducing or documenting `FlowTime.TimeMachine`. That is E-18 m-E18-01a. + - New template files, new examples, or new demo YAML generators. + - Performance, observability, or error-handling improvements unrelated to the schema/template/example retirement. + - CI wiring for `scripts/m-E19-03-grep-guards.sh`. The script exists and runs locally; CI integration is deferred. + - Updating release notes, completed-epic specs, or other historical material under `docs/releases/`, `docs/archive/`, or `work/epics/completed/`. + + ## Guards / DO NOT + + - **DO NOT** touch `MetricsContracts.MetricsGrid.BinMinutes`, `TimeGrid.BinMinutes`, the `ModelValidator` rejection gate, or `docs/schemas/model.schema.md`/`.yaml`. These are retained surfaces and their grep matches are legitimate. + - **DO NOT** rewrite the Little's Law formula on `whitepaper.md:77`. The mathematical notation is not a schema reference. + - **DO NOT** delete `examples/test-old-schema.yaml`, `test-no-schema.yaml`, or `test-new-schema.yaml`. They are archived, not deleted — schema-transition coverage is still useful history. + - **DO NOT** archive or delete `docs/schemas/model.schema.md` or `docs/schemas/model.schema.yaml`. These are authoritative migration docs. + - **DO NOT** retire Blazor demo mode or `CatalogService`-equivalent residue not already covered by m-E19-02. Demo-mode policy is m-E19-04. + - **DO NOT** introduce compatibility shims, `binMinutes`-to-`binSize`/`binUnit` converters, or new helper utilities. Rewrite YAML examples in place; they are static content. + - **DO NOT** add advisory comments pointing at m-E19-04 or at deleted surfaces. Forward-only. + - **DO NOT** leave partially archived directories behind. If a moved file has inbound references, update the references in the same commit (or file a grep-guard failure for the wrap pass to catch). + - **DO NOT** widen the milestone scope to include runtime endpoint changes, Contracts-level refactors, or cross-project deletions. Those are other milestones. + - **DO NOT** commit before explicit human approval per the repo's Hard Rules. + + ## Dependencies + + - [m-E19-01 Supported Surface Inventory, Boundary ADR & Exit Criteria](./m-E19-01-supported-surface-inventory.md) — supplies the retention/archive decisions and grep-guard taxonomy this milestone executes. + - [m-E19-02 Sim Authoring & Runtime Boundary Cleanup](./m-E19-02-sim-authoring-and-runtime-boundary-cleanup.md) — already removed the runtime seams (catalogs, drafts CRUD, bundle import) whose residue AC7 finishes cleaning up in the docs layer. + - [docs/architecture/supported-surfaces.md](../../../docs/architecture/supported-surfaces.md) — authoritative row-by-row ownership. + + ## References + + - [E-19 epic spec](./spec.md) + - [m-E19-01 spec](./m-E19-01-supported-surface-inventory.md) + - [m-E19-02 spec](./m-E19-02-sim-authoring-and-runtime-boundary-cleanup.md) + - [work/decisions.md](../../decisions.md) — D-2026-04-07-022 (shared framing), D-2026-04-07-027 (catalogs retired), D-2026-04-08-029 (deferred `/v1/run` `/v1/graph`) + - [scripts/m-E19-02-grep-guards.sh](../../../scripts/m-E19-02-grep-guards.sh) — template for the m-E19-03 grep-guard script + - kind: milestone + id: M-027 + frontmatter: + title: Blazor Support Alignment + status: done + parent: E-19 + body: | + ## Goal + + Remove stale `FlowTime.UI` Sim-client compatibility wrappers and the broken caller assumptions built on them, rewire the surviving Blazor authoring and run-query flows onto the supported Sim orchestration endpoint and Engine query API, and confirm the Svelte client layer stays aligned to current contracts. When this milestone closes, Blazor remains a supported first-party UI whose Sim client surface is exactly the row 63 supported set (`HealthAsync`, `GetDetailedHealthAsync`, `GetTemplatesAsync`, `GetTemplateAsync`, `GenerateModelAsync`, `CreateRunAsync`) and every Blazor caller either reaches a live endpoint or has been deleted alongside its wrapper. + + ## Context + + [m-E19-01](./m-E19-01-supported-surface-inventory.md) published the supported-surfaces matrix in [docs/architecture/supported-surfaces.md](../../../docs/architecture/supported-surfaces.md) and assigned the Blazor HTTP call-site rows (63–65) and the Svelte alignment rows (66–67) to this milestone. [m-E19-02](./m-E19-02-sim-authoring-and-runtime-boundary-cleanup.md) deleted the Sim runtime seams those wrappers would have depended on (stored drafts CRUD, Sim ZIP archive layer, Engine bundle-import, runtime catalogs, `/api/v1/drafts/validate`, `GET /v1/debug/scan-directory`) and narrowed `/api/v1/drafts/run` to inline-only. [m-E19-03](./m-E19-03-schema-template-example-retirement.md) retired deprecated schema, template, and example residue from active surfaces. + + This milestone is the Blazor client-layer cleanup pass over that cleaned-up baseline. Every row whose `Owning milestone` column in the matrix is `m-E19-04` is executed here. + + Scope boundaries inherited from the epic and m-E19-01: + + - Blazor is not retired. Blazor remains a supported first-party UI for debugging, operator workflows, and as plan-B to Svelte per the Blazor/Svelte support policy in [docs/architecture/supported-surfaces.md](../../../docs/architecture/supported-surfaces.md). + - Feature parity between Blazor and Svelte is not a goal. Svelte is intentionally behind Blazor. + - Demo mode stays. `FlowTimeSimService.RunDemoModeSimulationAsync` and the demo-data generators in `TemplateServiceImplementations.cs` are preserved as-is. + - `FlowTime.Core`, `FlowTime.Generator`, `FlowTime.API`, and `FlowTime.Sim.*` are not renamed and their high-level responsibilities do not change. + - Analytical surfaces purified by E-16 are out of scope. + - Engine and Sim runtime route deletions are not re-opened. m-E19-02 owns them. + - Schema/template/example/docs retirement is not re-opened. m-E19-03 owns it. + - `POST /v1/run` and `POST /v1/graph` remain deferred per [D-2026-04-08-029](../../decisions.md#d-2026-04-08-029-defer-post-v1run-and-post-v1graph-deletion-out-of-m-e19-02-ac6-scope-narrowing). `TemplateRunner.razor`'s engine-eval flow at line 744 consuming `IRunClient.RunAsync` (routed via `ApiRunClient` → `IFlowTimeApiClient.RunAsync` → Engine `POST /v1/run`) stays on that deferred surface — this milestone does not touch it. + - `FlowTimeSimApiClientWithFallback` and `PortDiscoveryService` are legitimate dev-environment port discovery, not a compatibility shim. Their pass-through methods for deleted interface members are removed in this milestone; their port-discovery bootstrap stays. + + The key distinction this milestone enforces: + + - **Stale wrapper** — an `IFlowTimeSimApiClient` method that calls a Sim route that no longer exists (`RunAsync` → removed `/api/v1/run`; `GetIndexAsync`/`GetSeriesAsync` → `/api/v1/runs/{id}/index`, `/api/v1/runs/{id}/series/{id}` which were never added on Sim). These are the row 64 and row 65 targets. Delete. + - **Supported Sim client call** — methods backed by live Sim routes (`HealthAsync`, `GetDetailedHealthAsync`, `GetTemplatesAsync`, `GetTemplateAsync`, `GenerateModelAsync`, `CreateRunAsync`). These are the row 63 targets. Keep, audit for drift. + - **Engine API client** (`IFlowTimeApiClient`) — the correct surface for run queries (index, series, state, metrics). Rewired callers use this for every run query previously routed at the stale Sim wrappers. + + ## Acceptance Criteria + + ### AC1 — Stale `RunAsync` wrapper deleted (row 64) + + `RunAsync(string yaml, ...)` on `IFlowTimeSimApiClient` targets `POST /api/v1/run` on the Sim service, which was removed on 2025-10-01 and does not return even when Sim is reachable. The file itself marks the method broken with a TODO comment. + + **Delete:** + + - `Task> RunAsync(string yaml, CancellationToken ct = default)` from the `IFlowTimeSimApiClient` interface at [src/FlowTime.UI/Services/FlowTimeSimApiClient.cs:9](../../../src/FlowTime.UI/Services/FlowTimeSimApiClient.cs). + - The implementation body and its TODO comment at [src/FlowTime.UI/Services/FlowTimeSimApiClient.cs:100-130](../../../src/FlowTime.UI/Services/FlowTimeSimApiClient.cs). + - The pass-through method `FlowTimeSimApiClientWithFallback.RunAsync` at [src/FlowTime.UI/Services/FlowTimeSimApiClientWithFallback.cs:72-76](../../../src/FlowTime.UI/Services/FlowTimeSimApiClientWithFallback.cs). + - `SimRunResponse` DTO and related deserialization types if they are unused after this AC and AC2 complete. + + **Preserve:** + + - `ApiRunClient.RunAsync` at [src/FlowTime.UI/Services/ApiRunClient.cs:14](../../../src/FlowTime.UI/Services/ApiRunClient.cs) — routes through `IFlowTimeApiClient.RunAsync` → Engine `POST /v1/run`, which is deferred per D-2026-04-08-029. + - `RunClientRouter.RunAsync` at [src/FlowTime.UI/Services/RunClientRouter.cs:24](../../../src/FlowTime.UI/Services/RunClientRouter.cs) and `SimulationRunClient.RunAsync` — these are `IRunClient` members, not `IFlowTimeSimApiClient` members, and feed the deferred Engine direct-eval path. + - `FlowTimeSimApiClient.CreateRunAsync` (row 63) — the supported Sim orchestration wrapper. + + **Grep guard:** No declaration or use of `IFlowTimeSimApiClient.RunAsync` remains in `src/FlowTime.UI/` or `tests/FlowTime.UI.Tests/`. The literal `api/v1/run` (i.e. the Sim `/api/v1/run` path, as distinct from Sim `/api/v1/runs/...` query routes which also do not exist and are covered by AC2) must not appear anywhere in `src/FlowTime.UI/Services/FlowTimeSimApiClient.cs` or `src/FlowTime.UI/Services/FlowTimeSimApiClientWithFallback.cs`. + + ### AC2 — Stale `GetIndexAsync` and `GetSeriesAsync` wrappers deleted (row 65) + + `GetIndexAsync` and `GetSeriesAsync` on `IFlowTimeSimApiClient` target `GET /api/v1/runs/{runId}/index` and `GET /api/v1/runs/{runId}/series/{seriesId}` on the Sim service. Neither route exists on Sim today and both files are marked broken with TODO comments pointing at Engine API as the correct target. + + **Delete:** + + - `Task> GetIndexAsync(string runId, ...)` from the `IFlowTimeSimApiClient` interface at [src/FlowTime.UI/Services/FlowTimeSimApiClient.cs:10](../../../src/FlowTime.UI/Services/FlowTimeSimApiClient.cs). + - `Task> GetSeriesAsync(string runId, string seriesId, ...)` from the `IFlowTimeSimApiClient` interface at [src/FlowTime.UI/Services/FlowTimeSimApiClient.cs:11](../../../src/FlowTime.UI/Services/FlowTimeSimApiClient.cs). + - Both implementation bodies and their TODO comments at [src/FlowTime.UI/Services/FlowTimeSimApiClient.cs:132-183](../../../src/FlowTime.UI/Services/FlowTimeSimApiClient.cs). + - The pass-through methods `FlowTimeSimApiClientWithFallback.GetIndexAsync` and `FlowTimeSimApiClientWithFallback.GetSeriesAsync` at [src/FlowTime.UI/Services/FlowTimeSimApiClientWithFallback.cs:78-88](../../../src/FlowTime.UI/Services/FlowTimeSimApiClientWithFallback.cs). + + **Preserve:** + + - `IFlowTimeApiClient.GetRunIndexAsync` and `GetRunSeriesAsync` — the canonical Engine run-query client methods. Every Blazor run-query caller routes through these after AC3 and AC5. + - `SeriesIndex` type itself — still consumed by the Engine client return type. + + **Grep guard:** No declaration or use of `IFlowTimeSimApiClient.GetIndexAsync` or `IFlowTimeSimApiClient.GetSeriesAsync` remains in `src/FlowTime.UI/` or `tests/FlowTime.UI.Tests/`. No literal `api/v1/runs/{` followed by `/index` or `/series/` constructed against a Sim base address remains in `src/FlowTime.UI/Services/FlowTimeSimApiClient.cs` or `src/FlowTime.UI/Services/FlowTimeSimApiClientWithFallback.cs`. + + ### AC3 — `FlowTimeSimService` API-mode data generation rewired to orchestration + + `FlowTimeSimService.RunApiModeSimulationAsync` at [src/FlowTime.UI/Services/TemplateServiceImplementations.cs:951-1008](../../../src/FlowTime.UI/Services/TemplateServiceImplementations.cs) currently: + + 1. Generates a local YAML spec. + 2. Calls `simClient.RunAsync(yamlSpec)` → the broken wrapper AC1 deletes. + 3. Builds a `ResultsUrl` pointing at `/{apiVersion}/sim/runs/{runId}/index` — a path that does not exist on either Sim or Engine. + + The supported replacement path is `CreateRunAsync` (row 63) on the Sim orchestration endpoint `POST /api/v1/orchestration/runs`, which accepts a `RunCreateRequestDto(templateId, mode, parameters, rng, telemetry)` and returns a canonical `RunCreateResponseDto` with a runId that the Engine API read surface recognises. + + **Rewrite:** + + - Replace the `GenerateSimulationYamlAsync` → `simClient.RunAsync(yamlSpec)` path inside `RunApiModeSimulationAsync` with a direct `simClient.CreateRunAsync(new RunCreateRequestDto(request.TemplateId, "simulation", request.Parameters, rng: null, telemetry: null))` call. The Sim orchestration endpoint owns template expansion — the local YAML-generation helper is no longer needed for this flow. + - Remove `ResultsUrl = $"/{simConfig.ApiVersion}/sim/runs/{runId}/index"` at [src/FlowTime.UI/Services/TemplateServiceImplementations.cs:987](../../../src/FlowTime.UI/Services/TemplateServiceImplementations.cs). Blazor consumers of `SimulationRunResult` already look up results by `RunId` through `SimResultsService`; the ad-hoc URL was dead guidance. + - Update `GetRunStatusAsync` at [src/FlowTime.UI/Services/TemplateServiceImplementations.cs:1012-1060](../../../src/FlowTime.UI/Services/TemplateServiceImplementations.cs) to call `apiClient.GetRunIndexAsync(runId, ct)` (Engine) instead of `simClient.GetIndexAsync(runId)` (deleted wrapper). The status-inference logic (completed / not_found / running) stays the same. + - If `GenerateSimulationYamlAsync` and any supporting schema-translation helpers in `TemplateServiceImplementations.cs` become unreachable after the rewrite, delete them. If they still have at least one live caller outside the rewritten method, leave them alone. + + **Preserve:** + + - `RunDemoModeSimulationAsync` and every demo-mode code path at [src/FlowTime.UI/Services/TemplateServiceImplementations.cs:898-949](../../../src/FlowTime.UI/Services/TemplateServiceImplementations.cs). Demo mode is not retired by this milestone. + - `FlowTimeSimService.RunSimulationAsync`'s outer demo-vs-api branch — only the API-mode branch body changes. + + **Grep guard:** No `simClient.RunAsync(` or `simClient.GetIndexAsync(` call site remains in `src/FlowTime.UI/Services/TemplateServiceImplementations.cs`. No `/sim/runs/` URL literal remains anywhere in `src/FlowTime.UI/`. + + ### AC4 — `SimResultsService` run queries go through the Engine API only + + `SimResultsService.GetSimulationResultsAsync` at [src/FlowTime.UI/Services/SimResultsService.cs:38-124](../../../src/FlowTime.UI/Services/SimResultsService.cs) currently branches on `isEngineRun` (a `runId.StartsWith("run_")` check) and calls either `apiClient.GetRunIndexAsync`/`GetRunSeriesAsync` (for engine runs) or the stale `simClient.GetIndexAsync`/`GetSeriesAsync` (for non-engine runs). After AC3 rewires data generation onto `CreateRunAsync`, every API-mode run produces a canonical Engine-format runId, so the branch is dead. + + **Rewrite:** + + - Delete the `isEngineRun` branch at [src/FlowTime.UI/Services/SimResultsService.cs:57-96](../../../src/FlowTime.UI/Services/SimResultsService.cs). Route every non-demo run through `apiClient.GetRunIndexAsync(runId, ct)` for the series index and `apiClient.GetRunSeriesAsync(runId, series.Id, ct)` for each series stream. + - Remove the `simClient` field, constructor parameter, and any `IFlowTimeSimApiClient` dependency on `SimResultsService` once the Sim-branch is gone. Update the `Program.cs` DI registration accordingly. + - Preserve the `demo://` prefix handling and the `UseDemoMode` feature-flag check. Demo mode is not retired. + + **Preserve:** + + - `GetDemoModeResultsAsync` and its synthetic-data generators. + - `SimResultData` result type including the `BinMinutes` computed display property (preserved per m-E19-03 spec). + + **Grep guard:** No `simClient.GetIndexAsync(` or `simClient.GetSeriesAsync(` call site remains in `src/FlowTime.UI/Services/SimResultsService.cs`. `IFlowTimeSimApiClient` must no longer appear in the `SimResultsService` constructor signature. + + ### AC5 — Dead Sim run-query URL construction removed from `SimulationResults.razor` + + [src/FlowTime.UI/Components/Templates/SimulationResults.razor:295-312](../../../src/FlowTime.UI/Components/Templates/SimulationResults.razor) constructs a download URL conditional on demo vs API mode: + + - Demo branch (line 302-303): `downloadUrl = $"{baseUrl}/{simConfig.ApiVersion}/sim/runs/{runId}/series/{seriesId}"` — a path that does not exist on Sim. + - API branch (line 307-311): `downloadUrl = $"{baseUrl}/{apiConfig.ApiVersion}/runs/{runId}/series/{seriesId}"` — a path that does not match the canonical Engine run-series route (`/v1/runs/{runId}/series/{seriesId}`). + + After AC3 rewires API-mode data generation to produce canonical Engine run IDs, the single correct download URL for every non-demo run is the Engine API's `GET /v1/runs/{runId}/series/{seriesId}`. + + **Rewrite:** + + - Collapse the demo-vs-API branch at [src/FlowTime.UI/Components/Templates/SimulationResults.razor:295-312](../../../src/FlowTime.UI/Components/Templates/SimulationResults.razor). Non-demo runs use `$"{apiBaseUrl}/{apiConfig.ApiVersion}/runs/{runId}/series/{seriesId}"` resolved from `FlowTimeApiOptions`. Confirm at implementation time whether the existing `apiConfig.ApiVersion` value produces the canonical `/v1/runs/...` shape; if not, correct the format string to match the live Engine route. + - Demo-mode download behaviour: either reuse the Engine download URL (if demo runs are materialised to canonical run directories) or remove the download button in demo mode. Decide at implementation time after checking whether demo runs actually produce downloadable series files; if not, the mode-mismatch warning path already covers the user story and the download button can be hidden in demo mode. + + **Preserve:** + + - The mode-mismatch warning logic at [src/FlowTime.UI/Components/Templates/SimulationResults.razor:280-293](../../../src/FlowTime.UI/Components/Templates/SimulationResults.razor). That UX guidance still applies. + + **Grep guard:** No `/sim/runs/` literal remains in `src/FlowTime.UI/Components/`. No `{apiConfig.ApiVersion}/runs/{runId}/series/` literal that does not match the canonical Engine route shape remains in `src/FlowTime.UI/Components/Templates/SimulationResults.razor`. + + ### AC6 — Supported Blazor Sim client surface confirmed aligned (row 63) + + Row 63 of the supported-surfaces matrix lists `HealthAsync`, `GetDetailedHealthAsync`, `GetTemplatesAsync`, `GetTemplateAsync`, `GenerateModelAsync`, `CreateRunAsync` as the supported Blazor Sim client surface. After AC1 and AC2 complete, those are the only methods remaining on `IFlowTimeSimApiClient`. + + **Audit:** + + - Confirm each surviving method targets a live Sim route per the Sim route sweep in [docs/architecture/supported-surfaces.md](../../../docs/architecture/supported-surfaces.md). + - Confirm no surviving method reconstructs metrics, state, or run shapes locally where a canonical endpoint already exists. + - Confirm the response DTOs the surviving methods deserialize (e.g. `FlowTimeSimDetailedHealthResponse`, `ApiTemplateInfo`, `TemplateGenerationResponse`, `RunCreateResponseDto`) match the current Sim wire shapes. Any drift is a milestone regression. + + **Expected outcome:** no code change if no drift is found. If drift is discovered, fix it in the same commit bundle as the alignment audit and document the change in the tracking doc. Adding new capability is out of scope — only bringing the surface back into alignment with a current contract is. + + **Grep guard:** `IFlowTimeSimApiClient` at [src/FlowTime.UI/Services/FlowTimeSimApiClient.cs](../../../src/FlowTime.UI/Services/FlowTimeSimApiClient.cs) exposes exactly the row 63 supported set after this milestone. No method names outside `{BaseAddress, HealthAsync, GetDetailedHealthAsync, GetTemplatesAsync, GetTemplateAsync, GenerateModelAsync, CreateRunAsync}` remain on the interface. + + ### AC7 — Svelte client surfaces confirmed aligned (rows 66, 67) + + Rows 66 and 67 of the matrix list the Svelte `Sim` client at [ui/src/lib/api/sim.ts](../../../ui/src/lib/api/sim.ts) and Engine client at [ui/src/lib/api/flowtime.ts](../../../ui/src/lib/api/flowtime.ts) as supported first-party surfaces. m-E19-04 is the owning milestone for their alignment audit. + + **Audit:** + + - Confirm `ui/src/lib/api/sim.ts` call sites target the current Sim routes listed in the HTTP Call Site Sweep (supported-surfaces.md): `/api/v1/healthz`, `/api/v1/templates`, `/api/v1/templates/{id}`, `/api/v1/templates/categories`, `/api/v1/orchestration/runs`. No stale draft CRUD, catalog, or bundle-import probes remain. + - Confirm `ui/src/lib/api/flowtime.ts` call sites target the current Engine routes: `/healthz`, `/v1/healthz`, `/v1/runs`, `/v1/runs/{runId}`, `/v1/artifacts*`, `/v1/runs/{runId}/graph`, `/v1/runs/{runId}/state`, `/v1/runs/{runId}/index`, `/v1/runs/{runId}/state_window`. No stale bundle-import, `POST /v1/runs`, or `/v1/debug/` probes remain. + - Confirm neither file reconstructs metrics, state, or run shapes locally where a canonical Engine endpoint already exists. + + **Expected outcome:** no code change if no drift is found. If drift is discovered, fix it in the same commit bundle as the alignment audit. + + **Grep guard:** `ui/src/lib/api/sim.ts` must not contain literals matching `catalogs`, `drafts`, `bundle`, `bundlePath`, `bundleArchiveBase64`, or `bundleRef`. `ui/src/lib/api/flowtime.ts` must not contain literals matching `POST /v1/runs`, `bundlePath`, `bundleArchiveBase64`, `bundleRef`, or `/v1/debug/`. + + ### AC8 — Grep-guard script codified + + Create `scripts/m-E19-04-grep-guards.sh` mirroring the structure of `scripts/m-E19-03-grep-guards.sh`. Every guard listed in AC1–AC7 becomes a named test in the script. The script must exit 0 when all guards pass. + + **Guards, as implemented in the script (each a named test):** + + 1. No `RunAsync(` declaration on `IFlowTimeSimApiClient` in `src/FlowTime.UI/Services/FlowTimeSimApiClient.cs` or implementation in `FlowTimeSimApiClient`/`FlowTimeSimApiClientWithFallback`. + 2. No `api/v1/run"` literal (Sim `/api/v1/run` path) in `src/FlowTime.UI/Services/FlowTimeSimApiClient.cs` or `FlowTimeSimApiClientWithFallback.cs`. + 3. No `GetIndexAsync(` or `GetSeriesAsync(` declaration on `IFlowTimeSimApiClient` or implementation in `FlowTimeSimApiClient`/`FlowTimeSimApiClientWithFallback`. + 4. No `api/v1/runs/{` literal constructed against a Sim base address in `FlowTimeSimApiClient.cs` or `FlowTimeSimApiClientWithFallback.cs`. + 5. No `simClient.RunAsync(` or `simClient.GetIndexAsync(` or `simClient.GetSeriesAsync(` call site in `src/FlowTime.UI/Services/TemplateServiceImplementations.cs`. + 6. No `simClient.GetIndexAsync(` or `simClient.GetSeriesAsync(` call site in `src/FlowTime.UI/Services/SimResultsService.cs`. + 7. No `IFlowTimeSimApiClient` dependency on the `SimResultsService` constructor signature. + 8. No `/sim/runs/` URL literal anywhere in `src/FlowTime.UI/`. + 9. `IFlowTimeSimApiClient` interface surface is exactly `{BaseAddress, HealthAsync, GetDetailedHealthAsync, GetTemplatesAsync, GetTemplateAsync, GenerateModelAsync, CreateRunAsync}` — no extra methods. + 10. No `catalogs`, `drafts`, `bundlePath`, `bundleArchiveBase64`, or `bundleRef` literal in `ui/src/lib/api/sim.ts`. + 11. No `POST /v1/runs`, `bundlePath`, `bundleArchiveBase64`, `bundleRef`, or `/v1/debug/` literal in `ui/src/lib/api/flowtime.ts`. + + Scoped searches are limited to `src/FlowTime.UI/`, `ui/src/lib/api/`, and `tests/FlowTime.UI.Tests/` by default. The script runs locally and in the wrap pass. CI wiring stays deferred, matching the pattern in `scripts/m-E19-02-grep-guards.sh` and `scripts/m-E19-03-grep-guards.sh`. + + ### AC9 — Build, tests, and grep guards green + + - `dotnet build FlowTime.sln` is green with no new warnings introduced by this milestone. + - `dotnet test FlowTime.sln` is green across all test projects. Test deletions for deleted code are acceptable; failing tests or reduced coverage for surviving code are not. In particular, `tests/FlowTime.UI.Tests/TimeTravelDataServiceTests.cs`, `DashboardTests.cs`, and `ArtifactListRenderTests.cs` define mock implementations of an `IFlowTimeApiClient`-like interface whose method names happen to include `RunAsync` and `GetSeriesAsync` — those are Engine-client members, not Sim-client members, and must remain untouched. Only the `IFlowTimeSimApiClient` declarations and implementations are in scope. + - The Svelte `ui/` project's existing `npm`/`pnpm` build (if wired) is green after the alignment audit. + - `scripts/m-E19-04-grep-guards.sh` exits 0 from the repo root. + + ### AC10 — Tracking doc and status surfaces reconciled + + - Create `work/epics/E-19-surface-alignment-and-compatibility-cleanup/m-E19-04-blazor-support-alignment-tracking.md` at milestone start and update it after each AC lands. Tracking doc records: per-AC file changes, grep-guard results, test counts, alignment-audit findings (drift or no drift), and deviations from the spec (if any). + - Flip milestone status in a single reconciliation pass at wrap time: + - This spec: `draft` → `in-progress` at start → `completed` at wrap. + - [work/epics/E-19-surface-alignment-and-compatibility-cleanup/spec.md](./spec.md) milestone table: `m-E19-04` status `next` → `in-progress` → `completed`; header `Status:` line updated; epic `Success Criteria` checkboxes for "first-party clients no longer maintain duplicate endpoint, metrics, or health fallback logic" and "grep and regression audits prove targeted legacy/fallback helpers are removed or isolated" flipped to checked if m-E19-04 closes them. + - [ROADMAP.md](../../../ROADMAP.md) E-19 section: sync m-E19-04 completion. If m-E19-04 is the final E-19 milestone before epic closure, advance the E-19 section to completed and name the next epic/milestone. + - [work/epics/epic-roadmap.md](../epic-roadmap.md) E-19 row: same sync. + - [CLAUDE.md](../../../CLAUDE.md) Current Work section: sync E-19 topology and next-step pointer. + - All status-surface updates happen in a single wrap commit after the grep guards pass. + + ## Technical Notes + + ### Commit plan (bundled) + + ACs are grouped into four focused commits plus the wrap. Each bundle is a single atomic concept so bisect points to one conceptual slice of the milestone. + + 1. **Bundle A — stale Sim client deletion + caller rewire (AC1 + AC2 + AC3 + AC4 + AC5).** Delete the three stale interface methods and their implementations in `FlowTimeSimApiClient` and `FlowTimeSimApiClientWithFallback`. Rewire `FlowTimeSimService.RunApiModeSimulationAsync` onto `CreateRunAsync` and `GetRunStatusAsync` onto `apiClient.GetRunIndexAsync`. Simplify `SimResultsService` to use Engine API for every non-demo query and drop its `IFlowTimeSimApiClient` dependency. Collapse the dead download-URL branch in `SimulationResults.razor`. This is one conceptual cleanup — the stale Sim client and its ripple effects — and goes in one commit. + 2. **Bundle B — alignment audit findings (AC6 + AC7).** Typically no code change. If the audits surface drift, fix it here. If no drift, this bundle is a no-op commit or folded into the tracking-doc wrap (if folded, the tracking doc records "no drift found"). + 3. **Bundle C — grep guard script (AC8).** Its own commit. The script must pass against the tree from Bundle A (and Bundle B if it produced changes), proving the cleanup is complete before the wrap. + 4. **Wrap (AC9 + AC10).** Tracking doc finalization and status-surface reconciliation in a single commit after the grep guards and build/test pass. + + If any bundle surfaces a complication at implementation time (e.g. Bundle A discovers a deeper caller chain that cannot be rewired cleanly, or AC5's `SimulationResults.razor` demo-mode download decision needs human input), stop and present options before widening or splitting the bundle, the way m-E19-02 handled the AC6 scope narrowing and m-E19-03 handled the Little's Law allowlist marker. + + ### Recommended implementation sequence within Bundle A + + Each step should leave the build green and the test suite passing before the next step begins. Forward-only; no compatibility shims. + + 1. **Rewire `FlowTimeSimService` first.** Change `RunApiModeSimulationAsync` to call `simClient.CreateRunAsync(...)` instead of `simClient.RunAsync(yamlSpec)`. Change `GetRunStatusAsync` to call `apiClient.GetRunIndexAsync(runId, ct)` instead of `simClient.GetIndexAsync(runId)`. Build and run `dotnet test` before removing anything — this isolates the rewire from the deletion. + 2. **Simplify `SimResultsService`.** Remove the `isEngineRun` branch, drop the `simClient` field and constructor parameter, update the `Program.cs` DI registration. Build and test again. + 3. **Collapse `SimulationResults.razor` download URL.** Decide the demo-mode download handling (reuse the Engine URL if demo runs materialise, otherwise hide the demo-mode download button). Build and test again. + 4. **Delete the interface methods and implementations.** Now that every caller is rewired, delete `RunAsync`/`GetIndexAsync`/`GetSeriesAsync` from `IFlowTimeSimApiClient`, `FlowTimeSimApiClient`, and `FlowTimeSimApiClientWithFallback`. Build and test. Any lingering reference surfaces as a compile error and must be rewired in this same commit, not deferred. + 5. **Clean up orphaned helpers.** If `GenerateSimulationYamlAsync` or related schema-translation helpers in `TemplateServiceImplementations.cs` are unreachable, delete them. If `SimRunResponse` DTO is unused, delete it. Build and test once more. + + ### Supporting data + + - `IFlowTimeSimApiClient` interface declaration: [src/FlowTime.UI/Services/FlowTimeSimApiClient.cs:6-18](../../../src/FlowTime.UI/Services/FlowTimeSimApiClient.cs). + - Concrete implementation: [src/FlowTime.UI/Services/FlowTimeSimApiClient.cs:20-439](../../../src/FlowTime.UI/Services/FlowTimeSimApiClient.cs). + - Port-discovery wrapper (retains port-discovery bootstrap, loses pass-throughs): [src/FlowTime.UI/Services/FlowTimeSimApiClientWithFallback.cs](../../../src/FlowTime.UI/Services/FlowTimeSimApiClientWithFallback.cs). + - `FlowTimeSimService` API-mode orchestration: [src/FlowTime.UI/Services/TemplateServiceImplementations.cs:867-1060](../../../src/FlowTime.UI/Services/TemplateServiceImplementations.cs). + - `SimResultsService` result loading: [src/FlowTime.UI/Services/SimResultsService.cs:38-124](../../../src/FlowTime.UI/Services/SimResultsService.cs). + - Download URL construction: [src/FlowTime.UI/Components/Templates/SimulationResults.razor:295-315](../../../src/FlowTime.UI/Components/Templates/SimulationResults.razor). + - Blazor `TemplateRunner.razor` data-generation caller: [src/FlowTime.UI/Pages/TemplateRunner.razor:831](../../../src/FlowTime.UI/Pages/TemplateRunner.razor). + - Blazor `TemplateRunner.razor` engine-eval caller (preserved, deferred per D-2026-04-08-029): [src/FlowTime.UI/Pages/TemplateRunner.razor:744](../../../src/FlowTime.UI/Pages/TemplateRunner.razor). + - Engine API client read methods used by the rewired callers: `IFlowTimeApiClient.GetRunIndexAsync` and `GetRunSeriesAsync` in [src/FlowTime.UI/Services/FlowTimeApiClient.cs](../../../src/FlowTime.UI/Services/FlowTimeApiClient.cs). + - Svelte Sim client: [ui/src/lib/api/sim.ts](../../../ui/src/lib/api/sim.ts). + - Svelte Engine client: [ui/src/lib/api/flowtime.ts](../../../ui/src/lib/api/flowtime.ts). + + ### Test strategy + + Forward-only deletion and rewire, not migration: + + - Tests that exist only to exercise the deleted stale wrappers are deleted alongside the wrappers. + - Tests that mock `IFlowTimeSimApiClient` for an unrelated scenario (e.g. template-metadata tests) must be updated to drop the deleted methods from the mock implementation. If the mock no longer compiles because the interface shrank, that is a desired forcing function — update the mock to the shrunken surface. + - Tests that mock `IFlowTimeApiClient` (Engine client) are out of scope. Method-name collisions with `RunAsync`/`GetSeriesAsync` on the Engine mocks are not stale-wrapper residue — Engine-side deletion is deferred per D-2026-04-08-029. + - No new unit tests are required by this milestone unless a rewire surfaces a regression that existing coverage did not catch. In that case, the regression test is added alongside the fix. + - Grep guards (AC8) are the load-bearing regression check for this milestone. Every deleted symbol and every rewired caller path is asserted absent or present via a guard. + + ## Preserved Surfaces + + Explicit list of surfaces that must remain untouched by this milestone. Any accidental change to these surfaces is a milestone regression. + + - `IFlowTimeApiClient` and its Engine-facing implementations (`FlowTimeApiClient`, `ApiRunClient`) — they are the canonical Engine query surface. + - `IRunClient`, `RunClientRouter`, `SimulationRunClient`, `ApiRunClient` — these feed the deferred Engine direct-eval path per D-2026-04-08-029. Their `RunAsync` members are not stale wrappers. + - `FlowTimeSimApiClient.CreateRunAsync`, `HealthAsync`, `GetDetailedHealthAsync`, `GetTemplatesAsync`, `GetTemplateAsync`, `GenerateModelAsync` — row 63 supported surface. + - `FlowTimeSimApiClientWithFallback` class, its `PortDiscoveryService` integration, and its bootstrap in `Program.cs` — legitimate dev-environment port discovery. Only the pass-through methods for deleted interface members are removed. + - `PortDiscoveryService` and `FlowTimeSimApiOptions` — unchanged. + - `FlowTimeSimService.RunSimulationAsync` outer demo-vs-api branch, `RunDemoModeSimulationAsync`, and every demo-data generator in `TemplateServiceImplementations.cs`. + - `SimResultsService.GetDemoModeResultsAsync` and every demo-data generator. + - `SimResultData` result type including its `BinMinutes` computed display property (explicitly preserved by m-E19-03 as a display helper, not a schema field). + - `TemplateRunner.razor` — flow analysis path at line 744 routing through `IRunClient.RunAsync` stays untouched. Only the data-generation path at line 831 is affected, and only transitively via `FlowTimeSimService` rewire. + - `Simulate.razor` — listed in m-E19-03 preserved surfaces; any incidental reads of `SimResultsService` continue to work after the rewire. + - `TemplateServiceImplementations.TemplateService` (the template metadata class distinct from `FlowTimeSimService`) — template authoring is out of scope here; m-E19-03 already retired its `binMinutes` demo residue. + - `ui/src/lib/api/sim.ts` and `ui/src/lib/api/flowtime.ts` — no code change expected from the alignment audit unless drift is found. + - `IFlowTimeSimApiClient` methods that are NOT in the stale-wrapper set: only AC1 and AC2 targets are removed. `CreateRunAsync` in particular must be preserved and is the replacement for the deleted `RunAsync`. + + ## Out of Scope + + - Retiring Blazor as a first-party UI. Blazor remains supported per the Blazor/Svelte support policy. + - Retiring Blazor demo mode. Demo mode is explicitly preserved. + - Forcing feature parity between Blazor and Svelte. Feature parity is not a goal. + - Adding new capability to either UI. Only alignment with current contracts is in scope. + - Deleting `POST /v1/run` or `POST /v1/graph` from Engine or their test consumers. Deferred per D-2026-04-08-029. + - Touching the `IRunClient` / `ApiRunClient` / `RunClientRouter` / `SimulationRunClient` abstractions. Those feed the deferred Engine direct-eval path. + - Touching `FlowTime.Core`, `FlowTime.Generator`, `FlowTime.API`, `FlowTime.Sim.*`, or any non-UI project. + - Re-opening schema, template, example, or docs retirement. m-E19-03 owns those and is complete. + - Re-opening Sim runtime route deletion. m-E19-02 owns those and is complete. + - Introducing or referencing `FlowTime.TimeMachine`. That component is new in E-18 m-E18-01a and does not exist yet. + - Reintroducing any deleted Sim route via a Blazor-side compatibility shim. + - Refactoring `FlowTimeSimService`, `SimResultsService`, or `SimulationResults.razor` beyond what the deletions and rewires require. The commit bundle stays scoped to the stale-wrapper cleanup ripple. + - Performance, observability, or error-handling improvements unrelated to deletion and rewire. + - CI wiring for `scripts/m-E19-04-grep-guards.sh`. The script exists and runs locally; CI integration is deferred matching the pattern in m-E19-02 and m-E19-03. + - Updating release notes, completed-epic specs, or other historical material under `docs/releases/`, `docs/archive/`, or `work/epics/completed/`. + + ## Guards / DO NOT + + - **DO NOT** delete `CreateRunAsync`, `HealthAsync`, `GetDetailedHealthAsync`, `GetTemplatesAsync`, `GetTemplateAsync`, or `GenerateModelAsync` from `IFlowTimeSimApiClient`. They are the row 63 supported surface and the rewired API-mode data-generation path depends on `CreateRunAsync`. + - **DO NOT** delete or modify `ApiRunClient.RunAsync`, `RunClientRouter.RunAsync`, or `SimulationRunClient.RunAsync`. Those are `IRunClient` members that feed the deferred Engine direct-eval path and are out of scope. + - **DO NOT** delete `FlowTimeSimApiClientWithFallback` or its `PortDiscoveryService` integration. Only the pass-through methods for deleted interface members are removed. + - **DO NOT** retire Blazor demo mode or any demo-data generator. Demo mode is explicitly preserved. + - **DO NOT** introduce new HTTP clients, new interface abstractions, or new DI lifetimes during the rewire. The rewire reuses `IFlowTimeApiClient` for run queries and `IFlowTimeSimApiClient.CreateRunAsync` for run creation. + - **DO NOT** reintroduce any deleted Sim route literal (`/api/v1/run`, `/api/v1/runs/{id}/index`, `/api/v1/runs/{id}/series/{id}`) anywhere in `src/FlowTime.UI/`. + - **DO NOT** add advisory comments pointing at E-18 Time Machine or at the deleted wrappers. Forward-only — once the wrappers are gone, no migration commentary is needed. + - **DO NOT** widen the scope into template authoring, schema cleanup, runtime endpoint changes, or Contracts-level refactors. Those are other milestones (or already complete). + - **DO NOT** touch the Svelte UI unless the AC7 alignment audit surfaces drift. The default expected outcome of AC7 is "no code change." + - **DO NOT** introduce compatibility shims, feature flags, or configuration toggles to keep deleted wrappers reachable. + - **DO NOT** commit before explicit human approval per the repo's Hard Rules. + + ## Dependencies + + - [m-E19-01 Supported Surface Inventory, Boundary ADR & Exit Criteria](./m-E19-01-supported-surface-inventory.md) — supplies matrix rows 63–67 and the Blazor/Svelte support policy this milestone executes. + - [m-E19-02 Sim Authoring & Runtime Boundary Cleanup](./m-E19-02-sim-authoring-and-runtime-boundary-cleanup.md) — already removed the Sim runtime routes these wrappers would have depended on, so the current state is "broken wrappers" not "unused-but-working wrappers." + - [m-E19-03 Schema, Template & Example Retirement](./m-E19-03-schema-template-example-retirement.md) — already retired the deprecated `binMinutes` authoring residue from `TemplateServiceImplementations.cs` demo generators and the UI sample fixture, so the only `TemplateServiceImplementations.cs` residue left is the stale Sim client caller chain this milestone rewires. + - [docs/architecture/supported-surfaces.md](../../../docs/architecture/supported-surfaces.md) — authoritative row-by-row ownership. + + ## References + + - [E-19 epic spec](./spec.md) + - [m-E19-01 spec](./m-E19-01-supported-surface-inventory.md) — see matrix rows 63–67 and the Blazor/Svelte Support Policy section + - [m-E19-02 spec](./m-E19-02-sim-authoring-and-runtime-boundary-cleanup.md) + - [m-E19-03 spec](./m-E19-03-schema-template-example-retirement.md) + - [work/decisions.md](../../decisions.md) — D-2026-04-08-029 (deferred `/v1/run` `/v1/graph`), Blazor/Svelte support policy decision + - [scripts/m-E19-03-grep-guards.sh](../../../scripts/m-E19-03-grep-guards.sh) — template for the m-E19-04 grep-guard script + - kind: milestone + id: M-028 + frontmatter: + title: Scaffold, Types, and Parsers + status: done + parent: E-20 + body: | + ## Goal + + Stand up the Rust project, port all model types with YAML deserialization, port the expression parser, and extract reference model fixtures. After this milestone, the Rust crate can parse any FlowTime model YAML and any FlowTime expression — the complete data layer with no computation. + + ## Context + + No Rust code exists in the repo. The devcontainer does not have Rust installed. The C# engine defines ~22 model types in `ModelParser.cs` and a recursive-descent expression parser in `FlowTime.Expressions/`. Existing YAML model files in `examples/` and `fixtures/` serve as integration test fixtures. + + ## Acceptance Criteria + + 1. **AC-1: Rust workspace and crate structure.** A Rust workspace at `engine/` (repo root) with two crates: + - `engine/core/` — library crate (`flowtime-core`) containing model types, expression parser, and (future) compiler/evaluator. + - `engine/cli/` — binary crate (`flowtime-engine`) that depends on `flowtime-core`. For this milestone, it only parses a model YAML and prints a summary (node count, grid dimensions). + - `Cargo.toml` workspace at `engine/` level. + + 2. **AC-2: Devcontainer Rust toolchain.** The devcontainer gains Rust support: + - `rustup` and `cargo` available on `$PATH`. + - `cargo build` and `cargo test` work from `engine/`. + - Installation via devcontainer feature or post-create script — not manual. + + 3. **AC-3: Model types with serde deserialization.** Rust structs mirroring every C# model type that participates in YAML deserialization: + - `ModelDefinition`, `GridDefinition`, `NodeDefinition`, `TopologyDefinition`, `TopologyNodeDefinition`, `TopologyNodeSemanticsDefinition`, `TopologyEdgeDefinition`, `ConstraintDefinition`, `ConstraintSemanticsDefinition`, `ClassDefinition`, `TrafficDefinition`, `ArrivalDefinition`, `ArrivalPatternDefinition`, `OutputDefinition`, `RouterDefinition`, `RouterInputsDefinition`, `RouterRouteDefinition`, `DispatchScheduleDefinition`, `PmfDefinition`, `InitialConditionDefinition`, `UiHintsDefinition`. + - All fields use camelCase JSON/YAML naming (matching existing schema). + - Optional fields are `Option`. + - `serde_yaml` for deserialization. + + 4. **AC-4: All existing model fixtures deserialize.** Every `.yaml` model file in `examples/` and `fixtures/` (excluding `examples/archive/`) parses into the Rust `ModelDefinition` without error. Test: `cargo test` includes a parameterized test that loads each fixture. + + 5. **AC-5: Expression parser.** Port of the C# `ExpressionParser` (recursive descent) producing equivalent AST types: + - AST: `Expr` enum with variants `Literal(f64)`, `ArrayLiteral(Vec)`, `NodeRef(String)`, `BinaryOp { op, left, right }`, `FunctionCall { name, args }`. + - `BinaryOp`: Add, Subtract, Multiply, Divide. + - Grammar: `Expression = Term (('+' | '-') Term)*`, `Term = Factor (('*' | '/') Factor)*`, `Factor = Number | Array | NodeRef | FunctionCall | '(' Expression ')'`. + - Error reporting with position. + + 6. **AC-6: Expression parser parity.** Every expression that appears in existing model fixtures and C# test fixtures parses correctly. Additionally, the following expressions must parse and produce correct AST structure (extracted from C# tests): + - `"capacity"` → NodeRef + - `"100.0"` → Literal + - `"a + b"` → BinaryOp(Add) + - `"a * b + c"` → precedence: Add(Mul(a, b), c) + - `"(a + b) * c"` → Mul(Add(a, b), c) + - `"SHIFT(demand, 1)"` → FunctionCall("SHIFT", [NodeRef("demand"), Literal(1)]) + - `"CONV(errors, [0.0, 0.6, 0.3, 0.1])"` → FunctionCall with ArrayLiteral + - `"CLAMP(queue_depth / 50, 0, 1)"` → nested function + binary op + - `"raw_arrivals * (1 - SHIFT(pressure, 1))"` → nested binary ops with function call + - `"MIN(capacity, arrivals)"` → FunctionCall + - `"MAX(0, SHIFT(queue_depth, 1) + arrivals)"` → nested + + 7. **AC-7: Reference model fixtures extracted.** A directory `engine/fixtures/` containing YAML model files at graduated complexity levels, copied or symlinked from existing `examples/` and `fixtures/`. At minimum: + - Simple: const-only model (e.g., `m0.const.yaml`) + - Expression: model with expr nodes + - Queue: model with serviceWithBuffer topology + - PMF: model with PMF nodes + - Router: model with router nodes + - Constraint: model with constraints + - Multi-class: model with class definitions + - WIP limit: model with wipLimit/wipOverflow + - These serve as the progressive parity test fixtures for M2-M6. + + ## Technical Notes + + - **Crate naming:** `flowtime-core` (library) and `flowtime-engine` (binary) follow Rust conventions (kebab-case crate names). + - **Workspace location:** `engine/` at repo root keeps Rust separate from the .NET solution. `Cargo.lock` lives in `engine/`. + - **serde field naming:** Use `#[serde(rename_all = "camelCase")]` on structs to match the existing YAML camelCase convention. Use `#[serde(default)]` for optional fields that have C# defaults. + - **Expression parser:** The C# parser is 371 lines. The Rust port should be similar size. Use `&str` + byte position for error reporting. + - **No computation:** This milestone deliberately excludes compilation, evaluation, and artifact writing. The types and parsers are the foundation; computation starts in m-E20-02. + - **Sim YAML models:** Some fixtures use Sim-specific fields (`metadata.generator`, `parameters`, etc.) that are not in `ModelDefinition`. Use `#[serde(flatten)]` or ignore unknown fields with `#[serde(deny_unknown_fields)]` disabled — existing models must parse without error. + + ## Out of Scope + + - Model compilation (topo sort, column map, plan generation) — m-E20-02 + - Evaluation (matrix ops) — m-E20-02+ + - Model validation (schema checks, initial condition validation) — m-E20-02+ + - Artifact writing — m-E20-06 + - WebAssembly compilation — future + - Expression evaluation (only parsing) — m-E20-02 + + ## Dependencies + + - None (first milestone in E-20) + - kind: milestone + id: M-029 + frontmatter: + title: Compiler and Core Evaluator + status: done + parent: E-20 + body: | + ## Goal + + Compile simple FlowTime models (const + expr nodes, no topology) into an evaluation plan, execute the plan against a flat matrix, and produce correct series output. This is the first milestone where the Rust engine computes something — the "hello world" of the matrix model. + + ## Context + + m-E20-01 delivered the Rust workspace, model types with YAML deserialization, and the expression parser. The crate can parse any model and any expression, but cannot compile or evaluate anything. + + The C# engine evaluates models via: + 1. `ModelParser.ParseNodes()` — creates `INode` instances from `NodeDefinition` + 2. `Graph(nodes)` — topological sort + 3. `Graph.Evaluate(grid)` — iterate in topo order, each node produces a `Series` + + The matrix engine replaces this with: + 1. Compiler: assign column indices, emit ops from node definitions + 2. Evaluator: iterate ops, execute against flat `f64[]` matrix + + ## Acceptance Criteria + + 1. **AC-1: ColumnMap.** Bidirectional mapping between series names (strings) and column indices (usize). `name_to_index()` and `index_to_name()`. Constructed during compilation. + + 2. **AC-2: Op enum and evaluator.** `Op` enum with variants for the element-wise operations needed by const + expr models: + - `Const { out, values }` — write constant values to a column + - `VecAdd { out, a, b }`, `VecSub`, `VecMul`, `VecDiv` — element-wise binary ops + - `ScalarMul { out, input, k }`, `ScalarAdd { out, input, k }` — scalar ops + - `VecMin { out, a, b }`, `VecMax { out, a, b }` — element-wise min/max + - `Clamp { out, val, lo, hi }` — clamp to range + - `Mod { out, a, b }` — modulo + - `Floor { out, input }`, `Ceil { out, input }`, `Round { out, input }` — rounding + - `Step { out, input, threshold }` — step function + - `Pulse { out, period, phase, amplitude }` — periodic pulse + + Evaluator function: `fn evaluate(plan: &[Op], bins: usize, series_count: usize) -> Vec` — allocates matrix, iterates ops, returns filled matrix. + + 3. **AC-3: Expression compiler.** Compile an expression AST (`Expr`) into a sequence of `Op`s given a `ColumnMap`. Each binary op and function call emits one or more ops, using temporary columns for intermediate results. Node references resolve to column indices via the ColumnMap. + + 4. **AC-4: Model compiler (const + expr).** `fn compile(model: &ModelDefinition) -> Result<(Plan, ColumnMap), CompileError>`: + - Assigns a column index to each node's output series. + - Topological sort based on expression dependencies. + - Emits `Const` ops for `kind: "const"` nodes. + - Emits expression ops for `kind: "expr"` nodes. + - Returns the plan (ordered ops) and column map. + + 5. **AC-5: End-to-end evaluation.** `fn eval_model(model: &ModelDefinition) -> Result` that compiles and evaluates, returning named series. Test with the `hello.yaml` fixture: + - `demand` = [10, 10, 10, 10, 10, 10, 10, 10] + - `served` = demand * 0.8 = [8, 8, 8, 8, 8, 8, 8, 8] + + 6. **AC-6: Parity with C# on simple models.** Create a parity test that evaluates a model with both the Rust engine and the C# engine (via pre-computed reference outputs) and compares series values. At minimum: + - Const-only model: all series match + - Const + expr model: expression results match (binary ops, scalar multiply) + - Nested expressions: `MIN(a, b)`, `MAX(a, b)`, `CLAMP(x, lo, hi)` + - Multiple dependent expressions (chain: a → b → c) + + 7. **AC-7: Plan inspection.** `fn format_plan(plan: &Plan, column_map: &ColumnMap) -> String` that prints a human-readable plan. The CLI `flowtime-engine plan ` command uses this. Output shows op type, column names (not just indices). + + ## Technical Notes + + - **Matrix layout:** Row-major `Vec` of size `series_count * bins`. Column `c` at bin `t` is at index `c * bins + t`. All bins for one series are contiguous. + - **Temporary columns:** Expression compilation may need intermediate columns (e.g., `a + b` in `(a + b) * c` needs a temp column for `a + b`). The compiler allocates these from the column map with generated names like `__temp_0`. + - **Topo sort:** Collect dependencies from expression AST (node references). Kahn's algorithm (same as C#). Reject cycles. + - **Fixture update:** `simple-const.yaml` uses legacy field `expression` instead of `expr`. Update the fixture to use `expr` so it works with the Rust model types. + - **No topology:** This milestone handles flat node lists only. Topology synthesis (serviceWithBuffer queue nodes) comes in m-E20-03. + - **PMF nodes:** `kind: "pmf"` computes a constant expected value from the distribution. Can be included here as a simple op, or deferred to m-E20-03. Include if straightforward. + + ## Out of Scope + + - Topology synthesis (queue nodes, retry echo) — m-E20-03 + - Sequential ops (QueueRecurrence, Shift, Convolve, DispatchGate) — m-E20-03 + - Routing and constraints — m-E20-04 + - Derived metrics — m-E20-05 + - Artifact writing (CSVs, JSON) — m-E20-06 + - SHIFT/feedback handling — m-E20-03 + + ## Dependencies + + - m-E20-01 complete (model types + expression parser) + - kind: milestone + id: M-030 + frontmatter: + title: Topology and Sequential Ops + status: done + parent: E-20 + body: | + ## Goal + + Add topology synthesis and sequential operations to the Rust engine so it can evaluate models with queues, retry echo, dispatch schedules, WIP limits, overflow routing, and SHIFT-based backpressure. After this milestone, the matrix engine handles the core flow dynamics — everything except routing, constraints, derived metrics, and artifact writing. + + ## Context + + m-E20-02 delivered the compiler and evaluator for flat models (const + expr + PMF). The compiler produces a plan of element-wise ops, the evaluator executes against a flat matrix. 38 Rust tests passing. + + The C# engine handles topology via: + 1. `ModelCompiler` — synthesizes `serviceWithBuffer` and `retryEcho` nodes from topology definitions + 2. `ServiceWithBufferNode.Evaluate` — sequential queue recurrence: `Q[t] = max(0, Q[t-1] + inflow - outflow - loss)` + 3. `ExprNode.EvaluateShiftFunction` — temporal shift: `out[t] = input[t - lag]` + 4. `DispatchScheduleProcessor` — gates outflow to dispatch bins only + 5. `WipOverflowEvaluator` — post-evaluation routing of WIP overflow to target queues + 6. `Graph.EvaluateFeedbackSubgraph` — bin-by-bin evaluation for SHIFT feedback cycles + + In the matrix model, all of these become ops in the plan. Sequential ops (QueueRecurrence, Shift, Convolve) process bins in order, reading from previous bins in the same matrix — feedback falls out naturally without special handling. + + ## Acceptance Criteria + + 1. **AC-1: Sequential op variants.** Add to the `Op` enum: + - `Shift { out, input, lag }` — `out[t] = input[t - lag]` (0 for t < lag) + - `Convolve { out, input, kernel }` — causal convolution: `out[t] = Σ(k) input[t-k] * kernel[k]` + - `QueueRecurrence { out, inflow, outflow, loss, init, wip_limit, overflow_out }` — sequential queue depth with optional WIP limit and overflow tracking + - `DispatchGate { out, input, period, phase, capacity }` — gates output to dispatch bins, optionally capping at capacity + + 2. **AC-2: Topology synthesis.** The compiler processes `model.topology.nodes` to synthesize queue and retry echo nodes (same logic as C# `ModelCompiler`): + - For each `serviceWithBuffer`/`queue`/`dlq` topology node: synthesize a `QueueRecurrence` op from `semantics.arrivals`, `semantics.served` (or capacity), `semantics.errors`, and `initialCondition.queueDepth`. + - For each topology node with `retryEcho` + `retryKernel`: synthesize a `Convolve` op. + - Queue node ID follows the C# snake_case convention (`Queue → queue_queue`). + - Dispatch schedule on topology node → `DispatchGate` op on the outflow before `QueueRecurrence`. + + 3. **AC-3: WIP limits and overflow routing.** + - `QueueRecurrence` op supports optional `wip_limit` column (scalar const or series) and `overflow_out` column. + - Overflow routing: compiler resolves `wipOverflow` topology node ID to the target's inflow column, emits an additional `VecAdd` to inject overflow into the target's inflow. + - Overflow cycle validation at compile time (same as C# `ValidateNoOverflowCycles`). + + 4. **AC-4: SHIFT feedback.** Models with SHIFT-based cross-node feedback cycles evaluate correctly without special handling. The Shift op reads `state[input, t - lag]` which was written in a previous bin iteration since the evaluator processes ops in plan order and sequential ops process bins in order. Test: the backpressure model from E-10 p3b (queue → pressure → SHIFT → effective_arrivals → queue) produces the same stabilization pattern. + + 5. **AC-5: Parity fixtures.** Create simulation-mode topology fixtures (with `grid` + inline `const`/`expr` nodes + topology) and verify parity with C# output: + - Simple queue: const arrivals/served → queue depth matches hand-calculated values + - Queue with WIP limit: overflow tracked correctly + - Queue with dispatch schedule: outflow gated to period bins + - Retry echo: CONV(failures, kernel) produces correct retry series + - Backpressure feedback: SHIFT-based throttle stabilizes queue + - Cascading WIP overflow: A→B→C overflow chain + + 6. **AC-6: Existing tests unbroken.** All 38 existing Rust tests still pass. The compiler changes don't break const/expr compilation. + + ## Technical Notes + + - **Evaluation order for sequential ops:** The plan is still executed as a linear op list. Sequential ops (QueueRecurrence, Shift, Convolve) internally loop over bins. This is correct because the evaluator processes ops in dependency order (topo sort), and within a sequential op, each bin reads from previous bins that are already written. No special "feedback mode" needed. + - **Overflow routing without re-evaluation:** Unlike the C# `WipOverflowEvaluator` (which iterates evaluate → override → re-evaluate), the matrix compiler can emit the overflow routing as additional ops after the queue ops. The QueueRecurrence op writes overflow to `overflow_out` column; a subsequent `VecAdd` adds it to the target's inflow. For cascading (A→B→C), the compiler orders the QueueRecurrence ops so A runs before B which runs before C. No iteration needed — single-pass. + - **Topology node ID → queue column:** The compiler maintains a mapping from topology node ID to the synthesized queue column index, used for overflow routing and later for derived metrics (m-E20-05). + - **New fixtures needed:** Existing fixtures in `engine/fixtures/` are either flat models (hello, simple-const) or telemetry models (file: references). This milestone needs simulation-mode topology fixtures with inline data. Create them as Rust test inline YAML or as new fixture files. + - **Dispatch schedule:** The C# `DispatchScheduleProcessor` zeros outflow on non-dispatch bins, then caps at capacity on dispatch bins. In the matrix model, this is a `DispatchGate` op applied to the outflow column before it reaches `QueueRecurrence`. + + ## Out of Scope + + - Routing (router flow materialization) — m-E20-04 + - Constraints (proportional allocation) — m-E20-04 + - Multi-class flows — m-E20-04 + - Derived metrics (utilization, latency, etc.) — m-E20-05 + - Invariant analysis — m-E20-05 + - Artifact writing — m-E20-06 + - File-based series references (`file:*.csv`) — future (telemetry mode) + + ## Dependencies + + - m-E20-02 complete (compiler + evaluator + element-wise ops) + - kind: milestone + id: M-031 + frontmatter: + title: Routing and Constraints + status: done + parent: E-20 + body: | + ## Goal + + Add router flow materialization and constraint allocation to the Rust engine so it can evaluate models with routers (weight-based and class-based flow splitting) and shared-capacity constraints (proportional allocation when demand exceeds capacity). After this milestone, the matrix engine handles the full evaluation pipeline except derived metrics, invariant analysis, and artifact writing. + + ## Context + + m-E20-03 delivered topology synthesis (QueueRecurrence, Shift, Convolve, DispatchGate), WIP overflow routing, and SHIFT-based backpressure feedback. The evaluator uses bin-major evaluation. 65 Rust tests passing. + + The C# engine handles routing and constraints via: + 1. `RouterFlowMaterializer.ComputeOverrides()` — distributes flows from a source to targets via class-based routing (priority 1) then weight-based routing (remaining flow, priority 2). + 2. `ConstraintAllocator.AllocateProportional()` — when total demand exceeds capacity, allocates proportionally: `allocated[node] = capacity * (demand[node] / totalDemand)`. + 3. `ConstraintAwareEvaluator` — applies router overrides first, then constraint overrides, then re-evaluates. + 4. `ClassContributionBuilder` — decomposes totals into per-class series, propagates through graph. + + In the matrix model, all of these become plan ops. Router splitting is `ScalarMul` (weight fractions) or direct column copying (class routing). Constraint allocation is a new `ProportionalAlloc` op that reads multiple demand columns + capacity and writes capped output columns. Multi-class is tracked as separate columns per class. + + ## Acceptance Criteria + + 1. **AC-1: Router weight-based splitting.** The compiler processes `NodeDefinition.router` to split a source series across targets by weight: + - For each route: `target_arrivals += source * (weight / totalWeight)`. + - Routes without explicit weight default to 1.0. + - Multiple routes to the same target accumulate via VecAdd. + - The router's source is resolved from `router.inputs.queue` (the queue node whose outflow feeds the router) or the node's own series. + - Emitted as ScalarMul + VecAdd ops — no new Op variant needed. + + 2. **AC-2: Router class-based routing.** Routes with a `classes` list route per-class flow to specific targets: + - The compiler resolves per-class arrival columns from `model.traffic.arrivals` (each entry has a `classId` and `nodeId`). + - Class routes extract per-class columns and sum them for the target. + - Remaining flow (after class routes) is distributed by weight among weight-only routes. + - Per-class columns use the naming convention `{nodeId}__class_{classId}`. + + 3. **AC-3: Constraint proportional allocation.** New `ProportionalAlloc` op: + - Reads N demand columns + 1 capacity column. + - Per bin: if `totalDemand > capacity`, writes `capped[i] = capacity * (demand[i] / totalDemand)`. Otherwise writes demands unchanged. + - The compiler processes `topology.constraints` to emit ProportionalAlloc ops, connecting each constraint's `semantics.arrivals` (demand total) and `semantics.served` (capacity) to the constrained topology nodes via `topologyNode.constraints` lists. + - Constrained nodes' inflow columns are replaced with the capped versions. + + 4. **AC-4: Router → Constraint evaluation order.** The compiler emits router ops before constraint ops (matching C# `RouterAwareGraphEvaluator` → `ConstraintAwareEvaluator` order). Constraint allocation reads from router-adjusted columns. The unified topo sort orders: data nodes → router splits → constraint allocation → queue recurrence. + + 5. **AC-5: Parity fixtures.** Create test models and verify parity with C# output: + - Weight-based router: 3 routes with weights [0.5, 0.3, 0.2], verify target arrivals sum to source + - Class-based router: 2 classes routed to different targets + - Mixed router: some routes class-based, remainder weight-based + - Simple constraint: 2 nodes sharing capacity, demand > capacity → proportional split + - Constraint below capacity: demand < capacity → no capping + - Router + constraint combined: router feeds constrained nodes + + 6. **AC-6: Existing tests unbroken.** All 65 existing Rust tests still pass. + + ## Technical Notes + + - **Router source resolution:** A router node in the YAML has `router.inputs.queue` pointing to a queue node. The router distributes the queue's outflow (served series) across targets. Each target gets a fraction of the served flow as its arrivals. + - **No new Op for routing:** Weight-based routing decomposes to ScalarMul + VecAdd (existing ops). Class routing decomposes to Copy + VecAdd. The compiler emits these standard ops — the router abstraction lives in the compiler, not the evaluator. + - **New Op for constraints:** `ProportionalAlloc` is a genuinely new operation — it reads N+1 columns and writes N columns with per-bin conditional logic. This is similar in spirit to QueueRecurrence (reads multiple columns, writes with conditional logic) but operates on groups of columns. + - **Per-class column naming:** `{nodeId}__class_{classId}` (double underscore to avoid collision with user-defined node IDs). These are internal columns that may not appear in outputs. + - **Constraint topology nodes:** Each constraint in `topology.constraints` has `semantics.arrivals` (total demand reference) and `semantics.served` (capacity reference). Topology nodes reference constraints via their `constraints` list. The compiler maps constraint IDs to the topology nodes they constrain. + - **Bin-major evaluation:** ProportionalAlloc processes one bin at a time (like all ops), reading demand[t] and capacity[t] and writing capped[t]. This is compatible with the bin-major evaluator from m-E20-03. + + ## Out of Scope + + - Derived metrics (utilization, latency, etc.) — m-E20-05 + - Invariant analysis — m-E20-05 + - Artifact writing — m-E20-06 + - File-based series references (`file:*.csv`) — future (telemetry mode) + - Per-class output series in artifacts — m-E20-06 (artifact layer decides what to write) + + ## Dependencies + + - m-E20-03 complete (topology synthesis, sequential ops, bin-major evaluation) + - kind: milestone + id: M-032 + frontmatter: + title: Derived Metrics and Analysis + status: done + parent: E-20 + body: | + ## Goal + + Add derived metric computation and invariant analysis to the Rust engine. Derived metrics (utilization, cycle time, flow efficiency, Kingman approximation) are emitted as additional plan ops on the evaluation matrix. Invariant analysis (conservation checks, warnings) runs as a post-evaluation pass over the matrix columns. After this milestone, the engine produces all analytical output — only artifact writing and CLI remain. + + ## Context + + m-E20-04 delivered routing and constraint allocation. The engine now handles the full evaluation pipeline: const/expr/PMF nodes, topology synthesis (queues, retry echo, dispatch), WIP overflow, SHIFT feedback, routers, and constraints. 78 Rust tests passing. + + The C# engine computes derived metrics in `RuntimeAnalyticalEvaluator` and runs invariant checks in `InvariantAnalyzer`. In the matrix model, derived metrics are additional columns computed from evaluation output. Invariant analysis is a read-only pass that produces warnings. + + ## Acceptance Criteria + + 1. **AC-1: Utilization metric.** Compute `utilization[t] = served[t] / effectiveCapacity[t]` for topology nodes with capacity semantics. Emit as a derived column per node. Returns 0 when capacity is 0. Effective capacity = base capacity × parallelism (when parallelism is defined). + + 2. **AC-2: Cycle time components.** Compute per-bin: + - `queueTimeMs[t] = (queueDepth[t] / served[t]) × binMs` (0 when served ≤ 0) + - `serviceTimeMs[t] = processingTimeMsSum[t] / servedCount[t]` (0 when servedCount ≤ 0) + - `cycleTimeMs[t] = queueTimeMs[t] + serviceTimeMs[t]` (sum of available components) + - `flowEfficiency[t] = serviceTimeMs[t] / cycleTimeMs[t]` (0 when cycleTime ≤ 0) + - `latencyMinutes[t] = queueTimeMs[t] / 60000` + - Which components are emitted depends on node category: queue-only → queueTime+latency, service-only → serviceTime, serviceWithBuffer → all. + + 3. **AC-3: Kingman G/G/1 approximation.** Compute `E[Wq] ≈ (ρ/(1-ρ)) × ((Ca² + Cs²)/2) × E[S]` where: + - `ρ` = utilization (must be in (0, 1)) + - `Ca` = coefficient of variation of arrivals + - `Cs` = coefficient of variation of service + - `E[S]` = mean service time (ms) + - Returns 0 for invalid inputs (ρ ≥ 1, negative Cv, etc.) + - Cv is computed from PMF nodes (σ/μ) or as 0.0 for constant series. + + 4. **AC-4: Invariant warnings.** Post-evaluation analysis producing a `Vec` struct: + - **Non-negativity:** Flag bins where arrivals, served, errors, or queueDepth < -ε (ε = 1e-6) + - **Conservation:** Flag bins where served > arrivals + ε (for non-queue nodes) or served > capacity + ε + - **Queue balance:** Flag bins where computed queue depth diverges from actual (|computed - actual| > ε) + - **Stationarity:** Flag when arrivals first-half vs second-half mean diverges > 25% + - Warning struct: `{ node_id, code, message, bins, severity }` + + 5. **AC-5: Derived metrics integration in compiler.** The compiler emits derived metric ops after topology ops, reading from queue depth, served, capacity, and other evaluation columns. A new `compile_derived_metrics` phase appends ops to the plan. The `EvalResult` includes a method to retrieve warnings. + + 6. **AC-6: Parity tests.** Test models verifying: + - Utilization: served=8, capacity=10 → utilization=0.8 + - Queue time: queueDepth=10, served=5, binMs=60000 → queueTimeMs=120000 + - Kingman: ρ=0.8, Ca=1.0, Cs=0.5, E[S]=10 → E[Wq]=25 + - Conservation violation: served > arrivals detected as warning + - Stationarity: increasing arrivals flagged + + 7. **AC-7: Existing tests unbroken.** All 78 existing Rust tests still pass. + + ## Technical Notes + + - **Derived columns naming:** `{nodeId}_utilization`, `{nodeId}_queue_time_ms`, `{nodeId}_cycle_time_ms`, `{nodeId}_flow_efficiency`, `{nodeId}_latency_min`, `{nodeId}_kingman_wq`. + - **Bin duration:** Resolved from `grid.binSize` × `grid.binUnit` → milliseconds. Needed for queue time computation. + - **Cv computation:** For PMF nodes, Cv = σ/μ computed at compile time from the PMF definition. For const nodes, Cv = 0. For expr nodes, Cv is not computed (future: sample Cv from evaluated series). + - **Invariant analysis is read-only.** It does not modify the matrix. It returns a list of warnings. The warnings are stored alongside the EvalResult. + - **No new Op for most derived metrics.** Utilization = VecDiv(served, capacity). Queue time = VecDiv(queueDepth, served) then ScalarMul by binMs. These compose from existing ops. Kingman may need a dedicated op or can be computed at compile time from scalar inputs. + + ## Out of Scope + + - Artifact writing (CSVs, index.json, run.json) — m-E20-06 + - CLI commands — m-E20-06 + - Per-class derived metrics — future + - Window-level aggregation (multi-bin statistics) — future + - Edge-specific warnings (edge flow conservation) — future + - Streak detection (backlog growth, overload, age risk) — future (could add in m-E20-05 if time permits) + + ## Dependencies + + - m-E20-04 complete (routing, constraints, full evaluation pipeline) + - kind: milestone + id: M-033 + frontmatter: + title: Artifacts, CLI, and Integration + status: done + parent: E-20 + body: | + ## Goal + + Add artifact writing (per-series CSVs, index.json, run.json) and complete the CLI so the Rust engine can be invoked as a standalone binary that reads a YAML model and produces a run directory with all output artifacts. This is the final milestone — after it, the Rust engine is a complete standalone replacement for the C# evaluation pipeline. + + ## Context + + m-E20-05 delivered derived metrics and invariant analysis. The engine now handles the full computation pipeline: parsing, compilation, evaluation, derived metrics, and warnings. 113 Rust tests passing. + + The CLI already has `parse`, `plan`, and `eval` commands. The `eval` command prints series to stdout. This milestone extends `eval` to write structured artifacts to an output directory, matching the C# engine's output format. + + ## Acceptance Criteria + + 1. **AC-1: CSV series writer.** Write each named (non-temp) column as a CSV file: + - Format: `bin_index,value\n` header, then `{t},{value}\n` per bin + - File naming: `{seriesId}.csv` (using the column name from the column map) + - Written to `{output}/series/` directory + - Values formatted in invariant culture (`.` decimal separator, no thousands separator) + + 2. **AC-2: series/index.json.** Write a JSON index of all output series: + - Schema: `{ "schemaVersion": 1, "grid": {...}, "series": [{id, path, points}] }` + - Grid: bins, binSize, binUnit from the model + - One entry per non-temp series, referencing its CSV path + + 3. **AC-3: run.json.** Write run metadata: + - Schema: `{ "schemaVersion": 1, "engineVersion": "0.1.0", "grid": {...}, "warnings": [...], "series": [{id, path}] }` + - Includes evaluation warnings from invariant analysis + - Warning format: `{ "nodeId", "code", "message", "severity" }` + + 4. **AC-4: CLI eval --output flag.** Extend the `eval` command: + - `flowtime-engine eval --output ` — evaluates and writes artifacts to `` + - Creates `/series/` directory structure + - Writes CSVs + index.json + run.json + - Without `--output`, prints summary to stdout (existing behavior) + - Exit code 0 on success, 1 on error + + 5. **AC-5: CLI validate command.** Add `validate` command: + - `flowtime-engine validate ` — parses, compiles, and runs analysis without artifact writing + - Prints warnings to stdout as JSON + - Exit code 0 if no errors, 1 if compilation fails + + 6. **AC-6: Round-trip parity test.** End-to-end test: + - Load a reference model fixture, run `eval --output`, verify the CSV contents match expected values + - Verify index.json is valid JSON with correct series count + - Verify run.json contains grid and warnings + + 7. **AC-7: Existing tests unbroken.** All 113 existing Rust tests still pass. + + ## Technical Notes + + - **New module: `writer.rs`** in `flowtime-core` handles artifact writing. It takes an `EvalResult` + `ModelDefinition` and writes to a directory. The CLI calls this module. + - **CSV precision:** Use `{value}` default f64 formatting (full precision). The C# engine uses invariant culture which is equivalent. + - **No hashing in this milestone.** The C# engine produces SHA256 hashes for series, model, and scenario. Deferring hashing to keep scope tight — can add in a follow-up. + - **No manifest.json in this milestone.** The manifest includes RNG/provenance data that the Rust engine doesn't produce yet. + - **No per-class series output.** Per-class columns use internal naming (`__class_`) and are not written as separate artifacts yet. + - **Temp column filtering:** Columns starting with `__temp_` are internal intermediates and are not written. + + ## Out of Scope + + - SHA256 hashing of artifacts — follow-up + - manifest.json (RNG, provenance) — follow-up + - Per-class series output — follow-up + - Parquet aggregate output — future + - .NET subprocess bridge wiring — future (separate integration work) + - stdin/stdout pipeline mode — future + + ## Dependencies + + - m-E20-05 complete (derived metrics, analysis, warnings) + - kind: milestone + id: M-034 + frontmatter: + title: .NET Subprocess Bridge + status: done + parent: E-20 + body: | + ## Goal + + Bridge the Rust `flowtime-engine` binary into the .NET API as a subprocess call, with SHA256 hashing for provenance (per D-2026-04-10-030), and a configuration switch to run the Rust engine alongside (not replacing) the C# engine. + + ## Context + + m-E20-06 delivered the Rust CLI binary and artifact writer (CSVs, index.json, run.json). The E-20 spec lists the ".NET subprocess bridge" and "full parity harness" as in-scope deliverables (spec line 66). The epic was marked complete before these were built. This milestone delivers the bridge and foundational parity tests. + + D-2026-04-10-030 established the provenance strategy: port SHA256 basics (model hash + per-series hashes in manifest.json) as part of the bridge work. Sim-specific provenance is deferred until the bridge is exercised by real Sim runs. + + ## Acceptance Criteria + + 1. **AC-1: SHA256 hashing in Rust writer.** The Rust artifact writer computes: + - SHA256 of the raw model YAML text (model hash) + - SHA256 of each series CSV file (series hashes) + - Writes `manifest.json` with `modelHash` and per-series `hash` fields + - Hash format: `"sha256:{hex}"` + - When no YAML text is available, `modelHash` is `null` + + 2. **AC-2: RustEngineRunner in FlowTime.Core.** A subprocess bridge class that: + - Writes model YAML to a temp file (UTF-8, no BOM) + - Invokes `flowtime-engine eval --output ` + - Reads back `run.json`, `manifest.json`, and series CSVs + - Returns typed DTOs (grid, warnings, series values, manifest with hashes) + - Cleans up temp directory on both success and failure (finally block) + - Configurable process timeout (default 60s) with process tree kill on expiry + - Clean `RustEngineException` for missing binary, non-zero exit, and timeout + + 3. **AC-3: Configuration switch.** Opt-in via `appsettings.json`: + - `RustEngine:Enabled` (default `false`) + - `RustEngine:BinaryPath` (default: auto-discover from solution root) + - DI registration in `Program.cs` when enabled + - Does not replace C# evaluation path — both engines available side by side + + 4. **AC-4: Integration tests.** At least one parity test plus error coverage: + - C#/Rust parity on simple const+expr model + - C#/Rust parity on topology model (serviceWithBuffer queue) + - C#/Rust parity on negative/precision values + - Empty model (0 nodes) + - Invalid YAML error handling + - Binary not found error handling + - Process timeout handling + - Temp directory cleanup on success and failure + - Manifest hash presence and determinism + + ## Out of Scope + + - Replacing the C# evaluation path in `POST /v1/run` + - Sim-specific provenance fields (template IDs, parameter bindings) + - Plan hashing (deferred to E-17/E-18) + - Per-class output support + - Full C# parity harness across all fixture models + + ## Test Summary + + - 4 Rust unit tests (manifest structure, hash determinism, null hash, SHA256 correctness) + - 14 C# integration tests (see AC-4) + - All 123 existing Rust tests pass + - All 1,301 existing .NET tests pass + - kind: milestone + id: M-035 + frontmatter: + title: Full Parity Harness + status: done + parent: E-20 + depends_on: + - M-034 + body: | + ## Goal + + Establish an automated parity test that runs every Rust engine fixture (21 models) through both the Rust and C# engines and compares series values. Produces a green/red matrix showing exactly which models match and which diverge, and where. This is the baseline before any engine core work. + + ## Context + + m-E20-07 delivered the .NET subprocess bridge and 3 parity tests (simple const+expr, topology queue, negative/precision values). The E-20 spec promises a "full parity harness" across reference models. Only 3 of 21 fixtures are tested today — topology, routing, constraint, PMF, and class-enabled models are untested. + + The `outputs:` filtering feature (YAML `outputs` section) is parsed by the Rust model but not used. Some models rely on it to select output series. This must be implemented for the harness to test those models correctly. + + ## Acceptance Criteria + + 1. **AC-1: `outputs:` filtering in Rust compiler.** When the model has an `outputs` section, the Rust engine filters its output to only include the listed series. The `as` field renames the series in the output. When no `outputs` section is present, all non-temp series are included (current behavior). + + 2. **AC-2: Parameterized parity test.** A single test method that: + - Iterates over all `engine/fixtures/*.yaml` files + - Evaluates each through the Rust engine (via `RustEngineRunner`) + - Evaluates each through the C# engine (`ModelService.ParseAndConvert` → `RouterAwareGraphEvaluator.Evaluate`) + - Compares shared series values bin-by-bin with configurable tolerance (default: 1e-10) + - Uses case-insensitive series matching (Rust lowercases topology node IDs) + - Reports per-fixture, per-series pass/fail with divergence details on failure + + 3. **AC-3: All non-class, non-edge fixtures pass parity.** The following fixtures must produce identical series values in both engines: + - `hello.yaml`, `simple-const.yaml` — trivial models + - `complex-pmf.yaml`, `pmf.yaml` — PMF nodes + - `http-service.yaml` — expression-based service + - `topology-simple-queue.yaml`, `topology-backpressure.yaml`, `topology-cascading-overflow.yaml`, `topology-wip-limit.yaml`, `topology-dispatch.yaml`, `topology-retry-echo.yaml` — topology models + - `constraint-below-capacity.yaml`, `constraint-proportional.yaml` — constraint allocation + - `router-weight.yaml`, `router-with-constraint.yaml` — weight-based routing + - `retry-service-time.yaml` — retry kernels + - `order-system.yaml`, `microservices.yaml` — complex multi-node models + + 4. **AC-4: Class and edge fixtures documented.** Fixtures that use classes (`class-enabled.yaml`, `router-class.yaml`, `router-mixed.yaml`) are tested but expected divergences are documented. The harness marks them as "known divergence — per-class decomposition not yet implemented" rather than failing the test run. + + 5. **AC-5: Parity matrix output.** The test run produces a clear summary (in test output or a generated report) showing pass/fail status for each fixture. This becomes the baseline for m-E20-09. + + ## Out of Scope + + - Per-class column decomposition (m-E20-09) + - Edge series materialization (m-E20-09) + - Artifact layout changes (m-E20-10) + - Any changes to the C# engine + + ## Key References + + - `engine/fixtures/` — 21 YAML fixtures + - `engine/core/src/model.rs` — `OutputDefinition` struct (parsed, not yet used) + - `tests/FlowTime.Integration.Tests/RustEngineBridgeTests.cs` — existing 14 bridge tests + - `work/gaps.md` — Rust Engine Parity section + - kind: milestone + id: M-036 + frontmatter: + title: Per-Class Decomposition and Edge Series + status: done + parent: E-20 + depends_on: + - M-035 + body: | + ## Goal + + The Rust engine core returns complete evaluation results including per-class series decomposition and per-edge metrics. After this milestone, the engine is feature-complete for evaluation — it computes everything the C# engine computes. This is the critical prerequisite for E-17 and E-18. + + ## Context + + The Rust engine currently computes class-based routing internally (using `__class_` prefixed temporary columns) but does not expose per-class series in its output. The C# engine uses `ClassContributionBuilder` (1,695 lines, 4-pass algorithm) and `EdgeFlowMaterializer` (764 lines) to produce per-class and per-edge series after evaluation. + + Per D-2026-04-10-031, the engine core must return complete results. The artifact sink (m-E20-10) then formats and persists them. This milestone focuses on the evaluation layer only. + + ### Design consideration: port vs. redesign + + The C# `ClassContributionBuilder` is a post-evaluation 4-pass algorithm that decomposes total series into per-class contributions using proportional allocation and expression-tree re-evaluation. In the Rust matrix engine, an alternative approach may be more natural: + + - **Option A (port):** Implement a post-evaluation decomposition pass similar to C#. Receives the evaluated state matrix and splits columns by class proportions. + - **Option B (plan ops):** Extend the compiler to emit per-class columns as explicit plan operations during compilation. Each class gets its own columns, evaluated in the main bin-major loop. No post-processing needed. + + Option B leverages the matrix architecture — it's "classes as columns" rather than "classes as post-processing." But it may increase matrix size for models with many classes. The spec author should evaluate both approaches during implementation and choose based on correctness and simplicity. Document the choice in the tracking doc. + + ## Acceptance Criteria + + 1. **AC-1: Class assignment map.** The engine extracts `traffic.arrivals` entries to build a node-to-class mapping. Equivalent to `ClassAssignmentMapBuilder` (trivial — 37 lines of C#). + + 2. **AC-2: Per-class series in EvalResult.** The `EvalResult` struct (or its successor) includes per-class series: for each node that has class contributions, the result contains `(node_id, class_id) → f64[]` series values. At minimum, arrival nodes, expression nodes, and serviceWithBuffer nodes must have per-class decomposition. + + 3. **AC-3: Expression-tree per-class evaluation.** For `expr` nodes, per-class decomposition must handle the expression tree correctly: binary ops (add, sub, mul, div), scalar ops, and functions (SHIFT, CONV, MIN, MAX, CLAMP). The per-class series must sum to the total series within floating-point tolerance. + + 4. **AC-4: ServiceWithBuffer per-class decomposition.** Queue nodes produce per-class queue depth, per-class served, per-class arrivals. Queue depth decomposition follows proportional allocation based on arrival class fractions. + + 5. **AC-5: Edge series in EvalResult.** The result includes per-edge metrics: `(edge_id, metric) → f64[]` where metric is one of: `flowVolume`, `attemptsVolume`, `failuresVolume`, `retryVolume`. Per-class edge decomposition: `(edge_id, metric, class_id) → f64[]`. + + 6. **AC-6: Router per-class distribution.** Class-based routes distribute flow to their designated targets. Weight-based routes distribute remaining (non-class-assigned) flow by weight. Router diagnostics (leakage, accuracy) are available in the result. + + 7. **AC-7: Parity harness green for class fixtures.** The 3 class-enabled fixtures (`class-enabled.yaml`, `router-class.yaml`, `router-mixed.yaml`) pass the parity harness from m-E20-08. Known divergences resolved. + + 8. **AC-8: Parity harness green for edge fixtures.** The 6 edge-bearing fixtures pass the parity harness with edge series comparison. Edge series values match C# `EdgeFlowMaterializer` output within tolerance. + + 9. **AC-9: Normalization invariant.** For every node, the sum of per-class series equals the total series within 1e-10 tolerance. A Rust test asserts this invariant across all class-enabled fixtures. + + ## Out of Scope + + - Artifact directory layout changes (m-E20-10) + - Series ID naming convention (`{node}@{component}@{class}`) — that's sink concern + - Per-class CSV file writing — that's sink concern + - StateQueryService compatibility — that's sink concern + - Sim-specific provenance (D-2026-04-10-030 deferral still applies) + + ## Key References + + - `src/FlowTime.Core/Artifacts/ClassContributionBuilder.cs` — C# reference (1,695 lines) + - `src/FlowTime.Core/Routing/EdgeFlowMaterializer.cs` — C# reference (764 lines) + - `src/FlowTime.Core/Artifacts/ClassAssignmentMapBuilder.cs` — C# reference (37 lines) + - `engine/core/src/compiler.rs` — existing class-aware routing (lines 397-533) + - `engine/fixtures/class-enabled.yaml`, `router-class.yaml`, `router-mixed.yaml` — class fixtures + - D-2026-04-10-031 — three-layer architecture decision + - kind: milestone + id: M-037 + frontmatter: + title: Artifact Sink Parity + status: done + parent: E-20 + depends_on: + - M-036 + body: | + ## Goal + + The Rust artifact sink produces the full directory layout that `StateQueryService` can read. After this milestone, the C# `RunArtifactWriter` is no longer needed for Rust-evaluated runs. E-17 and E-18 are unblocked. + + ## Context + + The Rust engine (after m-E20-09) returns complete evaluation results: total series, per-class series, edge series, warnings, grid info, and metadata. The current `writer.rs` produces a minimal artifact set (bare series CSVs, simple index.json, run.json, manifest.json). The C# `RunArtifactWriter` produces a much richer layout that `StateQueryService` expects: `model/` directory, normalized `spec.yaml`, per-class CSV naming, full JSON schemas with class metadata, and provenance files. + + Per D-2026-04-10-031, the artifact sink is a separate layer from the engine core. It receives the model input and EvalResult, and persists them durably. This milestone builds the full sink as a Rust library used by both the CLI and the .NET bridge. + + ## Acceptance Criteria + + 1. **AC-1: `model/` directory.** The sink writes: + - `model/model.yaml` — copy of the input model YAML + - `model/metadata.json` — template metadata extracted from YAML provenance section and/or passed metadata: `{ schemaVersion, templateId, templateTitle, templateVersion, mode, modelHash, source, hasTelemetrySources, telemetrySources, nodeSources, parameters }` + - `model/provenance.json` — written when provenance metadata is provided (pass-through). Omitted when absent (backward compatible). + + 2. **AC-2: `spec.yaml` at run root.** Normalized model YAML with topology semantics rewritten to `file://` URIs pointing to series CSV paths. This is what `StateQueryService` reads to resolve topology node bindings. + + 3. **AC-3: Series ID naming convention.** Series files use the format `{nodeId}@{COMPONENT_ID}@{CLASS_ID}.csv`: + - Default (no class): `{nodeId}@{COMPONENT_ID}@DEFAULT.csv` + - Per-class: `{nodeId}@{COMPONENT_ID}@{classId}.csv` + - Edge: `edge_{edgeId}_{metric}@{COMPONENT_ID}@{classId}.csv` + - Component IDs follow C# conventions: `ARRIVALS`, `SERVED`, `QUEUE`, `ERRORS`, etc. + + 4. **AC-4: Full `series/index.json` schema.** Each series entry includes: + - `id`, `kind` (flow/stock/ratio/time), `path`, `unit`, `componentId`, `class`, `classKind` (fallback/specific), `points`, `hash` + - `formats` section with aggregates table reference + - `classes` array with declared class definitions + - `classCoverage` field (full/partial/missing) + + 5. **AC-5: Full `run.json` schema.** Includes: + - `schemaVersion`, `runId`, `engineVersion`, `source`, `inputHash` + - `grid` (bins, binSize, binUnit, timezone, align) + - `scenarioHash`, `modelHash` + - `classesCoverage` + - `warnings` array (nodeId, code, message, severity, bins) + - `series` array (id, path, unit) + - `classes` array (id, displayName, description) + + 6. **AC-6: Full `manifest.json` schema.** Extends existing to include: + - `rng` section (kind, seed) + - `provenance` section (hasProvenance, modelId, templateId, inputHash) + - `classes` array + - `seriesHashes` (per-series SHA256) + - `createdUtc` timestamp + + 7. **AC-7: `aggregates/` directory.** Created as a placeholder (empty directory). Matches C# behavior. + + 8. **AC-8: Deterministic run ID.** When deterministic mode is requested, run ID is derived from `sha256(normalized_spec + seed + bias)` truncated to 16 hex chars. Matches C# `DeterministicRunNaming`. + + 9. **AC-9: StateQueryService integration test.** A C# integration test that: + - Evaluates a class-enabled model through the Rust engine + sink + - Loads the produced run directory via `StateQueryService.LoadContextAsync` + - Verifies: topology resolved, per-class series loadable, provenance hash valid, warnings present + - This is the definitive proof that the Rust sink is compatible. + + 10. **AC-10: Parity with C# artifact layout.** For a reference model, produce artifacts from both C# `RunArtifactWriter` and Rust sink. Compare directory structures and file contents. Document any intentional differences. + + ## Out of Scope + + - Replacing `RunArtifactWriter` callers (that's wiring work for when the switch happens) + - Parquet aggregates (placeholder only — future work) + - Telemetry bundle building (stays in C# `TelemetryBundleBuilder`) + - Template orchestration (stays in C# `RunOrchestrationService`) + - Storage backend abstraction (filesystem only — S3/database is future) + + ## Key References + + - `src/FlowTime.Core/Artifacts/RunArtifactWriter.cs` — C# reference (1,287 lines) + - `src/FlowTime.API/Services/StateQueryService.cs` — reads artifacts back (5,195 lines) + - `src/FlowTime.Core/Artifacts/DeterministicRunNaming.cs` — run ID generation + - `engine/core/src/writer.rs` — current minimal Rust writer + - D-2026-04-10-031 — three-layer architecture (engine core / artifact sink / consumer adapters) + - D-2026-04-10-030 — provenance strategy + - kind: milestone + id: M-038 + frontmatter: + title: Workbench Foundation + status: done + parent: E-21 + body: | + ## Goal + + Establish the compact design system, implement dag-map click/hover events in the library, and build the workbench panel with click-to-pin node inspection — the foundation that every subsequent E-21 milestone builds on. + + ## Context + + The Svelte UI has topology rendering (dag-map with heatmap mode), timeline scrubbing, run orchestration, and what-if parameter manipulation. But there is no way to click a node and inspect it. The layout uses shadcn-svelte's consumer-product defaults (generous padding, large text, wide sidebar) which waste space in a data-dense workbench. + + This milestone delivers three things in sequence: + 1. Compact design tokens that replace the spacious defaults + 2. dag-map library events so nodes/edges are clickable + 3. Workbench panel where clicked nodes show metrics and sparklines + + ### Prior art + + - dag-map already emits `data-node-id` and `data-edge-from`/`data-edge-to` attributes on SVG elements (`lib/dag-map/src/render.js`) + - Topology page (`ui/src/routes/time-travel/topology/+page.svelte`) has timeline scrubbing and state API integration + - What-if page (`ui/src/routes/what-if/+page.svelte`) has real-time metric display via WebSocket + - `DagMapView.svelte` wraps dag-map with theme switching and metric mapping + + ## Acceptance Criteria + + ### Density system (AC1-AC5) + + 1. **Main content area padding reduced.** The root layout no longer applies blanket `p-6`. Page-level padding is context-dependent: topology/workbench pages use minimal padding (`p-1` or `p-2`), form/config pages may use moderate padding. + + 2. **Sidebar narrowed.** Expanded sidebar width ≤ 208px (from 280px). Collapsed width ≤ 40px. All nav items still readable and clickable. + + 3. **Compact design tokens defined in `app.css`.** Two token layers: + - **Chrome tokens:** `--ft-bg`, `--ft-bg-elevated`, `--ft-border`, `--ft-text`, `--ft-text-muted`, `--ft-text-emphasis`. Calm values. Dark mode: near-black backgrounds (`hsl(220 10% 4%)`-range), subtle borders, muted gray text. Light mode: warm light backgrounds, subtle borders, dark text. + - **Data-viz tokens:** `--ft-viz-teal`, `--ft-viz-pink`, `--ft-viz-coral`, `--ft-viz-blue`, `--ft-viz-green`, `--ft-viz-amber` plus sequential/diverging scale entry points. Vivid against both dark and light backgrounds. + - **Spacing tokens:** `--ft-space-xs` (2px), `--ft-space-sm` (4px), `--ft-space-md` (6px), `--ft-space-lg` (8px), `--ft-space-xl` (12px). Tighter than the current 4/8/12/16/24px scale. + - **Border radius:** `--ft-radius` at `0.25rem` or less (from `0.5rem`). + - **Type:** working text size is `text-xs` (12px). Emphasis is `text-sm` (14px). Headers use `text-sm font-semibold` or `text-base`. + + 4. **shadcn component overrides applied.** Cards, buttons, inputs, and sidebar components use the compact tokens. No component uses raw `p-4`, `p-6`, `gap-4` etc. — spacing comes from the token scale. + + 5. **Existing pages still function.** What-if page, run orchestration, topology page, health page all render correctly with the new density. Visual audit confirms no layout breakage. Vitest and Playwright suites still pass. + + ### dag-map library events (AC6-AC8) + + 6. **`bindEvents()` exported from dag-map.** Given an SVG container element, `bindEvents(container, callbacks)` uses event delegation to fire: + - `onNodeClick(nodeId, event)` — click on any `[data-node-id]` element + - `onNodeHover(nodeId | null, event)` — mouseenter/mouseleave on node elements + - `onEdgeClick(fromId, toId, event)` — click on any `[data-edge-from]` element + - `onEdgeHover(fromId, toId | null, event)` — mouseenter/mouseleave on edge elements + - Returns a cleanup function that removes all listeners. + - Edge hit areas: edge paths are thin lines. `bindEvents` should set `pointer-events: stroke` and use a wider invisible stroke or a transparent hit-area overlay (≥ 8px clickable width) so edges are practically clickable. + + 7. **`selected` render option in dag-map.** `renderSVG(dag, layout, { ..., selected: Set })` draws a selection indicator (ring, outline, or highlight) on nodes whose ID is in the set. The selection visual must compose correctly with heatmap mode (heatmap fills + selection ring, not one replacing the other). + + 8. **dag-map tests cover events and selection.** Unit tests (dag-map's existing test infrastructure) verify: `bindEvents` fires correct callbacks for node/edge clicks and hovers; `selected` set renders the selection indicator; selection composes with heatmap mode. dag-map version bumped and published (or linked via workspace protocol). + + ### Workbench panel (AC9-AC14) + + 9. **Topology page restructured as split layout.** The topology page shows the DAG in the upper area and the workbench panel in the lower area, separated by a resizable split (drag to resize, reasonable default like 60/40 or 65/35). When no nodes are pinned, the workbench shows a minimal empty state hint ("Click a node to inspect"). + + 10. **Click-to-pin interaction.** Clicking a node in the topology DAG pins it to the workbench. The node appears with a selection indicator in the DAG (via `selected` set) and a card in the workbench. Clicking a pinned node again unpins it (removes card, removes selection indicator). Multiple nodes can be pinned simultaneously. + + 11. **Node card content.** Each workbench card shows: + - Node ID and kind (service, queue, dlq, source, router, etc.) + - Key metrics at the current timeline bin: utilization, queue depth, arrivals, served, errors, capacity — as available from the state API response + - Sparkline showing the selected metric over the full time window (all bins) + - Values formatted with appropriate precision (see `format.ts` utilities) + - Compact layout using the density tokens — the card should fit meaningful content in ~180-220px width + + 12. **Timeline integration.** When the timeline scrubs (bin changes), all workbench card metric values update to the new bin. Sparklines show a position indicator (vertical line or dot) at the current bin. + + 13. **Cards dismissible.** Each card has a small close/unpin control. Dismissing a card removes the node from the `selected` set and the workbench. + + 14. **Auto-pin highest-utilization node on first load.** When a run loads and state data is available, the node with the highest utilization at bin 0 is auto-pinned to the workbench so it is never empty on first view. If utilization data is unavailable, skip auto-pin (empty state is acceptable). + + ### Cross-cutting (AC15-AC16) + + 15. **Playwright test coverage.** At least one Playwright spec covering: (a) topology loads and renders, (b) clicking a node opens a workbench card, (c) clicking the close control removes the card, (d) scrubbing the timeline updates card values. Specs skip gracefully if the API or dev server is unavailable. + + 16. **Vitest coverage for new pure logic.** Any new helper functions (metric extraction, card data shaping, sparkline data preparation) have vitest tests with branch coverage. + + ## Technical Notes + + ### Density system approach + + - Introduce the `--ft-*` custom properties alongside the existing shadcn `--background`, `--foreground` etc. variables. The shadcn variables can initially alias the `--ft-*` tokens, keeping component library compatibility while allowing the token layer to diverge. + - The dark mode near-black should feel like the `.scratch/colors.png` reference: `hsl(220 10% 4%)` background, `hsl(220 8% 8%)` elevated, `hsl(220 6% 14%)` border. Text at `hsl(220 10% 65%)` for muted, `hsl(220 5% 85%)` for default. + - Light mode: `hsl(220 10% 97%)` background, `hsl(0 0% 100%)` elevated, `hsl(220 10% 88%)` border. Text at `hsl(220 10% 40%)` for muted, `hsl(220 10% 15%)` for default. + - Data-viz colors from `reference-palette.png` (epic folder) hue families: teal `#94E2D5`/`#2B8A8E`, pink `#F38BA8`/`#C45B4A`, coral `#EB6F92`, blue `#89B4FA`/`#3D5BA9`, green `#A6E3A1`/`#4A8C5C`, amber `#F9E2AF`/`#D4944C`. The dark-mode values are lighter/more vivid; light-mode values are darker/more saturated to maintain contrast. + + ### dag-map events + + - dag-map already emits `data-node-id` on station `` groups and `data-edge-from`/`data-edge-to` on edge `` elements. `bindEvents()` delegates from the SVG container using these attributes. + - For edge hit areas: add an invisible wider stroke path (same shape, stroke-width 8-12px, `opacity: 0`, `pointer-events: stroke`) behind each visible edge path. This is a render-time addition in `render.js`, not a separate overlay. + - The `selected` visual should be a ring or glow around the node circle/rect, using a dedicated CSS class (`dag-map-selected`) so consumers can override the style. Default: 2px outline in the theme's `ink` color, offset by 2px. + + ### Workbench panel + + - Implement the split as a CSS grid with `grid-template-rows` and a draggable splitter. No library dependency — a simple `mousedown` → `mousemove` → `mouseup` handler on a narrow divider element. Store the split ratio in `localStorage`. + - Node cards are a new Svelte component (`WorkbenchCard.svelte`). They consume the state API response that the topology page already fetches. + - Sparkline: reuse the existing `Sparkline.svelte` component. It already exists in `ui/src/lib/components/`. + - The workbench state (pinned node IDs) lives in a Svelte store so it persists across route navigations within the same session. Not persisted to `localStorage` (ephemeral per session). + + ### What-if page audit + + - The what-if page (`/what-if`) uses its own layout with dag-map and parameter panels. It does NOT use the workbench. The density pass adjusts its spacing tokens but does not add workbench functionality to it. The what-if page is a standalone surface that predates the workbench paradigm. + + ## Out of Scope + + - Edge cards (m-E21-02) + - Metric selector chip bar (m-E21-02) + - Class filter (m-E21-02) + - Analysis tab surfaces (m-E21-03/04) + - Heatmap view (m-E21-05) + - Validation/warning surfaces (m-E21-06) + - Final visual polish and dark mode QA (m-E21-07) + - Color palette iteration beyond the initial token values (user will bring examples for future iteration) + - dag-map layout engine changes (separate concern) + - Expert authoring surface + + ## Dependencies + + - dag-map library (`lib/dag-map/`) — we own it; changes ship in this milestone + - E-18 Time Machine APIs available on port 8081 (already merged to main) + - E-17 what-if infrastructure (`/what-if` route, engine session API) — must not regress + - E-11 M6 run orchestration (`/run` route) — must not regress + - kind: milestone + id: M-039 + frontmatter: + title: Metric Selector & Edge Cards + status: done + parent: E-21 + body: | + ## Goal + + Complete the workbench as a general inspection tool by adding a metric selector chip bar (choose which metric colors the topology), edge click-to-pin with edge cards, and a class filter dropdown. + + ## Context + + m-E21-01 delivered the workbench foundation: density system, dag-map click/hover events, and node cards with utilization-based heatmap coloring. But the topology only colors by utilization (hardcoded), there's no way to inspect edges, and class filtering doesn't exist. + + This milestone adds the remaining "what am I looking at?" controls that the Blazor feature bar provided (15+ toggles) — but in the simplified workbench paradigm: one metric selector, one class filter, and edge inspection via pinning. + + ## Acceptance Criteria + + ### Metric selector (AC1-AC3) + + 1. **Chip bar renders below the toolbar.** A horizontal row of metric chips: Utilization, Queue Depth, Arrivals, Served, Errors, Flow Latency. One active at a time (radio behavior). Default: Utilization. + + 2. **Selecting a metric changes the topology heatmap coloring.** Each chip maps to a specific field in the state API response (`derived.utilization`, `metrics.queueDepth`, `metrics.arrivals`, `metrics.served`, `metrics.errors`, `derived.flowLatencyMs`). Selecting a chip re-extracts metrics from the current state data and passes them to DagMapView. Node metric labels update accordingly (e.g., "85%" for utilization, "14.5" for queue depth). + + 3. **Workbench card sparklines reflect the selected metric.** When the selected metric changes, the sparkline in each pinned workbench card updates to show that metric's values over the full time window (requires fetching state window data or caching per-bin values). + + ### Edge cards (AC4-AC6) + + 4. **Clicking an edge in the topology pins it to the workbench.** Uses the `bindEvents` `onEdgeClick` callback. Pinned edges appear as cards in the workbench alongside node cards. Clicking a pinned edge again unpins it. + + 5. **Edge card content.** Each edge workbench card shows: + - Source and target node IDs + - Flow volume at current bin (from state API edge data if available, or from node-level served/arrivals) + - Sparkline of flow volume over time (if data available) + - Compact layout matching node cards + + 6. **Edge selection indicator in DAG.** Pinned edges get a visual highlight in the topology (e.g., brighter color, thicker stroke, or glow). This uses a CSS class approach since dag-map doesn't have an `selectedEdges` option — the Svelte wrapper applies the class after render. + + ### Class filter (AC7-AC8) + + 7. **Class filter dropdown appears when classes exist.** If the current run has per-class data (any node's state includes `byClass` entries), a dropdown/chip filter appears in the toolbar area. Lists all class IDs found in the data. Multi-select: toggle individual classes on/off. + + 8. **Class filter controls topology visibility.** When classes are filtered, the topology heatmap shows metrics for only the selected classes (using `byClass[classId]` data instead of aggregate). If no class filter is active, show aggregate (default behavior). + + ### Cross-cutting (AC9-AC10) + + 9. **Vitest coverage for new helpers.** Metric extraction by selected metric, edge data extraction, class discovery from state data — all have vitest tests. + + 10. **Existing Playwright specs still pass.** The m-E21-01 workbench specs and E-17 what-if specs continue to work. + + ## Technical Notes + + - The metric selector state lives in the workbench store (or a co-located topology store) — persists across bin scrubs but resets on run change. + - For sparkline data across all bins: the simplest approach is to cache metric values per node as each bin loads. When the user scrubs, accumulate values. Full-window sparklines need either a state_window API call or progressive accumulation. Start with progressive (show what's been visited), upgrade to full-window fetch if it feels incomplete. + - Edge data in the FlowTime state API: check if `/v1/runs/{id}/state?bin=N` returns edge-level metrics. If not, edge cards show source→target label only with node-level served/arrivals as proxy metrics. + - Class discovery: scan `stateNodes[].byClass` keys across all nodes to build the class list. + + ## Out of Scope + + - Analysis tab surfaces (m-E21-03/04) + - Heatmap view (m-E21-05) + - Validation surface (m-E21-06) + - New dag-map layout changes + - Edge metric labels on the DAG itself (edges show color only, detail in workbench) + + ## Dependencies + + - m-E21-01 (complete) — workbench foundation, dag-map events, density system + - FlowTime API state endpoint — already available on port 8081 + + ## Coverage Notes + + Branches that are defensive or reachable only under rare runtime conditions; explicitly noted here per the branch-coverage rule. + + ### Pure logic (unit-tested) + + - `metric-defs.ts` — every reachable branch of `extractMetricValue`, `extractMetricValueFiltered`, `buildMetricMapForDef`, `buildMetricMapForDefFiltered`, `buildSparklineSeries`, `extractSeriesValues`, `discoverClasses`, `computeTicks`, `computePointerPct` has at least one explicit vitest test. Includes non-object intermediates (string, number), path-prefix stripping variants, class-filter empty/present/mixed-finite, array-length caps, non-array values, null/NaN/Infinity rejection, and empty-result short-circuits. + - `workbench-metrics.ts` — `extractNodeMetrics`, `extractEdgeMetrics`, `findHighestUtilizationNode` — every null/undefined/non-finite/negative-errors/zero-failures branch covered. + - `sparkline-path.ts` — NaN/Infinity gap handling covered for start/mid/end positions; all-NaN and empty cases short-circuit tested. + - `workbench.svelte.ts` — full state machine tested in `workbench.svelte.test.ts` (pin/unpin/toggle idempotency, edge direction, metric reset on clear). + + ### Component rendering (Playwright-covered) + + - `metric-selector.svelte` — active vs inactive chip styling: covered by the "metric selector changes topology coloring" Playwright test. + - `workbench-card.svelte` + `workbench-edge-card.svelte` — header/metrics/sparkline conditionals: covered by the click-to-pin Playwright tests. + - `timeline-scrubber.svelte` — pointer rendering, controls rendering, drag state: covered by the topology-loads Playwright test (controls present in topology mode) plus the what-if page integration (pointer-hidden + no-controls mode, exercised by the existing what-if Playwright spec). + + ### Defensive / prop-default branches (documented, not explicitly tested) + + - `workbench-card.svelte` and `workbench-edge-card.svelte`: the `onClose === undefined` path (renders no close button). In production the topology page always supplies `onClose`; the optional prop exists so the component can be embedded in a read-only context later. Deleting the prop would require a contract change; keeping it is minimal-cost defense. + - `workbench-card.svelte`: the `currentBin === undefined` path (no vertical indicator on sparkline). Auto-pin always supplies `currentBin`; the branch exists because the prop is optional. + - `timeline-scrubber.svelte`: `binCount <= 1` disables the track. Every caller today only renders the component when `binCount > 0` and typically `> 1`; the tick helper still handles `binCount <= 1` correctly (tested in `computeTicks`). + + These branches are intentionally defensive — removing them would force callers to guard, which is worse. They are noted here rather than tested via component-driver scaffolding since the logic they guard is trivial (render vs. don't render) and the prop interface is stable. + - kind: milestone + id: M-040 + frontmatter: + title: Sweep & Sensitivity Surfaces + status: done + parent: E-21 + body: | + ## Goal + + Deliver the first two Time Machine analysis surfaces in Svelte: parameter sweep and sensitivity analysis. These are new capabilities that Blazor never had — the headline proof that the fork delivers value. + + ## Context + + E-18 shipped `POST /v1/sweep` and `POST /v1/sensitivity` against the Rust session engine. Until now there is no UI for either. This milestone introduces a new `/analysis` route with tabbed surfaces that let an expert: + + 1. Pick a run → parse its model for sweepable parameters (const nodes) + 2. Run a sweep over a chosen parameter with a range of values → see result series per point + 3. Run sensitivity analysis across multiple parameters against a target metric → see ranked gradients + + The workbench paradigm established in m-E21-01/02 sets the conventions: compact density, calm chrome with vivid data-viz colors, semantic tokens, `--ft-viz-*` palette, the shared `TimelineScrubber` and `Chart` components. + + ### API contracts (confirmed via code) + + **POST /v1/sweep** + ```json + Request: { "yaml": "...", "paramId": "arrivals", "values": [10, 15, 20], "captureSeriesIds": ["served"] } + Response: { "paramId": "arrivals", "points": [ { "paramValue": 10, "series": { "served": [8, 8, 8, 8] } }, ... ] } + ``` + + **POST /v1/sensitivity** + ```json + Request: { "yaml": "...", "paramIds": ["arrivals", "capacity"], "metricSeriesId": "queue.queueTimeMs", "perturbation": 0.05 } + Response: { "metricSeriesId": "queue.queueTimeMs", "points": [ { "paramId": "capacity", "baseValue": 50, "gradient": -2.35 }, ... ] } + ``` + + **Parameter discovery**: clients parse the model YAML and collect nodes with `kind: const`. Their `id` is the parameter name; `values[0]` is a reasonable baseline. + + ## Acceptance Criteria + + ### Analysis route shell (AC1-AC3) + + 1. **New `/analysis` route.** SvelteKit page at `ui/src/routes/analysis/+page.svelte`. Accessible from the sidebar under Tools. Compact layout consistent with the workbench paradigm. + + 2. **Run picker.** Dropdown at the top of `/analysis` to select a run. Defaults to the most recent run (same pattern as `/time-travel/topology`). Loading the model YAML for the selected run populates a param list. + + 3. **Tab bar.** Four tabs: Sweep, Sensitivity, Goal Seek, Optimize. Only Sweep and Sensitivity are wired in this milestone; Goal Seek and Optimize render placeholder "coming in m-E21-04" content. Tab state preserved in `localStorage` or URL query. + + ### Sweep surface (AC4-AC7) + + 4. **Parameter selector.** Dropdown listing the run's const-node parameters discovered from the model YAML. Each option shows the parameter id and its baseline value. Empty state when no const nodes exist. + + 5. **Value range inputs.** Three inputs (from, to, step) compute the sweep values. A text input for "or custom (comma-separated)" supersedes from/to/step when non-empty. A live preview shows the final value list and count; disallow runs > 50 points with an inline warning (soft cap, still runnable). + + 6. **Run sweep and render results.** A "Run sweep" button calls `POST /v1/sweep`. While running, show a spinner. On result, render: + - A line chart: x = param value, y = selected output series aggregate (mean per point) — picked via a series selector populated from response keys. + - A per-point table: param value column + one column per captured series showing aggregate (mean) with a compact sparkline of that series across bins. + - Reasonable handling for errors (API 400/503) with inline error messages. + + 7. **Captured series filter.** Optional multi-select chip bar listing common series (`arrivals`, `served`, `errors`, `queue`, `utilization`, `flowLatencyMs`). Empty selection = capture all. Sends `captureSeriesIds` in the request. + + ### Sensitivity surface (AC8-AC10) + + 8. **Param multi-select.** Chip-bar of all discovered const params. Clicking toggles selection. Defaults to all selected. + + 9. **Target metric picker + perturbation.** A text input for the target series id (common ones offered as chips: `served`, `queue`, `flowLatencyMs`, `utilization`). A slider for perturbation (default 0.05, range 0.01–0.30). + + 10. **Run sensitivity and render results.** A "Run sensitivity" button calls `POST /v1/sensitivity`. On result, render a horizontal bar chart sorted by |gradient| descending, colored by sign (positive/negative), with numeric gradient labels and the base value shown per row. Empty/error states handled. + + ### Cross-cutting (AC11-AC12) + + 11. **Vitest coverage for pure logic.** New helpers (param discovery from YAML, sweep value range generator, aggregate/mean computation) have vitest tests with explicit branch coverage including error paths. + + 12. **Playwright coverage.** New spec `svelte-analysis.spec.ts`: page loads, sweep can be configured and run against a real run, sensitivity can be configured and run. Graceful skip when infra is down. + + ## Technical Notes + + - **YAML parsing in browser**: `js-yaml` is already a transitive dep via other libs, but we should explicitly add it. Alternative: the API could provide a `/v1/runs/{id}/params` endpoint that returns const-node ids + baselines. For this milestone, browser-parse with `js-yaml`; if it proves fragile, promote to a server endpoint in a later milestone. + - **Chart reuse**: existing `Chart.svelte` handles multi-series line data. Sweep result chart passes `{name: paramValue, values: [aggregate]}` per captured series — a new shape. Consider a dedicated `ParamSweepChart` wrapper that transposes sweep results into Chart's format. + - **Bar chart**: no current component. Build a simple horizontal-bar SVG in `SensitivityBarChart.svelte` — pure SVG with the viz palette (coral for negative, teal for positive gradients). + - **Analysis state**: small store `analysis.svelte.ts` to hold current run YAML, last sweep/sensitivity results, selected tab. Session-ephemeral. + - **Loading state**: use existing `Loader2` icon from lucide; debounce run-button clicks. + + ## Out of Scope + + - Goal Seek + Optimize surfaces (m-E21-04) + - Server-side parameter discovery endpoint + - Saving/re-running past analyses (history panel) + - Exporting sweep/sensitivity results + - Constraints on optimization (already out of scope globally per gaps.md) + + ## Dependencies + + - m-E21-01/02 (complete) — workbench paradigm, chart component, density tokens + - `POST /v1/sweep`, `POST /v1/sensitivity` — available on port 8081 + + ## Coverage Notes + + (Filled at wrap time.) + - kind: milestone + id: M-041 + frontmatter: + title: Goal Seek Surface + status: done + parent: E-21 + body: | + **Started:** 2026-04-21 + **Completed:** 2026-04-22 + + ## Goal + + Wire the `/analysis` Goal Seek tab to live Time Machine so single-parameter target-seeking is usable from the Svelte workbench, and extend `/v1/goal-seek` (plus its sibling `/v1/optimize`) with the per-iteration `trace` they already compute internally but currently discard. Ship the shared convergence chart + analysis result card components here so the subsequent Optimize milestone (m-E21-05) consumes them directly. + + ## Context + + m-E21-03 shipped the `/analysis` route shell with a four-tab bar (Sweep, Sensitivity, Goal Seek, Optimize). Goal Seek and Optimize currently render `coming in m-E21-04` placeholders. This milestone activates the Goal Seek tab and lands the two shared visualization components; the Optimize tab stays placeholder (pointing at m-E21-05) until the follow-up milestone. + + The backend `trace` extension covers **both** `/v1/goal-seek` and `/v1/optimize` in one change because they ship under one decision (D-2026-04-21-034). That work landed early in this milestone (commit `29ac3e9`); the optimize trace is ready for m-E21-05 to consume without further backend work. + + Shared infrastructure already in place from m-E21-03: + + - `ui/src/routes/analysis/+page.svelte` — run/sample picker, scenario card, tab bar, active-tab persistence + - `ui/src/lib/utils/analysis-helpers.ts` — `discoverConstParams`, `ConstParam` type, numeric helpers + - `ui/src/lib/api/flowtime.ts` — `flowtime.sweep(...)`, `flowtime.sensitivity(...)` methods + - `GET /v1/runs/{runId}/model` — read-only model fetch (D-2026-04-17-033) + - Density tokens, `--ft-viz-*` palette, `Loader2` spinner pattern, inline-error pattern + - `sensitivity-bar-geometry.ts` — template for pure-SVG geometry helpers with vitest coverage + + ### Scope split note + + This milestone was originally drafted as `m-E21-04-goal-seek-optimize` covering both Goal Seek and Optimize tabs. It was split on 2026-04-21 after Phase 1 backend landed: Goal Seek remains here; Optimize moved to a new **m-E21-05 Optimize Surface**; heatmap / validation / polish renumbered to 06 / 07 / 08. Rationale: 16 ACs across backend + shared components + two surfaces was too large, and "Phase 1 / Phase 2" sub-phasing in the tracking doc was the smell. The backend trace change on `/v1/optimize` that landed here is kept; m-E21-05 consumes it. + + ### API contracts — current and extended + + The existing endpoints already compute per-iteration state but discard it before returning. This milestone extends both response shapes with an additive `trace` field (see Decision Record below). Requests are unchanged. `POST /v1/sweep` and `POST /v1/sensitivity` are untouched. + + **POST /v1/goal-seek** — `src/FlowTime.API/Endpoints/GoalSeekEndpoints.cs` + + Request (unchanged): + ```json + { + "yaml": "...", + "paramId": "capacity", + "metricSeriesId": "derived.utilization", + "target": 0.8, + "searchLo": 10, + "searchHi": 100, + "tolerance": 1e-6, + "maxIterations": 50 + } + ``` + + Response (extended): + ```json + { + "paramValue": 42.187, + "achievedMetricMean": 0.7999, + "converged": true, + "iterations": 12, + "trace": [ + { "iteration": 0, "paramValue": 10, "metricMean": 0.42, "searchLo": 10, "searchHi": 100 }, + { "iteration": 0, "paramValue": 100, "metricMean": 0.95, "searchLo": 10, "searchHi": 100 }, + { "iteration": 1, "paramValue": 55, "metricMean": 0.88, "searchLo": 10, "searchHi": 55 }, + { "iteration": 2, "paramValue": 32.5,"metricMean": 0.72, "searchLo": 32.5, "searchHi": 55 } + /* ... */ + ] + } + ``` + + Trace semantics: + - Two `iteration: 0` entries for the initial boundary evaluations (`searchLo`, `searchHi`), in that order. + - One entry per bisection step with `iteration: 1..N`, where the recorded `paramValue` is the midpoint evaluated at that step and `searchLo` / `searchHi` are the **post-step** bracket (after narrowing). + - `metricMean` is the unsigned mean at that `paramValue` — same value that drives the bisection decision. + - When the target is already hit at a boundary (converged in 0 iterations), the trace contains only the two boundary entries. When the target is not bracketed, the trace contains only the two boundary entries and the response reports `converged: false`, `iterations: 0`. + + 400 / 503 behaviour unchanged. + + **POST /v1/optimize** — `src/FlowTime.API/Endpoints/OptimizeEndpoints.cs` + + The `trace` extension on `/v1/optimize` is owned by this milestone's backend AC (AC2) since it shares D-2026-04-21-034 with goal-seek. The surface that consumes it — the Optimize tab — lives in m-E21-05. **Full request/response shape is owned by m-E21-05's spec** (`m-E21-05-optimize.md` → API contract section); do not duplicate it here. AC2's tests lock these trace invariants: + + - One entry per iteration (one post-sort entry before the main loop as `iteration: 0`, plus one per main-loop iteration after its post-iteration sort). + - `paramValues` is the current best vertex (`simplex[0]`), `metricMean` is its **unsigned** mean — the internal minimize-sign flip is reversed at record time for maximize runs. + - Trace length equals `iterations + 1` on every return path (pre-loop converged, main-loop converged, max-iterations exhausted). + - 0-iteration convergence yields a single `iteration: 0` entry. The per-evaluation probe log (reflection / expansion / contraction / shrink intermediate vertices) is intentionally not exposed. + + ### Decision Record + + **D-2026-04-21-034 — Additive `trace` field on `/v1/goal-seek` and `/v1/optimize`** — appended to `work/decisions.md` at start-milestone time (commit `5988f5c`). Scope covers both endpoints; implementation landed in commit `29ac3e9` of this milestone. No rewording needed for the split. + + ## Acceptance Criteria + + ### Backend — trace extension (AC1-AC3) + + 1. **Goal-seek trace plumbed end-to-end.** `GoalSeeker.SeekAsync` records the two boundary evaluations and each bisection midpoint with the post-step bracket. `GoalSeekResult` gains a `Trace` property (`IReadOnlyList`). `GoalSeekEndpoints` passes the trace through to the response. All five return paths (`Converged` at `searchLo`, `Converged` at `searchHi`, not-bracketed, tolerance hit mid-loop, max-iterations exhausted) return a trace whose shape matches the semantics above. Existing `GoalSeekEndpointsTests.cs` gains coverage for trace shape + ordering + post-step bracket invariants on each return path. + + 2. **Optimize trace plumbed end-to-end.** `Optimizer.OptimizeAsync` records the post-sort best vertex once before the main loop (as `iteration: 0`) and once per iteration thereafter. `OptimizeResult` gains a `Trace` property (`IReadOnlyList`). `OptimizeEndpoints` passes the trace through. Maximize runs report unsigned `metricMean` on trace entries (sign reversed internally). Existing `OptimizeEndpointsTests.cs` gains coverage for trace shape + ordering + unsigned-metric invariant on both objectives + trace-length / iterations consistency. _The consuming Optimize surface is delivered in m-E21-05._ + + 3. **`D-2026-04-21-034` appended to `work/decisions.md` at start-milestone time.** Body matches the draft in Context. E-21 epic spec Scope / Constraints updated in the same commit to reference the new decision alongside D-2026-04-17-033 (read-only run-adjacent) and to list the additive compute-response change as the other explicit carve-out. + + ### UI — Tab activation + shared components (AC4-AC5) + + 4. **Goal Seek placeholder replaced.** The `goal-seek` tab panel in `ui/src/routes/analysis/+page.svelte` renders live content (not the `coming in m-E21-04` stub). The `optimize` tab panel keeps its placeholder copy updated to reference **m-E21-05**. `TAB_INFO` copy for Goal Seek stands as-is — "convergence info" now accurately describes what the UI renders. + + 5. **Shared result card + shared convergence chart extracted up front.** `ui/src/lib/components/analysis-result-card.svelte` and `ui/src/lib/components/convergence-chart.svelte` land as reusable components in this milestone so m-E21-05's Optimize surface can consume them without further extraction work. Required behaviours (exact prop/slot names are an implementation decision): + - **Result card** — accepts a distinct header region, a primary-value region (large monospace), and a meta region for compact key-value pairs (iterations / converged badge / tolerance / direction / target / residual as applicable per surface). + - **Convergence chart** — consumes a **normalized** input shape `Array<{ iteration: number; metricMean: number }>`; each caller adapts its response into that shape before passing it in. The chart does not branch on surface type. Goal Seek's bracket and (future) Optimize's `paramValues` are rendered elsewhere (interval bar, per-param table) and do not enter the chart. Required behaviours: optional horizontal reference line when a target is supplied (dashed); caller-supplied y-axis label; line colour reflects converged state (teal when converged, amber when not); the converged/final point is visually emphasized (e.g. a larger marker) relative to intermediate points. + - Geometry lives in pure `.ts` siblings with vitest coverage, mirroring `sensitivity-bar-geometry`. + + ### UI — Goal Seek surface (AC6-AC9) + + 6. **Parameter selector.** Single-select dropdown listing the current model's const-node parameters (reuses `discoverConstParams`). Each option shows `{id} (base {baseline})` — same format as the Sweep tab. Empty state when no const params exist (same copy as Sweep). + + 7. **Search interval + target + advanced inputs.** Two numeric inputs `searchLo` and `searchHi` with inline validation (both required, `searchLo < searchHi`, defaults `0.5 × baseline` / `2 × baseline` of the selected parameter). Free-text input for `metricSeriesId` with the same chip shortcuts as Sensitivity (`served`, `queue`, `flowLatencyMs`, `utilization`). Numeric input for `target`. A collapsed "Advanced" disclosure exposes `tolerance` (default 1e-6) and `maxIterations` (default 50). All required fields must be valid before the Run button enables. + + 8. **Run goal-seek and render results.** "Run goal seek" button calls `flowtime.goalSeek(...)` (new API method, response type includes `trace`). While running, show a spinner (`Loader2Icon`) and disable the button. On success, render: + - The shared result card (AC5) with the final `paramValue`, `achievedMetricMean`, `target`, `|achieved − target|` residual, converged badge, and iteration count. + - The shared convergence chart (AC5) plotting `metricMean` vs `iteration` as a line, with a horizontal reference line at `target`. Boundary evaluations (`iteration: 0`) are plotted as two initial points on the x-axis at position 0. The converged/final point is visually emphasized per AC5. + - A **search-interval bar** (SVG) showing the original `[searchLo, searchHi]` range with a marker at the final `paramValue`, using `intervalMarkerGeometry` from `interval-bar-geometry.ts`. This is the Goal Seek consumer that justifies landing that geometry file in this milestone; Optimize reuses it for per-param mini bars in m-E21-05. + - 400 and 503 errors surfaced as inline messages using the existing analysis-page error pattern. + + 9. **Not-bracketed and not-converged states.** When the API returns `converged: false` with `iterations: 0` (target not bracketed), the result card shows an amber warning explaining that the target was not reachable within the search interval and suggests widening the bounds. The convergence chart still renders the two boundary evaluations. When `converged: false` with `iterations == maxIterations`, the card shows an amber "did not converge" badge and the chart is drawn over the full trace. + + ### Cross-cutting (AC10-AC12) + + 10. **Session form state — goal-seek.** The Goal Seek form retains its last input values across tab switches within the same page session (in-memory is sufficient). Form values reset when the scenario (run / sample model) changes. Mirrors the Sweep tab behaviour. _Optimize session state lives in m-E21-05._ + + 11. **Vitest coverage for pure logic.** New helpers added to `ui/src/lib/utils/analysis-helpers.ts` (or a sibling `goal-seek-helpers.ts` if the file grows unwieldy) have vitest tests with branch coverage: + - `defaultSearchBounds(baseline)` — `0.5 × baseline` / `2 × baseline`; guards for `baseline === 0`, negative baselines, non-finite inputs. + - `validateSearchInterval({lo, hi})` — structured error for missing / non-finite / `lo >= hi`. + - `intervalMarkerGeometry({ lo, hi, value, width })` — clamping when `value ∉ [lo, hi]`, degenerate `hi === lo`, non-finite inputs. _(Shared with Optimize's per-param range bars in m-E21-05.)_ + - `convergence-chart-geometry.ts` — operates on the **normalized** `Array<{ iteration, metricMean }>` shape defined in AC5. `convergencePath({ trace, width, height, padding, yDomain })` with tests for empty trace, single-point trace, trace with multiple entries at the same `iteration` (goal-seek boundary case: two points at `iteration: 0`), monotonic vs non-monotonic traces, flat metric (all equal), non-finite values, y-domain override vs auto-fit, target-line y-coordinate computation. + - `analysis-result-card-geometry.ts` (if needed) — whatever pure logic the card uses (badge-colour selection given `converged`, residual formatting). Skip the file if the card is pure markup with no computation worth testing. + - No mocks; no DOM. + - `validateOptimizeForm` is out of scope here; it lives in m-E21-05. + + 12. **Playwright coverage.** Extend `tests/ui/specs/svelte-analysis.spec.ts` (preferred) or add `svelte-analysis-goal-seek.spec.ts`: + - Goal Seek happy path: page loads, param selector populates, interval defaults render, Run button disabled until form is complete, run against a real engine returns a result card with `paramValue`, `converged` badge, iterations, **and a rendered convergence chart with at least one plotted point beyond iteration 0**. + - Goal Seek not-bracketed deterministic repro — uses the tuple recorded in the tracking doc's Notes section (first bundled sample in `SAMPLE_MODELS`, its first discovered const param, `target: 1e12` unreachable). Assert the warning message + the chart rendering only the two boundary points. + - Graceful skip when Engine API (8081) or Svelte dev server (5173) is down, matching the existing probe-and-skip pattern in `svelte-analysis.spec.ts`. + - _Optimize Playwright coverage is owned by m-E21-05._ + + ### Branch-coverage audit (AC13) + + 13. **Line-by-line branch audit** performed in two passes, each captured in the tracking doc's Coverage Notes before its respective commit-approval prompt: + - **AC13a — Backend pass (already complete, commit `29ac3e9`).** Five goal-seek return paths; pre-loop and main-loop exits in Nelder-Mead; shrink-vs-no-shrink branches. The optimize branches are audited here even though the consumer is m-E21-05, because the implementation lives on this milestone's commits. + - **AC13b — UI pass (pending).** New frontend components, geometry helpers, form validators, and render-condition branches in the Goal Seek tab. + Both passes enumerate every reachable branch and match each to a named test (xUnit / vitest / Playwright). Unreachable / defensive-default branches are documented with rationale, following m-E21-03's pattern. + + ## Technical Notes + + ### Backend + + - **`GoalSeekTracePoint` record** in `FlowTime.TimeMachine.Sweep` — `(int Iteration, double ParamValue, double MetricMean, double SearchLo, double SearchHi)`. Serializes to camelCase JSON automatically via existing endpoint serialization settings. + - **`OptimizeTracePoint` record** in `FlowTime.TimeMachine.Sweep` — `(int Iteration, IReadOnlyDictionary ParamValues, double MetricMean)`. + - **Trace buffer inside the runners** — accumulate in a `List<...>` and hand the result to `MakeResult` / `Converged` / `NotConverged` helpers. Avoid allocating per-iteration closures. + - **Max trace size** — bounded by `maxIterations + 2` for goal-seek and `maxIterations + 1` for optimize. No separate cap needed. + - **Serialization** — endpoint response records already use System.Text.Json camelCase; adding `Trace` on both response records picks up the same convention. Verify with a round-trip test. + - **.NET CLI (m-E18-14) impact** — the `goal-seek` and `optimize` CLI subcommands pipe JSON through; the new `trace` field appears automatically. No CLI code change required; add a CLI test confirming trace is present in the JSON output. + + ### Frontend + + - **API client addition** (`ui/src/lib/api/flowtime.ts`): + + ```ts + async goalSeek(body: { + yaml: string; + paramId: string; + metricSeriesId: string; + target: number; + searchLo: number; + searchHi: number; + tolerance?: number; + maxIterations?: number; + }) { + return post<{ + paramValue: number; + achievedMetricMean: number; + converged: boolean; + iterations: number; + trace: { + iteration: number; + paramValue: number; + metricMean: number; + searchLo: number; + searchHi: number; + }[]; + }>(`${API}/goal-seek`, body); + } + ``` + + The matching `flowtime.optimize(...)` method is owned by m-E21-05 — draft preserved in that milestone's Technical Notes. + + - **`ConvergenceChart.svelte`** — pure SVG, ~80-120 lines. Consumes the normalized `Array<{ iteration, metricMean }>` shape (see AC5). Geometry in `convergence-chart-geometry.ts` handles y-domain computation, point projection, target-line y-coord, and multi-point-at-same-x placement (goal-seek's two `iteration: 0` entries). Single-series line for simplicity; no legend. Uses `--ft-viz-*` palette tokens; teal when `converged`, amber otherwise; dashed horizontal reference line at `target` when provided. Axis labels: `iteration` on x, `yLabel` prop (caller-supplied, e.g. "metric mean" or "queue.queueTimeMs") on y. + + - **`AnalysisResultCard.svelte`** — compact card using existing density tokens. Header (title + converged badge), primary value (large monospace), meta grid (iterations, tolerance, direction, target if present). No new shadcn components required. + + - **Interval bar + per-param range bars** — extracted into `interval-bar-geometry.ts` with vitest. Reused by Goal Seek's interval visualization (single bar) and by Optimize's per-param table (one mini bar per row) — the file lands here, m-E21-05 reuses it. + + - **Form state** — co-located in the route component using `$state` runes. Promote to a `goal-seek-state.svelte.ts` store only if readability degrades; m-E21-03 kept state local and that's the baseline to beat. + + - **Scenario-change reset** — when `selectedRunId` or `selectedSampleId` changes, reset the Goal Seek form. Wire into the same reactivity that already drives scenario changes in `/analysis`. + + - **Error messaging** — reuse the existing error surface pattern from m-E21-03; do not introduce a new toast or modal system. + + - **Density / styling** — small inputs, tight gutters, 8–12 px steps. Use the analysis page's existing typography scale; no new font sizes. + + ## Out of Scope + + - Optimize tab surface — separate milestone **m-E21-05**. + - Per-evaluation probe log for optimize (raw reflection/expansion/contraction/shrink intermediate vertices). The exposed trace is per-iteration best only. + - Multi-objective / Pareto optimization (not in the engine). + - Constraints on optimization (deferred — tracked in `work/gaps.md`). + - History panel for past goal-seek / optimize runs. + - Exporting results (CSV, JSON download). + - Persisting form values to `localStorage` across browser sessions. + - Keyboard shortcuts beyond what the analysis page already supports. + - Server-side parameter discovery (browser-side `discoverConstParams` still owns this). + - Trace extension on `/v1/sweep` or `/v1/sensitivity` (not needed; not covered by D-2026-04-21-034). + + ## Dependencies + + - m-E21-03 (complete) — analysis route shell, tab bar, run/sample picker, param discovery, density tokens, inline-error pattern, sensitivity bar geometry (as a template for the interval bar + convergence chart geometry). + - `POST /v1/goal-seek`, `POST /v1/optimize` — available on port 8081 against `RustEngine:Enabled=true`. Both covered by existing API tests (`GoalSeekEndpointsTests.cs`, `OptimizeEndpointsTests.cs`) that this milestone extends with trace-shape assertions. + - D-2026-04-21-034 — appended to `work/decisions.md` at start-milestone time; covers AC3. + - Sample models bundled at `ui/src/lib/utils/sample-models.ts`. At least one sample must have const nodes, a reachable metric target (for the happy-path Playwright goal-seek), and accommodate the unreachable-target case from AC12. + + ## Notes + + - **Branch name vs milestone title.** The milestone branch is `milestone/m-E21-04-goal-seek-optimize` — it keeps its original name after the split because it already carries the Phase 1 backend commit (`29ac3e9`) and is referenced across CLAUDE.md Current Work and status surfaces. The branch name is the one documented mismatch with the renamed milestone folder (`m-E21-04-goal-seek`); all other surfaces reflect the new title. + + ## Coverage Notes + + See `m-E21-04-goal-seek-tracking.md` sections "Phase 1 — Branch-coverage audit" (backend) and "Coverage Notes → UI pass" (frontend) for the full line-by-line audit. Each reachable branch is matched to a named xUnit / vitest / Playwright test; defensive / unreachable branches are enumerated with rationale. + - kind: milestone + id: M-042 + frontmatter: + title: Optimize Surface + status: done + parent: E-21 + body: | + **Created:** 2026-04-21 (split from m-E21-04) + **Started:** 2026-04-22 + **Completed:** 2026-04-22 + ## Goal + + Wire the `/analysis` Optimize tab to live `/v1/optimize` so N-parameter Nelder-Mead optimization under bounds is usable from the Svelte workbench. Consume the shared `AnalysisResultCard` + `ConvergenceChart` components delivered by m-E21-04 and the already-landed `trace` field on the optimize response (commit `29ac3e9` of m-E21-04's branch). Deliver a per-param result table with mini range bars so the user sees where each optimized parameter landed inside its bound. + + ## Context + + This milestone was split out of the original `m-E21-04-goal-seek-optimize` on 2026-04-21 (16 ACs was too large; "Phase 1 / Phase 2" sub-phasing was the smell). The preconditions for Optimize to land cheaply are all complete before this milestone starts: + + - **Backend `trace` on `/v1/optimize`** — landed in m-E21-04 commit `29ac3e9` under D-2026-04-21-034. `OptimizeResponse` already carries `IReadOnlyList Trace` with pre-loop + per-iteration best-vertex entries; maximize runs emit unsigned `metricMean`. `OptimizeEndpointsTraceTests` + `OptimizerTests` already lock every reachable branch. + - **Shared UI components** — `ui/src/lib/components/analysis-result-card.svelte` and `ui/src/lib/components/convergence-chart.svelte` land in m-E21-04 (goal-seek is the first consumer). Their geometry siblings (`convergence-chart-geometry.ts`, `interval-bar-geometry.ts`) also land there with vitest coverage. This milestone consumes them directly — no further component extraction is needed. + - **Analysis route shell** — tab bar, scenario picker, inline-error pattern, session form state model, and the `optimize` tab placeholder (updated in m-E21-04 to reference this milestone) are all in place from m-E21-03 / m-E21-04. + + What remains is: the Optimize tab surface (param multi-select + bounds table + direction toggle + Advanced), the `flowtime.optimize(...)` API client method, the per-param result table with mini range bars, and the Playwright / vitest coverage for the optimize-specific pieces. + + ### API contract + + **POST /v1/optimize** — `src/FlowTime.API/Endpoints/OptimizeEndpoints.cs` + + Request (unchanged): + ```json + { + "yaml": "...", + "paramIds": ["arrivals", "capacity"], + "metricSeriesId": "queue.queueTimeMs", + "objective": "minimize", + "searchRanges": { + "arrivals": { "lo": 5, "hi": 50 }, + "capacity": { "lo": 10, "hi": 200 } + }, + "tolerance": 1e-4, + "maxIterations": 200 + } + ``` + + Response (already extended in m-E21-04 commit `29ac3e9`): + ```json + { + "paramValues": { "arrivals": 17.3, "capacity": 74.2 }, + "achievedMetricMean": 0.042, + "converged": true, + "iterations": 87, + "trace": [ + { "iteration": 0, "paramValues": { "arrivals": 27.5, "capacity": 105 }, "metricMean": 0.31 }, + { "iteration": 1, "paramValues": { "arrivals": 25.8, "capacity": 112 }, "metricMean": 0.27 }, + { "iteration": 2, "paramValues": { "arrivals": 22.4, "capacity": 98 }, "metricMean": 0.19 } + /* ... */ + ] + } + ``` + + Trace semantics (as delivered by m-E21-04's backend AC2): + - One entry per Nelder-Mead iteration, recorded **after** the per-iteration `Sort` so `paramValues` is the current best vertex (`simplex[0]`) and `metricMean` is the unsigned mean at that vertex. + - `iteration: 0` is the initial simplex's best vertex after the pre-loop sort. `iteration: 1..N` are the post-iteration bests. + - When the search converges in 0 iterations (initial simplex already satisfies tolerance), the trace contains only the `iteration: 0` entry. + - Maximize runs emit unsigned `metricMean` on the trace (same convention as `achievedMetricMean`). The internal sign-flip is not leaked. + - The per-iteration best is canonical; the raw per-evaluation log (reflection / expansion / contraction / shrink probes) is intentionally not exposed. + + 400 / 503 behaviour unchanged. + + ## Acceptance Criteria + + 1. **Optimize placeholder replaced.** The `optimize` tab panel in `ui/src/routes/analysis/+page.svelte` renders live content (not the m-E21-04-era "coming in m-E21-05" stub). `TAB_INFO` copy for Optimize stands as-is — "convergence history" now accurately describes what the UI renders. + + 2. **Param multi-select with bounds — layout.** Chip-bar of all discovered const params at the top (toggle to include in the optimization; same chip styling and toggle interaction as Sensitivity). Below the chip-bar, a **compact table** with one row per selected param and columns `param id`, `baseline`, `lo`, `hi`. The `lo` / `hi` cells are inline numeric inputs with defaults `0.5 × baseline` / `2 × baseline`; the table appears only when at least one chip is active. Rationale: keeps the chip-bar a pure selector (matches Sensitivity muscle memory) and groups the bounds into one aligned grid, which reads cleanly for 1–5 params (the realistic scale for a hand-driven Nelder-Mead session). **Empty state** when no const params are discoverable on the current model: render the Sweep/Goal-Seek shape string `"No const-kind parameters in this model to optimize over."` in the same `

` wrapper used by the Sweep (line 678) and Goal Seek (line 1004) surfaces in `ui/src/routes/analysis/+page.svelte`, with the Run button disabled. **No-params-selected state**: when the chip-bar has rendered but zero chips are toggled on, the bounds table is hidden and the Run button is disabled with an inline hint ("select at least one parameter"). Inline validation: at least one param selected; for every selected param, both bounds required and `lo < hi`. + + 3. **Objective metric + direction + advanced inputs.** Free-text `metricSeriesId` with the same chip shortcuts used by Sensitivity (`served`, `queue`, `flowLatencyMs`, `utilization`). A two-option toggle for direction (`minimize` / `maximize`), defaulting to `minimize` on first render and after every scenario-change reset. A collapsed "Advanced" disclosure exposes `tolerance` (default 1e-4) and `maxIterations` (default 200). All required fields must be valid before the Run button enables. + + 4. **Run optimize and render results.** "Run optimize" button calls `flowtime.optimize(...)` (new API method — see Technical Notes). While running, show a spinner (`Loader2Icon`) and disable the button. On success, render: + - The **shared** result card (delivered in m-E21-04) showing the objective metric + direction, final `achievedMetricMean`, converged badge, iteration count. + - A per-param table: `paramId`, final value, `[lo, hi]` bound (printed as `[lo, hi]` in a text cell), and a **separate** column for the mini "range bar" (SVG, reuses `interval-bar-geometry.ts` from m-E21-04) showing where the final value landed inside its bound. The range bar is its own column — do not overlay it on the `[lo, hi]` text cell, so the text stays selectable/copyable and the bar's width is not coupled to text length. + - The **shared** convergence chart (delivered in m-E21-04) plotting `metricMean` vs `iteration` over the full trace. No target reference line (there is no target for optimize) — the y-axis label reflects the direction ("minimizing X" / "maximizing X"). + - 400 and 503 errors surfaced as inline messages using the existing analysis-page error pattern. + + 5. **Not-converged state.** When the API returns `converged: false` with `iterations == maxIterations`, the shared result card shows an amber "did not converge" badge (same pattern as Goal Seek's max-iterations case from m-E21-04 AC9) and the convergence chart is drawn over the full trace. When `converged: false` with `iterations == 0` (initial simplex failed to satisfy tolerance in 0 iterations — a degenerate max-iterations case), the single `iteration: 0` trace point is plotted and the amber badge still shows. The per-param table renders whatever final `paramValues` the response carries. + + 6. **Session form state.** The Optimize form retains its last input values (selected param chips, per-param bounds, metric, direction, advanced fields) across tab switches within the same page session (in-memory is sufficient). Form values reset when the scenario (run / sample model) changes. Mirrors the Sweep + Goal Seek tab behaviour. + + 7. **Vitest coverage for pure logic.** Optimize-specific pure helpers live in a new sibling file `ui/src/lib/utils/optimize-helpers.ts` (with `optimize-helpers.test.ts` alongside it). Do **not** pile them into `analysis-helpers.ts` — keep the optimize surface's helpers modular and scoped to the surface, mirroring how each analysis surface owns its own component files. Branch-covered tests: + - `validateOptimizeForm({ selectedParams, bounds, metricSeriesId, objective })` — per-field error map. Exercises: no params selected; missing lo or hi on any selected param; `lo >= hi`; non-finite bounds; empty metric string; invalid objective. + - Any per-param range-bar geometry helper extracted from `interval-bar-geometry.ts` for the table's mini bars. (If the existing `intervalMarkerGeometry` from m-E21-04 covers this unchanged, no new tests are required beyond a call-site test.) + - Shared cross-surface helpers (e.g. `discoverConstParams`) stay in `analysis-helpers.ts`; optimize-only helpers stay in `optimize-helpers.ts`. + - No mocks; no DOM. + + 8. **Playwright coverage.** Extend `tests/ui/specs/svelte-analysis.spec.ts` (preferred) or add `svelte-analysis-optimize.spec.ts`: + - Optimize happy path: page loads, param chip-bar populates, multi-select toggles work, bounds inputs render per selected param, direction toggle works, Run button disabled until form is complete, run against a real engine returns the shared result card **with a converged badge**, a **per-param result table with one row per selected param (id, final value, `[lo, hi]` bound, and a rendered range bar)**, and a rendered convergence chart with multiple iterations plotted. Uses the deterministic tuple recorded in the tracking doc's Notes section (≥ 2 const params from a named bundled sample, bounds that reliably converge inside `maxIterations`). + - No-params-selected state: when the user opens the Optimize tab with no chips toggled on, the bounds table is hidden, the Run button is disabled, and the inline hint renders. + - Graceful skip when Engine API (8081) or Svelte dev server (5173) is down, matching the existing probe-and-skip pattern in `svelte-analysis.spec.ts`. + + 9. **Line-by-line branch audit** before the commit-approval prompt — the new UI components / helpers only (backend audit is complete from m-E21-04). Enumerate every reachable branch in `validateOptimizeForm` (and any sibling helpers in `optimize-helpers.ts`), the per-param range-bar call sites, and the Optimize tab's render conditions (happy-path / empty / no-params-selected / not-converged), matching each to a test (vitest / Playwright). Record unreachable / defensive-default branches in the tracking doc's Coverage Notes, following m-E21-03's pattern. + + ## Technical Notes + + - **API client addition** (`ui/src/lib/api/flowtime.ts`): + + ```ts + async optimize(body: { + yaml: string; + paramIds: string[]; + metricSeriesId: string; + objective: 'minimize' | 'maximize'; + searchRanges: Record; + tolerance?: number; + maxIterations?: number; + }) { + return post<{ + paramValues: Record; + achievedMetricMean: number; + converged: boolean; + iterations: number; + trace: { + iteration: number; + paramValues: Record; + metricMean: number; + }[]; + }>(`${API}/optimize`, body); + } + ``` + + - **Trace adaptation.** Convert the optimize trace into the chart's normalized `Array<{ iteration, metricMean }>` shape at the call site: `trace.map(p => ({ iteration: p.iteration, metricMean: p.metricMean }))`. The chart does not branch on surface type — it receives the same normalized shape Goal Seek passes. + + - **Chart y-axis label.** Reflects the direction — e.g. `minimizing ${metricSeriesId}` / `maximizing ${metricSeriesId}`. No target reference line (no target for optimize). Line colour reflects converged state (teal vs amber), same as Goal Seek. Exact prop names match whatever m-E21-04's extraction settled on. + + - **Per-param result table.** One row per paramId in `paramValues`. Columns (in order): id, final value (monospace, fixed precision), `[lo, hi]` bound as a text cell, mini SVG range bar as its own column (reuses `interval-bar-geometry.ts` from m-E21-04). Keeping the range bar in a dedicated column preserves text-cell copyability and decouples the bar's rendered width from the `[lo, hi]` string length. + + - **Form state.** Co-located in the route component using `$state` runes, mirroring Goal Seek's pattern from m-E21-04. If readability degrades with the multi-select + per-param bounds structure, promote to `optimize-state.svelte.ts` (sibling file, same modularization philosophy as `optimize-helpers.ts`). + + - **Helper module layout.** `ui/src/lib/utils/optimize-helpers.ts` owns optimize-specific pure helpers (form validation, trace→chart normalization, per-param table-row construction). Cross-surface helpers that were shared across Sweep / Sensitivity / Goal Seek / Optimize (e.g. `discoverConstParams`) stay in `analysis-helpers.ts`. The test file `optimize-helpers.test.ts` sits alongside the helper file; do not extend `analysis-helpers.test.ts`. + + - **Scenario-change reset.** Wire into the same reactivity that already drives scenario changes in `/analysis` (Sweep, Sensitivity, Goal Seek all do this). + + - **Error messaging.** Reuse the existing error surface pattern from m-E21-03 / m-E21-04; do not introduce a new toast or modal system. + + - **Density / styling.** Small inputs, tight gutters, 8–12 px steps. Use the analysis page's existing typography scale; no new font sizes. + + ## Out of Scope + + - Per-evaluation probe log for optimize (raw reflection/expansion/contraction/shrink intermediate vertices). The exposed trace is per-iteration best only. + - Multi-objective / Pareto optimization (not in the engine). + - Constraints on optimization (deferred — tracked in `work/gaps.md`). + - History panel for past optimize runs. + - Exporting results (CSV, JSON download). + - Persisting form values to `localStorage` across browser sessions. + - Keyboard shortcuts beyond what the analysis page already supports. + - Server-side parameter discovery (browser-side `discoverConstParams` still owns this). + - Backend trace / endpoint changes — complete in m-E21-04. + - Extraction of shared `AnalysisResultCard` / `ConvergenceChart` components — complete in m-E21-04. + + ## Dependencies + + - **m-E21-04 (complete before this milestone starts)** — delivers shared `AnalysisResultCard`, `ConvergenceChart`, `convergence-chart-geometry.ts`, `interval-bar-geometry.ts`, and the goal-seek surface baseline. + - **Backend trace on `/v1/optimize`** — already landed in m-E21-04 commit `29ac3e9` under D-2026-04-21-034. No backend work required in this milestone. + - `POST /v1/optimize` — available on port 8081 against `RustEngine:Enabled=true`. + - Sample models bundled at `ui/src/lib/utils/sample-models.ts` — at least one with ≥ 2 const nodes and a metric that changes monotonically with them (so the Nelder-Mead simplex has room to move during the Playwright happy path). See the candidate tuple in Notes. + + ## Notes + + - The original combined milestone `m-E21-04-goal-seek-optimize` was split on 2026-04-21 after the shared backend trace landed but before any UI work began. The split preserves the decision record (D-2026-04-21-034 covers both endpoints) and preserves commit `29ac3e9` on the m-E21-04 branch. + + - **Candidate Playwright happy-path tuple (to verify at milestone start):** + - Model id: `coffee-shop` (first entry in `SAMPLE_MODELS`, `ui/src/lib/utils/sample-models.ts:47`) — same sample used by the m-E21-04 goal-seek not-bracketed Playwright case, chosen for continuity and because it ships with multiple const nodes. + - `paramIds`: the first two discoverable const params via `discoverConstParams` (expected to include `customers_per_hour`; confirm the second at milestone start). + - `searchRanges`: for each selected param, `{ lo: 0.5 × baseline, hi: 2 × baseline }` — mirrors the default-bounds rule in AC2. + - `metricSeriesId`: `served` (Sensitivity chip shortcut — verify the exact engine-emitted id at authoring time; if the actual series is namespaced, e.g. `Register.served`, update this tuple + the AC8 assertion together). + - `objective`: `minimize`; `tolerance`: `1e-4`; `maxIterations`: `200`. + - Expected: `converged: true` within the iteration budget, trace length ≥ 2, `paramValues` populated for both selected params, per-param table renders one row per param with a visible range-bar marker inside `[lo, hi]`. + - **Verification gate at milestone start**: if `coffee-shop` lacks a second usable const param, or the chosen metric does not move monotonically under these bounds in the Rust engine's output, **swap the sample** (pick an alternate from `SAMPLE_MODELS` whose metric is monotonic under its default bounds, record the replacement tuple here) before writing the Playwright spec. Do **not** soften AC8 to a not-converged assertion — the converged-badge happy path is what AC8 is proving; AC5 already owns the not-converged rendering. A silently-flaky Playwright test is the failure mode to avoid. + + ## Coverage Notes + + (Filled at wrap — follow m-E21-03's structure: pure-logic tests, component rendering via Playwright, defensive / unreachable branches enumerated with rationale.) + - kind: milestone + id: M-043 + frontmatter: + title: Heatmap View + status: done + parent: E-21 + body: | + **Created:** 2026-04-23 + **Started:** 2026-04-23 + **Completed:** 2026-04-24 + ## Goal + + Deliver a nodes-x-bins heatmap view as a **sibling of topology** under `/time-travel/topology`, sharing the toolbar, class filter, metric selector, timeline scrubber, workbench sidebar, and pin state. Heatmap reuses the existing `GET /v1/runs/{runId}/state_window` endpoint — **zero backend changes**. Introduce a typed `ViewSwitcher` component, a shared view-state store that both views consume, and a shared full-window color-scale normalization so "bright red at (N, T)" on the heatmap matches "bright red on node N" on topology when the scrubber is at bin T. + + ## Context + + E-21's first five milestones delivered the workbench paradigm (m-E21-01 foundation + click-to-pin cards), the metric selector + edge cards + class filter (m-E21-02), and the `/analysis` analysis surfaces (m-E21-03 sweep/sensitivity, m-E21-04 goal-seek, m-E21-05 optimize). What remains on the "views around the data" side of E-21 is: + + 1. A second view of the same model — the heatmap — that reveals temporal patterns a single-bin topology snapshot cannot (a node that is fine in bins 1–4 but saturated in bins 5–8 is invisible on topology; the heatmap makes that obvious). + 2. A reusable view-switcher shape so later views (decomposition, comparison, flow-balance — all out of E-21 scope) can slot in without structural refactoring. + + The heatmap's data need (`state_window` per-node per-bin series) is already served by the Engine API. `ui/src/lib/api/flowtime.ts:101` already exposes `getStateWindow(runId, startBin, endBin)`. The topology page (`ui/src/routes/time-travel/topology/+page.svelte`) already calls it to populate sparklines. The heatmap calls it once per scenario, identically. + + ### Design decisions settled at planning (2026-04-23 Q&A) + + The 14-question Q&A on 2026-04-23 locked every design decision below. The key shape: + + - **View location (Q1):** Heatmap is a sibling of topology under `/time-travel/topology`, behind a view switcher on the canvas. Not an `/analysis` tab. Not its own route. + - **Workbench integration (Q2):** Heatmap replaces the **canvas** when selected; toolbar + scrubber + workbench sidebar persist unchanged across view switches. Pin state and scrubber position survive view switches in both directions. + - **Shared normalization (Q3):** Shared full-window color scale with 99th-percentile clipping. **Topology's per-bin normalization changes to match** — this is explicitly a cross-view parity change, captured under ADR-m-E21-06-02 below. + - **Axis orientation (Q4):** Nodes as rows (Y, labels on left), bins as columns (X, left-to-right). + - **View switcher (Q5, Q13):** Horizontal tabs above the canvas (`[ Topology | Heatmap ]`), shadcn-style underline, `Alt+1` / `Alt+2` shortcuts. Typed `` component with inline view array on the topology page — **no manifest registry, no Svelte context API**. Captured under ADR-m-E21-06-01. + - **Class filter (Q6):** Full parity with topology — hides rows AND restricts metric computation AND domain computation. Adds a **row-stability toggle** that dims filtered rows in place (off by default). + - **Row sort (Q7):** Topological order is the default; modes = topological / node id / max desc / mean desc / variance desc. Pin position is natural within the active sort; the pin glyph (AC10) is the pinned-row indicator. (Amended mid-implementation 2026-04-23 — see Confirmations item #5 below: the original "pinned-first modifier always-on" language was dropped in favor of pin-agnostic sort.) + - **Cell states (Q8):** Three states — observed (colored), no-data-for-bin (neutral grey + subtle hatch), metric-undefined-for-node (row-level muted). Tooltip always disambiguates. + - **Scrubber coupling (Q9):** Two-way. Scrubber position highlights the current-bin column in the heatmap; clicking a cell jumps the scrubber and pins the node. + - **Pinned row markers (Q10):** Pin glyph in the row-label gutter, click-to-unpin. (Amended 2026-04-23: the glyph is the sole pinned-row indicator — there is no positional float; rows keep their natural sort position.) + - **Bin-axis labels (Q11):** Sparse human time labels on the top axis; stride chosen by column pixel width × bin size. Absolute time when `StateWindowResponse.timestampsUtc` is populated (backend sends per-bin wall-clock timestamps), offset-from-start otherwise. Tooltip always shows both bin index and time. + - **Accessibility baseline (Q12):** Keyboard nav, ARIA grid structure, focus ring, tooltip-on-focus, one keyboard Playwright spec. Pattern encoding / high-contrast / screen-reader polish deferred to m-E21-08. + - **Testing (Q14):** 13 Playwright critical-path specs + 5 vitest pure-logic suites. + - **Node-mode toggle (Q15, added 2026-04-23 after Q14):** Shared toolbar toggle `[ Operational | Full ]` controlling the `mode` parameter on `GET /v1/runs/{runId}/state_window`. Operational (default) hides `expr`/`const`/`pmf` computed nodes — matches the Blazor UI's "operational nodes" toggle. Full exposes them; they render as row-level-muted rows under operational metrics (utilization, queue depth) per AC4, and as coloured rows under metrics that are defined for them (value / output). Toggle state lives in the shared view-state store and applies to **both** topology and heatmap (re-fetches `state_window` on change). + + ### API contract + + **GET /v1/runs/{runId}/state_window** — `src/FlowTime.API/Program.cs:1028` + + Query: `startBin`, `endBin` (required); optional `mode` (`operational` default, `full` available), `edgeIds`, `edgeMetrics`, `classIds`. Heatmap calls this **once per scenario** with `startBin=0`, `endBin=binCount-1`, no edge/class filter in the request (class filter is applied client-side to allow the toggle-in-place behaviour from Q6). `mode` is passed from the shared view-state store (AC15 node-mode toggle) and drives which node kinds appear in the response. Response is the same `StateWindowResponse` shape topology already consumes. No new endpoints, no additive response fields, no carve-outs needed. + + Client surface: `getStateWindow` gains an `mode?: 'operational' | 'full'` parameter (default `operational`, backward-compatible for existing call sites). The heatmap and topology both read `mode` from the shared store and pass it through. + + ## Acceptance Criteria + + 1. **View switcher renders above the canvas.** `/time-travel/topology` shows a horizontal tab bar `[ Topology | Heatmap ]` above the DAG/heatmap area, implemented as a new `` component at `ui/src/lib/components/view-switcher.svelte`. Views are listed inline in the route's `