TimBeyer · TimBeyer · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -132,11 +132,14 @@ clawctl-dev create --config ./vm-bootstrap.json
 - Name: `YYYY-MM-DD_hhmm_descriptive-kebab-case` (e.g., `2026-02-24_1337_log-coloring`)
 - **Get the timestamp from the OS** (`date +%Y-%m-%d_%H%M`) — do not guess or make up a time
 - A task is a concrete, completable unit of work — not an epic or a backlog
-- Include a TASK.md with: scope, plan, steps, current status
+- Include a TASK.md with: scope, context, plan, steps, current status
 - Keep TASK.md updated as work progresses
 - When coding work is done: mark TASK.md status as **Resolved** — the task directory stays in `tasks/`
 - `tasks/archive/` is for periodic manual cleanup, not part of the PR workflow
 - Commit task+plan first, before implementation code
+- Since we will usually clear the context before implementation when using the plan mode,
+  your plan MUST include any context that we want to be included in the `TASK.md`,
+  especially concerning user inputs and feedback, and explicit design choices
 
 ### TASK.md Structure
 
@@ -151,26 +154,49 @@ Every TASK.md should have these sections:
 
 What this task covers and — just as importantly — what it does not.
 
+## Context
+
+The motivation and background behind this task. Capture:
+
+- Why we're doing this — the problem, constraint, or goal that triggered it
+- Relevant background the user provided (domain knowledge, prior decisions,
+  architectural constraints) that shaped the approach
+- Key requirements or invariants that must hold
+
+This section is written at the start of the task, drawn from the initial
+prompt and early discussion. It's the "why" behind the "what".
+
 ## Plan
 
-Numbered high-level steps.
+The design and approach, not just a numbered list of steps. Capture:
+
+- The chosen approach and _why_ it was chosen
+- Alternatives that were considered and why they were rejected
+- Pushback or refinements from discussion — if the initial idea was
+  changed, record what changed and why
+- Trade-offs acknowledged (e.g., "simpler but less flexible", "more work
+  now but avoids X later")
+
+The plan should read as a record of the design process, not just its
+output. A future reader should understand not only what we decided to do,
+but what we decided _not_ to do and why.
 
 ## Steps
 
 Checkbox list (- [x] / - [ ]) of concrete work items.
 
 ## Notes
 
-Running log of observations, questions, and decisions made during the work.
+Running log of observations and decisions made _during implementation_.
 Write these as you go — not after the fact. Include:
 
-- Design decisions and _why_ (not just what)
-- Alternatives you considered and why they were rejected
+- Implementation-time discoveries that affected the approach
 - Anything a future reader would look at in the code and wonder "why?"
 - Links to relevant docs, issues, or conversations
 
 Don't log routine fixes (type errors, lint fixes, minor API quirks) —
 only things where the reasoning isn't obvious from the code itself.
+Design-level reasoning belongs in Context and Plan, not here.
 
 ## Outcome
 
@@ -182,9 +208,11 @@ Written when marking the task as Resolved. A short summary of:
 ```
 
 **Why this matters**: Task documents are the project's decision log. When
-someone later asks "why did we do X?", the answer should be findable by
-scanning task Notes and Outcomes — not locked in someone's head or lost
-in a chat transcript.
+someone later asks "why did we do X?" or "why didn't we do Y?", the
+answer should be findable by scanning task Context, Plan, and Notes —
+not locked in someone's head or lost in a chat transcript. Recording
+the design process (not just the result) means we don't re-litigate
+the same trade-offs when revisiting a decision later.
 
 ## Committing
 

diff --git a/bun.lock b/bun.lock
diff --git a/docs/capabilities.md b/docs/capabilities.md
@@ -304,8 +304,50 @@ migrations: [
 ],
 ```
 
-If no migration path exists, the runner falls through to re-provision
-(since steps are idempotent, this is safe).
+### Versioning and migrations
+
+There are two contexts where version drift is handled:
+
+**Full provisioning** (`claw provision`) — the runner checks for
+migrations, and if none exist (no chain declared, or a gap), falls
+through to re-running the capability's provision steps. Since steps are
+idempotent this is safe, though it may be slower than a targeted
+migration.
+
+**Binary updates** (`claw migrate`) — runs after a `clawctl update`
+pushes a new claw binary. Only explicit migration chains are executed.
+If no migration path exists, the version is bumped with no VM-side
+action — the assumption is that the version bump only needed new binary
+code, not VM state changes.
+
+This means a version bump without a migration is fine when:
+
+- The change is in claw binary code only (new command, bug fix, better
+  config interface)
+- The change is in provision steps that only matter for fresh installs
+
+A migration is required when the update needs VM-side action on existing
+instances:
+
+- Config file format changes
+- New files that must be written (SKILL.md, wrapper scripts)
+- Package installs or removals
+- systemd unit changes
+
+### Divergence risk
+
+Fresh provisioning always produces the "current version" state. Migrations
+produce it incrementally. There is an inherent risk that a migration
+chain doesn't exactly reproduce what a clean install would — for example,
+a migration might forget to remove an old config key that a clean install
+never creates.
+
+The escape hatch is re-provisioning from scratch: delete the VM and
+re-create it. This is always safe and produces a known-good state.
+
+If we ever need to trigger a full re-provision during an update (without
+deleting the VM), it should be built as an explicit capability hook —
+not as a silent fallback from a missing migration chain.
 
 ## Core vs optional capabilities
 

diff --git a/package.json b/package.json
@@ -82,6 +82,7 @@
     "@release-it/conventional-changelog": "^10.0.5",
     "@types/bun": "latest",
     "@types/react": "^19.0.0",
+    "@types/semver": "^7.7.1",
     "eslint": "^10.0.2",
     "prettier": "^3.8.1",
     "release-it": "^19.2.4",

diff --git a/packages/cli/bin/cli.tsx b/packages/cli/bin/cli.tsx
@@ -25,8 +25,10 @@ import {
   runDaemonStatus,
   runDaemonLogs,
   runDaemonRun,
+  runUpdate,
 } from "../src/commands/index.js";
 import { ensureDaemon } from "@clawctl/daemon";
+import { checkAndPromptUpdate } from "../src/update-hook.js";
 
 const driver = new LimaDriver();
 
@@ -238,4 +240,38 @@ completionsCmd
     await runCompletionsUpdateOc(driver, opts);
   });
 
+program
+  .command("update")
+  .description("Check for and apply clawctl updates")
+  .option("--apply-vm", "Apply VM updates after binary replacement (internal)")
+  .action(async (opts: { applyVm?: boolean }) => {
+    try {
+      await runUpdate(opts);
+    } catch (err) {
+      console.error(err instanceof Error ? `Error: ${err.message}` : err);
+      process.exit(1);
+    }
+  });
+
+// Pre-command update check (skip for commands that handle updates themselves or are non-interactive)
+const SKIP_UPDATE_COMMANDS = new Set(["update", "daemon", "completions"]);
+
+program.hook("preAction", async (_thisCommand, actionCommand) => {
+  // Walk up to find the top-level subcommand name
+  let cmd = actionCommand;
+  while (cmd.parent && cmd.parent !== program) {
+    cmd = cmd.parent;
+  }
+  const commandName = cmd.name();
+  if (SKIP_UPDATE_COMMANDS.has(commandName)) return;
+
+  try {
+    const result = await checkAndPromptUpdate(pkg.version);
+    if (result === "updated") process.exit(0);
+  } catch (err) {
+    // Update check/apply failed — don't block the user's command
+    console.error(`Warning: update check failed: ${err instanceof Error ? err.message : err}`);
+  }
+});
+
 await program.parseAsync();
diff --git a/packages/cli/src/commands/index.ts b/packages/cli/src/commands/index.ts
@@ -18,3 +18,4 @@ export {
   runDaemonLogs,
   runDaemonRun,
 } from "./daemon.js";
+export { runUpdate } from "./update.js";
diff --git a/packages/cli/src/commands/start.ts b/packages/cli/src/commands/start.ts
@@ -1,6 +1,14 @@
 import type { VMDriver } from "@clawctl/host-core";
-import { requireInstance } from "@clawctl/host-core";
+import {
+  requireInstance,
+  deployClaw,
+  clawPath,
+  loadRegistry,
+  saveRegistry,
+} from "@clawctl/host-core";
+import { CLAW_BIN_PATH } from "@clawctl/types";
 import { notifyDaemon } from "@clawctl/daemon";
+import pkg from "../../../../package.json";
 
 export async function runStart(driver: VMDriver, opts: { instance?: string }): Promise<void> {
   const entry = await requireInstance(opts);
@@ -14,5 +22,36 @@ export async function runStart(driver: VMDriver, opts: { instance?: string }): P
   console.log(`Starting "${entry.name}"...`);
   await driver.start(entry.vmName);
   console.log(`Instance "${entry.name}" started.`);
+
+  // Apply pending claw update if the binary was replaced while VM was stopped
+  if (entry.pendingClawUpdate) {
+    console.log("Applying pending claw update...");
+    try {
+      await deployClaw(driver, entry.vmName, clawPath);
+      const migrateResult = await driver.exec(entry.vmName, `${CLAW_BIN_PATH} migrate --json`);
+
+      const registry = await loadRegistry();
+      const current = registry.instances[entry.name];
+
+      if (migrateResult.exitCode !== 0) {
+        // Don't clear the flag — retry on next start. A future clawctl
+        // update may ship a fixed claw that unblocks it.
+        console.error(
+          `Warning: claw migrate failed (exit ${migrateResult.exitCode}). Will retry on next start.`,
+        );
+      } else if (current) {
+        current.pendingClawUpdate = false;
+        current.clawVersion = pkg.version;
+        await saveRegistry(registry);
+        console.log("Claw update applied.");
+      }
+    } catch (err) {
+      // deployClaw failed — flag stays set, will retry next start
+      console.error(
+        `Warning: pending claw update failed: ${err instanceof Error ? err.message : err}`,
+      );
+    }
+  }
+
   await notifyDaemon();
 }
diff --git a/packages/cli/src/commands/update.ts b/packages/cli/src/commands/update.ts
@@ -0,0 +1,47 @@
+import { execa } from "execa";
+import { checkForUpdate, downloadAndReplace, applyVmUpdates } from "@clawctl/host-core";
+import pkg from "../../../../package.json";
+
+export async function runUpdate(opts: { applyVm?: boolean }): Promise<void> {
+  if (opts.applyVm) {
+    // Internal mode: called by the NEW binary after self-replacement
+    console.log("Updating VMs with new claw binary...");
+    const results = await applyVmUpdates(pkg.version);
+    for (const r of results) {
+      const icon = r.status === "updated" ? "\u2713" : r.status === "pending" ? "\u25cb" : "\u00d7";
+      console.log(`  ${icon} ${r.name}: ${r.detail ?? r.status}`);
+    }
+    const updated = results.filter((r) => r.status === "updated").length;
+    const pending = results.filter((r) => r.status === "pending").length;
+    if (results.length === 0) {
+      console.log("No instances registered.");
+    } else {
+      console.log(`\n${updated} updated, ${pending} pending.`);
+    }
+    return;
+  }
+
+  // Dev mode: running via bun, not a compiled binary — can't self-update
+  if (process.execPath.endsWith("/bun")) {
+    console.log("Dev mode detected — self-update is not available.");
+    console.log("Build a release binary with `bun run build` to use auto-update.");
+    return;
+  }
+
+  // Normal mode: check + download + re-exec
+  console.log(`Current version: v${pkg.version}`);
+  const update = await checkForUpdate(pkg.version);
+
+  if (!update || !update.available) {
+    console.log(`clawctl is up to date (v${pkg.version}).`);
+    return;
+  }
+
+  console.log(`New version available: v${update.version}`);
+  console.log("Downloading...");
+  await downloadAndReplace(update.assetUrl!);
+  console.log("Binary updated. Applying VM updates...");
+
+  // Spawn the NEW binary to handle VM updates (it has the new embedded claw)
+  await execa(process.execPath, ["update", "--apply-vm"], { stdio: "inherit" });
+}
diff --git a/packages/cli/src/update-hook.ts b/packages/cli/src/update-hook.ts
@@ -0,0 +1,60 @@
+import { createInterface } from "readline";
+import { execa } from "execa";
+import {
+  checkForUpdate,
+  loadUpdateState,
+  saveUpdateState,
+  downloadAndReplace,
+} from "@clawctl/host-core";
+
+/**
+ * Pre-command hook that checks for updates and prompts the user.
+ *
+ * Returns:
+ * - "updated": binary was replaced and VM updates spawned — caller should exit
+ * - "skipped": user declined the update
+ * - "none": no update available (or dev mode, or error)
+ */
+export async function checkAndPromptUpdate(
+  currentVersion: string,
+): Promise<"updated" | "skipped" | "none"> {
+  // Dev mode: running via `bun cli.tsx`, not a compiled binary
+  if (process.execPath.endsWith("/bun")) return "none";
+
+  const update = await checkForUpdate(currentVersion);
+  if (!update || !update.available || !update.version) return "none";
+
+  // Check if this version was already dismissed
+  const state = await loadUpdateState();
+  if (state.dismissedVersion === update.version) return "none";
+
+  // Prompt the user
+  const answer = await prompt(
+    `clawctl v${update.version} is available (you have v${currentVersion}). Update? [Y/n] `,
+  );
+
+  if (answer.toLowerCase() === "n" || answer.toLowerCase() === "no") {
+    await saveUpdateState({ ...state, dismissedVersion: update.version });
+    return "skipped";
+  }
+
+  // Download and replace
+  console.log("Downloading update...");
+  await downloadAndReplace(update.assetUrl!);
+  console.log("Updated. Applying VM updates...");
+
+  // Spawn the NEW binary for VM updates
+  await execa(process.execPath, ["update", "--apply-vm"], { stdio: "inherit" });
+
+  return "updated";
+}
+
+function prompt(question: string): Promise<string> {
+  return new Promise((resolve) => {
+    const rl = createInterface({ input: process.stdin, output: process.stdout });
+    rl.question(question, (answer) => {
+      rl.close();
+      resolve(answer.trim());
+    });
+  });
+}
diff --git a/packages/host-core/package.json b/packages/host-core/package.json
@@ -10,6 +10,7 @@
   "dependencies": {
     "@clawctl/types": "workspace:*",
     "@clawctl/templates": "workspace:*",
-    "execa": "^9.0.0"
+    "execa": "^9.0.0",
+    "semver": "^7.7.4"
   }
 }