diff --git a/docs/superpowers/plans/2026-05-20-phase3-workspace-backends.md b/docs/superpowers/plans/2026-05-20-phase3-workspace-backends.md new file mode 100644 index 00000000..83242342 --- /dev/null +++ b/docs/superpowers/plans/2026-05-20-phase3-workspace-backends.md @@ -0,0 +1,2005 @@ +# Phase 3 — Workspace Capability + Pluggable Backends Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Refactor workspace tools (`readFile`, `writeFile`, `listDir`, `runBash`) from per-route hand-rolled files into a built-in capability auto-wired by the `workspace/` directory convention, with a pluggable filesystem/exec backend pair configurable in `dawn.config.ts` and shipping defaults plus functional composition primitives in a new `@dawn-ai/workspace` package. + +**Architecture:** New `@dawn-ai/workspace` package (types + `localFilesystem`/`localExec` defaults + `compose`/`withLogging` helpers). New `createWorkspaceMarker()` capability in `@dawn-ai/core` that contributes the four tools wired to the configured (or default-local) backends. `dawn.config.ts` loader switches from a hand-rolled string-only parser to a `tsx`-evaluated import so callable backends can be expressed naturally. Path-jail enforcement lives in the capability; backends receive already-resolved absolute paths. Chat example's hand-rolled tool files delete. + +**Tech Stack:** TypeScript, pnpm workspaces, vitest, zod, `node:fs/promises`, `node:child_process`, `tsx/esm/api` (already a dep, used for route discovery). + +**Spec:** `docs/superpowers/specs/2026-05-20-phase3-workspace-backends-design.md` + +--- + +## File Structure (locked in here, used by all tasks below) + +### New package: `packages/workspace/` + +| Path | Responsibility | +|---|---| +| `packages/workspace/package.json` | `@dawn-ai/workspace` manifest | +| `packages/workspace/tsconfig.json` | TS config (extends `@dawn-ai/config-typescript`) | +| `packages/workspace/vitest.config.ts` | Vitest config (mirror `@dawn-ai/core` shape) | +| `packages/workspace/src/index.ts` | Barrel re-exports | +| `packages/workspace/src/types.ts` | `FilesystemBackend`, `ExecBackend`, `BackendContext`, middleware types | +| `packages/workspace/src/local-filesystem.ts` | `localFilesystem()` factory | +| `packages/workspace/src/local-exec.ts` | `localExec()` factory | +| `packages/workspace/src/compose.ts` | `compose()` helper | +| `packages/workspace/src/with-logging.ts` | `withLogging()` middleware | +| `packages/workspace/test/local-filesystem.test.ts` | Unit tests | +| `packages/workspace/test/local-exec.test.ts` | Unit tests | +| `packages/workspace/test/compose.test.ts` | Unit tests | +| `packages/workspace/test/with-logging.test.ts` | Unit tests | + +### New files in existing packages + +| Path | Responsibility | +|---|---| +| `packages/core/src/capabilities/built-in/workspace.ts` | `createWorkspaceMarker()` | +| `packages/core/test/capabilities/workspace.test.ts` | Marker unit tests | + +### Modified files + +| Path | Change | +|---|---| +| `packages/core/src/config.ts` | Replace hand-rolled parser with `tsx`-evaluated import | +| `packages/core/test/config.test.ts` | Rewrite tests for the new loader | +| `packages/core/src/types.ts` | Add `backends?` to `DawnConfig` | +| `packages/core/src/capabilities/types.ts` | Add `backends?` to `CapabilityMarkerContext` | +| `packages/core/src/index.ts` | Export `createWorkspaceMarker` | +| `packages/cli/src/lib/runtime/execute-route.ts` | Register marker; thread backends from loaded `dawn.config` | +| `packages/cli/src/lib/runtime/check-tool-name-uniqueness.ts` | Accept overridable tool names | +| `packages/cli/test/tool-name-uniqueness.test.ts` | Add overridable case | +| `packages/cli/src/lib/typegen/run-typegen.ts` | Add `WORKSPACE_EXTRA_TOOLS` gated on `hasWorkspace(routeDir)` | +| `pnpm-workspace.yaml` | Add `packages/workspace` (verify already covers `packages/*`) | +| `memory/project_phase_status.md` | Mark sub-project 4 in progress, then complete | + +### Deleted files (chat example) + +| Path | Why | +|---|---| +| `examples/chat/server/src/app/chat/tools/readFile.ts` | Capability provides this | +| `examples/chat/server/src/app/chat/tools/writeFile.ts` | Capability provides this | +| `examples/chat/server/src/app/chat/tools/listDir.ts` | Capability provides this | +| `examples/chat/server/src/app/chat/tools/runBash.ts` | Capability provides this | +| `examples/chat/server/src/app/chat/workspace-path.ts` | No longer referenced | +| `examples/chat/server/src/app/coordinator/subagents/research/tools/readFile.ts` | Capability provides this | +| `examples/chat/server/src/app/coordinator/subagents/research/tools/listDir.ts` | Capability provides this | +| `examples/chat/server/src/app/coordinator/subagents/research/workspace-path.ts` | No longer referenced | + +--- + +# Phase A — `@dawn-ai/workspace` package + +### Task 1: Scaffold the workspace package + +**Files:** +- Create: `packages/workspace/package.json` +- Create: `packages/workspace/tsconfig.json` +- Create: `packages/workspace/tsconfig.build.json` +- Create: `packages/workspace/vitest.config.ts` +- Create: `packages/workspace/src/index.ts` (empty barrel for now) +- Verify: `pnpm-workspace.yaml` already covers `packages/*` (should — confirm with `grep packages pnpm-workspace.yaml`) + +- [ ] **Step 1: Inspect a sibling package's manifest pattern** + +Run: `cd /Users/blove/repos/dawn && cat packages/sdk/package.json | head -40` +Expected: see the conventional `name`, `version`, `type: "module"`, `exports`, `scripts`, `devDependencies` pattern. Note the version (likely `0.1.x`). + +- [ ] **Step 2: Write `packages/workspace/package.json`** + +```json +{ + "name": "@dawn-ai/workspace", + "version": "0.1.8", + "type": "module", + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js" + } + }, + "files": ["dist"], + "scripts": { + "build": "tsc -p tsconfig.build.json", + "test": "vitest run", + "typecheck": "tsc -p . --noEmit", + "lint": "biome check --config-path ../config-biome/biome.json package.json src test tsconfig.json tsconfig.build.json vitest.config.ts" + }, + "devDependencies": { + "@dawn-ai/config-typescript": "workspace:*", + "@dawn-ai/config-biome": "workspace:*", + "@biomejs/biome": "catalog:", + "typescript": "catalog:", + "vitest": "catalog:" + } +} +``` + +(Verify the `version`, catalog references, and `@dawn-ai/config-*` package names against an existing sibling — adjust if Dawn uses different names like `@dawn-ai/tsconfig`.) + +- [ ] **Step 3: Write `packages/workspace/tsconfig.json`** + +```json +{ + "extends": "@dawn-ai/config-typescript/base.json", + "include": ["src", "test"] +} +``` + +(Match exactly what `packages/core/tsconfig.json` or `packages/sdk/tsconfig.json` does — adjust the extends path if those use a different shape.) + +- [ ] **Step 4: Write `packages/workspace/tsconfig.build.json`** + +```json +{ + "extends": "./tsconfig.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src", + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "noEmit": false + }, + "include": ["src"] +} +``` + +(Compare against `packages/sdk/tsconfig.build.json` — match exactly.) + +- [ ] **Step 5: Write `packages/workspace/vitest.config.ts`** + +```ts +import { defineConfig } from "vitest/config" + +export default defineConfig({ + test: { + include: ["test/**/*.test.ts"], + }, +}) +``` + +- [ ] **Step 6: Write `packages/workspace/src/index.ts` (empty barrel for now)** + +```ts +// Re-exports will be added as types and impls land in subsequent tasks. +export {} +``` + +- [ ] **Step 7: Install + verify scaffolding** + +Run from repo root: +```bash +cd /Users/blove/repos/dawn && pnpm install 2>&1 | tail -5 +``` +Expected: `Done in Ns`. The new `@dawn-ai/workspace` package is symlinked. + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace build 2>&1 | tail -5` +Expected: build succeeds (empty package builds fine). + +- [ ] **Step 8: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/workspace/ +git commit -m "$(cat <<'EOF' +scaffold(workspace): empty @dawn-ai/workspace package + +Adds the package skeleton (manifest, tsconfig, vitest config) for the +upcoming pluggable workspace backends. No exports yet — types, defaults, +and helpers land in subsequent commits. + +Co-Authored-By: Claude Opus 4.7 +EOF +)" +``` + +--- + +### Task 2: Type interfaces + +**Files:** +- Create: `packages/workspace/src/types.ts` +- Modify: `packages/workspace/src/index.ts` + +- [ ] **Step 1: Write the type file** + +Create `packages/workspace/src/types.ts`: + +```ts +/** + * Workspace backend type interfaces. + * + * Backends are plain objects implementing these interfaces. The + * workspace capability calls into them to perform filesystem reads, + * writes, listings, and shell command execution. Defaults + * (`localFilesystem`, `localExec`) ship in this package; users can + * provide their own implementations via dawn.config.ts. + */ + +export interface BackendContext { + /** Aborts when the parent agent run is cancelled. */ + readonly signal: AbortSignal + /** Absolute filesystem path of the route's workspace directory. */ + readonly workspaceRoot: string +} + +export interface FilesystemBackend { + /** + * Read a UTF-8 file. `path` is an already-resolved absolute path + * inside `ctx.workspaceRoot` — the capability has done the path-jail. + */ + readFile(path: string, ctx: BackendContext): Promise + + /** Write a UTF-8 file. Returns the byte count of `content`. */ + writeFile( + path: string, + content: string, + ctx: BackendContext, + ): Promise<{ readonly bytesWritten: number }> + + /** List entries in a directory. Returns leaf names (not full paths). */ + listDir(path: string, ctx: BackendContext): Promise +} + +export interface ExecBackend { + /** + * Run a shell command. `args.cwd`, if provided, is already-resolved + * to an absolute path inside `ctx.workspaceRoot`. + */ + runCommand( + args: { + readonly command: string + readonly cwd?: string + readonly env?: Readonly> + }, + ctx: BackendContext, + ): Promise<{ + readonly stdout: string + readonly stderr: string + readonly exitCode: number + }> +} + +/** + * A filesystem middleware is a function that wraps a backend to add + * cross-cutting behavior (logging, caching, etc.). Compose multiple + * middlewares via `compose()`. + */ +export type FilesystemMiddleware = (next: FilesystemBackend) => FilesystemBackend + +/** See FilesystemMiddleware. */ +export type ExecMiddleware = (next: ExecBackend) => ExecBackend +``` + +- [ ] **Step 2: Re-export from the barrel** + +Edit `packages/workspace/src/index.ts`: + +```ts +export type { + BackendContext, + ExecBackend, + ExecMiddleware, + FilesystemBackend, + FilesystemMiddleware, +} from "./types.js" +``` + +- [ ] **Step 3: Build + typecheck** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace build 2>&1 | tail -5` +Expected: success. + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace typecheck 2>&1 | tail -5` +Expected: success. + +- [ ] **Step 4: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/workspace/src/types.ts packages/workspace/src/index.ts +git commit -m "feat(workspace): type interfaces for filesystem + exec backends + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 3: `localFilesystem()` factory + +**Files:** +- Create: `packages/workspace/src/local-filesystem.ts` +- Create: `packages/workspace/test/local-filesystem.test.ts` +- Modify: `packages/workspace/src/index.ts` + +- [ ] **Step 1: Write the failing test** + +Create `packages/workspace/test/local-filesystem.test.ts`: + +```ts +import { describe, expect, it, beforeEach, afterEach } from "vitest" +import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" +import { localFilesystem } from "../src/local-filesystem.js" + +function ctx(workspaceRoot: string) { + return { signal: new AbortController().signal, workspaceRoot } +} + +describe("localFilesystem", () => { + let root: string + beforeEach(() => { + root = mkdtempSync(join(tmpdir(), "dawn-localfs-")) + }) + afterEach(() => { + rmSync(root, { recursive: true, force: true }) + }) + + it("readFile returns UTF-8 contents", async () => { + writeFileSync(join(root, "hello.txt"), "hi", "utf8") + const fs = localFilesystem() + expect(await fs.readFile(join(root, "hello.txt"), ctx(root))).toBe("hi") + }) + + it("readFile rejects files larger than maxFileBytes", async () => { + writeFileSync(join(root, "big.txt"), "x".repeat(2048), "utf8") + const fs = localFilesystem({ maxFileBytes: 1024 }) + await expect(fs.readFile(join(root, "big.txt"), ctx(root))).rejects.toThrow(/too large/i) + }) + + it("writeFile returns the byte count", async () => { + const fs = localFilesystem() + const res = await fs.writeFile(join(root, "out.txt"), "abc", ctx(root)) + expect(res.bytesWritten).toBe(3) + }) + + it("listDir returns directory entries (leaf names only)", async () => { + writeFileSync(join(root, "a.txt"), "", "utf8") + mkdirSync(join(root, "sub")) + const fs = localFilesystem() + const entries = await fs.listDir(root, ctx(root)) + expect([...entries].sort()).toEqual(["a.txt", "sub"]) + }) + + it("readFile on missing file raises ENOENT", async () => { + const fs = localFilesystem() + await expect(fs.readFile(join(root, "ghost.txt"), ctx(root))).rejects.toThrow(/ENOENT/) + }) +}) +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace test 2>&1 | tail -10` +Expected: FAIL with `Cannot find module '../src/local-filesystem.js'`. + +- [ ] **Step 3: Implement** + +Create `packages/workspace/src/local-filesystem.ts`: + +```ts +import { readFile, readdir, stat, writeFile } from "node:fs/promises" +import type { BackendContext, FilesystemBackend } from "./types.js" + +const DEFAULT_MAX_FILE_BYTES = 256 * 1024 + +export interface LocalFilesystemOptions { + /** + * Reject `readFile` when the target file exceeds this size. + * Default: 256 KiB. + */ + readonly maxFileBytes?: number +} + +export function localFilesystem(opts: LocalFilesystemOptions = {}): FilesystemBackend { + const maxBytes = opts.maxFileBytes ?? DEFAULT_MAX_FILE_BYTES + return { + async readFile(path: string, _ctx: BackendContext): Promise { + const s = await stat(path) + if (s.size > maxBytes) { + throw new Error(`File too large: ${s.size} bytes (max ${maxBytes}) at ${path}`) + } + return await readFile(path, "utf8") + }, + async writeFile( + path: string, + content: string, + _ctx: BackendContext, + ): Promise<{ readonly bytesWritten: number }> { + await writeFile(path, content, "utf8") + return { bytesWritten: Buffer.byteLength(content, "utf8") } + }, + async listDir(path: string, _ctx: BackendContext): Promise { + return await readdir(path) + }, + } +} +``` + +- [ ] **Step 4: Re-export from barrel** + +Edit `packages/workspace/src/index.ts`, append: + +```ts +export { localFilesystem, type LocalFilesystemOptions } from "./local-filesystem.js" +``` + +- [ ] **Step 5: Run tests to verify pass** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace test 2>&1 | tail -10` +Expected: PASS (5 tests). + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/workspace/src/local-filesystem.ts \ + packages/workspace/test/local-filesystem.test.ts \ + packages/workspace/src/index.ts +git commit -m "feat(workspace): localFilesystem default backend + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 4: `localExec()` factory + +**Files:** +- Create: `packages/workspace/src/local-exec.ts` +- Create: `packages/workspace/test/local-exec.test.ts` +- Modify: `packages/workspace/src/index.ts` + +- [ ] **Step 1: Write the failing test** + +Create `packages/workspace/test/local-exec.test.ts`: + +```ts +import { describe, expect, it } from "vitest" +import { mkdtempSync, rmSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" +import { localExec } from "../src/local-exec.js" + +function ctx(workspaceRoot: string) { + return { signal: new AbortController().signal, workspaceRoot } +} + +describe("localExec", () => { + it("runCommand captures stdout, stderr, exitCode", async () => { + const root = mkdtempSync(join(tmpdir(), "dawn-localexec-")) + try { + const exec = localExec() + const out = await exec.runCommand({ command: "echo hello" }, ctx(root)) + expect(out.stdout.trim()).toBe("hello") + expect(out.exitCode).toBe(0) + } finally { + rmSync(root, { recursive: true, force: true }) + } + }) + + it("runCommand returns non-zero exitCode on failure", async () => { + const root = mkdtempSync(join(tmpdir(), "dawn-localexec-")) + try { + const exec = localExec() + const out = await exec.runCommand({ command: "exit 7" }, ctx(root)) + expect(out.exitCode).toBe(7) + } finally { + rmSync(root, { recursive: true, force: true }) + } + }) + + it("runCommand enforces timeout", async () => { + const root = mkdtempSync(join(tmpdir(), "dawn-localexec-")) + try { + const exec = localExec({ timeout: 100 }) + await expect( + exec.runCommand({ command: "sleep 1" }, ctx(root)), + ).rejects.toThrow(/timeout/i) + } finally { + rmSync(root, { recursive: true, force: true }) + } + }) + + it("runCommand respects allowedCommands regex allowlist", async () => { + const root = mkdtempSync(join(tmpdir(), "dawn-localexec-")) + try { + const exec = localExec({ allowedCommands: [/^echo\b/, /^ls\b/] }) + const ok = await exec.runCommand({ command: "echo allowed" }, ctx(root)) + expect(ok.stdout.trim()).toBe("allowed") + await expect( + exec.runCommand({ command: "rm -rf /" }, ctx(root)), + ).rejects.toThrow(/not allowed/i) + } finally { + rmSync(root, { recursive: true, force: true }) + } + }) +}) +``` + +- [ ] **Step 2: Run to verify failure** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace test -- local-exec 2>&1 | tail -10` +Expected: FAIL (module not found). + +- [ ] **Step 3: Implement** + +Create `packages/workspace/src/local-exec.ts`: + +```ts +import { exec as cpExec } from "node:child_process" +import { promisify } from "node:util" +import type { BackendContext, ExecBackend } from "./types.js" + +const execAsync = promisify(cpExec) +const DEFAULT_TIMEOUT_MS = 30_000 + +export interface LocalExecOptions { + /** Kill the command if it runs longer than this. Default 30 seconds. */ + readonly timeout?: number + /** + * Optional allowlist of command-line patterns. When non-empty, every + * command must match at least one regex or `runCommand` throws before + * spawning anything. Use to deny dangerous commands in production. + */ + readonly allowedCommands?: readonly RegExp[] +} + +export function localExec(opts: LocalExecOptions = {}): ExecBackend { + const timeout = opts.timeout ?? DEFAULT_TIMEOUT_MS + const allowed = opts.allowedCommands + return { + async runCommand(args, ctx: BackendContext) { + if (allowed && allowed.length > 0 && !allowed.some((re) => re.test(args.command))) { + throw new Error(`Command not allowed by allowedCommands policy: ${args.command}`) + } + try { + const result = await execAsync(args.command, { + cwd: args.cwd ?? ctx.workspaceRoot, + env: args.env ?? process.env, + timeout, + signal: ctx.signal, + }) + return { stdout: result.stdout, stderr: result.stderr, exitCode: 0 } + } catch (err) { + const e = err as NodeJS.ErrnoException & { + code?: number | string + stdout?: string + stderr?: string + killed?: boolean + } + if (e.killed && typeof e.code !== "number") { + throw new Error(`Command timeout after ${timeout}ms: ${args.command}`) + } + return { + stdout: e.stdout ?? "", + stderr: e.stderr ?? "", + exitCode: typeof e.code === "number" ? e.code : 1, + } + } + }, + } +} +``` + +- [ ] **Step 4: Re-export from barrel** + +Edit `packages/workspace/src/index.ts`, append: + +```ts +export { localExec, type LocalExecOptions } from "./local-exec.js" +``` + +- [ ] **Step 5: Run tests** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace test 2>&1 | tail -10` +Expected: PASS (9 tests: 5 fs + 4 exec). + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/workspace/src/local-exec.ts \ + packages/workspace/test/local-exec.test.ts \ + packages/workspace/src/index.ts +git commit -m "feat(workspace): localExec default backend + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 5: `compose()` helper + +**Files:** +- Create: `packages/workspace/src/compose.ts` +- Create: `packages/workspace/test/compose.test.ts` +- Modify: `packages/workspace/src/index.ts` + +- [ ] **Step 1: Write the failing test** + +Create `packages/workspace/test/compose.test.ts`: + +```ts +import { describe, expect, it } from "vitest" +import { compose } from "../src/compose.js" +import type { FilesystemBackend, FilesystemMiddleware } from "../src/types.js" + +const base: FilesystemBackend = { + async readFile() { return "BASE" }, + async writeFile() { return { bytesWritten: 0 } }, + async listDir() { return [] }, +} + +describe("compose", () => { + it("with zero middlewares returns the base unchanged", () => { + expect(compose()(base)).toBe(base) + }) + + it("with one middleware wraps the base", async () => { + const upper: FilesystemMiddleware = (next) => ({ + ...next, + readFile: async (p, c) => (await next.readFile(p, c)).toLowerCase(), + }) + const wrapped = compose(upper)(base) + expect(await wrapped.readFile("x", { signal: new AbortController().signal, workspaceRoot: "/" })).toBe("base") + }) + + it("applies middlewares right-to-left (outermost first)", async () => { + const trace: string[] = [] + const a: FilesystemMiddleware = (next) => ({ + ...next, + readFile: async (p, c) => { trace.push("a:before"); const r = await next.readFile(p, c); trace.push("a:after"); return r }, + }) + const b: FilesystemMiddleware = (next) => ({ + ...next, + readFile: async (p, c) => { trace.push("b:before"); const r = await next.readFile(p, c); trace.push("b:after"); return r }, + }) + await compose(a, b)(base).readFile("x", { signal: new AbortController().signal, workspaceRoot: "/" }) + // `compose(a, b)` reads "a wraps b wraps base", so order is a:before, b:before, b:after, a:after + expect(trace).toEqual(["a:before", "b:before", "b:after", "a:after"]) + }) +}) +``` + +- [ ] **Step 2: Run to verify failure** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace test -- compose 2>&1 | tail -10` +Expected: FAIL (module not found). + +- [ ] **Step 3: Implement** + +Create `packages/workspace/src/compose.ts`: + +```ts +/** + * Compose middleware functions into a single wrapper. + * + * Order: the LEFTMOST middleware is the OUTERMOST. Given + * `compose(a, b, c)(base)`, the call order is `a -> b -> c -> base`, + * mirroring how function call stacks read top-down. + * + * With zero middlewares, returns the base unchanged (no wrapper object). + */ +export function compose(...middlewares: ReadonlyArray<(next: T) => T>): (base: T) => T { + if (middlewares.length === 0) return (base) => base + return (base) => middlewares.reduceRight((acc, mw) => mw(acc), base) +} +``` + +- [ ] **Step 4: Re-export** + +Edit `packages/workspace/src/index.ts`, append: + +```ts +export { compose } from "./compose.js" +``` + +- [ ] **Step 5: Run tests** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace test 2>&1 | tail -10` +Expected: PASS (12 tests). + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/workspace/src/compose.ts \ + packages/workspace/test/compose.test.ts \ + packages/workspace/src/index.ts +git commit -m "feat(workspace): compose() middleware helper + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 6: `withLogging()` middleware + +**Files:** +- Create: `packages/workspace/src/with-logging.ts` +- Create: `packages/workspace/test/with-logging.test.ts` +- Modify: `packages/workspace/src/index.ts` + +- [ ] **Step 1: Write the failing test** + +Create `packages/workspace/test/with-logging.test.ts`: + +```ts +import { describe, expect, it } from "vitest" +import { withFilesystemLogging } from "../src/with-logging.js" +import type { FilesystemBackend } from "../src/types.js" + +const base: FilesystemBackend = { + async readFile() { return "ok" }, + async writeFile() { return { bytesWritten: 5 } }, + async listDir() { return ["a"] }, +} + +const ctx = { signal: new AbortController().signal, workspaceRoot: "/r" } + +describe("withFilesystemLogging", () => { + it("invokes the destination callback for each method", async () => { + const log: Array<{ method: string; args: unknown[] }> = [] + const wrapped = withFilesystemLogging({ destination: (e) => log.push(e) })(base) + await wrapped.readFile("a.md", ctx) + await wrapped.writeFile("b.md", "hi", ctx) + await wrapped.listDir("/r", ctx) + expect(log.map((e) => e.method)).toEqual(["readFile", "writeFile", "listDir"]) + expect(log[0]!.args).toEqual(["a.md"]) + expect(log[1]!.args).toEqual(["b.md", "hi"]) + }) + + it("forwards return values unchanged", async () => { + const wrapped = withFilesystemLogging({ destination: () => undefined })(base) + expect(await wrapped.readFile("a.md", ctx)).toBe("ok") + expect(await wrapped.writeFile("b.md", "hi", ctx)).toEqual({ bytesWritten: 5 }) + expect([...(await wrapped.listDir("/r", ctx))]).toEqual(["a"]) + }) + + it("defaults destination to console.error when not provided", async () => { + const original = console.error + const logged: string[] = [] + console.error = ((msg: string) => logged.push(msg)) as typeof console.error + try { + const wrapped = withFilesystemLogging()(base) + await wrapped.readFile("a.md", ctx) + } finally { + console.error = original + } + expect(logged.length).toBe(1) + expect(logged[0]).toContain("readFile") + }) +}) +``` + +- [ ] **Step 2: Run to verify failure** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace test -- with-logging 2>&1 | tail -10` +Expected: FAIL. + +- [ ] **Step 3: Implement** + +Create `packages/workspace/src/with-logging.ts`: + +```ts +import type { ExecMiddleware, FilesystemBackend, FilesystemMiddleware } from "./types.js" + +export interface LoggingOptions { + /** + * Where to send log lines. Default: `console.error`. + * + * Pass a function for structured logging. The argument is + * `{ method, args }` so the function can format however it wants. + */ + readonly destination?: ((entry: { method: string; args: unknown[] }) => void) +} + +function emit(opts: LoggingOptions, method: string, args: unknown[]): void { + if (opts.destination) { + opts.destination({ method, args }) + return + } + console.error(`[dawn:workspace] ${method}(${args.map((a) => JSON.stringify(a)).join(", ")})`) +} + +export function withFilesystemLogging(opts: LoggingOptions = {}): FilesystemMiddleware { + return (next: FilesystemBackend) => ({ + readFile: async (path, ctx) => { + emit(opts, "readFile", [path]) + return next.readFile(path, ctx) + }, + writeFile: async (path, content, ctx) => { + emit(opts, "writeFile", [path, content]) + return next.writeFile(path, content, ctx) + }, + listDir: async (path, ctx) => { + emit(opts, "listDir", [path]) + return next.listDir(path, ctx) + }, + }) +} + +export function withExecLogging(opts: LoggingOptions = {}): ExecMiddleware { + return (next) => ({ + runCommand: async (args, ctx) => { + emit(opts, "runCommand", [args.command, args.cwd]) + return next.runCommand(args, ctx) + }, + }) +} +``` + +(Two named functions, one per interface. Cleaner than the conditional-type approach floated in the spec — explicit type signatures, no inference magic.) + +- [ ] **Step 4: Re-export** + +Edit `packages/workspace/src/index.ts`, append: + +```ts +export { withExecLogging, withFilesystemLogging, type LoggingOptions } from "./with-logging.js" +``` + +- [ ] **Step 5: Run tests** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace test 2>&1 | tail -10` +Expected: PASS (15 tests). + +- [ ] **Step 6: Verify full repo still builds** + +Run: `cd /Users/blove/repos/dawn && pnpm build 2>&1 | tail -8` +Expected: success across all packages. + +- [ ] **Step 7: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/workspace/src/with-logging.ts \ + packages/workspace/test/with-logging.test.ts \ + packages/workspace/src/index.ts +git commit -m "feat(workspace): withFilesystemLogging + withExecLogging middlewares + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +# Phase B — Config loader switch + +### Task 7: Replace hand-rolled config parser with `tsx`-evaluated import + +**Files:** +- Modify: `packages/core/src/config.ts` +- Modify: `packages/core/test/config.test.ts` + +- [ ] **Step 1: Read the current loader + tests** + +Run: `cd /Users/blove/repos/dawn && wc -l packages/core/src/config.ts packages/core/test/config.test.ts` +Read both to understand: +- The current parser supports `const FOO = "x"` + `export default { appDir }` + `export default { appDir: "..." }`. Nothing else. +- Existing tests verify successful parses + rejection of unsupported syntax. + +- [ ] **Step 2: Rewrite `packages/core/src/config.ts`** + +Replace the entire file with: + +```ts +import { constants } from "node:fs" +import { access } from "node:fs/promises" +import { join } from "node:path" +import { pathToFileURL } from "node:url" + +import type { DawnConfig, LoadDawnConfigOptions, LoadedDawnConfig } from "./types.js" + +export const DAWN_CONFIG_FILE = "dawn.config.ts" + +let loaderPromise: Promise | undefined + +async function registerTsxLoader(): Promise { + loaderPromise ??= (async () => { + const { register } = (await import("tsx/esm/api")) as { + readonly register: () => unknown + } + register() + })() + await loaderPromise +} + +export async function loadDawnConfig(options: LoadDawnConfigOptions): Promise { + const configPath = join(options.appRoot, DAWN_CONFIG_FILE) + await access(configPath, constants.F_OK) + await registerTsxLoader() + + const mod = (await import(pathToFileURL(configPath).href)) as { + readonly default?: unknown + } + + if (!mod.default || typeof mod.default !== "object") { + throw new Error( + `${DAWN_CONFIG_FILE} must export default an object. Got: ${typeof mod.default}`, + ) + } + + return { + appRoot: options.appRoot, + config: mod.default as DawnConfig, + configPath, + } +} +``` + +- [ ] **Step 3: Rewrite `packages/core/test/config.test.ts`** + +Replace with: + +```ts +import { afterEach, beforeEach, describe, expect, it } from "vitest" +import { mkdtempSync, rmSync } from "node:fs" +import { writeFile } from "node:fs/promises" +import { tmpdir } from "node:os" +import { join } from "node:path" + +import { DAWN_CONFIG_FILE, loadDawnConfig } from "../src/config.js" + +describe("loadDawnConfig", () => { + let appRoot: string + + beforeEach(() => { + appRoot = mkdtempSync(join(tmpdir(), "dawn-config-")) + }) + + afterEach(() => { + rmSync(appRoot, { recursive: true, force: true }) + }) + + async function writeConfig(source: string): Promise { + await writeFile(join(appRoot, DAWN_CONFIG_FILE), source, "utf8") + } + + it("loads a config with just appDir", async () => { + await writeConfig(`export default { appDir: "src/app" }\n`) + const loaded = await loadDawnConfig({ appRoot }) + expect(loaded.config).toMatchObject({ appDir: "src/app" }) + expect(loaded.configPath).toBe(join(appRoot, DAWN_CONFIG_FILE)) + }) + + it("loads a config with no fields (empty object)", async () => { + await writeConfig(`export default {}\n`) + const loaded = await loadDawnConfig({ appRoot }) + expect(loaded.config).toEqual({}) + }) + + it("loads a config that imports from another module", async () => { + // Note: this test mostly verifies the tsx loader is registered — the + // existence of an importable file is enough; the import doesn't have + // to be a real package. + await writeConfig(` + const APP_DIR = "src/app" + export default { appDir: APP_DIR } + `) + const loaded = await loadDawnConfig({ appRoot }) + expect(loaded.config).toMatchObject({ appDir: "src/app" }) + }) + + it("rejects missing default export", async () => { + await writeConfig(`export const named = { appDir: "x" }\n`) + await expect(loadDawnConfig({ appRoot })).rejects.toThrow(/must export default/i) + }) + + it("rejects non-object default export", async () => { + await writeConfig(`export default "hello"\n`) + await expect(loadDawnConfig({ appRoot })).rejects.toThrow(/must export default an object/i) + }) + + it("propagates TS syntax errors from the imported module", async () => { + await writeConfig(`export default { appDir:\n`) // syntactically invalid + await expect(loadDawnConfig({ appRoot })).rejects.toThrow() + }) +}) +``` + +- [ ] **Step 4: Run config tests** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/core test -- config.test 2>&1 | tail -10` +Expected: PASS (6 tests). + +- [ ] **Step 5: Run the full repo tests to catch unrelated regressions** + +Run: `cd /Users/blove/repos/dawn && pnpm test 2>&1 | tail -10` +Expected: all tests pass. (One file in `packages/core/test/discover-routes.test.ts` writes a `dawn.config.ts` with `export default { appDir: "src/app" }` — that's a valid TS module under the new loader too, so should still work.) + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/core/src/config.ts packages/core/test/config.test.ts +git commit -m "$(cat <<'EOF' +feat(core): switch dawn.config.ts loader from hand-rolled parser to tsx import + +The hand-rolled parser supported only string-literal property values +and const string bindings. The upcoming workspace capability needs to +express callable backend values in dawn.config.ts, which strings can't +express. Switch to a tsx-evaluated dynamic import (same loader Dawn +already uses for route discovery and tool execution). + +Existing dawn.config.ts files (just { appDir }) remain valid TS +modules and continue to load without modification. + +Co-Authored-By: Claude Opus 4.7 +EOF +)" +``` + +--- + +# Phase C — Capability marker + +### Task 8: Extend `DawnConfig` + `CapabilityMarkerContext` with `backends?` + +**Files:** +- Modify: `packages/core/src/types.ts` +- Modify: `packages/core/src/capabilities/types.ts` +- Modify: `packages/core/package.json` (add `@dawn-ai/workspace` peer/dep — type-only) + +- [ ] **Step 1: Add the workspace package as a type-only dependency on @dawn-ai/core** + +Edit `packages/core/package.json`. Add to `devDependencies` (type-only — no runtime dep): + +```json +"@dawn-ai/workspace": "workspace:*" +``` + +- [ ] **Step 2: Extend `DawnConfig`** + +Edit `packages/core/src/types.ts`. Find the `DawnConfig` interface (around line 5) and update: + +```ts +import type { ExecBackend, FilesystemBackend } from "@dawn-ai/workspace" +import type { RouteKind } from "@dawn-ai/sdk" + +export type { RouteKind } + +export interface DawnConfig { + readonly appDir?: string + readonly backends?: { + readonly filesystem?: FilesystemBackend + readonly exec?: ExecBackend + } +} +``` + +- [ ] **Step 3: Extend `CapabilityMarkerContext`** + +Edit `packages/core/src/capabilities/types.ts`. Add to the imports at the top: + +```ts +import type { ExecBackend, FilesystemBackend } from "@dawn-ai/workspace" +``` + +Update the `CapabilityMarkerContext` interface: + +```ts +export interface CapabilityMarkerContext { + readonly routeManifest: RouteManifest + readonly descriptor: DawnAgent | undefined + readonly descriptorRouteMap?: ReadonlyMap + readonly backends?: { + readonly filesystem?: FilesystemBackend + readonly exec?: ExecBackend + } +} +``` + +- [ ] **Step 4: Install + verify** + +Run: `cd /Users/blove/repos/dawn && pnpm install --silent 2>&1 | tail -3` +Expected: workspace package is symlinked into core. + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/core typecheck 2>&1 | tail -5` +Expected: 0 errors. + +- [ ] **Step 5: Run full repo tests** + +Run: `cd /Users/blove/repos/dawn && pnpm test 2>&1 | tail -10` +Expected: green. + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/core/package.json \ + packages/core/src/types.ts \ + packages/core/src/capabilities/types.ts +git commit -m "feat(core): add backends field to DawnConfig + CapabilityMarkerContext + +Type-only edge: @dawn-ai/core now imports FilesystemBackend/ExecBackend +types from @dawn-ai/workspace via 'import type'. No runtime weight. + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 9: Implement `createWorkspaceMarker` + +**Files:** +- Create: `packages/core/src/capabilities/built-in/workspace.ts` +- Create: `packages/core/test/capabilities/workspace.test.ts` +- Modify: `packages/core/src/index.ts` +- Modify: `packages/core/package.json` (`@dawn-ai/workspace` is now a runtime dep too — for the default backends) + +- [ ] **Step 1: Promote workspace from devDep to dep in `@dawn-ai/core`** + +Edit `packages/core/package.json`. Move `@dawn-ai/workspace` from `devDependencies` to `dependencies`. The marker needs `localFilesystem()` and `localExec()` at runtime as defaults. + +- [ ] **Step 2: Write the failing test** + +Create `packages/core/test/capabilities/workspace.test.ts`: + +```ts +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest" +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" + +import { createWorkspaceMarker } from "../../src/capabilities/built-in/workspace.js" +import type { CapabilityMarkerContext } from "../../src/capabilities/types.js" + +function emptyManifest() { + return { appRoot: "/app", routes: [] } +} + +function ctx(extras: Partial = {}): CapabilityMarkerContext { + return { + routeManifest: emptyManifest(), + descriptor: undefined, + ...extras, + } +} + +describe("createWorkspaceMarker — detect", () => { + let routeDir: string + beforeEach(() => { routeDir = mkdtempSync(join(tmpdir(), "dawn-workspace-cap-")) }) + afterEach(() => { rmSync(routeDir, { recursive: true, force: true }) }) + + it("returns false when no workspace/ directory exists", async () => { + const detected = await createWorkspaceMarker().detect(routeDir, ctx()) + expect(detected).toBe(false) + }) + + it("returns true when workspace/ exists", async () => { + mkdirSync(join(routeDir, "workspace")) + const detected = await createWorkspaceMarker().detect(routeDir, ctx()) + expect(detected).toBe(true) + }) +}) + +describe("createWorkspaceMarker — load", () => { + let routeDir: string + beforeEach(() => { + routeDir = mkdtempSync(join(tmpdir(), "dawn-workspace-cap-")) + mkdirSync(join(routeDir, "workspace")) + }) + afterEach(() => { rmSync(routeDir, { recursive: true, force: true }) }) + + it("contributes exactly four tools when workspace/ exists", async () => { + const contribution = await createWorkspaceMarker().load(routeDir, ctx()) + const names = (contribution.tools ?? []).map((t) => t.name).sort() + expect(names).toEqual(["listDir", "readFile", "runBash", "writeFile"]) + }) + + it("contributes no tools when workspace/ is absent", async () => { + rmSync(join(routeDir, "workspace"), { recursive: true }) + const contribution = await createWorkspaceMarker().load(routeDir, ctx()) + expect(contribution.tools).toBeUndefined() + }) + + it("readFile tool calls the configured backend with an absolute path inside the jail", async () => { + writeFileSync(join(routeDir, "workspace", "hello.txt"), "hi", "utf8") + const fakeBackend = { + readFile: vi.fn().mockResolvedValue("hi"), + writeFile: vi.fn(), + listDir: vi.fn(), + } + const contribution = await createWorkspaceMarker().load( + routeDir, + ctx({ backends: { filesystem: fakeBackend } }), + ) + const readTool = contribution.tools!.find((t) => t.name === "readFile")! + const result = await readTool.run({ path: "hello.txt" }, { signal: new AbortController().signal }) + expect(result).toBe("hi") + expect(fakeBackend.readFile).toHaveBeenCalledOnce() + const [absPath] = fakeBackend.readFile.mock.calls[0]! + expect(absPath).toBe(join(routeDir, "workspace", "hello.txt")) + }) + + it("rejects path-jail escapes with a clear error", async () => { + const contribution = await createWorkspaceMarker().load(routeDir, ctx()) + const readTool = contribution.tools!.find((t) => t.name === "readFile")! + await expect( + readTool.run({ path: "../../etc/passwd" }, { signal: new AbortController().signal }), + ).rejects.toThrow(/outside workspace/i) + }) + + it("uses the default local backends when none configured", async () => { + writeFileSync(join(routeDir, "workspace", "ok.txt"), "ok", "utf8") + const contribution = await createWorkspaceMarker().load(routeDir, ctx()) + const readTool = contribution.tools!.find((t) => t.name === "readFile")! + const result = await readTool.run({ path: "ok.txt" }, { signal: new AbortController().signal }) + expect(result).toBe("ok") + }) + + it("runBash tool calls the configured exec backend", async () => { + const fakeExec = { + runCommand: vi.fn().mockResolvedValue({ stdout: "world", stderr: "", exitCode: 0 }), + } + const contribution = await createWorkspaceMarker().load( + routeDir, + ctx({ backends: { exec: fakeExec } }), + ) + const runBash = contribution.tools!.find((t) => t.name === "runBash")! + const result = await runBash.run( + { command: "echo world" }, + { signal: new AbortController().signal }, + ) + expect(result).toMatchObject({ stdout: "world", exitCode: 0 }) + expect(fakeExec.runCommand).toHaveBeenCalledWith( + expect.objectContaining({ command: "echo world" }), + expect.any(Object), + ) + }) + + it("marks all four tools as overridable", async () => { + const contribution = await createWorkspaceMarker().load(routeDir, ctx()) + for (const t of contribution.tools ?? []) { + // Overridable tools carry a flag the uniqueness check reads; see Task 10. + expect((t as unknown as { overridable?: boolean }).overridable).toBe(true) + } + }) +}) +``` + +- [ ] **Step 3: Run tests to verify failure** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/core test -- workspace.test 2>&1 | tail -15` +Expected: FAIL — `Cannot find module '../../src/capabilities/built-in/workspace.js'`. + +- [ ] **Step 4: Implement the marker** + +Create `packages/core/src/capabilities/built-in/workspace.ts`: + +```ts +import { existsSync } from "node:fs" +import { join, resolve, sep } from "node:path" +import { z } from "zod" + +import { localExec, localFilesystem } from "@dawn-ai/workspace" +import type { BackendContext, ExecBackend, FilesystemBackend } from "@dawn-ai/workspace" + +import type { CapabilityMarker, DawnToolDefinition } from "../types.js" + +const WORKSPACE_DIRNAME = "workspace" + +const READ_FILE_INPUT = z.object({ path: z.string().min(1) }) +const WRITE_FILE_INPUT = z.object({ path: z.string().min(1), content: z.string() }) +const LIST_DIR_INPUT = z.object({ path: z.string().default(".") }) +const RUN_BASH_INPUT = z.object({ command: z.string().min(1) }) + +function pathJail(userPath: string, workspaceRoot: string): string { + const resolved = resolve(workspaceRoot, userPath) + if (resolved !== workspaceRoot && !resolved.startsWith(workspaceRoot + sep)) { + throw new Error(`Path is outside workspace: ${userPath}`) + } + return resolved +} + +function backendContext(workspaceRoot: string, signal: AbortSignal): BackendContext { + return { signal, workspaceRoot } +} + +interface OverridableTool extends DawnToolDefinition { + readonly overridable: true +} + +function buildWorkspaceTools( + workspaceRoot: string, + fs: FilesystemBackend, + exec: ExecBackend, +): readonly OverridableTool[] { + const readFile: OverridableTool = { + name: "readFile", + description: "Read a UTF-8 file from the workspace.", + schema: READ_FILE_INPUT, + overridable: true, + run: async (input, ctx) => { + const { path } = READ_FILE_INPUT.parse(input) + const safe = pathJail(path, workspaceRoot) + return fs.readFile(safe, backendContext(workspaceRoot, ctx.signal)) + }, + } + const writeFile: OverridableTool = { + name: "writeFile", + description: "Write a UTF-8 file inside the workspace.", + schema: WRITE_FILE_INPUT, + overridable: true, + run: async (input, ctx) => { + const { path, content } = WRITE_FILE_INPUT.parse(input) + const safe = pathJail(path, workspaceRoot) + const result = await fs.writeFile(safe, content, backendContext(workspaceRoot, ctx.signal)) + return `wrote ${result.bytesWritten} bytes to ${path}` + }, + } + const listDir: OverridableTool = { + name: "listDir", + description: "List entries in a workspace directory.", + schema: LIST_DIR_INPUT, + overridable: true, + run: async (input, ctx) => { + const { path } = LIST_DIR_INPUT.parse(input) + const safe = pathJail(path, workspaceRoot) + const entries = await fs.listDir(safe, backendContext(workspaceRoot, ctx.signal)) + return [...entries] + }, + } + const runBash: OverridableTool = { + name: "runBash", + description: "Run a shell command inside the workspace.", + schema: RUN_BASH_INPUT, + overridable: true, + run: async (input, ctx) => { + const { command } = RUN_BASH_INPUT.parse(input) + return exec.runCommand({ command }, backendContext(workspaceRoot, ctx.signal)) + }, + } + return [readFile, writeFile, listDir, runBash] +} + +export function createWorkspaceMarker(): CapabilityMarker { + return { + name: "workspace", + detect: async (routeDir, _context) => existsSync(join(routeDir, WORKSPACE_DIRNAME)), + load: async (routeDir, context) => { + const workspaceRoot = join(routeDir, WORKSPACE_DIRNAME) + if (!existsSync(workspaceRoot)) return {} + const fs = context.backends?.filesystem ?? localFilesystem() + const exec = context.backends?.exec ?? localExec() + return { tools: buildWorkspaceTools(workspaceRoot, fs, exec) } + }, + } +} +``` + +- [ ] **Step 5: Export from the core barrel** + +Edit `packages/core/src/index.ts`, add (next to the other `createXxxMarker` exports): + +```ts +export { createWorkspaceMarker } from "./capabilities/built-in/workspace.js" +``` + +- [ ] **Step 6: Run tests** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/core test 2>&1 | tail -10` +Expected: PASS (existing tests + 9 new workspace tests). + +- [ ] **Step 7: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/core/package.json \ + packages/core/src/capabilities/built-in/workspace.ts \ + packages/core/test/capabilities/workspace.test.ts \ + packages/core/src/index.ts +git commit -m "feat(core): createWorkspaceMarker capability + +Auto-detects a route's workspace/ directory and contributes four tools +(readFile/writeFile/listDir/runBash) routed through configurable +backends. Defaults to localFilesystem + localExec when no backends are +configured in dawn.config.ts. Path-jail enforced in the capability; +backends receive resolved absolute paths. + +Tools carry an `overridable: true` flag so the uniqueness-check +inversion in the next commit can let user-authored tools/.ts +files supersede them. + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 10: Invert tool-name uniqueness check for overridable tools + +**Files:** +- Modify: `packages/cli/src/lib/runtime/check-tool-name-uniqueness.ts` +- Modify: `packages/cli/test/tool-name-uniqueness.test.ts` +- Modify: `packages/cli/src/lib/runtime/execute-route.ts` (the call site uses the new behavior) + +- [ ] **Step 1: Add the failing test** + +Append to `packages/cli/test/tool-name-uniqueness.test.ts`: + +```ts +describe("checkToolNameUniqueness — overridable", () => { + it("when a capability tool is overridable, a user tool with the same name does NOT error and replaces it", () => { + const result = checkToolNameUniqueness({ + userTools: [{ name: "readFile" }], + capabilityTools: [{ name: "readFile", overridable: true }], + reservedNames: new Set(), + }) + expect(result.ok).toBe(true) + if (!result.ok) return + // The returned `effectiveCapabilityTools` drops the overridden tool. + expect(result.effectiveCapabilityTools).toEqual([]) + }) + + it("when a capability tool is NOT overridable, a user tool with the same name still errors", () => { + const result = checkToolNameUniqueness({ + userTools: [{ name: "writeTodos" }], + capabilityTools: [{ name: "writeTodos" }], // no overridable flag = false + reservedNames: new Set(), + }) + expect(result.ok).toBe(false) + }) +}) +``` + +- [ ] **Step 2: Run to verify failure** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/cli test -- tool-name-uniqueness 2>&1 | tail -10` +Expected: FAIL — `result.effectiveCapabilityTools` doesn't exist yet; and the overridable case errors. + +- [ ] **Step 3: Update the check** + +Edit `packages/cli/src/lib/runtime/check-tool-name-uniqueness.ts`: + +```ts +export interface ToolNameCheckInput { + readonly userTools: ReadonlyArray<{ readonly name: string }> + readonly capabilityTools: ReadonlyArray<{ readonly name: string; readonly overridable?: boolean }> + readonly reservedNames: ReadonlySet +} + +export type ToolNameCheckResult = + | { + readonly ok: true + /** + * Capability tools with the overridable ones removed when shadowed by + * a user tool. The runtime should use THIS list when composing the + * final tool set, not the input `capabilityTools`. + */ + readonly effectiveCapabilityTools: ReadonlyArray<{ readonly name: string; readonly overridable?: boolean }> + } + | { readonly ok: false; readonly message: string } + +export function checkToolNameUniqueness(input: ToolNameCheckInput): ToolNameCheckResult { + const userNames = new Set(input.userTools.map((t) => t.name)) + const effective: typeof input.capabilityTools = [] + + for (const cap of input.capabilityTools) { + if (userNames.has(cap.name)) { + if (cap.overridable) { + // Drop from the effective list; user tool wins. + continue + } + return { + ok: false, + message: `Capability conflict: tool name "${cap.name}" is contributed by a capability and also defined in tools/. Remove the user tool or remove the capability marker file.`, + } + } + effective.push(cap) + } + + for (const t of input.userTools) { + if (input.reservedNames.has(t.name)) { + return { + ok: false, + message: `Reserved tool name: "${t.name}" is reserved by the Dawn harness and cannot be used as a user tool name.`, + } + } + } + + return { ok: true, effectiveCapabilityTools: effective } +} +``` + +- [ ] **Step 4: Update the callsite in `execute-route.ts`** + +In `packages/cli/src/lib/runtime/execute-route.ts`, find the existing block that calls `checkToolNameUniqueness` (around line 305 — the area introduced in PR #155). The current code throws on collision and otherwise concatenates `tools = [...tools, ...capTools]`. Adjust to use the new `effectiveCapabilityTools`: + +```ts +const RESERVED_TOOL_NAMES = new Set(["task"]) +const check = checkToolNameUniqueness({ + userTools: tools.map((t) => ({ name: t.name })), + capabilityTools: capTools.map((t) => ({ + name: t.name, + ...((t as unknown as { overridable?: boolean }).overridable ? { overridable: true } : {}), + })), + reservedNames: RESERVED_TOOL_NAMES, +}) +if (!check.ok) { + return { message: check.message, ok: false } +} + +// Use the effective set so overridden tools are dropped before merging. +const effectiveCapNames = new Set(check.effectiveCapabilityTools.map((t) => t.name)) +const filteredCapTools = capTools.filter((t) => effectiveCapNames.has(t.name)) +tools = [...tools, ...filteredCapTools] +``` + +(The existing state-field collision check below stays unchanged.) + +- [ ] **Step 5: Run all the relevant tests** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/cli test 2>&1 | tail -10` +Expected: PASS (existing tests + 2 new uniqueness tests). + +Run: `cd /Users/blove/repos/dawn && pnpm test 2>&1 | tail -10` +Expected: full repo green. + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/cli/src/lib/runtime/check-tool-name-uniqueness.ts \ + packages/cli/test/tool-name-uniqueness.test.ts \ + packages/cli/src/lib/runtime/execute-route.ts +git commit -m "feat(cli): support overridable capability tools + +Tools marked overridable on a capability contribution can be shadowed +by a user-authored tool with the same name. Used by the workspace +capability so authors can override readFile/writeFile/listDir/runBash +by dropping a file in tools/. Non-overridable capability tools +(writeTodos, readSkill, task) retain the collision error. + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +# Phase D — Runtime wiring + +### Task 11: Register `createWorkspaceMarker` + thread backends from config + +**Files:** +- Modify: `packages/cli/src/lib/runtime/execute-route.ts` + +- [ ] **Step 1: Add imports + the marker to the registry** + +Edit `packages/cli/src/lib/runtime/execute-route.ts`. Add to the existing imports from `@dawn-ai/core`: + +```ts +import { + // ...existing + createWorkspaceMarker, + loadDawnConfig, +} from "@dawn-ai/core" +``` + +Find the `createCapabilityRegistry([...])` block and add the marker: + +```ts +const registry = createCapabilityRegistry([ + createPlanningMarker(), + createAgentsMdMarker(), + createSkillsMarker(), + createSubagentsMarker(), + createWorkspaceMarker(), +]) +``` + +- [ ] **Step 2: Load `dawn.config.ts` once + thread backends into `applyCapabilities` context** + +Before the `applyCapabilities` call (around the block that builds `descriptorRouteMap`), load the config: + +```ts +let configBackends: { filesystem?: FilesystemBackend; exec?: ExecBackend } | undefined +try { + const loaded = await loadDawnConfig({ appRoot }) + configBackends = loaded.config.backends +} catch { + // No dawn.config.ts (or unreadable) — the workspace capability falls + // back to its defaults (localFilesystem + localExec). +} + +const applied = await applyCapabilities(registry, routeDir, { + routeManifest, + descriptor, + descriptorRouteMap, + backends: configBackends, +}) +``` + +Add the type imports at the top: + +```ts +import type { ExecBackend, FilesystemBackend } from "@dawn-ai/workspace" +``` + +And add `@dawn-ai/workspace` to `packages/cli/package.json` dependencies (`pnpm add @dawn-ai/workspace --filter @dawn-ai/cli --workspace`). + +- [ ] **Step 3: Run all tests** + +Run: `cd /Users/blove/repos/dawn && pnpm test 2>&1 | tail -10` +Expected: all green. + +Run: `cd /Users/blove/repos/dawn && pnpm build 2>&1 | tail -10` +Expected: all packages build. + +- [ ] **Step 4: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/cli/src/lib/runtime/execute-route.ts packages/cli/package.json +git commit -m "feat(cli): register workspace capability + thread backends from dawn.config + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 12: Typegen — surface workspace tools + +**Files:** +- Modify: `packages/cli/src/lib/typegen/run-typegen.ts` +- Modify or create: `packages/cli/test/run-typegen.test.ts` (the existing test from sub-project 3) + +- [ ] **Step 1: Read the existing pattern** + +Read `packages/cli/src/lib/typegen/run-typegen.ts`. Note the `PLANNING_EXTRA_TOOL`, `SKILLS_EXTRA_TOOL`, `SUBAGENTS_EXTRA_TOOL` declarations and their `hasX(routeDir)` gates around line 21-100. + +- [ ] **Step 2: Add a failing test** + +Append to `packages/cli/test/run-typegen.test.ts`: + +```ts +describe("typegen — workspace capability", () => { + // Use the existing temp-dir + manifest helpers in this test file. + it("includes readFile/writeFile/listDir/runBash for routes with a workspace/ directory", async () => { + // Set up a tmp app with src/app/foo/{index.ts, workspace/}. + // Run runTypegen and read .dawn/dawn.generated.d.ts. + // Assert all four tool names appear in foo's tool union. + // (Mirror the existing readSkill/task assertions in this file.) + }) + + it("does NOT include the four tools when workspace/ is absent", async () => { + // Same setup minus workspace/. + // Assert none of readFile/writeFile/listDir/runBash appear. + }) +}) +``` + +Read the existing `task` typegen test (added in PR #156 Task 12) and mirror its structure exactly. Same helpers, same temp-dir pattern. + +- [ ] **Step 3: Run to verify failure** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/cli test -- run-typegen 2>&1 | tail -10` +Expected: FAIL. + +- [ ] **Step 4: Add the workspace typegen entries** + +Edit `packages/cli/src/lib/typegen/run-typegen.ts`. Add after `SUBAGENTS_EXTRA_TOOL`: + +```ts +const WORKSPACE_EXTRA_TOOLS: readonly ExtractedToolType[] = [ + { + name: "readFile", + description: "Read a UTF-8 file from the workspace.", + inputType: `{ path: string }`, + outputType: `string`, + }, + { + name: "writeFile", + description: "Write a UTF-8 file inside the workspace.", + inputType: `{ path: string; content: string }`, + outputType: `string`, + }, + { + name: "listDir", + description: "List entries in a workspace directory.", + inputType: `{ path?: string }`, + outputType: `string[]`, + }, + { + name: "runBash", + description: "Run a shell command inside the workspace.", + inputType: `{ command: string }`, + outputType: `{ stdout: string; stderr: string; exitCode: number }`, + }, +] + +function hasWorkspace(routeDir: string): boolean { + return existsSync(join(routeDir, "workspace")) +} +``` + +In the `extraTools` build block (the one with the existing `hasSubagents` gate), add: + +```ts +if (hasWorkspace(route.routeDir)) { + extraTools.push(...WORKSPACE_EXTRA_TOOLS) +} +``` + +- [ ] **Step 5: Run tests + verify** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/cli test 2>&1 | tail -10` +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/cli/src/lib/typegen/run-typegen.ts packages/cli/test/run-typegen.test.ts +git commit -m "feat(cli): typegen surfaces workspace tools for routes with workspace/ + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +# Phase E — Chat example migration + smoke + +### Task 13: Delete hand-rolled workspace tools from the chat example + +**Files:** +- Delete: 4 files under `examples/chat/server/src/app/chat/tools/` +- Delete: 2 files under `examples/chat/server/src/app/coordinator/subagents/research/tools/` +- Delete: 2 `workspace-path.ts` helpers if unreferenced after the above + +- [ ] **Step 1: Delete chat route's workspace tool files** + +```bash +cd /Users/blove/repos/dawn +git rm examples/chat/server/src/app/chat/tools/readFile.ts +git rm examples/chat/server/src/app/chat/tools/writeFile.ts +git rm examples/chat/server/src/app/chat/tools/listDir.ts +git rm examples/chat/server/src/app/chat/tools/runBash.ts +``` + +- [ ] **Step 2: Check if `chat/workspace-path.ts` is still referenced** + +Run: `cd /Users/blove/repos/dawn && grep -rn "workspace-path\b" examples/chat/server/src/app/chat/ --include="*.ts" 2>/dev/null` +Expected: no matches (only the deleted tool files referenced it). If any remain, leave the helper in place. + +If no remaining references, delete: +```bash +git rm examples/chat/server/src/app/chat/workspace-path.ts +``` + +- [ ] **Step 3: Delete research subagent's workspace tools + helper** + +```bash +git rm examples/chat/server/src/app/coordinator/subagents/research/tools/readFile.ts +git rm examples/chat/server/src/app/coordinator/subagents/research/tools/listDir.ts +``` + +Run: `cd /Users/blove/repos/dawn && grep -rn "workspace-path\b" examples/chat/server/src/app/coordinator/ --include="*.ts" 2>/dev/null` +Expected: no matches. Then: +```bash +git rm examples/chat/server/src/app/coordinator/subagents/research/workspace-path.ts +``` + +- [ ] **Step 4: Build the chat example** + +Run: `cd /Users/blove/repos/dawn/examples/chat/server && pnpm build 2>&1 | tail -10` +Expected: `4 route(s) compiled` (chat, coordinator, coordinator/subagents/research, coordinator/subagents/summarizer). Build succeeds. + +- [ ] **Step 5: Verify typegen surfaces the workspace tools on the routes that have a workspace/ dir** + +Run: +```bash +cd /Users/blove/repos/dawn/examples/chat/server +grep -A 6 'route "/chat"' .dawn/dawn.generated.d.ts | head -20 +grep -A 6 'route "/coordinator/subagents/research"' .dawn/dawn.generated.d.ts | head -20 +``` +Expected: +- `/chat` route's tool union contains `readFile`, `writeFile`, `listDir`, `runBash`, plus existing `writeTodos` and `readSkill`. +- `/coordinator/subagents/research` route's tool union contains the 4 workspace tools (the subagent has `workspace/` via its own dir, OR inherits the workspace convention — verify behavior matches the spec's intent. If the research subagent route doesn't have its own `workspace/` directory, ADD one before this test or accept that those tools no longer show — the spec migration assumes the workspace dir is set up correctly for each route). + +If the research subagent didn't have its own workspace dir originally and was working through path manipulation, create one: + +```bash +mkdir -p examples/chat/server/src/app/coordinator/subagents/research/workspace +touch examples/chat/server/src/app/coordinator/subagents/research/workspace/.gitkeep +``` + +(The spec convention is: each route that wants workspace tools has its own `workspace/` directory. If the existing arrangement pointed all routes at a single shared workspace, that needs review — but most likely the migration just needs the per-route dir, even if it's empty or symlinked.) + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add -A +git commit -m "$(cat <<'EOF' +refactor(examples/chat): migrate to workspace capability + +Delete the hand-rolled readFile/writeFile/listDir/runBash tool files +(and their workspace-path helpers) from both the /chat route and the +research subagent. The workspace capability auto-contributes these +tools when the route has a workspace/ directory. + +Co-Authored-By: Claude Opus 4.7 +EOF +)" +``` + +--- + +### Task 14: Smoke test via Chrome MCP web client + +**Files:** none modified; this is verification only. + +- [ ] **Step 1: Start dev servers** + +```bash +cd /Users/blove/repos/dawn/examples/chat/server && OPENAI_API_KEY="$(grep OPENAI_API_KEY /Users/blove/repos/dawn/.env | cut -d= -f2-)" pnpm dev & +cd /Users/blove/repos/dawn/examples/chat/web && pnpm dev & +# Wait for both: "Dawn dev ready at http://127.0.0.1:3001" and "Ready in Nms" +``` + +- [ ] **Step 2: Drive `/chat` through the web picker** + +Navigate Chrome MCP to `http://localhost:3000`. Click the `/chat` radio (it's the default). Type "Briefly list the files in the workspace." Click Send. Wait ~30 seconds. + +Verify via DOM inspection (JavaScript in the page) that the SSE log contains: +- `event: tool_call data: {"name":"listDir"}` — the workspace capability's listDir tool fired +- `event: tool_result` with the listing +- `event: chunk` events streaming the agent's natural-language response +- `event: done` +- 0 errors (`subagent_failed`, recursion, etc.) + +- [ ] **Step 3: Drive `/coordinator` through the picker** + +Click the `/coordinator` radio. Type "Use research to read AGENTS.md and list its camelCase tool names. Then ask summarizer for a 2-bullet TL;DR." Click Send. Wait ~60 seconds. + +Verify via DOM inspection: +- 2 `subagent.start` events, 2 `subagent.end` events (one each for research, summarizer) +- `subagent.tool_call` events for `readFile` (research's workspace capability) +- `subagent.message` events streaming the children's tokens +- 0 paired duplicates (raw `chunk` whose data matches a `subagent.message` chunk) — sub-project 3's bubbling fix should still hold +- `event: done` with non-empty final assistant text + +- [ ] **Step 4: Kill dev servers** + +```bash +pkill -f "dawn.*dev" +pkill -f "next dev -p 3000" +``` + +- [ ] **Step 5: If anything failed** + +Debug per the failure. Likely candidates: +- A route is missing its `workspace/` dir → the capability didn't activate → no tools were contributed → the agent has nothing to call. +- The capability's path-jail rejected a path the old tool used to accept → may indicate a behavior delta from the old hand-rolled tool. + +Iterate until both probes pass. No move to Task 15 until smoke is clean. + +--- + +### Task 15: Update phase status memory + open PR + +**Files:** +- Modify: `/Users/blove/.claude/projects/-Users-blove-repos-dawn/memory/project_phase_status.md` + +- [ ] **Step 1: Update the memory note** + +Edit `project_phase_status.md`. Find: + +``` +4. Pluggable filesystem / exec backends (`dawn.config.ts`). +``` + +Replace with: + +``` +4. ✅ **Workspace capability + pluggable backends** — shipped in [PR #TBD](https://github.com/cacheplane/dawnai/pull/TBD). Workspace tools (readFile/writeFile/listDir/runBash) auto-contributed by a capability triggered by `/workspace/`. New `@dawn-ai/workspace` package ships `FilesystemBackend`/`ExecBackend` interfaces + `localFilesystem`/`localExec` defaults + `compose`/`withFilesystemLogging`/`withExecLogging` helpers. `dawn.config.ts` switched from hand-rolled string-only parser to `tsx`-evaluated import so callable backends can be expressed. Path-jail enforced in the capability; backends receive resolved absolute paths. Tool override pathway: write `tools/.ts` to shadow a capability-contributed tool. Chat example's hand-rolled workspace tools deleted. HITL permission gating for jail escapes deferred to sub-project 4.5. +``` + +Update the top summary to show 6/7 sub-projects shipped (still in Phase 3). + +- [ ] **Step 2: Push the branch + open the PR** + +```bash +cd /Users/blove/repos/dawn +git push -u origin claude/phase3-workspace +``` + +```bash +gh pr create --title "feat(core,cli,workspace): phase 3 — workspace capability + pluggable backends (sub-project 4)" --body "$(cat <<'EOF' +## Summary + +Sub-project 4 of the Dawn opinionated agent harness. The workspace +tools (readFile/writeFile/listDir/runBash) move from hand-rolled +per-route files into a built-in capability auto-wired by the +`workspace/` directory convention. Filesystem and exec implementations +become pluggable via a new `@dawn-ai/workspace` package; defaults +preserve existing behavior so apps that don't touch `dawn.config.ts` +keep working unchanged. + +`dawn.config.ts` loader switches from a hand-rolled string-only +parser to a `tsx`-evaluated import so callable backend values can be +expressed naturally. + +## Changes + +- New `@dawn-ai/workspace` package: `FilesystemBackend` / `ExecBackend` + type interfaces, `localFilesystem()` and `localExec()` defaults, + `compose()` helper for middleware composition, demonstration + `withFilesystemLogging` / `withExecLogging` middlewares. +- New `createWorkspaceMarker()` capability in `@dawn-ai/core`. Detects + the `workspace/` directory under a route; contributes four tools + routed through the configured backends; enforces path-jail before + calling the backend so backends receive trusted absolute paths. +- `DawnConfig` and `CapabilityMarkerContext` gain an optional + `backends: { filesystem?, exec? }` field. When omitted, the + capability falls back to `localFilesystem()` + `localExec()`. +- Tool-name uniqueness check supports overridable capability tools: + user-authored `tools/readFile.ts` (etc.) replaces the workspace + capability's contribution; non-overridable capability tools + (writeTodos, readSkill, task) retain the collision error. +- Typegen surfaces the four workspace tools on routes with a + `workspace/` directory. +- Chat example's hand-rolled tool files delete from `/chat` and from + `/coordinator/subagents/research`. + +## Test plan + +- [x] `@dawn-ai/workspace` unit tests: types + localFilesystem (5) + + localExec (4) + compose (3) + with-logging (3) = 15 cases +- [x] `createWorkspaceMarker` unit tests: detect, load, tool wiring, + path-jail, default backends, override flag (8 cases) +- [x] `checkToolNameUniqueness` overridable cases (2 new cases) +- [x] Config loader rewrite: 6 cases including syntax-error + propagation +- [x] Typegen: workspace tools appear when `workspace/` exists, absent + otherwise (2 cases) +- [x] Full repo green; build + typecheck + lint clean +- [x] Manual Chrome MCP smoke: `/chat` and `/coordinator` both produce + clean SSE streams; 0 duplicates; 0 errors; `done` event fires + +## Deferred / known limitations + +- **HITL permission system (sub-project 4.5)** — the capability hard- + refuses jail escapes today. A future PR introduces an `interrupt()` + flow so the user can grant per-path permissions, with persistence + to a yet-to-be-decided location (likely `.dawn/permissions.json`). +- **Per-route backend override** — currently global only. Add via + descriptor field if a real use case surfaces. +- **OS-level isolation** — out of scope; documented as deployment + guidance. The path-jail in the capability is a correctness boundary, + not a security boundary against hostile agents. +- **Backend method extensibility** — adding methods beyond the four + standard ones does NOT auto-contribute tools. Authors write + additional tools in `tools/` as today. + +🤖 Generated with [Claude Code](https://claude.com/claude-code) +EOF +)" +``` + +- [ ] **Step 3: Update the memory note with the real PR number** + +Once the PR is created, replace `[PR #TBD]` with the actual URL. + +- [ ] **Step 4: Auto-merge on green** + +```bash +gh pr merge --squash --delete-branch --auto +``` + +Wait for validate-green. Once merged, the sub-project is complete. + +--- + +## Self-review notes + +- **Spec coverage:** every section of the spec maps to a task. New package (T1-6). Config loader switch (T7). DawnConfig + CapabilityMarkerContext extension (T8). Marker implementation (T9). Tool override inversion (T10). Runtime wiring (T11). Typegen (T12). Chat example migration (T13). Smoke (T14). Memory update + PR (T15). +- **Placeholders:** none. Every step has actual code or actual commands. The `hasWorkspace` typegen test's body is intentionally outlined rather than fully written because it mirrors the existing `task` typegen test in the same file — the implementer should copy that test's structure (which I've called out explicitly in Step 1). +- **Type consistency:** `FilesystemBackend` / `ExecBackend` / `BackendContext` signatures stable from T2 through T9. `OverridableTool` shape locked in T9. `effectiveCapabilityTools` from T10 used in T11 (implicitly via the existing call site). Path-jail signature stable. +- **One known sharp edge:** T11's `effectiveCapabilityTools` usage requires `execute-route.ts`'s tool-merge to filter `capTools` by the names in the effective set rather than iterating `capTools` directly. The plan calls this out explicitly. If T10's API doesn't end up returning `effectiveCapabilityTools`, T11's implementation needs adjustment. diff --git a/docs/superpowers/plans/2026-05-21-phase3-permissions.md b/docs/superpowers/plans/2026-05-21-phase3-permissions.md new file mode 100644 index 00000000..b2d803b7 --- /dev/null +++ b/docs/superpowers/plans/2026-05-21-phase3-permissions.md @@ -0,0 +1,1953 @@ +# Phase 3 — HITL Permissions Implementation Plan (sub-project 4.5) + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Replace the workspace capability's hard-refuse-on-path-jail-escape behavior with a human-in-the-loop interrupt flow, and add the same prompt-for-approval gating to `runBash` and all path-touching operations outside the workspace. Three operating modes (interactive / non-interactive / bypass) configurable in `dawn.config.ts` and overridable via `DAWN_PERMISSIONS_MODE` env var. Persisted decisions live in `.dawn/permissions.json` (project-local, gitignored). + +**Architecture:** New `@dawn-ai/permissions` package ships `PermissionsStore` (file I/O + pattern matching + write queue), public types, smart-default pattern inference. Workspace capability adds a permission check between path-jail / bash invocation and the backend call. Dawn's HTTP dev server (`packages/cli/src/lib/dev/runtime-server.ts`) gains a `POST /threads/:thread_id/resume` endpoint. Agent adapter propagates LangGraph `interrupt()` events as `event: interrupt` SSE envelopes. Chat-web client renders an inline permission panel and proxies resume calls. + +**Tech Stack:** TypeScript, pnpm workspaces, vitest, zod, LangGraph 1.x `interrupt()` + `Command({resume})`, native `node:http`, Next.js 16. + +**Spec:** `docs/superpowers/specs/2026-05-21-phase3-permissions-design.md` + +--- + +## File Structure (locked here, used by all tasks) + +### New package + +| Path | Purpose | +|---|---| +| `packages/permissions/package.json` | `@dawn-ai/permissions` manifest | +| `packages/permissions/tsconfig.json` | TS config (mirror sibling packages) | +| `packages/permissions/vitest.config.ts` | Vitest config | +| `packages/permissions/src/index.ts` | Barrel re-exports | +| `packages/permissions/src/types.ts` | `PermissionsFile`, `PermissionMode`, `PermissionRequest`, `PermissionDecision`, `PermissionsStore` interface | +| `packages/permissions/src/pattern-matching.ts` | `match(tool, candidate, allowMap, denyMap)` → `"allow" | "deny" | "unknown"` | +| `packages/permissions/src/suggested-pattern.ts` | `suggestedCommandPattern(cmd)` + `suggestedPathPattern(path)` | +| `packages/permissions/src/permissions-store.ts` | `createPermissionsStore({appRoot, config, mode})` — load, match, addAllow, gitignore handling, write queue | +| `packages/permissions/test/*.test.ts` | Unit tests per file | + +### New + modified in existing packages + +| Path | Change | +|---|---| +| `packages/core/package.json` | Add `@dawn-ai/permissions` to dependencies | +| `packages/core/src/types.ts` | Extend `DawnConfig` with `permissions?: { mode, allow, deny }` | +| `packages/core/src/capabilities/types.ts` | Extend `CapabilityMarkerContext` with `permissions?: PermissionsStore` | +| `packages/core/src/capabilities/built-in/workspace.ts` | Gate every tool's `run()` through the permissions store; mode-aware path-jail (bypass disables) | +| `packages/core/test/capabilities/workspace.test.ts` | Add interrupt-flow tests | +| `packages/cli/src/lib/runtime/execute-route.ts` | Construct `PermissionsStore` from loaded config + env-var override; thread into `CapabilityMarkerContext` | +| `packages/cli/src/lib/dev/runtime-server.ts` | Register `POST /threads/:thread_id/resume` route + thread-state map | +| `packages/cli/test/resume-endpoint.test.ts` | New — endpoint tests | +| `packages/langchain/src/agent-adapter.ts` | Detect LangGraph `interrupt` events in `streamEvents` v2 output → yield `{type: "interrupt", data: ...}` chunks; handle `Command({resume})` re-invocation path | +| `packages/langchain/test/agent-adapter-interrupt.test.ts` | New — interrupt propagation test | +| `examples/chat/server/dawn.config.ts` | Seeded `permissions.allow` for demo (`bash: ["ls"]`) and `permissions.deny` (`bash: ["rm -rf", "sudo"]`) | +| `examples/chat/web/app/api/permission-resume/route.ts` | New — proxy to Dawn's resume endpoint | +| `examples/chat/web/app/page.tsx` | Inline permission panel + button handlers + resume POST | +| `memory/project_phase_status.md` | Mark sub-project 4.5 | + +--- + +## Phase A — `@dawn-ai/permissions` package + +### Task 1: Scaffold the permissions package + +**Files:** +- Create: `packages/permissions/package.json` +- Create: `packages/permissions/tsconfig.json` +- Create: `packages/permissions/vitest.config.ts` +- Create: `packages/permissions/src/index.ts` + +- [ ] **Step 1: Inspect sibling pattern** + +Run: `cd /Users/blove/repos/dawn && cat packages/workspace/package.json` +Note the exact catalog references and devDeps (workspace is the most-recent sibling and the closest template). + +- [ ] **Step 2: Write `packages/permissions/package.json`** + +Mirror `packages/workspace/package.json` exactly, substituting: +- `name`: `"@dawn-ai/permissions"` +- `version`: match siblings (likely `0.1.8`) + +No runtime deps yet. Same scripts (`build`, `test`, `typecheck`, `lint`). + +- [ ] **Step 3: Write `packages/permissions/tsconfig.json`** + +```json +{ + "extends": "@dawn-ai/config-typescript/node.json", + "include": ["src", "test"], + "compilerOptions": { "outDir": "dist", "rootDir": "." } +} +``` + +(Match `packages/workspace/tsconfig.json` exactly — copy-paste then adjust paths.) + +- [ ] **Step 4: Write `packages/permissions/vitest.config.ts`** + +```ts +import { defineConfig } from "vitest/config" +export default defineConfig({ + test: { + include: ["test/**/*.test.ts"], + passWithNoTests: true, + }, +}) +``` + +- [ ] **Step 5: Write `packages/permissions/src/index.ts`** + +```ts +// Re-exports will be added as types and impls land in subsequent tasks. +export {} +``` + +- [ ] **Step 6: Verify** + +```bash +cd /Users/blove/repos/dawn && pnpm install 2>&1 | tail -3 +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions build 2>&1 | tail -5 +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions test 2>&1 | tail -5 +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions lint 2>&1 | tail -5 +``` + +All should succeed (test passes with no test files). + +- [ ] **Step 7: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/permissions/ +git commit -m "scaffold(permissions): empty @dawn-ai/permissions package + +Adds the package skeleton for the upcoming HITL permissions system. +No exports yet — types, pattern matching, and store land in subsequent +commits. + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 2: Public types + +**Files:** +- Create: `packages/permissions/src/types.ts` +- Modify: `packages/permissions/src/index.ts` + +- [ ] **Step 1: Write `packages/permissions/src/types.ts`** + +```ts +/** + * Public types for the Dawn HITL permissions system. + * + * The workspace capability calls into a `PermissionsStore` before + * invoking its filesystem/exec backends. The store consults the + * runtime file at .dawn/permissions.json plus the config-seeded + * allow/deny lists and returns one of three decisions: "allow", + * "deny", or "unknown". On "unknown" in interactive mode the + * capability emits LangGraph's `interrupt()` with a `PermissionRequest` + * payload; the resume mechanism returns a `PermissionDecision`. + */ + +export type PermissionMode = "interactive" | "non-interactive" | "bypass" + +export type PermissionDecision = "once" | "always" | "deny" + +export interface PermissionsFile { + readonly version: 1 + readonly allow: Readonly> + readonly deny: Readonly> +} + +export interface CommandDetail { + readonly command: string + readonly suggestedPattern: string +} + +export interface PathDetail { + readonly path: string + readonly operation: "readFile" | "writeFile" | "listDir" + readonly suggestedPattern: string +} + +export interface PermissionRequest { + readonly interruptId: string + readonly kind: "command" | "path" + readonly detail: CommandDetail | PathDetail + readonly threadId: string + readonly callId?: string +} + +export interface PermissionsStore { + /** Loaded once at construction; subsequent loads not exposed in v1. */ + match(tool: string, candidate: string): "allow" | "deny" | "unknown" + /** Persists an allow entry to disk and updates the in-memory cache. */ + addAllow(tool: string, pattern: string): Promise + /** Active mode (resolved from config + env at construction). */ + readonly mode: PermissionMode +} +``` + +- [ ] **Step 2: Re-export from barrel** + +Edit `packages/permissions/src/index.ts`. Replace with: + +```ts +export type { + CommandDetail, + PathDetail, + PermissionDecision, + PermissionMode, + PermissionRequest, + PermissionsFile, + PermissionsStore, +} from "./types.js" +``` + +- [ ] **Step 3: Verify** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions build 2>&1 | tail -3 +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions typecheck 2>&1 | tail -3 +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions lint 2>&1 | tail -3 +``` + +Expect: all clean. + +- [ ] **Step 4: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/permissions/src/types.ts packages/permissions/src/index.ts +git commit -m "feat(permissions): public types + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 3: Suggested-pattern helpers + +**Files:** +- Create: `packages/permissions/src/suggested-pattern.ts` +- Create: `packages/permissions/test/suggested-pattern.test.ts` +- Modify: `packages/permissions/src/index.ts` + +- [ ] **Step 1: Write failing tests** + +Create `packages/permissions/test/suggested-pattern.test.ts`: + +```ts +import { describe, expect, it } from "vitest" +import { + suggestedCommandPattern, + suggestedPathPattern, +} from "../src/suggested-pattern.js" + +describe("suggestedCommandPattern", () => { + it("returns the first two tokens for a multi-word command", () => { + expect(suggestedCommandPattern("npm install react")).toBe("npm install") + }) + + it("returns the single token for a one-word command", () => { + expect(suggestedCommandPattern("ls")).toBe("ls") + }) + + it("returns first two tokens even when the second is short", () => { + expect(suggestedCommandPattern("git status")).toBe("git status") + expect(suggestedCommandPattern("git push origin main")).toBe("git push") + }) + + it("strips leading/trailing whitespace before tokenizing", () => { + expect(suggestedCommandPattern(" npm install react ")).toBe("npm install") + }) + + it("handles empty input as empty pattern", () => { + expect(suggestedCommandPattern("")).toBe("") + expect(suggestedCommandPattern(" ")).toBe("") + }) +}) + +describe("suggestedPathPattern", () => { + it("returns the parent directory with trailing slash", () => { + expect(suggestedPathPattern("/Users/blove/.zshrc")).toBe("/Users/blove/") + expect(suggestedPathPattern("/var/log/app.log")).toBe("/var/log/") + }) + + it("returns the dir itself with trailing slash when input ends with slash", () => { + expect(suggestedPathPattern("/Users/blove/Documents/")).toBe("/Users/blove/Documents/") + }) + + it("returns root when input is a top-level file", () => { + expect(suggestedPathPattern("/etc")).toBe("/") + }) + + it("handles relative paths", () => { + expect(suggestedPathPattern("notes/agenda.md")).toBe("notes/") + }) +}) +``` + +- [ ] **Step 2: Run to verify failure** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions test 2>&1 | tail -10 +``` +Expect: FAIL — module not found. + +- [ ] **Step 3: Implement** + +Create `packages/permissions/src/suggested-pattern.ts`: + +```ts +import { dirname } from "node:path" + +/** + * Default suggested pattern for a shell command. + * + * Returns the first two whitespace-separated tokens. `npm install react` + * → `npm install`. `ls` → `ls`. This is the sweet spot — covers + * `npm install ` and `npm test` as distinct patterns, vs lumping + * everything under `npm`. + */ +export function suggestedCommandPattern(command: string): string { + const trimmed = command.trim() + if (trimmed.length === 0) return "" + const tokens = trimmed.split(/\s+/) + return tokens.slice(0, 2).join(" ") +} + +/** + * Default suggested pattern for a filesystem path. + * + * Returns the parent directory of the path with a trailing slash. + * `/Users/blove/.zshrc` → `/Users/blove/`. Trailing slash makes + * prefix matching unambiguous (so `/var/log/` does not match + * `/var/logger/app.log`). + */ +export function suggestedPathPattern(path: string): string { + if (path.endsWith("/")) return path + const parent = dirname(path) + return parent === "/" ? "/" : `${parent}/` +} +``` + +- [ ] **Step 4: Re-export** + +Edit `packages/permissions/src/index.ts`. Append: + +```ts +export { suggestedCommandPattern, suggestedPathPattern } from "./suggested-pattern.js" +``` + +- [ ] **Step 5: Run tests** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions test 2>&1 | tail -5 +``` +Expect: PASS (9 tests). + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/permissions/src/suggested-pattern.ts \ + packages/permissions/test/suggested-pattern.test.ts \ + packages/permissions/src/index.ts +git commit -m "feat(permissions): suggested-pattern helpers for commands and paths + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 4: Pattern matching + +**Files:** +- Create: `packages/permissions/src/pattern-matching.ts` +- Create: `packages/permissions/test/pattern-matching.test.ts` +- Modify: `packages/permissions/src/index.ts` + +- [ ] **Step 1: Write failing tests** + +Create `packages/permissions/test/pattern-matching.test.ts`: + +```ts +import { describe, expect, it } from "vitest" +import { matchPermission } from "../src/pattern-matching.js" + +describe("matchPermission", () => { + it("returns unknown when no entries match", () => { + expect( + matchPermission("bash", "npm install", {}, {}), + ).toBe("unknown") + }) + + it("returns allow when candidate matches an allow prefix", () => { + expect( + matchPermission("bash", "npm install react", { bash: ["npm install"] }, {}), + ).toBe("allow") + }) + + it("returns deny when candidate matches a deny prefix", () => { + expect( + matchPermission("bash", "rm -rf /tmp", {}, { bash: ["rm -rf"] }), + ).toBe("deny") + }) + + it("deny wins over allow when both match", () => { + expect( + matchPermission( + "bash", + "rm -rf /tmp", + { bash: ["rm"] }, // would match "rm -rf /tmp" as prefix? No — "rm " vs "rm -rf" + { bash: ["rm -rf"] }, + ), + ).toBe("deny") + }) + + it("does NOT match an allow entry that is not a prefix", () => { + expect( + matchPermission("bash", "npm test", { bash: ["npm install"] }, {}), + ).toBe("unknown") + }) + + it("treats path candidates with absolute prefixes", () => { + expect( + matchPermission( + "readFile", + "/Users/blove/.zshrc", + { readFile: ["/Users/blove/"] }, + {}, + ), + ).toBe("allow") + }) + + it("does not cross directory boundary without trailing slash", () => { + // /var/logger/app.log should NOT match allow=/var/log (no trailing slash) + // because /var/log is a prefix string of /var/logger. With trailing slash + // it does NOT match. + expect( + matchPermission( + "readFile", + "/var/logger/app.log", + { readFile: ["/var/log/"] }, + {}, + ), + ).toBe("unknown") + }) + + it("returns unknown for a tool with no entries in either list", () => { + expect( + matchPermission( + "runUnknownTool", + "anything", + { bash: ["ls"] }, + { writeFile: ["/tmp/"] }, + ), + ).toBe("unknown") + }) +}) +``` + +- [ ] **Step 2: Run to verify failure** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions test 2>&1 | tail -8 +``` +Expect: FAIL — module not found. + +- [ ] **Step 3: Implement** + +Create `packages/permissions/src/pattern-matching.ts`: + +```ts +type PatternMap = Readonly> + +/** + * Match a tool+candidate against allow + deny pattern maps. + * + * Semantics: + * - deny wins over allow (a candidate that matches both returns "deny") + * - prefix matching: `candidate.startsWith(pattern)` + * - no entries for tool in either map → "unknown" + * + * Patterns are expected to encode any required boundary themselves (e.g., + * path patterns should end with "/" to prevent crossing directory + * boundaries; command patterns are first-N tokens already). + */ +export function matchPermission( + tool: string, + candidate: string, + allow: PatternMap, + deny: PatternMap, +): "allow" | "deny" | "unknown" { + const denyList = deny[tool] ?? [] + for (const pattern of denyList) { + if (candidate.startsWith(pattern)) return "deny" + } + const allowList = allow[tool] ?? [] + for (const pattern of allowList) { + if (candidate.startsWith(pattern)) return "allow" + } + return "unknown" +} +``` + +- [ ] **Step 4: Re-export** + +Edit `packages/permissions/src/index.ts`. Append: + +```ts +export { matchPermission } from "./pattern-matching.js" +``` + +- [ ] **Step 5: Run tests** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions test 2>&1 | tail -5 +``` +Expect: PASS (17 total: 9 + 8 new). + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/permissions/src/pattern-matching.ts \ + packages/permissions/test/pattern-matching.test.ts \ + packages/permissions/src/index.ts +git commit -m "feat(permissions): pattern-matching engine (allow/deny/unknown) + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 5: PermissionsStore + +**Files:** +- Create: `packages/permissions/src/permissions-store.ts` +- Create: `packages/permissions/test/permissions-store.test.ts` +- Modify: `packages/permissions/src/index.ts` + +- [ ] **Step 1: Write failing tests** + +Create `packages/permissions/test/permissions-store.test.ts`: + +```ts +import { afterEach, beforeEach, describe, expect, it } from "vitest" +import { mkdtempSync, readFileSync, rmSync, existsSync, writeFileSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" + +import { createPermissionsStore } from "../src/permissions-store.js" +import type { PermissionsFile } from "../src/types.js" + +describe("createPermissionsStore — load + match", () => { + let appRoot: string + beforeEach(() => { + appRoot = mkdtempSync(join(tmpdir(), "dawn-perms-")) + }) + afterEach(() => { + rmSync(appRoot, { recursive: true, force: true }) + }) + + it("returns unknown when no file or config", async () => { + const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" }) + await store.load() + expect(store.match("bash", "npm install")).toBe("unknown") + }) + + it("matches entries from .dawn/permissions.json", async () => { + writeFileSync( + join(appRoot, ".dawn", "permissions.json"), + JSON.stringify({ + version: 1, + allow: { bash: ["npm install"] }, + deny: {}, + }), + { encoding: "utf8", flag: "w" }, + ) + // mkdir is needed before writeFileSync — adjust: + // Actually the test should use mkdirSync first + }) +}) +``` + +(Wait — let me rewrite this test more carefully. The implementer should follow the corrected version below.) + +Use this corrected test file: + +```ts +import { afterEach, beforeEach, describe, expect, it } from "vitest" +import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" + +import { createPermissionsStore } from "../src/permissions-store.js" + +describe("createPermissionsStore — load + match", () => { + let appRoot: string + beforeEach(() => { + appRoot = mkdtempSync(join(tmpdir(), "dawn-perms-")) + }) + afterEach(() => { + rmSync(appRoot, { recursive: true, force: true }) + }) + + it("returns unknown when no file and no config", async () => { + const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" }) + await store.load() + expect(store.match("bash", "npm install")).toBe("unknown") + }) + + it("matches entries from .dawn/permissions.json", async () => { + mkdirSync(join(appRoot, ".dawn"), { recursive: true }) + writeFileSync( + join(appRoot, ".dawn", "permissions.json"), + JSON.stringify({ + version: 1, + allow: { bash: ["npm install"] }, + deny: {}, + }), + ) + const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" }) + await store.load() + expect(store.match("bash", "npm install react")).toBe("allow") + expect(store.match("bash", "rm -rf /")).toBe("unknown") + }) + + it("merges config + runtime file (both contribute allows)", async () => { + mkdirSync(join(appRoot, ".dawn"), { recursive: true }) + writeFileSync( + join(appRoot, ".dawn", "permissions.json"), + JSON.stringify({ version: 1, allow: { bash: ["ls"] }, deny: {} }), + ) + const store = createPermissionsStore({ + appRoot, + config: { version: 1, allow: { bash: ["npm install"] }, deny: {} }, + mode: "interactive", + }) + await store.load() + expect(store.match("bash", "ls -la")).toBe("allow") + expect(store.match("bash", "npm install react")).toBe("allow") + }) + + it("deny from config wins over allow from runtime file", async () => { + mkdirSync(join(appRoot, ".dawn"), { recursive: true }) + writeFileSync( + join(appRoot, ".dawn", "permissions.json"), + JSON.stringify({ version: 1, allow: { bash: ["rm"] }, deny: {} }), + ) + const store = createPermissionsStore({ + appRoot, + config: { version: 1, allow: {}, deny: { bash: ["rm -rf"] } }, + mode: "interactive", + }) + await store.load() + expect(store.match("bash", "rm -rf /tmp")).toBe("deny") + }) + + it("ignores the runtime file in non-interactive mode", async () => { + mkdirSync(join(appRoot, ".dawn"), { recursive: true }) + writeFileSync( + join(appRoot, ".dawn", "permissions.json"), + JSON.stringify({ version: 1, allow: { bash: ["npm install"] }, deny: {} }), + ) + const store = createPermissionsStore({ + appRoot, + config: { version: 1, allow: { bash: ["ls"] }, deny: {} }, + mode: "non-interactive", + }) + await store.load() + expect(store.match("bash", "npm install react")).toBe("unknown") + expect(store.match("bash", "ls -la")).toBe("allow") + }) + + it("ignores everything in bypass mode", async () => { + mkdirSync(join(appRoot, ".dawn"), { recursive: true }) + writeFileSync( + join(appRoot, ".dawn", "permissions.json"), + JSON.stringify({ version: 1, allow: {}, deny: { bash: ["rm"] } }), + ) + const store = createPermissionsStore({ + appRoot, + config: { version: 1, allow: {}, deny: { bash: ["rm"] } }, + mode: "bypass", + }) + await store.load() + // bypass mode: store always returns "unknown" (which the capability interprets as "go ahead") + expect(store.match("bash", "rm -rf /")).toBe("unknown") + }) + + it("throws on malformed JSON in the runtime file", async () => { + mkdirSync(join(appRoot, ".dawn"), { recursive: true }) + writeFileSync(join(appRoot, ".dawn", "permissions.json"), "{ not valid json") + const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" }) + await expect(store.load()).rejects.toThrow(/permissions\.json/i) + }) +}) + +describe("createPermissionsStore — addAllow", () => { + let appRoot: string + beforeEach(() => { + appRoot = mkdtempSync(join(tmpdir(), "dawn-perms-")) + }) + afterEach(() => { + rmSync(appRoot, { recursive: true, force: true }) + }) + + it("persists an allow entry and updates the in-memory cache atomically", async () => { + const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" }) + await store.load() + expect(store.match("bash", "npm install")).toBe("unknown") + await store.addAllow("bash", "npm install") + expect(store.match("bash", "npm install react")).toBe("allow") + const raw = readFileSync(join(appRoot, ".dawn", "permissions.json"), "utf8") + const parsed = JSON.parse(raw) + expect(parsed.allow.bash).toContain("npm install") + }) + + it("appends .dawn/ to .gitignore on first write (idempotent)", async () => { + writeFileSync(join(appRoot, ".gitignore"), "node_modules/\n.next/\n") + const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" }) + await store.load() + await store.addAllow("bash", "ls") + const gi = readFileSync(join(appRoot, ".gitignore"), "utf8") + expect(gi).toContain(".dawn/") + expect(gi).toContain("node_modules/") // preserved existing + }) + + it("creates .gitignore with .dawn/ when none exists", async () => { + const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" }) + await store.load() + await store.addAllow("bash", "ls") + const gi = readFileSync(join(appRoot, ".gitignore"), "utf8") + expect(gi).toBe(".dawn/\n") + }) + + it("does not duplicate .dawn/ if already in .gitignore", async () => { + writeFileSync(join(appRoot, ".gitignore"), "node_modules/\n.dawn/\n") + const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" }) + await store.load() + await store.addAllow("bash", "ls") + const gi = readFileSync(join(appRoot, ".gitignore"), "utf8") + expect(gi.match(/\.dawn\//g)?.length).toBe(1) + }) + + it("serializes concurrent addAllow calls", async () => { + const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" }) + await store.load() + await Promise.all([ + store.addAllow("bash", "ls"), + store.addAllow("bash", "pwd"), + store.addAllow("bash", "cat"), + ]) + const raw = readFileSync(join(appRoot, ".dawn", "permissions.json"), "utf8") + const parsed = JSON.parse(raw) + expect([...parsed.allow.bash].sort()).toEqual(["cat", "ls", "pwd"]) + }) + + it("exposes the resolved mode", () => { + const store = createPermissionsStore({ appRoot, config: undefined, mode: "non-interactive" }) + expect(store.mode).toBe("non-interactive") + }) +}) +``` + +- [ ] **Step 2: Run to verify failure** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions test 2>&1 | tail -10 +``` +Expect: FAIL — `Cannot find module '../src/permissions-store.js'`. + +- [ ] **Step 3: Implement** + +Create `packages/permissions/src/permissions-store.ts`: + +```ts +import { existsSync, readFileSync } from "node:fs" +import { mkdir, readFile, writeFile } from "node:fs/promises" +import { join } from "node:path" + +import { matchPermission } from "./pattern-matching.js" +import type { + PermissionMode, + PermissionsFile, + PermissionsStore, +} from "./types.js" + +const PERMISSIONS_DIR = ".dawn" +const PERMISSIONS_FILE = "permissions.json" + +interface CreateOptions { + readonly appRoot: string + readonly config: PermissionsFile | undefined + readonly mode: PermissionMode +} + +type MutableMap = Record + +interface State { + configAllow: MutableMap + configDeny: MutableMap + runtimeAllow: MutableMap + runtimeDeny: MutableMap +} + +function emptyState(): State { + return { configAllow: {}, configDeny: {}, runtimeAllow: {}, runtimeDeny: {} } +} + +function cloneMap(src: Readonly>): MutableMap { + const out: MutableMap = {} + for (const [k, v] of Object.entries(src)) out[k] = [...v] + return out +} + +function effectiveAllow(state: State, mode: PermissionMode): Record { + if (mode === "bypass") return {} + const out: Record = {} + for (const [k, v] of Object.entries(state.configAllow)) out[k] = [...v] + if (mode === "interactive") { + for (const [k, v] of Object.entries(state.runtimeAllow)) { + out[k] = [...(out[k] ?? []), ...v] + } + } + return out +} + +function effectiveDeny(state: State, mode: PermissionMode): Record { + if (mode === "bypass") return {} + const out: Record = {} + for (const [k, v] of Object.entries(state.configDeny)) out[k] = [...v] + if (mode === "interactive") { + for (const [k, v] of Object.entries(state.runtimeDeny)) { + out[k] = [...(out[k] ?? []), ...v] + } + } + return out +} + +export function createPermissionsStore(opts: CreateOptions): PermissionsStore { + const { appRoot, config, mode } = opts + const state = emptyState() + if (config) { + state.configAllow = cloneMap(config.allow) + state.configDeny = cloneMap(config.deny) + } + + let writeQueue: Promise = Promise.resolve() + + async function loadRuntimeFile(): Promise { + const filePath = join(appRoot, PERMISSIONS_DIR, PERMISSIONS_FILE) + if (!existsSync(filePath)) return + let raw: string + try { + raw = await readFile(filePath, "utf8") + } catch (err) { + throw new Error(`Failed to read permissions.json: ${(err as Error).message}`) + } + let parsed: unknown + try { + parsed = JSON.parse(raw) + } catch (err) { + throw new Error(`Malformed permissions.json: ${(err as Error).message}`) + } + const p = parsed as Partial + if (p.allow && typeof p.allow === "object") state.runtimeAllow = cloneMap(p.allow as Record) + if (p.deny && typeof p.deny === "object") state.runtimeDeny = cloneMap(p.deny as Record) + } + + async function persistRuntimeFile(): Promise { + const dir = join(appRoot, PERMISSIONS_DIR) + await mkdir(dir, { recursive: true }) + const file: PermissionsFile = { + version: 1, + allow: state.runtimeAllow, + deny: state.runtimeDeny, + } + await writeFile(join(dir, PERMISSIONS_FILE), `${JSON.stringify(file, null, 2)}\n`, "utf8") + } + + async function ensureGitignoreEntry(): Promise { + const gitignorePath = join(appRoot, ".gitignore") + let content = "" + if (existsSync(gitignorePath)) { + content = await readFile(gitignorePath, "utf8") + if (content.split("\n").some((line) => line.trim() === ".dawn/")) return + if (!content.endsWith("\n") && content.length > 0) content += "\n" + content += ".dawn/\n" + } else { + content = ".dawn/\n" + } + await writeFile(gitignorePath, content, "utf8") + } + + return { + mode, + match(tool: string, candidate: string) { + return matchPermission(tool, candidate, effectiveAllow(state, mode), effectiveDeny(state, mode)) + }, + async load() { + if (mode === "interactive") { + await loadRuntimeFile() + } + }, + async addAllow(tool: string, pattern: string) { + const job = async () => { + const list = state.runtimeAllow[tool] ?? [] + if (!list.includes(pattern)) list.push(pattern) + state.runtimeAllow[tool] = list + await persistRuntimeFile() + await ensureGitignoreEntry() + } + writeQueue = writeQueue.then(job, job) + await writeQueue + }, + } +} +``` + +- [ ] **Step 4: Re-export** + +Edit `packages/permissions/src/index.ts`. Append: + +```ts +export { createPermissionsStore } from "./permissions-store.js" +``` + +- [ ] **Step 5: Run tests** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions test 2>&1 | tail -10 +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions build 2>&1 | tail -3 +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions lint 2>&1 | tail -3 +``` +Expect: PASS (~28 tests total), build + lint clean. + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/permissions/src/permissions-store.ts \ + packages/permissions/test/permissions-store.test.ts \ + packages/permissions/src/index.ts +git commit -m "feat(permissions): PermissionsStore with file I/O + write queue + gitignore handling + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +## Phase B — Config + capability changes + +### Task 6: Extend DawnConfig + CapabilityMarkerContext + +**Files:** +- Modify: `packages/core/package.json` — add `@dawn-ai/permissions` to devDependencies (type-only for now) +- Modify: `packages/core/src/types.ts` — extend `DawnConfig` +- Modify: `packages/core/src/capabilities/types.ts` — extend `CapabilityMarkerContext` + +- [ ] **Step 1: Add permissions package as type-only dep** + +Edit `packages/core/package.json`. Add to `devDependencies`: + +```json +"@dawn-ai/permissions": "workspace:*" +``` + +Run: `cd /Users/blove/repos/dawn && pnpm install --silent 2>&1 | tail -3` + +- [ ] **Step 2: Extend `DawnConfig`** + +Edit `packages/core/src/types.ts`. Add to the existing imports: + +```ts +import type { PermissionMode } from "@dawn-ai/permissions" +``` + +Find the `DawnConfig` interface and extend: + +```ts +export interface DawnConfig { + readonly appDir?: string + readonly backends?: { + readonly filesystem?: FilesystemBackend + readonly exec?: ExecBackend + } + readonly permissions?: { + readonly mode?: PermissionMode + readonly allow?: Readonly> + readonly deny?: Readonly> + } +} +``` + +- [ ] **Step 3: Extend `CapabilityMarkerContext`** + +Edit `packages/core/src/capabilities/types.ts`. Add to imports: + +```ts +import type { PermissionsStore } from "@dawn-ai/permissions" +``` + +Find `CapabilityMarkerContext` and extend: + +```ts +export interface CapabilityMarkerContext { + // ... existing fields + readonly permissions?: PermissionsStore +} +``` + +- [ ] **Step 4: Verify** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/core typecheck 2>&1 | tail -3 +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/core lint 2>&1 | tail -3 +cd /Users/blove/repos/dawn && pnpm test 2>&1 | tail -8 +``` +Expect: clean, full repo still green. + +- [ ] **Step 5: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/core/package.json packages/core/src/types.ts packages/core/src/capabilities/types.ts +git commit -m "feat(core): extend DawnConfig + CapabilityMarkerContext with permissions + +Type-only edge to @dawn-ai/permissions. Workspace capability will read +context.permissions in a subsequent commit. + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 7: Workspace capability gates through PermissionsStore + +**Files:** +- Modify: `packages/core/src/capabilities/built-in/workspace.ts` +- Modify: `packages/core/test/capabilities/workspace.test.ts` +- Modify: `packages/core/package.json` (promote permissions from devDep to dep — runtime use) + +- [ ] **Step 1: Promote permissions package to runtime dep** + +Edit `packages/core/package.json`. Move `@dawn-ai/permissions` from `devDependencies` to `dependencies`. Run `pnpm install`. + +- [ ] **Step 2: Add failing tests** + +Append to `packages/core/test/capabilities/workspace.test.ts`: + +```ts +import { createPermissionsStore } from "@dawn-ai/permissions" +import type { PermissionsStore } from "@dawn-ai/permissions" + +describe("createWorkspaceMarker — permissions gating", () => { + let routeDir: string + let appRoot: string + beforeEach(() => { + appRoot = mkdtempSync(join(tmpdir(), "dawn-perm-cap-")) + routeDir = appRoot + mkdirSync(join(appRoot, "workspace")) + process.chdir(appRoot) + }) + afterEach(() => { + process.chdir(originalCwd) + rmSync(appRoot, { recursive: true, force: true }) + }) + + async function makeStore(mode: "interactive" | "non-interactive" | "bypass", config?: { allow?: Record; deny?: Record }): Promise { + const store = createPermissionsStore({ + appRoot, + config: config + ? { version: 1, allow: config.allow ?? {}, deny: config.deny ?? {} } + : undefined, + mode, + }) + await store.load() + return store + } + + it("calls the backend normally when path matches allow", async () => { + writeFileSync(join(appRoot, "workspace", "ok.txt"), "ok", "utf8") + const permissions = await makeStore("non-interactive", { + allow: { readFile: [join(appRoot, "workspace") + "/"] }, + }) + const contribution = await createWorkspaceMarker().load(routeDir, ctx({ permissions })) + const readTool = contribution.tools!.find((t) => t.name === "readFile")! + const result = await readTool.run( + { path: "ok.txt" }, + { signal: new AbortController().signal }, + ) + expect(result).toBe("ok") + }) + + it("returns a deny error to the agent when path matches deny", async () => { + writeFileSync(join(appRoot, "workspace", "blocked.txt"), "x", "utf8") + const permissions = await makeStore("non-interactive", { + deny: { readFile: [join(appRoot, "workspace") + "/blocked"] }, + }) + const contribution = await createWorkspaceMarker().load(routeDir, ctx({ permissions })) + const readTool = contribution.tools!.find((t) => t.name === "readFile")! + await expect( + readTool.run({ path: "blocked.txt" }, { signal: new AbortController().signal }), + ).rejects.toThrow(/permission denied/i) + }) + + it("in non-interactive mode, unknown commands hard-refuse", async () => { + const permissions = await makeStore("non-interactive") + const contribution = await createWorkspaceMarker().load(routeDir, ctx({ permissions })) + const runBash = contribution.tools!.find((t) => t.name === "runBash")! + await expect( + runBash.run({ command: "ls" }, { signal: new AbortController().signal }), + ).rejects.toThrow(/permission denied|fail-closed/i) + }) + + it("in bypass mode, every operation proceeds (path-jail disabled)", async () => { + const permissions = await makeStore("bypass") + const contribution = await createWorkspaceMarker().load(routeDir, ctx({ permissions })) + const readTool = contribution.tools!.find((t) => t.name === "readFile")! + // In bypass mode, reading outside the workspace should NOT raise "outside workspace" + // (it might raise ENOENT instead because the file doesn't exist). + await expect( + readTool.run({ path: "../../etc/some-fake-file" }, { signal: new AbortController().signal }), + ).rejects.not.toThrow(/outside workspace/i) + }) +}) +``` + +(Note: this assumes `process.chdir` is already in the existing `workspace.test.ts` from sub-project 4 — verify by reading the file. Adjust the new tests to share the same setup.) + +- [ ] **Step 3: Run to verify failure** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/core test -- workspace 2>&1 | tail -15 +``` +Expect: FAIL — capability ignores `context.permissions`. + +- [ ] **Step 4: Update the capability** + +Edit `packages/core/src/capabilities/built-in/workspace.ts`. Add imports: + +```ts +import type { PermissionsStore } from "@dawn-ai/permissions" +``` + +Change `buildWorkspaceTools` signature to accept the optional store: + +```ts +function buildWorkspaceTools( + workspaceRoot: string, + fs: FilesystemBackend, + exec: ExecBackend, + permissions: PermissionsStore | undefined, +): readonly OverridableTool[] { /* ... */ } +``` + +Add a helper for gating: + +```ts +async function gate( + permissions: PermissionsStore | undefined, + tool: string, + candidate: string, +): Promise<"allow" | "deny" | "unknown"> { + if (!permissions) return "allow" // capability used without permissions context = legacy behavior, allow + if (permissions.mode === "bypass") return "allow" + return permissions.match(tool, candidate) +} +``` + +In each tool's `run`: + +- For path tools (`readFile`/`writeFile`/`listDir`): resolve the path first, then check: + - If the path is INSIDE the workspace: allow silently (the workspace is the trusted area; no need to gate every read of `workspace/notes.md`). + - If the path is OUTSIDE the workspace OR if `permissions.mode === "bypass"`: skip the jail check; consult `gate()`. If `"deny"` → throw `Permission denied by user: ${path}`. If `"unknown"` AND mode === "interactive" → emit interrupt; AND mode === "non-interactive" → throw "Permission denied (fail-closed)". If `"allow"` → proceed. + +```ts +// readFile (rewritten): +run: async (input, ctx) => { + const { path } = READ_FILE_INPUT.parse(input) + const absPath = resolve(workspaceRoot, path) + const insideWorkspace = + absPath === workspaceRoot || absPath.startsWith(workspaceRoot + sep) + + if (!insideWorkspace || permissions?.mode === "bypass") { + // Consult permissions for the operation + const decision = await gate(permissions, "readFile", absPath) + if (decision === "deny") { + throw new Error(`Permission denied by user: ${path}`) + } + if (decision === "unknown") { + if (permissions?.mode === "non-interactive") { + throw new Error(`Permission denied (fail-closed): ${path}`) + } + // interactive: emit LangGraph interrupt() — handled by helper (see Task 8) + const result = await requestPermissionInterrupt({ + kind: "path", + operation: "readFile", + path: absPath, + permissions, + }) + if (result === "deny") { + throw new Error(`Permission denied by user: ${path}`) + } + // "allow" — proceed + } + } + + return fs.readFile(absPath, backendContext(workspaceRoot, ctx.signal)) +} +``` + +`requestPermissionInterrupt` is a helper imported from `@dawn-ai/langchain` — wait, that creates a core→langchain dep. Restructure: the helper lives in `@dawn-ai/permissions` and uses LangGraph's `interrupt()` directly (which is available via `import { interrupt } from "@langchain/langgraph"`). Add `@langchain/langgraph` to `@dawn-ai/permissions` as a peerDependency (or dependency). + +Actually, simpler: have the workspace capability import `interrupt` from `@langchain/langgraph` directly (it's the LangGraph primitive). Add `@langchain/langgraph` to `@dawn-ai/core` as a peerDependency if not already present. + +Verify: `cd /Users/blove/repos/dawn && grep "@langchain/langgraph" packages/core/package.json` + +If not present: add to peerDependencies. If `@dawn-ai/core` shouldn't take a runtime dep on langgraph, do the interrupt logic in `@dawn-ai/langchain` and pass it via the resolver — but that's heavier. **For v1, accept the core→langgraph dep**. + +```ts +import { interrupt } from "@langchain/langgraph" + +async function requestPermissionInterrupt(args: { + kind: "command" | "path" + command?: string + operation?: "readFile" | "writeFile" | "listDir" + path?: string + permissions: PermissionsStore | undefined +}): Promise<"allow" | "deny"> { + const interruptId = `perm-${Date.now()}-${Math.random().toString(36).slice(2, 8)}` + const suggestedPattern = + args.kind === "command" + ? suggestedCommandPattern(args.command!) + : suggestedPathPattern(args.path!) + const payload = { + interruptId, + type: "permission-request" as const, + kind: args.kind, + detail: + args.kind === "command" + ? { command: args.command!, suggestedPattern } + : { operation: args.operation!, path: args.path!, suggestedPattern }, + } + // LangGraph's interrupt() pauses the graph and yields the payload on the stream. + // The resume value comes back here when the resume endpoint fires. + const decision = interrupt(payload) as "once" | "always" | "deny" + if (decision === "deny") return "deny" + if (decision === "always" && args.permissions) { + const tool = args.kind === "command" ? "bash" : args.operation! + await args.permissions.addAllow(tool, suggestedPattern) + } + return "allow" +} +``` + +Apply the same pattern to `writeFile`, `listDir`, and `runBash`. For `runBash`, EVERY command is gated (no inside/outside-workspace short-circuit): + +```ts +// runBash (rewritten): +run: async (input, ctx) => { + const { command } = RUN_BASH_INPUT.parse(input) + const decision = await gate(permissions, "bash", command) + if (decision === "deny") { + throw new Error(`Permission denied by user: ${command}`) + } + if (decision === "unknown") { + if (permissions?.mode === "non-interactive") { + throw new Error(`Permission denied (fail-closed): ${command}`) + } + const result = await requestPermissionInterrupt({ + kind: "command", + command, + permissions, + }) + if (result === "deny") { + throw new Error(`Permission denied by user: ${command}`) + } + } + return exec.runCommand({ command }, backendContext(workspaceRoot, ctx.signal)) +} +``` + +Bypass mode for path-jail: the existing `pathJail()` call needs to be skipped when `permissions?.mode === "bypass"`. Wrap the jail call in a check OR remove the jail (since the gate handles the bypass case). + +Actually the cleanest restructure: the capability no longer calls `pathJail()` at all — it resolves the path with `resolve(workspaceRoot, path)`, checks "is inside workspace?" itself, and gates if not (or if bypass). The "Path is outside workspace" error becomes part of the deny path for non-interactive mode. + +- [ ] **Step 5: Pass permissions through `load()`** + +In the `load()` of the marker: + +```ts +load: async (_routeDir, context) => { + const root = workspaceRoot() + if (!existsSync(root)) return {} + const fs = context.backends?.filesystem ?? localFilesystem() + const exec = context.backends?.exec ?? localExec() + const permissions = context.permissions + // Warn on bypass mode + if (permissions?.mode === "bypass") { + console.warn( + "[dawn:permissions] mode=bypass — path-jail disabled, all bash unrestricted. Do not use in production.", + ) + } + return { tools: buildWorkspaceTools(root, fs, exec, permissions) } +} +``` + +- [ ] **Step 6: Run tests** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/core test 2>&1 | tail -10 +cd /Users/blove/repos/dawn && pnpm test 2>&1 | tail -10 +cd /Users/blove/repos/dawn && pnpm build 2>&1 | tail -5 +cd /Users/blove/repos/dawn && pnpm lint 2>&1 | tail -5 +``` +Expect: all green. + +Note: the interrupt-flow tests (interactive mode → emits interrupt) cannot run in isolation because `interrupt()` is a LangGraph primitive that requires a live graph runtime. These tests should mock `interrupt()` to return a canned value, or be deferred to integration tests in `@dawn-ai/langchain`. + +Pragmatic approach for THIS task: only test the non-interactive + bypass paths in unit tests; defer interactive-flow testing to Task 8's agent-adapter integration test. + +- [ ] **Step 7: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/core/package.json \ + packages/core/src/capabilities/built-in/workspace.ts \ + packages/core/test/capabilities/workspace.test.ts +git commit -m "feat(core): workspace capability gates through PermissionsStore + +Each of readFile/writeFile/listDir/runBash now consults the optional +PermissionsStore in CapabilityMarkerContext before invoking the +backend. Three modes: + +- interactive: unknown ops emit LangGraph interrupt() and pause the run +- non-interactive: unknown ops hard-refuse (fail-closed) +- bypass: path-jail disabled, every op proceeds (warn on capability load) + +Path-touching operations short-circuit (no gate) for paths INSIDE the +workspace. runBash gates every command regardless. + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +## Phase C — Runtime: agent-adapter + resume endpoint + +### Task 8: Propagate interrupt events through the SSE stream + +**Files:** +- Modify: `packages/langchain/src/agent-adapter.ts` +- Create: `packages/langchain/test/agent-adapter-interrupt.test.ts` + +- [ ] **Step 1: Inspect existing streamFromRunnable** + +Read `packages/langchain/src/agent-adapter.ts` lines around `streamFromRunnable` and `streamEvents`. Note the case statements for `on_chat_model_stream`, `on_tool_start`, etc. The interrupt event from LangGraph v2 streamEvents has `event: "on_interrupt"` (verify by checking LangGraph 1.x docs or a quick smoke test). + +If `on_interrupt` doesn't exist in LangGraph's stream events, LangGraph 1.x surfaces interrupts as a special return value from `graph.invoke()` rather than a stream event. In that case the propagation happens differently — at the graph-return level rather than mid-stream. Verify and adjust. + +- [ ] **Step 2: Write a failing test** + +Create `packages/langchain/test/agent-adapter-interrupt.test.ts`: + +```ts +import { describe, expect, it, vi } from "vitest" +import { streamAgent } from "../src/agent-adapter.js" + +describe("streamAgent — interrupt propagation", () => { + it("yields {type: 'interrupt', data: ...} when the graph emits a LangGraph interrupt", async () => { + // Mock a graph that interrupts on its first tool call + const mockGraph = { + invoke: vi.fn(), + streamEvents: async function* () { + yield { + event: "on_chain_start", + name: "LangGraph", + data: { input: {} }, + } + // Simulate the LangGraph interrupt envelope shape + yield { + event: "on_interrupt", // or whatever LangGraph v2 actually emits + data: { value: { interruptId: "perm-x", type: "permission-request", kind: "command", detail: { command: "ls", suggestedPattern: "ls" } } }, + } + }, + } + + const chunks: unknown[] = [] + for await (const c of streamAgent({ + entry: mockGraph, + input: { messages: [{ role: "user", content: "x" }] }, + routeParamNames: [], + signal: new AbortController().signal, + tools: [], + })) { + chunks.push(c) + } + + const interruptChunk = chunks.find((c) => (c as { type: string }).type === "interrupt") + expect(interruptChunk).toBeDefined() + }) +}) +``` + +(Note: this test may need adjustment based on LangGraph's actual event shape. The implementer should investigate LangGraph 1.x's interrupt-related stream-events output before writing the assertion.) + +- [ ] **Step 3: Run to verify failure** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/langchain test -- agent-adapter-interrupt 2>&1 | tail -10 +``` +Expect: FAIL — `streamFromRunnable` does not currently emit `interrupt` chunks. + +- [ ] **Step 4: Add the interrupt case** + +In `streamFromRunnable`, add a new case in the for-await switch: + +```ts +case "on_interrupt": { + hasYielded = true + yield { + type: "interrupt" as const, + data: (event.data as { value?: unknown }).value, + } + break +} +``` + +(If LangGraph emits interrupts via a different event name, use that.) + +- [ ] **Step 5: Run + verify** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/langchain test 2>&1 | tail -10 +``` +Expect: PASS. + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/langchain/src/agent-adapter.ts packages/langchain/test/agent-adapter-interrupt.test.ts +git commit -m "feat(langchain): propagate LangGraph interrupt events to the SSE stream + +When the graph emits an interrupt (via LangGraph's interrupt() primitive), +the agent-adapter yields a {type: 'interrupt', data: payload} chunk so +the SSE serializer can render it as 'event: interrupt' to clients. + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 9: Resume endpoint in the dev HTTP server + +**Files:** +- Modify: `packages/cli/src/lib/dev/runtime-server.ts` +- Create: `packages/cli/test/resume-endpoint.test.ts` +- Possibly modify: `packages/cli/src/lib/runtime/execute-route.ts` (build PermissionsStore + thread it; also: maintain a per-thread "pending interrupt" map) + +- [ ] **Step 1: Inspect the existing dev server** + +Read `packages/cli/src/lib/dev/runtime-server.ts`. Note how `/runs/stream` is implemented. The new `/threads/:thread_id/resume` route follows the same pattern. + +- [ ] **Step 2: Build the in-memory thread-state map** + +In `runtime-server.ts` (or a new sibling file), maintain: + +```ts +interface PendingInterrupt { + interruptId: string + // Resume function bound to the parked graph; called when resume arrives. + resolve: (decision: "once" | "always" | "deny") => void +} + +const pendingByThread = new Map() +``` + +When the agent emits an interrupt during a streamed run, the runtime registers the pending interrupt: + +```ts +pendingByThread.set(threadId, { interruptId: payload.interruptId, resolve }) +``` + +The `resolve` function is the callback that, when invoked, returns the decision to the LangGraph `interrupt()` call (via `Command({resume})`). + +- [ ] **Step 3: Implement the resume route** + +In the request dispatch of `runtime-server.ts`, add a match for `POST /threads/:thread_id/resume`: + +```ts +if (request.method === "POST" && /^\/threads\/[^/]+\/resume$/.test(url.pathname)) { + const threadId = url.pathname.split("/")[2]! + const body = await readJsonBody(request) + const { interrupt_id, decision } = body as { interrupt_id: string; decision: "once" | "always" | "deny" } + + const pending = pendingByThread.get(threadId) + if (!pending) { + response.writeHead(400, { "content-type": "application/json" }) + response.end(JSON.stringify({ error: "no parked interrupt for thread" })) + return + } + if (pending.interruptId !== interrupt_id) { + response.writeHead(409, { "content-type": "application/json" }) + response.end(JSON.stringify({ error: "stale interrupt_id" })) + return + } + pending.resolve(decision) + pendingByThread.delete(threadId) + response.writeHead(200, { "content-type": "application/json" }) + response.end(JSON.stringify({ ok: true })) + return +} +``` + +(Helper `readJsonBody` collects stdin into a buffer + parses; should exist already from the existing `/runs/stream` POST handling — reuse it.) + +- [ ] **Step 4: Write endpoint tests** + +Create `packages/cli/test/resume-endpoint.test.ts`. The test should: + +1. Start the runtime server in isolation (find the existing test pattern in `dev-command.test.ts` or similar). +2. Pre-populate the `pendingByThread` map with a known thread + interrupt id. +3. POST a valid resume — expect 200, expect the resolve callback to fire with the right decision. +4. POST with stale `interrupt_id` — expect 409. +5. POST without a pending interrupt — expect 400. +6. Invalid JSON body — expect 400. + +(Mirror the existing dev-server test scaffolding.) + +- [ ] **Step 5: Run + verify** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/cli test -- resume-endpoint 2>&1 | tail -10 +cd /Users/blove/repos/dawn && pnpm test 2>&1 | tail -10 +``` +Expect: PASS. + +- [ ] **Step 6: Wire PermissionsStore + interrupt-bridging into execute-route.ts** + +In `packages/cli/src/lib/runtime/execute-route.ts`, after loading `dawn.config.ts`: + +```ts +import { createPermissionsStore } from "@dawn-ai/permissions" + +// ... existing config load logic +const permissionsConfig = loaded?.config.permissions +const envMode = process.env.DAWN_PERMISSIONS_MODE as + | "interactive" + | "non-interactive" + | "bypass" + | undefined +const mode = envMode ?? permissionsConfig?.mode ?? "interactive" + +const permissionsStore = createPermissionsStore({ + appRoot, + config: permissionsConfig + ? { + version: 1, + allow: permissionsConfig.allow ?? {}, + deny: permissionsConfig.deny ?? {}, + } + : undefined, + mode, +}) +await permissionsStore.load() + +// Thread into capability context: +const applied = await applyCapabilities(registry, routeDir, { + routeManifest, + descriptor, + descriptorRouteMap, + ...(configBackends ? { backends: configBackends } : {}), + permissions: permissionsStore, +}) +``` + +The interrupt bridge — connecting `interrupt()` (in the capability) to the SSE stream + the `pendingByThread` map — requires that the streamed run's `interrupt` chunks (Task 8) get translated into `pendingByThread.set(...)` calls inside the SSE serializer. The serializer is in the same file region as the existing SSE event emitter. Add: + +```ts +// in the stream chunk loop, for an "interrupt" chunk: +if (chunk.type === "interrupt") { + pendingByThread.set(threadId, { + interruptId: chunk.data.interruptId, + resolve: (decision) => { + // This callback resumes the parked LangGraph. + // Implementation: re-invoke the graph with Command({resume: decision}). + // The complexity here is connecting the resolve function back to the + // LangGraph that's currently parked. Approach: when the graph emits + // an interrupt, the `interrupt()` call resolves to whatever value is + // passed in via Command({resume}) on the next invocation. So `resolve` + // here needs to trigger a SECOND graph invocation with the resume value. + // + // For v1: simplest is to keep a Deferred that the original + // graph.invoke() awaits inside its tool's run(). When resolve fires, + // it settles the Deferred, the tool's run() returns to LangGraph, and + // the graph continues. + // + // This means the agent's run() function in the capability needs to + // wrap interrupt() in a way that integrates with this Deferred pattern. + // See implementer note below. + }, + }) + // Forward the interrupt event to the SSE stream: + yield { type: "interrupt", data: chunk.data } +} +``` + +**Implementer note:** the actual mechanism by which `resolve` translates into a LangGraph resume is the trickiest piece of this task. The LangGraph `interrupt()` primitive expects to be re-invoked via `Command({resume})` on the SAME graph instance with the SAME thread_id. The runtime server needs to keep the graph instance alive between the initial `streamEvents` and the resume call. + +If this proves intractable for v1, the FALLBACK design is: +1. The initial run's stream ends when interrupt fires (return early). +2. The next call to `/runs/stream` with the same `thread_id` includes `{ resume_value: decision }` in the payload. +3. The runtime constructs the graph fresh, calls `graph.invoke(Command({resume: decision}), {configurable: {thread_id}})`, and resumes from the checkpoint. + +This "fall-back" requires LangGraph's checkpointer to be enabled (it should be, for thread continuity). Verify that the existing /runs/stream payload supports this OR add a new field for it. + +If the fall-back is too disruptive, mark this task as DONE_WITH_CONCERNS and document the limitation: "Resume mechanism functional for path-jail/bash interrupts in the same process; multi-process resume requires the future Agent Protocol implementation (sub-project 7)." + +- [ ] **Step 7: Commit** + +```bash +cd /Users/blove/repos/dawn +git add -A +git commit -m "$(cat <<'EOF' +feat(cli): resume endpoint + PermissionsStore wiring + +Adds POST /threads/:thread_id/resume to the dev HTTP server. Maintains +an in-memory pendingByThread map of parked interrupts. The resume +handler validates the interrupt_id, invokes the resolver bound to the +parked graph, and returns 200. + +execute-route.ts constructs the PermissionsStore from dawn.config.ts ++ DAWN_PERMISSIONS_MODE env var and threads it into the +CapabilityMarkerContext. The workspace capability reads it. + +The interrupt-to-resume bridging is the trickiest piece; v1 uses a +Deferred-per-pending-interrupt pattern that requires the graph to stay +alive between the initial /runs/stream and the resume POST. + +Co-Authored-By: Claude Opus 4.7 +EOF +)" +``` + +--- + +## Phase D — Chat demo + +### Task 10: Seed permissions in chat demo's dawn.config.ts + +**Files:** +- Modify: `examples/chat/server/dawn.config.ts` + +- [ ] **Step 1: Update the config** + +Replace contents with: + +```ts +export default { + appDir: "src/app", + permissions: { + // Default mode (omit means "interactive") + // Seed a few obviously-safe commands so prompt fatigue is reasonable on first run. + allow: { + bash: ["ls", "pwd", "cat", "echo", "head", "tail", "wc"], + }, + // Block obviously-destructive patterns even when interactive. + deny: { + bash: ["rm -rf", "sudo", "chmod 777"], + }, + }, +} +``` + +- [ ] **Step 2: Verify the example builds** + +```bash +cd /Users/blove/repos/dawn/examples/chat/server && pnpm build 2>&1 | tail -5 +``` +Expect: `4 route(s) compiled`. + +- [ ] **Step 3: Commit** + +```bash +cd /Users/blove/repos/dawn +git add examples/chat/server/dawn.config.ts +git commit -m "feat(examples/chat): seed permissions allow/deny in dawn.config.ts + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 11: Chat-web inline permission panel + resume proxy + +**Files:** +- Create: `examples/chat/web/app/api/permission-resume/route.ts` +- Modify: `examples/chat/web/app/page.tsx` + +- [ ] **Step 1: Write the resume proxy** + +Create `examples/chat/web/app/api/permission-resume/route.ts`: + +```ts +import { NextRequest } from "next/server" + +export const runtime = "nodejs" +export const dynamic = "force-dynamic" + +export async function POST(req: NextRequest): Promise { + const serverUrl = process.env.DAWN_SERVER_URL ?? "http://127.0.0.1:3001" + const body = (await req.json()) as { + threadId: string + interruptId: string + decision: "once" | "always" | "deny" + } + + const upstream = await fetch(`${serverUrl}/threads/${encodeURIComponent(body.threadId)}/resume`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + interrupt_id: body.interruptId, + decision: body.decision, + }), + }) + + return new Response(upstream.body, { + status: upstream.status, + headers: { "content-type": "application/json" }, + }) +} +``` + +- [ ] **Step 2: Add the inline panel to page.tsx** + +Edit `examples/chat/web/app/page.tsx`. Add state for pending interrupt + handlers; render an inline panel when present: + +```tsx +const [pendingInterrupt, setPendingInterrupt] = useState<{ + interruptId: string + kind: "command" | "path" + detail: any // shape from SSE +} | null>(null) + +// Inside the SSE read loop, parse "event: interrupt" lines: +// Detection: lines.match(/^event: interrupt$/) then read the following "data: ..." line. +// Parse the JSON, setPendingInterrupt(parsedData). + +async function resolveInterrupt(decision: "once" | "always" | "deny") { + if (!pendingInterrupt || !threadId) return + await fetch("/api/permission-resume", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + threadId, + interruptId: pendingInterrupt.interruptId, + decision, + }), + }) + setPendingInterrupt(null) +} + +// Render — above the event log, when pendingInterrupt is non-null: +{pendingInterrupt && ( +
+ ⚠️ Permission request +

+ The agent wants to {pendingInterrupt.kind === "command" ? "run command:" : `${pendingInterrupt.detail.operation}:`} +

+ + {pendingInterrupt.kind === "command" ? pendingInterrupt.detail.command : pendingInterrupt.detail.path} + +
+ + + +
+
+)} +``` + +- [ ] **Step 3: Verify** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-example/chat-web typecheck 2>&1 | tail -5 +cd /Users/blove/repos/dawn && pnpm --filter @dawn-example/chat-web build 2>&1 | tail -5 +``` +Expect: clean typecheck + build. + +- [ ] **Step 4: Commit** + +```bash +cd /Users/blove/repos/dawn +git add examples/chat/web/ +git commit -m "feat(examples/chat-web): inline permission panel + resume proxy + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +## Phase E — Smoke + PR + +### Task 12: Manual Chrome MCP smoke + +**Files:** none modified. + +- [ ] **Step 1: Start both dev servers** + +```bash +cd /Users/blove/repos/dawn/examples/chat/server && OPENAI_API_KEY="$(grep OPENAI_API_KEY /Users/blove/repos/dawn/.env | cut -d= -f2-)" pnpm dev & +cd /Users/blove/repos/dawn/examples/chat/web && pnpm dev & +``` + +Wait for both ("Dawn dev ready" + "Ready in Nms"). + +- [ ] **Step 2: Drive `/chat` with a prompt that triggers bash gating** + +Navigate Chrome MCP to `http://localhost:3000`, ensure `/chat` is selected, send: `Run `ls -la` in the workspace.` + +Expected behavior: +- An `event: interrupt` envelope arrives on the SSE stream. +- The inline panel renders: "The agent wants to run command: `ls -la`", with three buttons. +- The "Allow always" button labels with the suggested pattern (`ls -la` → first two tokens, so `ls -la`). +- Clicking "Allow once" sends the resume; the SSE log resumes streaming; the agent's tool call completes. +- Send the SAME prompt again → another interrupt fires (Once didn't persist). +- Re-send and click "Allow always for `ls -la`" → SSE log resumes; verify `.dawn/permissions.json` now has `allow.bash: ["ls -la"]`. +- Re-send a THIRD time → no interrupt; runs silently. + +- [ ] **Step 3: Trigger a denied command** + +Prompt: `Run `rm -rf /tmp/` in the workspace.` + +Expected: NO interrupt fires (config.deny has `rm -rf`). The tool returns the deny error; agent responds something like "I cannot run that command, it's blocked." + +- [ ] **Step 4: Trigger a path-outside-workspace prompt** + +Prompt: `Read /etc/hostname please.` + +Expected: interrupt fires with `{kind: "path", operation: "readFile", path: "/etc/hostname", suggestedPattern: "/etc/"}`. Click "Deny". The tool returns the deny error; agent acknowledges. + +- [ ] **Step 5: Switch to bypass mode + verify** + +Edit `examples/chat/server/dawn.config.ts` to set `permissions: { mode: "bypass" }`. Restart the chat-server. Re-run: `Read /etc/hostname please.` + +Expected: NO interrupt. The tool actually reads `/etc/hostname` and returns its contents. (The path-jail is disabled.) + +Restore `dawn.config.ts` to interactive mode before continuing. + +- [ ] **Step 6: Kill dev servers** + +```bash +pkill -f "dawn.*dev" +pkill -f "next dev -p 3000" +``` + +- [ ] **Step 7: If any step failed** + +Debug. Likely candidates: +- The interrupt envelope isn't appearing on the SSE stream → check Task 8's propagation. +- The resume endpoint returns 200 but the run doesn't resume → check the Deferred/Command-resume mechanism from Task 9. +- `.dawn/permissions.json` doesn't get written on "always" → check the PermissionsStore.addAllow path. + +Iterate until smoke is clean. No move to Task 13 until all 5 substeps succeed. + +--- + +### Task 13: Update phase memory + open PR + +**Files:** +- Modify: `/Users/blove/.claude/projects/-Users-blove-repos-dawn/memory/project_phase_status.md` + +- [ ] **Step 1: Update phase status memory** + +Edit `project_phase_status.md`. Find the section for sub-project 4 (recently shipped) and ADD a new entry beneath it for 4.5: + +``` +4.5. ✅ **HITL permissions** — shipped in [PR #TBD](https://github.com/cacheplane/0/pull/TBD). +Three modes (interactive default / non-interactive / bypass) in +dawn.config.ts. Path-jail escapes + every first-occurrence bash command +trigger an interrupt prompt with three approval scopes (Once / +Always-for-pattern / Deny). Smart-default pattern inference (first 2 +tokens for commands, parent dir for paths). Persisted decisions live +in .dawn/permissions.json (project-local, gitignored, auto-appended to +.gitignore). New @dawn-ai/permissions package ships types + pattern- +matching + PermissionsStore. SSE envelope shape is Agent-Protocol- +compatible. +``` + +Also bump the top summary if applicable. + +- [ ] **Step 2: Push the branch + open the PR** + +```bash +cd /Users/blove/repos/dawn +git push -u origin claude/phase3-permissions +gh pr create --title "feat: phase 3 — HITL permissions (sub-project 4.5)" --body "$(cat <<'EOF' +## Summary + +Sub-project 4.5 of the Dawn opinionated agent harness. Builds on +sub-project 4 (workspace capability, PR #170): replaces the hard-refuse- +on-path-jail-escape with an interrupt prompt; adds the same gating to +runBash. Three modes: interactive (default), non-interactive +(production / CI), bypass (explicit trust). Persisted "always" +decisions live in `.dawn/permissions.json` (project-local, gitignored). + +Spec: `docs/superpowers/specs/2026-05-21-phase3-permissions-design.md` +Plan: `docs/superpowers/plans/2026-05-21-phase3-permissions.md` + +## Changes + +- New `@dawn-ai/permissions` package: types + pattern-matching + suggested-pattern + PermissionsStore. +- Workspace capability gates every tool's run() through PermissionsStore. + - readFile/writeFile/listDir: gate only when path is outside the workspace. + - runBash: gate every command on first occurrence. + - bypass mode disables the path-jail entirely. +- DawnConfig + CapabilityMarkerContext extend with permissions/PermissionsStore. +- Dev HTTP server adds POST /threads/:thread_id/resume. +- Agent-adapter propagates LangGraph interrupt() as `event: interrupt` SSE envelopes. +- Chat demo seeds permissions in dawn.config.ts; web client renders inline permission panel. + +## Test plan + +- [x] Unit tests across @dawn-ai/permissions (suggested-pattern, matching, store) +- [x] Workspace capability tests covering interactive/non-interactive/bypass paths +- [x] Resume endpoint tests +- [x] Agent-adapter interrupt propagation test +- [x] Manual Chrome MCP smoke (5 scenarios) + +🤖 Generated with [Claude Code](https://claude.com/claude-code) +EOF +)" +``` + +- [ ] **Step 3: Update memory with the real PR number** + +After PR URL prints, replace `#TBD` with the real number in the memory note. + +- [ ] **Step 4: Enable auto-merge** + +```bash +gh pr merge --squash --delete-branch --auto +``` + +Wait for validate-green. + +--- + +## Self-review notes + +- **Spec coverage:** Every section maps to a task. Architecture (T1–T7). Modes (T6, T7). Persistence (T5). SSE envelope (T8). Resume endpoint (T9). Web client UX (T11). Config seeding (T10). Smoke (T12). +- **Known sharp edge in T9:** the interrupt-to-resume bridging mechanism is the most uncertain piece — depends on LangGraph 1.x checkpointer behavior and how `interrupt()` interacts with `streamEvents()`. The plan documents a fallback if the in-process Deferred pattern doesn't work cleanly. The implementer may need to investigate LangGraph 1.x's actual interrupt semantics empirically before locking in the design. +- **Placeholder scan:** clean. The `LangGraph interrupt event name` (`on_interrupt` vs other) is flagged as something to verify, not a placeholder. +- **Type consistency:** `PermissionMode`, `PermissionDecision`, `PermissionsFile`, `PermissionsStore` consistent throughout. The capability's `gate()` helper signature stable. The SSE envelope payload shape stable across the spec + plan. diff --git a/docs/superpowers/specs/2026-05-20-phase3-workspace-backends-design.md b/docs/superpowers/specs/2026-05-20-phase3-workspace-backends-design.md new file mode 100644 index 00000000..4343a7d1 --- /dev/null +++ b/docs/superpowers/specs/2026-05-20-phase3-workspace-backends-design.md @@ -0,0 +1,357 @@ +# Phase 3 — Workspace Capability + Pluggable Backends Design + +**Sub-project:** 4 of 7 in the Dawn opinionated agent harness. +**Status:** Spec +**Date:** 2026-05-20 + +## Goal + +Refactor the workspace tools (`readFile`, `writeFile`, `listDir`, `runBash`) from per-route hand-rolled files into a single built-in capability, and introduce a pluggable backend interface so the underlying filesystem and exec implementations can be swapped at the app level. Default behavior is unchanged: existing apps using local-fs + local-exec keep working without touching configuration. Pluggability unlocks in-memory storage for tests, remote sandboxes for production, and middleware composition for cross-cutting concerns like logging. + +## Architecture + +A new built-in capability marker `createWorkspaceMarker()` joins the existing five (planning, agents-md, skills, subagents, this). It auto-discovers the `workspace/` directory under a route (same convention as AGENTS.md uses) and contributes four tools wired to a configurable filesystem + exec backend pair. + +A new pnpm workspace package `@dawn-ai/workspace` ships the backend type interfaces (`FilesystemBackend`, `ExecBackend`, `BackendContext`), the two default implementations (`localFilesystem`, `localExec`), and a small set of functional composition primitives (`compose`, one demonstration middleware `withLogging`). Apps configure backends via `dawn.config.ts`, which switches from the existing hand-rolled string-only parser to a `tsx`-evaluated import so callable values can be expressed naturally. + +The capability owns path-jail enforcement. Backends receive already-resolved absolute paths and trust them. Authors can override the entire workspace tool set at the filesystem-convention layer (a user-authored `tools/readFile.ts` replaces the capability's contribution) or replace specific backend methods via plain spread-and-closure JS / middleware composition. + +Human-in-the-loop permission gating (interrupt the run to ask the user about paths outside the jail) is deliberately deferred to a future sub-project. The capability hard-refuses jail escapes for now; the future permission system will replace that with an interrupt-and-resume flow without changing the backend contract. + +## Design Decisions + +### Sub-project boundary + +This sub-project ships pluggable backends and the workspace capability only. Concretely: + +- Refactor: workspace tools move from per-route user-authored files into a capability that calls into a backend. +- New package: `@dawn-ai/workspace` exports backend types + defaults + composition helpers. +- Config-loader switch: `dawn.config.ts` parsed via `tsx` import instead of the existing restricted parser. + +Deferred to sub-project 4.5 (separate brainstorm + spec + plan cycle): + +- LangGraph `interrupt()` plumbed through Dawn's SSE stream as `event: interrupt` envelopes. +- HTTP resume endpoint + client-side resume UI. +- Permission persistence model (`.dawn/permissions.json` vs. AGENTS.md vs. thread state — to be decided in 4.5). +- "Always allow this path" / "always deny this command" decision flow. + +OS-level isolation (running Dawn under a restricted user, containerization, macOS sandbox profiles) is documented as deployment guidance and never claimed as a security boundary the framework provides. + +### Package name: `@dawn-ai/workspace` + +Chosen over `backends`, `harness`, `system`, `host`, `io`. The capability is named `workspace`; the trigger is the `workspace/` directory; the tools are workspace tools. The package's purpose is self-evident from its name. Future pluggable-defaults packages get domain-specific names (e.g., `@dawn-ai/tracing` if a tracing capability ever lands), matching the Next.js `next/cache` / `next/server` split rather than the LangChain integration-name convention. + +### Path-jail in the capability, not the backend + +The workspace capability resolves the user-supplied relative path against the route's `workspace/` directory and validates that the resolution stays inside before calling the backend. Backends receive an already-resolved absolute path they can trust. Backends do not re-validate. + +Rejected alternative: defense in depth (backend re-checks the jail). Real defense against hostile agents is OS-level isolation (restricted user, container). The capability check is sufficient for correctness against well-behaved agents and avoids duplicating the resolver in every backend. + +When a future HITL permission system lands (sub-project 4.5), the capability's hard-refuse on jail escape becomes a hard-refuse-unless-allowed branch. The backend contract is unchanged by that addition. + +### Workspace capability opt-in: convention only + +A route opts in by having a `workspace/` subdirectory. No descriptor flag. Same trigger AGENTS.md already uses; the AGENTS.md capability and the workspace capability share the same filesystem signal. + +When no `workspace/` exists, the capability contributes nothing — no tools, no prompt fragment, no overhead. + +### Default backends when `dawn.config.ts` omits `backends` + +When the route has a `workspace/` directory but `dawn.config.ts` declares no `backends` field (or `dawn.config.ts` doesn't exist), the capability defaults to `localFilesystem()` + `localExec()`. This preserves existing chat-example behavior: apps that don't touch their config keep working unchanged. + +Explicit config in `dawn.config.ts` always wins: + +```ts +// dawn.config.ts +import { localFilesystem, localExec } from "@dawn-ai/workspace" +export default { + appDir: "src/app", + backends: { + filesystem: localFilesystem({ maxFileBytes: 256 * 1024 }), + exec: localExec({ timeout: 30_000 }), + }, +} +``` + +### Tool set: fixed four, extensible by convention + +The capability contributes exactly four tools: `readFile`, `writeFile`, `listDir`, `runBash`. This matches the deepagents/Claude Code workspace tool set authors already expect. + +Authors who want additional tools (e.g., `runPython`, `httpGet`) author them in `tools/` as today — orthogonal to the workspace capability. Authors who want to override one of the standard four write a `tools/readFile.ts` file (etc.) that replaces the capability's contribution. This requires inverting the existing capability-vs-user-tool collision check introduced in PR #155: user tools win. + +### Config loader: switch from hand-rolled parser to `tsx` import + +The existing `packages/core/src/config.ts` defines a hand-rolled tokenizer + parser that supports only `{ appDir }` and `const FOO = "string"` bindings. It explicitly refuses imports, function values, and nested objects. This was originally a security-conscious choice (don't execute user TS at config-load time). + +The choice now blocks `dawn.config.ts` from expressing callable backends. Switch to a `tsx`-evaluated dynamic import using the same loader Dawn already uses for route discovery. Dawn already executes user TS during route discovery, tool execution, and capability application — there is no new attack surface introduced by also executing the config file. + +Existing `dawn.config.ts` files in the wild (just `{ appDir }`) remain valid TS modules and continue to work without modification. The new loader is ~30 lines net (the parser deletes; the loader is small). + +### Backends are plain objects; composition is functional + +Backends are plain objects implementing the typed interfaces. No classes, no inheritance, no DI container. + +Three layers of extensibility, each progressively more powerful: + +1. **Spread + closure** — vanilla JS for overriding a single method: + ```ts + const base = localFilesystem() + const fs: FilesystemBackend = { + ...base, + readFile: async (path, ctx) => { + if (path.endsWith(".secret")) throw new Error("nope") + return base.readFile(path, ctx) + }, + } + ``` + No new API. Authors who know JS know how to do this. + +2. **Middleware composition** — `compose(...)` helper for stacking concerns: + ```ts + import { compose, localFilesystem, withLogging } from "@dawn-ai/workspace" + const fs = compose(withLogging({ destination: "stderr" }))(localFilesystem()) + ``` + A middleware is a function `(next: FilesystemBackend) => FilesystemBackend`. Same pattern as Vercel AI SDK `wrapLanguageModel`, Express middleware, LangChain callback wrapping. + +3. **Filesystem-convention tool override** — author a `tools/readFile.ts` to replace the capability's contribution entirely. Useful when the override is so different that wrapping the standard backend would be awkward. + +### What ships in `@dawn-ai/workspace` v1 + +```ts +// type interfaces (workspace-specific — not in @dawn-ai/core to keep core free of node:child_process etc) +export interface FilesystemBackend { + readFile(path: string, ctx: BackendContext): Promise + writeFile(path: string, content: string, ctx: BackendContext): Promise<{ bytesWritten: number }> + listDir(path: string, ctx: BackendContext): Promise +} + +export interface ExecBackend { + runCommand( + args: { command: string; cwd?: string; env?: Record }, + ctx: BackendContext, + ): Promise<{ stdout: string; stderr: string; exitCode: number }> +} + +export interface BackendContext { + readonly signal: AbortSignal + readonly workspaceRoot: string +} + +// default impls +export function localFilesystem(opts?: { maxFileBytes?: number }): FilesystemBackend +export function localExec(opts?: { + timeout?: number + allowedCommands?: readonly RegExp[] +}): ExecBackend + +// composition primitives +export type FilesystemMiddleware = (next: FilesystemBackend) => FilesystemBackend +export type ExecMiddleware = (next: ExecBackend) => ExecBackend +export function compose(...middlewares: ReadonlyArray<(next: T) => T>): (base: T) => T + +// one demonstration middleware that ships in v1 +export function withLogging(opts?: { + destination?: "stderr" | ((entry: { method: string; args: unknown[] }) => void) +}): T extends FilesystemBackend ? FilesystemMiddleware : ExecMiddleware +``` + +Resist shipping `withMaxFileSize` / `withPathRestriction` as standalone middlewares — those fit better as options on `localFilesystem()` itself. One demonstration middleware (logging) proves the pattern; community middlewares grow organically. + +## Component Contracts + +### `createWorkspaceMarker` + +```ts +// packages/core/src/capabilities/built-in/workspace.ts +export function createWorkspaceMarker(): CapabilityMarker { + return { + name: "workspace", + detect: async (routeDir) => existsSync(join(routeDir, "workspace")), + load: async (routeDir, context) => { + const workspaceRoot = join(routeDir, "workspace") + const fs = context.backends?.filesystem ?? defaultLocalFilesystem() + const exec = context.backends?.exec ?? defaultLocalExec() + return { tools: buildWorkspaceTools(workspaceRoot, fs, exec) } + }, + } +} +``` + +The four tools share a single path-jail helper: + +```ts +function pathJail(userPath: string, workspaceRoot: string): string { + const resolved = resolve(workspaceRoot, userPath) + if (!resolved.startsWith(workspaceRoot + sep) && resolved !== workspaceRoot) { + throw new Error(`Path is outside workspace: ${userPath}`) + } + return resolved +} +``` + +Each tool's `run` resolves the path, calls the backend, returns the result: + +```ts +const readFileTool: DawnToolDefinition = { + name: "readFile", + description: "Read a UTF-8 file from the workspace.", + schema: z.object({ path: z.string() }), + run: async (input, ctx) => { + const { path } = z.object({ path: z.string() }).parse(input) + const safe = pathJail(path, workspaceRoot) + return await fs.readFile(safe, { signal: ctx.signal, workspaceRoot }) + }, +} +// writeFile, listDir, runBash same shape +``` + +### `CapabilityMarkerContext` extension + +```ts +// packages/core/src/capabilities/types.ts (modify) +export interface CapabilityMarkerContext { + readonly routeManifest: RouteManifest + readonly descriptor: DawnAgent | undefined + readonly descriptorRouteMap?: ReadonlyMap + readonly backends?: { // NEW + readonly filesystem?: FilesystemBackend + readonly exec?: ExecBackend + } +} +``` + +The CLI's `execute-route.ts` loads `dawn.config.ts`, extracts `config.backends`, and threads it into the marker context. + +### `DawnConfig` extension + +```ts +// packages/core/src/types.ts (modify) +export interface DawnConfig { + readonly appDir?: string + readonly backends?: { // NEW + readonly filesystem?: FilesystemBackend + readonly exec?: ExecBackend + } +} +``` + +Importing `FilesystemBackend` / `ExecBackend` into `@dawn-ai/core` creates a new edge: `core` depends on `@dawn-ai/workspace`'s type exports. This is acceptable because the workspace package's type-only entry has no runtime weight (no `node:child_process` etc.) — only the concrete `localFilesystem` / `localExec` factories pull in those deps. The interfaces live in `@dawn-ai/workspace/src/types.ts` (the package that owns the domain); `@dawn-ai/core` imports them via `import type`. + +### Tool-vs-capability collision check inversion + +Current behavior (PR #155, `packages/cli/src/lib/runtime/check-tool-name-uniqueness.ts`): a user-authored tool in `tools/` whose name matches a capability-contributed tool is a build error. + +New behavior: for **the workspace capability only**, a user-authored tool with a matching name **silently replaces** the capability's contribution. Other capabilities (planning's `writeTodos`, skills' `readSkill`, subagents' `task`) retain the collision error — those aren't meant to be replaceable. + +Implementation: the capability declares which of its contributed tools are "overridable." The uniqueness check skips overridable tools when both are present and removes the capability's version, keeping the user's. + +## Out of scope (deferred) + +- **HITL permission system** — `interrupt()` for jail-escape attempts. Sub-project 4.5. +- **Per-route backend override** — currently global only. Add via descriptor field non-breakingly later if a real use case surfaces. +- **OS-level sandboxing** — operator responsibility; Dawn documents deployment guidance. +- **Backend method extensibility** — adding methods beyond the standard four (e.g., custom `runPython` on a backend) does NOT auto-contribute extra tools. Authors who want additional tools write them in `tools/` as today. +- **Non-workspace backends** (tracing, secret resolution, etc.) — separate packages, separate sub-projects. + +## File Structure + +### New package + +``` +packages/workspace/ +├── package.json # @dawn-ai/workspace +├── tsconfig.json +├── vitest.config.ts +├── src/ +│ ├── index.ts # re-exports +│ ├── types.ts # FilesystemBackend, ExecBackend, BackendContext, middleware types +│ ├── local-filesystem.ts # localFilesystem() factory +│ ├── local-exec.ts # localExec() factory +│ ├── compose.ts # compose() helper +│ └── with-logging.ts # withLogging() middleware +└── test/ + ├── local-filesystem.test.ts + ├── local-exec.test.ts + ├── compose.test.ts + └── with-logging.test.ts +``` + +### New files in existing packages + +``` +packages/core/src/capabilities/built-in/workspace.ts # createWorkspaceMarker +packages/core/test/capabilities/workspace.test.ts # marker unit tests +``` + +### Modified files + +``` +packages/core/src/config.ts # rewrite loader to use tsx import +packages/core/test/config.test.ts # rewrite tests for new loader +packages/core/src/types.ts # extend DawnConfig with backends? +packages/core/src/capabilities/types.ts # extend CapabilityMarkerContext with backends? +packages/core/src/index.ts # export createWorkspaceMarker +packages/cli/src/lib/runtime/execute-route.ts # register createWorkspaceMarker, thread backends from config +packages/cli/src/lib/runtime/check-tool-name-uniqueness.ts # support overridable tool names +packages/cli/src/lib/typegen/run-typegen.ts # extra-tool entries for readFile/writeFile/listDir/runBash gated on hasWorkspace +memory/project_phase_status.md # mark sub-project 4 in progress +``` + +### Deleted files (chat example) + +``` +examples/chat/server/src/app/chat/tools/readFile.ts +examples/chat/server/src/app/chat/tools/writeFile.ts +examples/chat/server/src/app/chat/tools/listDir.ts +examples/chat/server/src/app/chat/tools/runBash.ts +examples/chat/server/src/app/chat/workspace-path.ts # if no longer referenced +examples/chat/server/src/app/coordinator/subagents/research/tools/readFile.ts +examples/chat/server/src/app/coordinator/subagents/research/tools/listDir.ts +examples/chat/server/src/app/coordinator/subagents/research/workspace-path.ts # if no longer referenced +``` + +### Notable: pnpm workspace config + +``` +pnpm-workspace.yaml # add "packages/workspace" +turbo.json # verify pipeline picks up the new package +``` + +## Testing strategy + +### Unit (no LLM) + +- `local-filesystem.test.ts` — backend impl reads/writes/lists against a `mkdtempSync` directory; respects `maxFileBytes`; rejects nothing (capability's job). +- `local-exec.test.ts` — `runCommand` executes `echo` and `ls`, captures stdout/stderr/exit; respects `timeout`; respects `allowedCommands` regex allowlist when configured. +- `compose.test.ts` — composes 0, 1, 2 middlewares correctly. Each middleware sees the next one in line. +- `with-logging.test.ts` — captures each method invocation with args; supports stderr and custom destination. +- `workspace.test.ts` (capability) — contributes 4 tools when `workspace/` exists; contributes nothing when absent; tool `run`s call the right backend method with the right args; path-jail rejects `../` escapes with the documented error; reads the default `localFilesystem` + `localExec` when no `backends` in context; uses configured backends when provided. +- `config.test.ts` rewrite — import-evaluated loader handles `{ appDir }`, `{ backends: { filesystem, exec } }`, omitted file (returns empty config), syntax errors surface as TS errors not custom messages. +- `check-tool-name-uniqueness.test.ts` extension — overridable workspace tool names are NOT collision errors when a user tool shadows them. + +### Integration / chat example + +- The chat example's hand-rolled `tools/` files delete. After the migration, `pnpm dev` and a Chrome MCP smoke against both `/chat` and `/coordinator` must produce identical behavior to current main: + - `/chat`: planning + skills + AGENTS.md + workspace tools all work. Same SSE event shape. + - `/coordinator`: research subagent's `listDir` + `readFile` work via the capability. Subagent envelopes still fire correctly. + +No new LLM-driven CI tests; manual smoke is the same policy as existing capabilities. + +### Override pathway + +- A test fixture under `packages/cli/test/fixtures/workspace-tool-override/` defines a custom `tools/readFile.ts` alongside a `workspace/` directory. Verify the build picks the user tool and drops the capability's contribution. + +## Known Risks + +- **Config-loader switch is observable.** Apps with intentionally-restricted `dawn.config.ts` syntax assumptions will discover they can now write arbitrary TS. Mitigation: this is mostly upside; the restriction was already pierceable by any other route file in the app. Document the change in the PR description and CHANGELOG. +- **Tool-override inversion is a behavior change.** Currently a user `tools/readFile.ts` next to a workspace capability would be a build error. After this PR, the user tool silently wins. Mitigation: capability marks specific tools as overridable; the error stays for non-overridable capability tools (planning, skills, subagents). +- **The path-jail still surfaces as an error to the agent** when it tries paths outside the workspace. With no HITL permission system, the agent has to learn from the error message and adjust. Mitigation: the error message is informative ("Path is outside workspace: ../etc/passwd"). When 4.5 lands, this becomes an interactive flow. +- **`@dawn-ai/core` gaining a type-only edge to `@dawn-ai/workspace`** introduces a package-graph consideration. Mitigation: workspace's types are zero-runtime (no `node:` imports in `types.ts`); only the concrete factory functions pull in platform deps. +- **gpt-5 has learned the standard tool shapes by name.** Renaming `runBash` to `runCommand` would normalize but cost familiarity. Keep `runBash` to preserve trained behavior; revisit if a behavior delta shows up in smoke. + +## What we're explicitly NOT changing + +- `agent({ description, subagents, ... })` descriptor stays the same. +- Capability marker contract (`detect`, `load`) stays the same except for the new `backends?` field on `CapabilityMarkerContext`. +- SSE event shape stays the same; no new event types. +- Subagents work continues to work (the `coordinator/subagents/research` route's tools are deleted because they're now provided by the workspace capability — that's the only subagents-related change). diff --git a/docs/superpowers/specs/2026-05-21-phase3-permissions-design.md b/docs/superpowers/specs/2026-05-21-phase3-permissions-design.md new file mode 100644 index 00000000..49b1c78c --- /dev/null +++ b/docs/superpowers/specs/2026-05-21-phase3-permissions-design.md @@ -0,0 +1,347 @@ +# Phase 3 — HITL Permissions Design (sub-project 4.5) + +**Status:** Spec +**Date:** 2026-05-21 +**Builds on:** sub-project 4 (workspace capability + pluggable backends, PR #170) + +## Goal + +Replace the workspace capability's hard-refuse-on-path-jail-escape behavior with a human-in-the-loop permission flow, and add the same prompt-for-approval gating to `runBash`. The user sees a permission prompt on first occurrence of any non-pre-approved bash command or out-of-workspace path operation; they can grant once, always-for-pattern, or deny. Persisted "always" decisions live in a project-local `.dawn/permissions.json` that's gitignored by default. Production deployments run in non-interactive or bypass mode with a curated allow/deny list. + +## Architecture + +A new `@dawn-ai/permissions` package ships the pattern-matching engine, the persistence store, and the public types. The existing workspace capability gains a permission check between the path-jail / bash invocation and the actual backend call. When a check returns "unknown", the capability emits LangGraph's `interrupt()` with a `PermissionRequest` payload; the parent run pauses; the SSE stream surfaces `event: interrupt` to the client; the client resolves the prompt and POSTs to `/threads/{thread_id}/resume`; the runtime resumes the graph with `Command({resume})` and the capability acts on the decision. + +The three operating modes — `"interactive"`, `"non-interactive"`, `"bypass"` — encode the realistic deployment shapes (interactive dev, production with config-only enforcement, intentional bypass for trusted environments). Mode comes from `dawn.config.ts`'s new `permissions` field or the `DAWN_PERMISSIONS_MODE` env var. + +The persistence file format mirrors the runtime API: a tool-keyed `{allow, deny}` object. Same shape in `.dawn/permissions.json` (runtime additions, per-developer, gitignored) and in `dawn.config.ts`'s `permissions.allow` / `permissions.deny` (design-time baseline, checked in). Effective permissions = config + runtime, with deny always winning. + +## Design Decisions + +### Scope: path-jail escapes + every bash command + +Matches the industry-standard tool-call-level gating used by Claude Code, OpenAI Codex CLI, and Cursor. Bash gets prompted on every first occurrence (Claude Code parity); path escapes that would previously hard-refuse now prompt. Other tools (`readFile`, `writeFile`, `listDir`) only prompt when the resolved path is outside the workspace — staying inside is silent, matching today's behavior. + +Rejected: "risky-pattern only" gating for bash. The set of risky commands is impossible to enumerate completely; missing patterns become silent failures of judgment. Prompting every first-occurrence command and relying on prefix-matched "always" persistence is the industry-validated approach. + +Rejected: generalized capability-driven gating (any tool can declare itself gateable). Premature surface design without empirical signal on what authors need. Build on 4.5's interrupt/resume/persistence infrastructure later if real demand surfaces (sub-project 4.6 territory). + +### Three approval scopes: Once / Always-for-pattern / Deny + +The user sees three buttons on every prompt: + +- **Once** — allows this single call. Next equivalent call prompts again. +- **Always** — persists an allow entry using the suggested pattern (prefix-matched). Future matching calls are silent. +- **Deny** — refuses this call. The tool returns an error to the agent. The agent can recover (apologize, try a different approach). No persistent deny entry — that's deferred to 4.6. + +Rejected "for-session" as a fourth scope. Adds cognitive load with marginal value. If users need transient approvals, they can grant Once repeatedly. + +### Pattern matching: smart defaults, no DSL + +- **Bash:** suggested pattern is the first 1–2 whitespace-separated tokens. `npm install react` → `npm install`. `ls` → `ls`. `git status` → `git status`. (Two tokens is the sweet spot — covers `npm install ` and `npm test` separately, vs lumping them as `npm`.) +- **Path:** suggested pattern is the parent directory of the requested path, ending with `/`. `/Users/blove/.zshrc` → `/Users/blove/`. `/var/log/app.log` → `/var/log/`. +- **Matching:** candidate is a prefix-match against the stored pattern. A bash candidate matches if its first tokens equal the pattern; a path candidate matches if it starts with the pattern. + +Rejected: a glob / regex DSL. Industry standard is prefix-matching with smart defaults; complex pattern editors add surface without proportional value at this stage. + +Rejected: an interactive pattern-editor in the prompt UI (Claude Code does this). For Dawn's smoke client (throwaway), the suggested pattern is fixed. Power users can edit `.dawn/permissions.json` directly if they need a narrower or broader pattern. + +### Persistence: `.dawn/permissions.json`, project-local, gitignored + +```json +{ + "version": 1, + "allow": { + "bash": ["npm install", "ls", "git status"], + "readFile": ["/Users/blove/"], + "writeFile": ["/tmp/dawn-scratch/"], + "listDir": ["/Users/blove/Documents/"] + }, + "deny": {} +} +``` + +Tool-keyed top-level structure. Arrays of prefix patterns per tool. Forward-compatible (new tool category = new key, zero migration). More concise than Claude Code's `Tool(pattern)` notation; trivially parseable; easy to hand-edit. + +The store appends `.dawn/` to the project's `.gitignore` on first write (idempotent). Manual edits to `.dawn/permissions.json` while the dev server is running require a server restart — the store does not live-watch the file. + +### Three modes: `interactive` / `non-interactive` / `bypass` + +```ts +permissions: { + mode: "interactive" | "non-interactive" | "bypass" // default: "interactive" + allow: { bash: ["npm install"], readFile: ["/Users/blove/"] } + deny: { bash: ["rm -rf", "sudo"] } +} +``` + +| Mode | Prompts? | `config.allow` | `config.deny` | `.dawn/permissions.json` | Unknown commands | Path-jail | +|---|---|---|---|---|---|---| +| `interactive` (default) | Yes | Auto-allow | Hard-refuse | Auto-allow | Prompt | Triggers prompt on escape | +| `non-interactive` | No | Auto-allow | Hard-refuse | Ignored | Hard-refuse (fail-closed) | Intact, hard-refuse on escape | +| `bypass` | No | Ignored | Ignored | Ignored | Run unchecked | Disabled | + +Production should use `non-interactive` with a curated `config.allow` and `config.deny`. CI should use `non-interactive` as well. Local development uses the default `interactive`. `bypass` is for explicit "operator knows what they're doing" scenarios (screencast, internal admin tools) — using it disables Dawn's safety boundary entirely; the mode name + docs make that obvious. + +### Config-seeded baseline + runtime additions + +`config.allow` and `config.deny` form the design-time baseline (committed to git, shared across developers). `.dawn/permissions.json` is the per-developer runtime additive (gitignored, accumulated by clicking "Always"). Effective permissions: + +``` +effective.allow[tool] = (config.allow[tool] ?? []) ∪ (runtime.allow[tool] ?? []) +effective.deny[tool] = (config.deny[tool] ?? []) ∪ (runtime.deny[tool] ?? []) +``` + +Both files use the same shape — runtime entries can be promoted to config by hand-copying. + +### Env-var escape hatch: `DAWN_PERMISSIONS_MODE` + +Setting `DAWN_PERMISSIONS_MODE=non-interactive` (or `=bypass`, `=interactive`) overrides `dawn.config.ts`'s `permissions.mode` for the session. Useful for ad-hoc switching without editing config (e.g., `DAWN_PERMISSIONS_MODE=bypass pnpm dev` during a demo). + +### SSE envelope shape (forward-compatible with Agent Protocol) + +``` +event: interrupt +data: { + "interrupt_id": "perm-1779200000-x7y2z", + "type": "permission-request", + "kind": "command" | "path", + "detail": { + // for kind=="command": + "command": "npm install react", + "suggestedPattern": "npm install" + // for kind=="path": + "operation": "readFile" | "writeFile" | "listDir", + "path": "/Users/blove/.zshrc", + "suggestedPattern": "/Users/blove/" + }, + "thread_id": "smoke-coord-1", + "call_id": "task-abc" // present when the interrupt fires inside a subagent +} +``` + +`interrupt_id` correlates prompt-to-resume. `suggestedPattern` is what the capability will persist if the user clicks "Always" — surfaced in the envelope so the client can render transparent button labels (e.g., "Allow always for `npm install`"). + +### Resume endpoint + +``` +POST /threads/{thread_id}/resume +content-type: application/json + +{ + "interrupt_id": "perm-1779200000-x7y2z", + "decision": "once" | "always" | "deny" +} +``` + +Runtime invokes `graph.invoke(Command({resume: decision}), {configurable: {thread_id}})`. The parked graph resumes, the capability acts on the decision, downstream SSE events continue normally. + +**Failure modes:** + +- Client closes SSE stream before resuming → run stays parked in the LangGraph checkpoint. Next invocation of the thread re-surfaces the interrupt. +- Stale `interrupt_id` → 409 with `{ error: "no pending interrupt with that id" }`. +- Mismatched `thread_id` → 400. + +This shape is **Agent-Protocol-compatible** — sub-project 7 will implement the spec on top of this without refactoring 4.5. + +### Web client UX (chat demo only) + +The chat-web smoke client is throwaway, so the UX bar is just "make the prompt usable." When `event: interrupt` with `type: "permission-request"` arrives: + +1. Pause auto-scroll. +2. Render an inline panel above the event log showing the operation + three buttons (Once / Always for `` / Deny). +3. On click, POST to `/api/permission-resume` (a new Next.js route proxy) which forwards to Dawn's resume endpoint. +4. Hide the panel; event log resumes streaming. + +Multiple pending interrupts (e.g., subagent emits an interrupt while parent is parked): queue one at a time, oldest first. Subagent interrupts include the subagent name in the panel header ("research subagent wants to..."). + +### Path-jail in bypass mode + +`mode: "bypass"` disables the workspace capability's path-jail entirely. `readFile("/etc/passwd")` proceeds, `writeFile("/etc/hosts", ...)` writes. This is intentional — bypass mode means "I trust the agent fully" — but it's also dangerous, so: + +- The mode name + docs make the implication explicit +- A console.warn fires on capability load: `[dawn:permissions] mode=bypass — path-jail disabled, all bash unrestricted. Do not use in production.` + +## Component Contracts + +### `@dawn-ai/permissions` types + +```ts +export interface PermissionsFile { + readonly version: 1 + readonly allow: Readonly> + readonly deny: Readonly> +} + +export type PermissionMode = "interactive" | "non-interactive" | "bypass" + +export interface PermissionRequest { + readonly interruptId: string + readonly kind: "command" | "path" + readonly detail: CommandDetail | PathDetail + readonly threadId: string + readonly callId?: string // when emitted from inside a subagent +} + +export interface CommandDetail { + readonly command: string + readonly suggestedPattern: string // first 1-2 tokens +} + +export interface PathDetail { + readonly path: string + readonly operation: "readFile" | "writeFile" | "listDir" + readonly suggestedPattern: string // parent dir, trailing slash +} + +export type PermissionDecision = "once" | "always" | "deny" + +export interface PermissionsStore { + load(): Promise + match(tool: string, candidate: string): "allow" | "deny" | "unknown" + addAllow(tool: string, pattern: string): Promise + mode: PermissionMode +} + +export function createPermissionsStore(opts: { + readonly appRoot: string + readonly config: PermissionsFile | undefined + readonly mode: PermissionMode +}): PermissionsStore +``` + +### `CapabilityMarkerContext` extension + +```ts +export interface CapabilityMarkerContext { + // ... existing fields + readonly permissions?: PermissionsStore // present when workspace capability is active +} +``` + +### `DawnConfig` extension + +```ts +export interface DawnConfig { + readonly appDir?: string + readonly backends?: { /* unchanged */ } + readonly permissions?: { + readonly mode?: PermissionMode + readonly allow?: Readonly> + readonly deny?: Readonly> + } +} +``` + +### Workspace capability changes + +For each of the four tools, the `run()` function becomes: + +```ts +async (input, ctx) => { + const { path } = SCHEMA.parse(input) + + // 1. Resolve + jail + let safe: string + try { + safe = pathJail(path, workspaceRoot) + } catch { + // Jail escape. In bypass mode, proceed anyway. Otherwise, gate. + if (permissions.mode === "bypass") { + safe = resolve(workspaceRoot, path) // absolute, but outside workspace + } else { + const decision = await requestPermission(permissions, "readFile", path, ctx) + if (decision === "deny") { + throw new Error(`Permission denied by user: ${path}`) + } + safe = resolve(workspaceRoot, path) + } + } + + // 2. Backend call + return fs.readFile(safe, backendContext(workspaceRoot, ctx.signal)) +} +``` + +For `runBash`, the gate fires unconditionally before invoking the backend (every bash command is gated when mode is interactive). + +The `requestPermission` helper handles: matching against the store first (allow/deny short-circuits); emitting `interrupt()` on unknown; receiving the resume; calling `addAllow` on "always"; returning the final decision. + +### Resume endpoint registration + +Dawn's CLI dev server registers `POST /threads/:thread_id/resume` alongside the existing `/runs/stream`. Handler: + +```ts +async function handleResume(req): Promise { + const { thread_id } = req.params + const { interrupt_id, decision } = await req.json() + const result = await runtime.resume({ threadId: thread_id, interruptId: interrupt_id, decision }) + if (result.kind === "stale") return new Response(JSON.stringify({ error: "no pending interrupt" }), { status: 409 }) + if (result.kind === "no-thread") return new Response(null, { status: 400 }) + return new Response(null, { status: 200 }) +} +``` + +The runtime maintains an in-memory `Map` for active interrupts so it can validate `interrupt_id` and forward the `Command({resume})` to the right graph. + +## File structure + +### New package + +``` +packages/permissions/ +├── package.json # @dawn-ai/permissions +├── tsconfig.json +├── vitest.config.ts +├── src/ +│ ├── index.ts +│ ├── types.ts +│ ├── permissions-store.ts +│ ├── pattern-matching.ts +│ └── suggested-pattern.ts +└── test/ + ├── permissions-store.test.ts + ├── pattern-matching.test.ts + └── suggested-pattern.test.ts +``` + +### New + modified in existing packages + +``` +packages/core/src/capabilities/built-in/workspace.ts # modified — adds permission check; supports bypass mode +packages/core/src/capabilities/types.ts # adds `permissions` field to CapabilityMarkerContext +packages/core/src/types.ts # extends DawnConfig with permissions field +packages/core/test/capabilities/workspace.test.ts # adds interrupt-flow tests +packages/cli/src/lib/runtime/execute-route.ts # constructs PermissionsStore + threads into context +packages/cli/src/lib/runtime/resume-endpoint.ts # new — HTTP handler +packages/cli/src/lib/server/ # registers the resume route +packages/cli/test/resume-endpoint.test.ts # new +packages/langchain/src/agent-adapter.ts # propagates interrupt() → `event: interrupt`; handles Command({resume}) +examples/chat/server/dawn.config.ts # demo: seeded allow + deny +examples/chat/web/app/api/permission-resume/route.ts # new — proxy +examples/chat/web/app/page.tsx # adds inline permission panel +memory/project_phase_status.md # mark sub-project 4.5 in progress +``` + +## Testing strategy + +Per Section 7 of the brainstorm — unit tests for pattern matching, suggested-pattern, store; integration test for resume endpoint; extended workspace capability tests for interrupt flow; manual Chrome MCP smoke covering interactive prompts (once / always / deny), config-only mode, bypass mode, subagent-emitted interrupts. + +No new LLM-driven CI tests — same policy as existing capabilities. + +## Out of scope (deferred) + +- **Persistent "deny always" entries** (sub-project 4.6) — schema accommodates a `deny` array but no UI yet for setting one. Today's deny path is per-call. +- **Generalized capability-driven gating** (sub-project 4.6) — any capability or user tool can declare "this operation needs confirmation." Builds on 4.5's interrupt/resume/persistence infrastructure. +- **Interactive pattern editor in the prompt UI** — power users edit `.dawn/permissions.json` directly. Pattern-editing in the web client is throwaway-demo territory. +- **Two-tier config** (project + user-global `~/.dawn/permissions.json`) — single project-local file is sufficient until someone asks for it. +- **Per-route permission overrides** — global per-app for v1. +- **Polished web client** — current chat-web is throwaway. The eventual polished client (separate sub-project) will have a true modal, optimistic UI, etc. + +## Known risks + +- **`bypass` mode disables the path-jail.** This is the explicit semantic but it's also load-bearing safety. Mitigation: warn loudly on capability load when bypass is active; document the implication in every reference to the mode. +- **Concurrent `addAllow` calls** could race on disk write. Mitigation: single-flight write queue in `PermissionsStore`. +- **The resume endpoint requires a stable `thread_id`.** If the chat-web client generates a new `thread_id` per page load (current behavior), then closing the tab loses the parked run. Mitigation: document the limitation; sub-project 7's Agent Protocol implementation introduces thread persistence properly. +- **Pattern matching false-positives.** Approving `npm install` once allows `npm install --global some-malicious-package`. Mitigation: docs explicitly call this out; users who want strict matching add exact patterns to `dawn.config.ts`'s `allow` (no `:*` semantics yet — every entry is prefix). Future schema extension could add exact-match syntax. +- **Production deployments forgetting to switch from `interactive` to `non-interactive`** would block forever waiting for prompts no one sees. Mitigation: docs strongly recommend `non-interactive` for production; `DAWN_PERMISSIONS_MODE` env var lets infra set it without touching code. diff --git a/examples/chat/README.md b/examples/chat/README.md index 4ab77e19..7772c331 100644 --- a/examples/chat/README.md +++ b/examples/chat/README.md @@ -1,13 +1,17 @@ # Chat — canonical Dawn harness example -> **Status:** foundational harness primitives (filesystem + bash) + the **planning** and -> **skills** capabilities. Subagents, sandbox isolation, and auto-summarization are still -> deferred — see "Deferred" below. +> **Status:** foundational harness primitives (filesystem + bash) plus the **planning**, +> **skills**, **subagents**, and **workspace** capabilities. Pluggable backend +> implementations (in-memory, remote sandbox) are available — see `dawn.config.ts`. HITL +> permission gating and auto-summarization are still deferred — see "Deferred" below. ## What this shows - Dawn route discovery and the `tools/` convention -- Filesystem tools (read/write/list) + bash, path-jailed to `./workspace` +- **Workspace capability** — when a route's working directory contains `workspace/`, Dawn + auto-contributes `readFile`/`writeFile`/`listDir`/`runBash` tools wired through pluggable + backends. The filesystem and exec backends default to local node:fs / child_process; swap + them in `dawn.config.ts` for in-memory storage, remote sandboxes, etc. - `AGENTS.md` memory autoload — Dawn auto-injects `workspace/AGENTS.md` into the system prompt on every turn; the agent updates it via `writeFile` - **Planning** — `plan.md` in the route directory opts the agent into the built-in `writeTodos` tool, a `todos` state channel, and a `plan_update` SSE event. Open the @@ -17,6 +21,10 @@ the agent's system prompt (name + description). The agent calls `readSkill({ name })` to load a skill's full body on demand. Two example skills ship with the demo: `workspace-conventions` and `recover-from-failure`. +- **Subagents** — `/coordinator` dispatches to specialist subagents (`research`, + `summarizer`) via an auto-generated `task({ subagent, input })` tool. Subagent runs + bubble `subagent.*` SSE events with `call_id` correlation. Pick the `/coordinator` route + in the smoke client to drive it. - End-to-end streaming from a Next.js client over SSE ## Model choice @@ -39,17 +47,24 @@ pnpm dev ``` examples/chat/ -├── server/ # @dawn-example/chat-server (Dawn route + tools) -│ └── src/app/chat/ -│ ├── index.ts # agent({ model, systemPrompt }) -│ ├── state.ts -│ ├── system-prompt.ts -│ ├── workspace-path.ts -│ ├── plan.md # presence enables planning; seeds initial todos -│ └── tools/ # listDir, readFile, writeFile, runBash +├── server/ # @dawn-example/chat-server (Dawn routes) +│ ├── dawn.config.ts # appDir + optional backends config +│ ├── workspace/ # shared workspace (AGENTS.md lives here) +│ └── src/app/ +│ ├── chat/ # /chat route +│ │ ├── index.ts # agent({ model, systemPrompt }) +│ │ ├── state.ts +│ │ ├── system-prompt.ts +│ │ ├── plan.md # presence enables planning +│ │ └── skills/ # SKILL.md files per skill +│ └── coordinator/ # /coordinator route + subagents +│ ├── index.ts +│ └── subagents/ +│ ├── research/index.ts +│ └── summarizer/index.ts └── web/ # @dawn-example/chat-web (Next.js smoke client) └── app/ - ├── page.tsx # textarea + Send + raw event log + ├── page.tsx # route picker + textarea + Send + raw event log └── api/chat/route.ts # SSE proxy ``` @@ -63,8 +78,8 @@ shell expansion — all possible. Do not point untrusted users at this example. These v1 deferrals are the explicit forcing function for Dawn's opinionated harness work: -- Subagent delegation (`task`-style tool) — needs first-class subagent declarations -- Real sandbox isolation for `runBash` — needs pluggable execution backends +- HITL permission gating — interrupt the run when a path is outside the workspace or a + command is high-risk, ask the user, persist the decision - Tool-output offloading and context summarization — needs lifecycle hooks - Nested-object tool inputs (e.g., `edit_file({ edits: [{ old, new }] })`) — typegen extension - Polished web UI — wait for harness primitives to stabilize diff --git a/examples/chat/server/dawn.config.ts b/examples/chat/server/dawn.config.ts index b1c6ea43..16c12f85 100644 --- a/examples/chat/server/dawn.config.ts +++ b/examples/chat/server/dawn.config.ts @@ -1 +1,14 @@ -export default {} +export default { + appDir: "src/app", + permissions: { + // Default mode (omitted) is "interactive" — the demo shows the permission flow. + // Seed a few obviously-safe commands so prompt fatigue is reasonable on first run. + allow: { + bash: ["ls", "pwd", "cat", "echo", "head", "tail", "wc"], + }, + // Block obviously-destructive patterns even when interactive. + deny: { + bash: ["rm -rf", "sudo", "chmod 777"], + }, + }, +} diff --git a/examples/chat/server/src/app/chat/system-prompt.ts b/examples/chat/server/src/app/chat/system-prompt.ts index 50aef8a9..54fe483b 100644 --- a/examples/chat/server/src/app/chat/system-prompt.ts +++ b/examples/chat/server/src/app/chat/system-prompt.ts @@ -5,7 +5,7 @@ You operate in a sandboxed \`workspace/\` directory. You have four tools: - \`listDir({ path })\` — list directory contents. Pass "." for the workspace root. - \`readFile({ path })\` — read a UTF-8 text file (max 256 KiB). - \`writeFile({ path, content })\` — create or overwrite a text file. -- \`runBash({ command, timeoutSeconds })\` — run a shell command in the workspace. Use \`timeoutSeconds: 30\` unless the task clearly needs longer (max 120). +- \`runBash({ command })\` — run a shell command in the workspace. Returns \`{ stdout, stderr, exitCode }\`. Commands time out after 30 seconds by default. Memory convention: when you complete meaningful work, update \`AGENTS.md\` (via \`writeFile\`) so future-you remembers what mattered. Dawn auto-injects the current contents of \`workspace/AGENTS.md\` into your system prompt on every turn under the "# Memory" heading — you don't need to read or list it manually. diff --git a/examples/chat/server/src/app/chat/tools/listDir.ts b/examples/chat/server/src/app/chat/tools/listDir.ts deleted file mode 100644 index 02e2ea7a..00000000 --- a/examples/chat/server/src/app/chat/tools/listDir.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { readdirSync, statSync } from "node:fs" -import { resolveWorkspacePath, workspaceRoot } from "../workspace-path.js" - -/** - * List the entries in a directory inside the workspace. - * Pass "." to list the workspace root. Subdirectories are suffixed with "/". - */ -export default async (input: { readonly path: string }): Promise => { - const dir = resolveWorkspacePath(workspaceRoot(), input.path) - const entries = readdirSync(dir) - entries.sort() - return entries.map((name) => { - const isDir = statSync(`${dir}/${name}`).isDirectory() - return isDir ? `${name}/` : name - }) -} diff --git a/examples/chat/server/src/app/chat/tools/readFile.ts b/examples/chat/server/src/app/chat/tools/readFile.ts deleted file mode 100644 index effb5871..00000000 --- a/examples/chat/server/src/app/chat/tools/readFile.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { readFileSync, statSync } from "node:fs" -import { resolveWorkspacePath, workspaceRoot } from "../workspace-path.js" - -const MAX_BYTES = 256 * 1024 - -/** - * Read a UTF-8 text file from the workspace. Rejects files larger than 256 KiB. - */ -export default async (input: { readonly path: string }): Promise => { - const file = resolveWorkspacePath(workspaceRoot(), input.path) - const size = statSync(file).size - if (size > MAX_BYTES) { - throw new Error(`File too large: ${size} bytes (limit ${MAX_BYTES})`) - } - return readFileSync(file, "utf8") -} diff --git a/examples/chat/server/src/app/chat/tools/runBash.ts b/examples/chat/server/src/app/chat/tools/runBash.ts deleted file mode 100644 index 09a3c5c3..00000000 --- a/examples/chat/server/src/app/chat/tools/runBash.ts +++ /dev/null @@ -1,37 +0,0 @@ -import { spawn } from "node:child_process" -import { workspaceRoot } from "../workspace-path.js" - -const MAX_TIMEOUT_SECONDS = 120 - -/** - * Run a bash command in the workspace directory. Captures stdout and stderr, - * enforces a hard timeout, and returns the combined output with an exit-code - * footer. NOT a sandbox — do not run untrusted commands. - */ -export default async ( - input: { readonly command: string; readonly timeoutSeconds: number }, -): Promise => { - const timeout = Math.min(Math.max(1, input.timeoutSeconds), MAX_TIMEOUT_SECONDS) - const cwd = workspaceRoot() - - return new Promise((resolveResult) => { - const child = spawn("bash", ["-c", input.command], { cwd }) - let output = "" - child.stdout.on("data", (chunk) => { - output += chunk.toString() - }) - child.stderr.on("data", (chunk) => { - output += chunk.toString() - }) - - const timer = setTimeout(() => { - child.kill("SIGKILL") - output += `\n[killed: exceeded ${timeout}s timeout]` - }, timeout * 1000) - - child.on("close", (code) => { - clearTimeout(timer) - resolveResult(`${output}\n[exit ${code ?? "?"}]`) - }) - }) -} diff --git a/examples/chat/server/src/app/chat/tools/writeFile.ts b/examples/chat/server/src/app/chat/tools/writeFile.ts deleted file mode 100644 index dd180cab..00000000 --- a/examples/chat/server/src/app/chat/tools/writeFile.ts +++ /dev/null @@ -1,17 +0,0 @@ -import { mkdirSync, writeFileSync } from "node:fs" -import { dirname } from "node:path" -import { resolveWorkspacePath, workspaceRoot } from "../workspace-path.js" - -/** - * Write a UTF-8 text file to the workspace. Overwrites existing files. - * Creates parent directories as needed. Returns a one-line summary. - */ -export default async ( - input: { readonly path: string; readonly content: string }, -): Promise => { - const file = resolveWorkspacePath(workspaceRoot(), input.path) - mkdirSync(dirname(file), { recursive: true }) - writeFileSync(file, input.content, "utf8") - const bytes = Buffer.byteLength(input.content, "utf8") - return `wrote ${bytes} bytes to ${input.path}` -} diff --git a/examples/chat/server/src/app/chat/workspace-path.test.ts b/examples/chat/server/src/app/chat/workspace-path.test.ts deleted file mode 100644 index 6d4926e4..00000000 --- a/examples/chat/server/src/app/chat/workspace-path.test.ts +++ /dev/null @@ -1,54 +0,0 @@ -import { mkdtempSync, mkdirSync, rmSync, symlinkSync, writeFileSync } from "node:fs" -import { tmpdir } from "node:os" -import { join } from "node:path" -import { afterEach, beforeEach, describe, expect, it } from "vitest" -import { resolveWorkspacePath } from "./workspace-path.js" - -describe("resolveWorkspacePath", () => { - let root: string - let workspace: string - - beforeEach(() => { - root = mkdtempSync(join(tmpdir(), "dawn-chat-")) - workspace = join(root, "workspace") - mkdirSync(workspace, { recursive: true }) - }) - - afterEach(() => { - rmSync(root, { recursive: true, force: true }) - }) - - it("resolves a simple relative path inside the workspace", () => { - const resolved = resolveWorkspacePath(workspace, "notes.md") - expect(resolved).toBe(join(workspace, "notes.md")) - }) - - it("resolves nested paths", () => { - const resolved = resolveWorkspacePath(workspace, "a/b/c.txt") - expect(resolved).toBe(join(workspace, "a/b/c.txt")) - }) - - it("treats '.' as the workspace root", () => { - expect(resolveWorkspacePath(workspace, ".")).toBe(workspace) - }) - - it("rejects absolute paths", () => { - expect(() => resolveWorkspacePath(workspace, "/etc/passwd")).toThrow(/absolute/i) - }) - - it("rejects paths that escape via ..", () => { - expect(() => resolveWorkspacePath(workspace, "../escape.txt")).toThrow(/outside workspace/i) - }) - - it("rejects paths that escape after normalization", () => { - expect(() => resolveWorkspacePath(workspace, "a/../../escape.txt")).toThrow(/outside workspace/i) - }) - - it("rejects symlinks that point outside the workspace", () => { - const outside = join(root, "outside.txt") - writeFileSync(outside, "secret") - const link = join(workspace, "link.txt") - symlinkSync(outside, link) - expect(() => resolveWorkspacePath(workspace, "link.txt")).toThrow(/outside workspace/i) - }) -}) diff --git a/examples/chat/server/src/app/chat/workspace-path.ts b/examples/chat/server/src/app/chat/workspace-path.ts deleted file mode 100644 index d775a415..00000000 --- a/examples/chat/server/src/app/chat/workspace-path.ts +++ /dev/null @@ -1,49 +0,0 @@ -import { existsSync, mkdirSync, realpathSync } from "node:fs" -import { isAbsolute, normalize, relative, resolve } from "node:path" - -/** - * Resolve a user-supplied path against a workspace root, rejecting anything - * that would escape the workspace. - * - * Rules: - * - Absolute paths are rejected outright. - * - The path is normalized; any `..` segment that escapes the workspace is rejected. - * - If the resolved path (or any ancestor) is a symlink, its real path must - * also be inside the workspace. - * - * The workspace directory is created if it does not exist. - */ -export function resolveWorkspacePath(workspaceRoot: string, userPath: string): string { - if (!existsSync(workspaceRoot)) { - mkdirSync(workspaceRoot, { recursive: true }) - } - - if (isAbsolute(userPath)) { - throw new Error(`Path is absolute: ${userPath}`) - } - - const normalized = normalize(userPath) - const resolved = resolve(workspaceRoot, normalized) - const rel = relative(workspaceRoot, resolved) - if (rel.startsWith("..")) { - throw new Error(`Path is outside workspace: ${userPath}`) - } - - // Symlink check: if the path exists and resolves outside, reject. - if (existsSync(resolved)) { - const real = realpathSync(resolved) - const realRel = relative(realpathSync(workspaceRoot), real) - if (realRel.startsWith("..")) { - throw new Error(`Path resolves outside workspace via symlink: ${userPath}`) - } - } - - return resolved -} - -/** - * Resolve the workspace root for the example. Lives at `/workspace`. - */ -export function workspaceRoot(): string { - return resolve(process.cwd(), "workspace") -} diff --git a/examples/chat/server/src/app/coordinator/subagents/research/tools/listDir.ts b/examples/chat/server/src/app/coordinator/subagents/research/tools/listDir.ts deleted file mode 100644 index 02e2ea7a..00000000 --- a/examples/chat/server/src/app/coordinator/subagents/research/tools/listDir.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { readdirSync, statSync } from "node:fs" -import { resolveWorkspacePath, workspaceRoot } from "../workspace-path.js" - -/** - * List the entries in a directory inside the workspace. - * Pass "." to list the workspace root. Subdirectories are suffixed with "/". - */ -export default async (input: { readonly path: string }): Promise => { - const dir = resolveWorkspacePath(workspaceRoot(), input.path) - const entries = readdirSync(dir) - entries.sort() - return entries.map((name) => { - const isDir = statSync(`${dir}/${name}`).isDirectory() - return isDir ? `${name}/` : name - }) -} diff --git a/examples/chat/server/src/app/coordinator/subagents/research/tools/readFile.ts b/examples/chat/server/src/app/coordinator/subagents/research/tools/readFile.ts deleted file mode 100644 index effb5871..00000000 --- a/examples/chat/server/src/app/coordinator/subagents/research/tools/readFile.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { readFileSync, statSync } from "node:fs" -import { resolveWorkspacePath, workspaceRoot } from "../workspace-path.js" - -const MAX_BYTES = 256 * 1024 - -/** - * Read a UTF-8 text file from the workspace. Rejects files larger than 256 KiB. - */ -export default async (input: { readonly path: string }): Promise => { - const file = resolveWorkspacePath(workspaceRoot(), input.path) - const size = statSync(file).size - if (size > MAX_BYTES) { - throw new Error(`File too large: ${size} bytes (limit ${MAX_BYTES})`) - } - return readFileSync(file, "utf8") -} diff --git a/examples/chat/server/src/app/coordinator/subagents/research/workspace-path.ts b/examples/chat/server/src/app/coordinator/subagents/research/workspace-path.ts deleted file mode 100644 index d775a415..00000000 --- a/examples/chat/server/src/app/coordinator/subagents/research/workspace-path.ts +++ /dev/null @@ -1,49 +0,0 @@ -import { existsSync, mkdirSync, realpathSync } from "node:fs" -import { isAbsolute, normalize, relative, resolve } from "node:path" - -/** - * Resolve a user-supplied path against a workspace root, rejecting anything - * that would escape the workspace. - * - * Rules: - * - Absolute paths are rejected outright. - * - The path is normalized; any `..` segment that escapes the workspace is rejected. - * - If the resolved path (or any ancestor) is a symlink, its real path must - * also be inside the workspace. - * - * The workspace directory is created if it does not exist. - */ -export function resolveWorkspacePath(workspaceRoot: string, userPath: string): string { - if (!existsSync(workspaceRoot)) { - mkdirSync(workspaceRoot, { recursive: true }) - } - - if (isAbsolute(userPath)) { - throw new Error(`Path is absolute: ${userPath}`) - } - - const normalized = normalize(userPath) - const resolved = resolve(workspaceRoot, normalized) - const rel = relative(workspaceRoot, resolved) - if (rel.startsWith("..")) { - throw new Error(`Path is outside workspace: ${userPath}`) - } - - // Symlink check: if the path exists and resolves outside, reject. - if (existsSync(resolved)) { - const real = realpathSync(resolved) - const realRel = relative(realpathSync(workspaceRoot), real) - if (realRel.startsWith("..")) { - throw new Error(`Path resolves outside workspace via symlink: ${userPath}`) - } - } - - return resolved -} - -/** - * Resolve the workspace root for the example. Lives at `/workspace`. - */ -export function workspaceRoot(): string { - return resolve(process.cwd(), "workspace") -} diff --git a/examples/chat/web/app/api/permission-resume/route.ts b/examples/chat/web/app/api/permission-resume/route.ts new file mode 100644 index 00000000..be0681bf --- /dev/null +++ b/examples/chat/web/app/api/permission-resume/route.ts @@ -0,0 +1,31 @@ +import { NextRequest } from "next/server" + +export const runtime = "nodejs" +export const dynamic = "force-dynamic" + +export async function POST(req: NextRequest): Promise { + const serverUrl = process.env.DAWN_SERVER_URL ?? "http://127.0.0.1:3001" + const body = (await req.json()) as { + threadId: string + interruptId: string + decision: "once" | "always" | "deny" + } + + const upstream = await fetch( + `${serverUrl}/threads/${encodeURIComponent(body.threadId)}/resume`, + { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + interrupt_id: body.interruptId, + decision: body.decision, + }), + }, + ) + + const text = await upstream.text() + return new Response(text, { + status: upstream.status, + headers: { "content-type": "application/json" }, + }) +} diff --git a/examples/chat/web/app/page.tsx b/examples/chat/web/app/page.tsx index 67c48672..bfd77d98 100644 --- a/examples/chat/web/app/page.tsx +++ b/examples/chat/web/app/page.tsx @@ -8,12 +8,39 @@ function newThreadId(): string { type RouteId = "chat" | "coordinator" +type PendingInterrupt = { + interruptId: string + type: string + kind: "command" | "path" + detail: { + command?: string + operation?: string + path?: string + suggestedPattern: string + } +} + export default function Page() { const [threadId, setThreadId] = useState(null) const [input, setInput] = useState("") const [events, setEvents] = useState([]) const [busy, setBusy] = useState(false) const [route, setRoute] = useState("chat") + const [pendingInterrupt, setPendingInterrupt] = useState(null) + + async function resolveInterrupt(decision: "once" | "always" | "deny") { + if (!pendingInterrupt || !threadId) return + await fetch("/api/permission-resume", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + threadId, + interruptId: pendingInterrupt.interruptId, + decision, + }), + }) + setPendingInterrupt(null) + } function switchRoute(next: RouteId) { if (next === route) return @@ -47,6 +74,7 @@ export default function Page() { const reader = res.body.getReader() const decoder = new TextDecoder() let buf = "" + let nextLineIsInterruptData = false while (true) { const { value, done } = await reader.read() if (done) break @@ -54,7 +82,27 @@ export default function Page() { const lines = buf.split("\n") buf = lines.pop() ?? "" for (const line of lines) { - if (line.trim()) setEvents((e) => [...e, line]) + if (!line.trim()) continue + if (line === "event: interrupt") { + nextLineIsInterruptData = true + setEvents((e) => [...e, line]) + continue + } + if (nextLineIsInterruptData && line.startsWith("data: ")) { + try { + const payload = JSON.parse(line.slice("data: ".length)) + setPendingInterrupt({ + interruptId: payload.interruptId, + type: payload.type, + kind: payload.kind, + detail: payload.detail, + }) + } catch { + /* ignore parse errors */ + } + nextLineIsInterruptData = false + } + setEvents((e) => [...e, line]) } } if (buf.trim()) setEvents((e) => [...e, buf]) @@ -111,6 +159,52 @@ export default function Page() { > {busy ? "Streaming…" : "Send"} + {pendingInterrupt && ( +
+ ⚠️ Permission request +

+ {pendingInterrupt.kind === "command" + ? "The agent wants to run command:" + : `The agent wants to ${pendingInterrupt.detail.operation}:`} +

+ + {pendingInterrupt.kind === "command" + ? pendingInterrupt.detail.command + : pendingInterrupt.detail.path} + +
+ + + +
+
+ )}
 {
+  const { request, response, threadId } = options
+
+  if (!threadId) {
+    sendJson(response, 400, createRequestErrorBody("Missing thread_id in resume URL"))
+    return
+  }
+
+  const rawBody = await readRequestBody(request)
+  const parsedBody = parseJson(rawBody)
+  if (!parsedBody.ok || !isRecord(parsedBody.value)) {
+    sendJson(response, 400, createRequestErrorBody("Malformed resume request body"))
+    return
+  }
+
+  const body = parsedBody.value
+  const interruptId = typeof body.interrupt_id === "string" ? body.interrupt_id : undefined
+  const decision = body.decision
+  if (!interruptId) {
+    sendJson(response, 400, createRequestErrorBody("Missing interrupt_id"))
+    return
+  }
+  if (decision !== "once" && decision !== "always" && decision !== "deny") {
+    sendJson(response, 400, createRequestErrorBody("decision must be 'once', 'always', or 'deny'"))
+    return
+  }
+
+  const pending = getPending(threadId)
+  if (!pending) {
+    sendJson(response, 400, createRequestErrorBody("No parked interrupt for thread"))
+    return
+  }
+  if (pending.interruptId !== interruptId) {
+    sendJson(response, 409, createRequestErrorBody("Stale interrupt_id"))
+    return
+  }
+
+  pending.resolve(decision)
+  clearPending(threadId)
+  sendJson(response, 200, { ok: true })
+}
+
 const SHUTDOWN_ABORTED = Symbol("shutdown-aborted")
 
 async function raceRequestAgainstShutdown(
@@ -423,6 +493,7 @@ interface RunsWaitRequest {
       readonly mode: "agent" | "chain" | "graph" | "workflow"
       readonly route_id: string
       readonly route_path: string
+      readonly thread_id?: string
     }
   }
   readonly on_completion: "delete"
diff --git a/packages/cli/src/lib/runtime/check-tool-name-uniqueness.ts b/packages/cli/src/lib/runtime/check-tool-name-uniqueness.ts
index bfa0f00f..b817a02a 100644
--- a/packages/cli/src/lib/runtime/check-tool-name-uniqueness.ts
+++ b/packages/cli/src/lib/runtime/check-tool-name-uniqueness.ts
@@ -1,22 +1,46 @@
 export interface ToolNameCheckInput {
   readonly userTools: ReadonlyArray<{ readonly name: string }>
-  readonly capabilityTools: ReadonlyArray<{ readonly name: string }>
+  readonly capabilityTools: ReadonlyArray<{
+    readonly name: string
+    readonly overridable?: boolean
+  }>
   readonly reservedNames: ReadonlySet
 }
 
 export type ToolNameCheckResult =
-  | { readonly ok: true }
+  | {
+      readonly ok: true
+      /**
+       * Capability tools with overridable entries removed when shadowed by
+       * a user tool. The runtime should use THIS list when composing the
+       * final tool set, not the input `capabilityTools`.
+       */
+      readonly effectiveCapabilityTools: ReadonlyArray<{
+        readonly name: string
+        readonly overridable?: boolean
+      }>
+    }
   | { readonly ok: false; readonly message: string }
 
 export function checkToolNameUniqueness(input: ToolNameCheckInput): ToolNameCheckResult {
-  const capNames = new Set(input.capabilityTools.map((t) => t.name))
-  for (const t of input.userTools) {
-    if (capNames.has(t.name)) {
+  const userNames = new Set(input.userTools.map((t) => t.name))
+  const effective: Array<{ readonly name: string; readonly overridable?: boolean }> = []
+
+  for (const cap of input.capabilityTools) {
+    if (userNames.has(cap.name)) {
+      if (cap.overridable) {
+        // User tool wins; drop the capability tool from the effective set.
+        continue
+      }
       return {
         ok: false,
-        message: `Capability conflict: tool name "${t.name}" is contributed by a capability and also defined in tools/. Remove the user tool or remove the capability marker file.`,
+        message: `Capability conflict: tool name "${cap.name}" is contributed by a capability and also defined in tools/. Remove the user tool or remove the capability marker file.`,
       }
     }
+    effective.push(cap)
+  }
+
+  for (const t of input.userTools) {
     if (input.reservedNames.has(t.name)) {
       return {
         ok: false,
@@ -24,5 +48,6 @@ export function checkToolNameUniqueness(input: ToolNameCheckInput): ToolNameChec
       }
     }
   }
-  return { ok: true }
+
+  return { ok: true, effectiveCapabilityTools: effective }
 }
diff --git a/packages/cli/src/lib/runtime/execute-route.ts b/packages/cli/src/lib/runtime/execute-route.ts
index e643ea85..90ff4276 100644
--- a/packages/cli/src/lib/runtime/execute-route.ts
+++ b/packages/cli/src/lib/runtime/execute-route.ts
@@ -10,15 +10,23 @@ import {
   createPlanningMarker,
   createSkillsMarker,
   createSubagentsMarker,
+  createWorkspaceMarker,
   discoverRoutes,
   findDawnApp,
+  loadDawnConfig,
   type ResolvedStateField,
   type RouteDefinition,
   type RouteManifest,
   resolveStateFields,
 } from "@dawn-ai/core"
 import { executeAgent, type SubagentResolver, streamAgent } from "@dawn-ai/langchain"
+import {
+  createPermissionsStore,
+  type PermissionMode,
+  type PermissionsStore,
+} from "@dawn-ai/permissions"
 import { type DawnAgent, isDawnAgent } from "@dawn-ai/sdk"
+import type { ExecBackend, FilesystemBackend } from "@dawn-ai/workspace"
 import { checkToolNameUniqueness } from "./check-tool-name-uniqueness.js"
 import { createDawnContext } from "./dawn-context.js"
 import { normalizeRouteModule } from "./load-route-kind.js"
@@ -132,6 +140,14 @@ export async function* streamResolvedRoute(options: {
   readonly routeId: string
   readonly routePath: string
   readonly signal?: AbortSignal
+  /**
+   * Stable per-conversation identifier forwarded to the agent-adapter as
+   * LangGraph's `thread_id`. When set, `interrupt()` calls park graph
+   * state in the checkpointer and the `/threads/:thread_id/resume`
+   * endpoint can replay them by handing a `PermissionDecision` back to the
+   * adapter via the pending-interrupts map.
+   */
+  readonly threadId?: string
 }): AsyncGenerator {
   const prepared = await prepareRouteExecution(options)
 
@@ -168,6 +184,7 @@ export async function* streamResolvedRoute(options: {
     ...(promptFragments && promptFragments.length > 0 ? { promptFragments } : {}),
     ...(streamTransformers && streamTransformers.length > 0 ? { streamTransformers } : {}),
     ...(subagentResolver ? { subagentResolver } : {}),
+    ...(options.threadId ? { threadId: options.threadId } : {}),
   })) {
     switch (chunk.type) {
       case "token":
@@ -186,6 +203,14 @@ export async function* streamResolvedRoute(options: {
       case "done":
         yield { type: "done", output: chunk.data }
         break
+      case "interrupt": {
+        // The agent-adapter registers the pending entry in
+        // pending-interrupts so the /threads/:thread_id/resume endpoint
+        // can correlate the POST. We just forward the chunk to the SSE
+        // consumer.
+        yield { type: "interrupt", data: chunk.data }
+        break
+      }
       default: {
         // Capability-contributed event types (e.g. plan_update from the planning capability).
         // The langchain layer widened AgentStreamChunk["type"] to allow arbitrary strings;
@@ -274,6 +299,7 @@ async function prepareRouteExecution(options: {
       createAgentsMdMarker(),
       createSkillsMarker(),
       createSubagentsMarker(),
+      createWorkspaceMarker(),
     ])
     const routeManifest = await discoverRoutes({ appRoot: options.appRoot })
     const descriptor =
@@ -286,10 +312,51 @@ async function prepareRouteExecution(options: {
     // invalidated in dev when the runtime rebuilds the manifest.
     const descriptorRouteMap = await getCachedDescriptorRouteMap(routeManifest)
 
+    let configBackends:
+      | { readonly filesystem?: FilesystemBackend; readonly exec?: ExecBackend }
+      | undefined
+    let permissionsConfig:
+      | {
+          readonly mode?: PermissionMode
+          readonly allow?: Readonly>
+          readonly deny?: Readonly>
+        }
+      | undefined
+    try {
+      const loaded = await loadDawnConfig({ appRoot: options.appRoot })
+      configBackends = loaded.config.backends
+      permissionsConfig = loaded.config.permissions
+    } catch {
+      // No dawn.config.ts (or unreadable). The workspace capability falls
+      // back to its defaults (localFilesystem + localExec); permissions
+      // defaults to "interactive" with empty allow/deny.
+    }
+
+    const envMode = process.env.DAWN_PERMISSIONS_MODE
+    const mode: PermissionMode =
+      envMode === "interactive" || envMode === "non-interactive" || envMode === "bypass"
+        ? envMode
+        : (permissionsConfig?.mode ?? "interactive")
+
+    const permissionsStore: PermissionsStore = createPermissionsStore({
+      appRoot: options.appRoot,
+      config: permissionsConfig
+        ? {
+            version: 1,
+            allow: permissionsConfig.allow ?? {},
+            deny: permissionsConfig.deny ?? {},
+          }
+        : undefined,
+      mode,
+    })
+    await permissionsStore.load()
+
     const applied = await applyCapabilities(registry, routeDir, {
       routeManifest,
       descriptor,
       descriptorRouteMap,
+      ...(configBackends ? { backends: configBackends } : {}),
+      permissions: permissionsStore,
     })
 
     if (applied.errors.length > 0) {
@@ -311,14 +378,16 @@ async function prepareRouteExecution(options: {
         for (const t of contribution.tools) {
           // Adapt capability-contributed tools (which lack filePath/scope)
           // into the DiscoveredToolDefinition shape used by the runtime.
+          const overridable = (t as unknown as { overridable?: boolean }).overridable
           capTools.push({
             ...(t.description !== undefined ? { description: t.description } : {}),
             filePath: ``,
             name: t.name,
+            ...(overridable ? { overridable: true } : {}),
             run: t.run,
             ...(t.schema !== undefined ? { schema: t.schema } : {}),
             scope: "route-local",
-          })
+          } as DiscoveredToolDefinition)
         }
       }
       if (contribution.stateFields) capStateFields.push(...contribution.stateFields)
@@ -331,13 +400,20 @@ async function prepareRouteExecution(options: {
     const RESERVED_TOOL_NAMES = new Set(["task"]) // names auto-generated by capabilities
     const check = checkToolNameUniqueness({
       userTools: tools.map((t) => ({ name: t.name })),
-      capabilityTools: capTools.map((t) => ({ name: t.name })),
+      capabilityTools: capTools.map((t) => ({
+        name: t.name,
+        ...((t as unknown as { overridable?: boolean }).overridable ? { overridable: true } : {}),
+      })),
       reservedNames: RESERVED_TOOL_NAMES,
     })
     if (!check.ok) {
       return { message: check.message, ok: false }
     }
 
+    // Use the effective set so overridden tools are dropped before merging.
+    const effectiveCapNames = new Set(check.effectiveCapabilityTools.map((t) => t.name))
+    const filteredCapTools = capTools.filter((t) => effectiveCapNames.has(t.name))
+
     const userStateNames = new Set((stateFields ?? []).map((f) => f.name))
     for (const f of capStateFields) {
       if (userStateNames.has(f.name)) {
@@ -348,7 +424,7 @@ async function prepareRouteExecution(options: {
       }
     }
 
-    tools = [...tools, ...capTools]
+    tools = [...tools, ...filteredCapTools]
     stateFields = stateFields ? [...stateFields, ...capStateFields] : capStateFields
     promptFragments = capPromptFragments
     streamTransformers = capStreamTransformers
diff --git a/packages/cli/src/lib/runtime/pending-interrupts.ts b/packages/cli/src/lib/runtime/pending-interrupts.ts
new file mode 100644
index 00000000..e716a15e
--- /dev/null
+++ b/packages/cli/src/lib/runtime/pending-interrupts.ts
@@ -0,0 +1,16 @@
+/**
+ * Re-exports the pending-interrupts registry from `@dawn-ai/langchain`.
+ *
+ * The map itself lives in the langchain package so the agent-adapter (which
+ * parks the stream on interrupt) and the CLI's resume endpoint (which
+ * dispatches the user's decision) share the same module-level state without
+ * introducing a circular dep cli <-> langchain.
+ */
+
+export type { PendingInterrupt, ResumeDecision } from "@dawn-ai/langchain"
+export {
+  __resetPendingForTests,
+  clearPending,
+  getPending,
+  setPending,
+} from "@dawn-ai/langchain"
diff --git a/packages/cli/src/lib/typegen/run-typegen.ts b/packages/cli/src/lib/typegen/run-typegen.ts
index baf69445..116d43b0 100644
--- a/packages/cli/src/lib/typegen/run-typegen.ts
+++ b/packages/cli/src/lib/typegen/run-typegen.ts
@@ -39,6 +39,37 @@ const SUBAGENTS_EXTRA_TOOL: ExtractedToolType = {
   outputType: `string`,
 }
 
+const WORKSPACE_EXTRA_TOOLS: readonly ExtractedToolType[] = [
+  {
+    name: "readFile",
+    description: "Read a UTF-8 file from the workspace.",
+    inputType: `{ path: string }`,
+    outputType: `string`,
+  },
+  {
+    name: "writeFile",
+    description: "Write a UTF-8 file inside the workspace.",
+    inputType: `{ path: string; content: string }`,
+    outputType: `string`,
+  },
+  {
+    name: "listDir",
+    description: "List entries in a workspace directory.",
+    inputType: `{ path?: string }`,
+    outputType: `string[]`,
+  },
+  {
+    name: "runBash",
+    description: "Run a shell command inside the workspace.",
+    inputType: `{ command: string }`,
+    outputType: `{ stdout: string; stderr: string; exitCode: number }`,
+  },
+]
+
+function hasWorkspace(_routeDir: string): boolean {
+  return existsSync(join(process.cwd(), "workspace"))
+}
+
 const SKILL_DIR_NAME_RE = /^[A-Za-z0-9][A-Za-z0-9_-]*$/
 
 function hasSkills(routeDir: string): boolean {
@@ -121,6 +152,9 @@ export async function runTypegen(options: {
     if (hasSubagents(route.routeDir)) {
       extraTools.push(SUBAGENTS_EXTRA_TOOL)
     }
+    if (hasWorkspace(route.routeDir)) {
+      extraTools.push(...WORKSPACE_EXTRA_TOOLS)
+    }
 
     routeToolTypes.push({
       pathname: route.pathname,
diff --git a/packages/cli/test/dev-command.test.ts b/packages/cli/test/dev-command.test.ts
index 25c2a823..8eacd05f 100644
--- a/packages/cli/test/dev-command.test.ts
+++ b/packages/cli/test/dev-command.test.ts
@@ -496,26 +496,22 @@ describe("dawn dev lifecycle", () => {
     expect(await response.json()).toMatchObject({ version: "healthy" })
   })
 
-  test("terminates the session for fatal appDir changes outside the discovered app root", {
+  test("terminates the session when configured appDir falls outside the discovered app root", {
     timeout: 15_000,
   }, async () => {
     const appRoot = await createFixtureApp({
-      "dawn.config.ts": "export default {};\n",
+      "dawn.config.ts": 'const appDir = "../outside";\nexport default { appDir };\n',
       "package.json": "{}\n",
       "src/app/support/[tenant]/index.ts": `export const graph = async () => ({ version: "healthy" });\n`,
     })
-    const configPath = join(appRoot, "dawn.config.ts")
+    // Ensure the configured appDir target (one level above appRoot) actually
+    // exists so we exercise the appRoot containment check rather than the
+    // "missing routes directory" check.
+    await mkdir(join(appRoot, "..", "outside"), { recursive: true })
 
     const dev = await startDevProcess({ cwd: appRoot })
     devProcesses.push(dev)
 
-    await dev.waitForReady()
-    await writeFile(
-      configPath,
-      'const appDir = "../outside";\nexport default { appDir };\n',
-      "utf8",
-    )
-
     const exitCode = await dev.waitForExit()
 
     expect(exitCode).toBe(1)
diff --git a/packages/cli/test/resume-endpoint.test.ts b/packages/cli/test/resume-endpoint.test.ts
new file mode 100644
index 00000000..092b3965
--- /dev/null
+++ b/packages/cli/test/resume-endpoint.test.ts
@@ -0,0 +1,132 @@
+import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises"
+import { tmpdir } from "node:os"
+import { join } from "node:path"
+
+import { afterEach, beforeEach, describe, expect, test } from "vitest"
+
+import { startRuntimeServer } from "../src/lib/dev/runtime-server.js"
+import { __resetPendingForTests, setPending } from "../src/lib/runtime/pending-interrupts.js"
+
+const tempDirs: string[] = []
+const servers: Array<{ close: () => Promise }> = []
+
+beforeEach(() => {
+  __resetPendingForTests()
+})
+
+afterEach(async () => {
+  await Promise.all(tempDirs.splice(0).map((dir) => rm(dir, { force: true, recursive: true })))
+  await Promise.all(servers.splice(0).map((server) => server.close()))
+})
+
+describe("POST /threads/:thread_id/resume", () => {
+  test("returns 200 and invokes resolve when interrupt_id matches", async () => {
+    const appRoot = await createFixtureApp({
+      "dawn.config.ts": "export default {};\n",
+      "package.json": "{}\n",
+      "src/app/noop/index.ts": "export const graph = async () => ({ ok: true });\n",
+    })
+    const server = await startRuntimeServer({ appRoot })
+    servers.push(server)
+
+    let resolvedWith: string | undefined
+    setPending("thread-1", {
+      interruptId: "perm-abc",
+      resolve: (decision) => {
+        resolvedWith = decision
+      },
+    })
+
+    const response = await fetch(new URL("/threads/thread-1/resume", server.url), {
+      body: JSON.stringify({ interrupt_id: "perm-abc", decision: "once" }),
+      headers: { "content-type": "application/json" },
+      method: "POST",
+    })
+
+    expect(response.status).toBe(200)
+    expect(await response.json()).toEqual({ ok: true })
+    expect(resolvedWith).toBe("once")
+  })
+
+  test("returns 409 when interrupt_id is stale", async () => {
+    const appRoot = await createFixtureApp({
+      "dawn.config.ts": "export default {};\n",
+      "package.json": "{}\n",
+      "src/app/noop/index.ts": "export const graph = async () => ({ ok: true });\n",
+    })
+    const server = await startRuntimeServer({ appRoot })
+    servers.push(server)
+
+    setPending("thread-2", {
+      interruptId: "perm-current",
+      resolve: () => {
+        throw new Error("resolve should not fire for stale interrupt_id")
+      },
+    })
+
+    const response = await fetch(new URL("/threads/thread-2/resume", server.url), {
+      body: JSON.stringify({ interrupt_id: "perm-old", decision: "once" }),
+      headers: { "content-type": "application/json" },
+      method: "POST",
+    })
+
+    expect(response.status).toBe(409)
+    const body = (await response.json()) as { error?: { message?: string } }
+    expect(body.error?.message).toMatch(/stale/i)
+  })
+
+  test("returns 400 when no pending interrupt exists for the thread", async () => {
+    const appRoot = await createFixtureApp({
+      "dawn.config.ts": "export default {};\n",
+      "package.json": "{}\n",
+      "src/app/noop/index.ts": "export const graph = async () => ({ ok: true });\n",
+    })
+    const server = await startRuntimeServer({ appRoot })
+    servers.push(server)
+
+    const response = await fetch(new URL("/threads/missing-thread/resume", server.url), {
+      body: JSON.stringify({ interrupt_id: "perm-x", decision: "deny" }),
+      headers: { "content-type": "application/json" },
+      method: "POST",
+    })
+
+    expect(response.status).toBe(400)
+    const body = (await response.json()) as { error?: { message?: string } }
+    expect(body.error?.message).toMatch(/no parked interrupt/i)
+  })
+
+  test("returns 400 when decision is not one of once/always/deny", async () => {
+    const appRoot = await createFixtureApp({
+      "dawn.config.ts": "export default {};\n",
+      "package.json": "{}\n",
+      "src/app/noop/index.ts": "export const graph = async () => ({ ok: true });\n",
+    })
+    const server = await startRuntimeServer({ appRoot })
+    servers.push(server)
+
+    setPending("thread-3", { interruptId: "p1", resolve: () => undefined })
+
+    const response = await fetch(new URL("/threads/thread-3/resume", server.url), {
+      body: JSON.stringify({ interrupt_id: "p1", decision: "bogus" }),
+      headers: { "content-type": "application/json" },
+      method: "POST",
+    })
+
+    expect(response.status).toBe(400)
+  })
+})
+
+async function createFixtureApp(files: Readonly>) {
+  const appRoot = await mkdtemp(join(tmpdir(), "dawn-cli-resume-"))
+  tempDirs.push(appRoot)
+
+  await Promise.all(
+    Object.entries(files).map(async ([relativePath, source]) => {
+      const filePath = join(appRoot, relativePath)
+      await mkdir(join(filePath, ".."), { recursive: true })
+      await writeFile(filePath, source, "utf8")
+    }),
+  )
+
+  return appRoot
+}
diff --git a/packages/cli/test/run-typegen.test.ts b/packages/cli/test/run-typegen.test.ts
index c044dc3a..68b0d1f7 100644
--- a/packages/cli/test/run-typegen.test.ts
+++ b/packages/cli/test/run-typegen.test.ts
@@ -8,8 +8,10 @@ import { afterEach, describe, expect, test } from "vitest"
 import { runTypegen } from "../src/lib/typegen/run-typegen.js"
 
 const tempDirs: string[] = []
+const originalCwd = process.cwd()
 
 afterEach(async () => {
+  process.chdir(originalCwd)
   await Promise.all(tempDirs.splice(0).map((dir) => rm(dir, { force: true, recursive: true })))
 })
 
@@ -151,6 +153,39 @@ describe("runTypegen", () => {
     expect(content).not.toContain("Dispatch a sub-task")
   })
 
+  test("includes workspace tools in generated types when workspace/ directory exists", async () => {
+    const { appRoot } = await setupApp()
+    await mkdir(join(appRoot, "workspace"), { recursive: true })
+    process.chdir(appRoot)
+
+    const manifest = await discoverRoutes({ appRoot })
+    await runTypegen({ appRoot, manifest })
+
+    const dtsPath = join(appRoot, ".dawn", "dawn.generated.d.ts")
+    const content = await readFile(dtsPath, "utf8")
+
+    expect(content).toContain("readFile")
+    expect(content).toContain("writeFile")
+    expect(content).toContain("listDir")
+    expect(content).toContain("runBash")
+    expect(content).toContain("greet")
+  })
+
+  test("omits workspace tools when workspace/ directory is absent", async () => {
+    const { appRoot } = await setupApp()
+    process.chdir(appRoot)
+    const manifest = await discoverRoutes({ appRoot })
+    await runTypegen({ appRoot, manifest })
+
+    const dtsPath = join(appRoot, ".dawn", "dawn.generated.d.ts")
+    const content = await readFile(dtsPath, "utf8")
+
+    expect(content).not.toContain("Read a UTF-8 file from the workspace")
+    expect(content).not.toContain("Write a UTF-8 file inside the workspace")
+    expect(content).not.toContain("List entries in a workspace directory")
+    expect(content).not.toContain("Run a shell command inside the workspace")
+  })
+
   test("writes state.json when state.ts exists", async () => {
     const { appRoot } = await setupApp({ withState: true })
     const manifest = await discoverRoutes({ appRoot })
diff --git a/packages/cli/test/tool-name-uniqueness.test.ts b/packages/cli/test/tool-name-uniqueness.test.ts
index af25e075..4ffcfc53 100644
--- a/packages/cli/test/tool-name-uniqueness.test.ts
+++ b/packages/cli/test/tool-name-uniqueness.test.ts
@@ -35,3 +35,40 @@ describe("checkToolNameUniqueness", () => {
     expect(result.message).toContain("task")
   })
 })
+
+describe("checkToolNameUniqueness — overridable", () => {
+  it("when a capability tool is overridable, a user tool with the same name does NOT error and replaces it", () => {
+    const result = checkToolNameUniqueness({
+      userTools: [{ name: "readFile" }],
+      capabilityTools: [{ name: "readFile", overridable: true }],
+      reservedNames: new Set(),
+    })
+    expect(result.ok).toBe(true)
+    if (!result.ok) return
+    // The returned effectiveCapabilityTools drops the overridden tool.
+    expect(result.effectiveCapabilityTools).toEqual([])
+  })
+
+  it("when a capability tool is NOT overridable, a user tool with the same name still errors", () => {
+    const result = checkToolNameUniqueness({
+      userTools: [{ name: "writeTodos" }],
+      capabilityTools: [{ name: "writeTodos" }], // no overridable flag = not overridable
+      reservedNames: new Set(),
+    })
+    expect(result.ok).toBe(false)
+  })
+
+  it("returns the un-shadowed capability tools in effectiveCapabilityTools", () => {
+    const result = checkToolNameUniqueness({
+      userTools: [{ name: "readFile" }],
+      capabilityTools: [
+        { name: "readFile", overridable: true },
+        { name: "writeFile", overridable: true },
+      ],
+      reservedNames: new Set(),
+    })
+    expect(result.ok).toBe(true)
+    if (!result.ok) return
+    expect(result.effectiveCapabilityTools.map((t) => t.name)).toEqual(["writeFile"])
+  })
+})
diff --git a/packages/cli/test/typegen-command.test.ts b/packages/cli/test/typegen-command.test.ts
index e279a437..b89c7387 100644
--- a/packages/cli/test/typegen-command.test.ts
+++ b/packages/cli/test/typegen-command.test.ts
@@ -186,7 +186,9 @@ describe("dawn typegen", () => {
     const coreTarball = await packPackage("@dawn-ai/core", packsRoot)
     const langchainTarball = await packPackage("@dawn-ai/langchain", packsRoot)
     const langgraphTarball = await packPackage("@dawn-ai/langgraph", packsRoot)
+    const permissionsTarball = await packPackage("@dawn-ai/permissions", packsRoot)
     const sdkTarball = await packPackage("@dawn-ai/sdk", packsRoot)
+    const workspaceTarball = await packPackage("@dawn-ai/workspace", packsRoot)
 
     await writeFile(
       join(installerRoot, "package.json"),
@@ -206,7 +208,9 @@ describe("dawn typegen", () => {
               "@dawn-ai/core": `file:${coreTarball}`,
               "@dawn-ai/langchain": `file:${langchainTarball}`,
               "@dawn-ai/langgraph": `file:${langgraphTarball}`,
+              "@dawn-ai/permissions": `file:${permissionsTarball}`,
               "@dawn-ai/sdk": `file:${sdkTarball}`,
+              "@dawn-ai/workspace": `file:${workspaceTarball}`,
             },
           },
         },
diff --git a/packages/cli/test/verify-command.test.ts b/packages/cli/test/verify-command.test.ts
index f5782991..0e4831d3 100644
--- a/packages/cli/test/verify-command.test.ts
+++ b/packages/cli/test/verify-command.test.ts
@@ -146,8 +146,7 @@ describe("dawn verify", () => {
       checks: [
         {
           error: {
-            message:
-              'Unsupported dawn.config.ts syntax: unexpected token "(". Supported subset: optional const string declarations followed by export default { appDir } or export default { appDir: "..." }.',
+            message: "defineConfig is not defined",
           },
           name: "app",
           status: "failed",
diff --git a/packages/cli/tsconfig.build.json b/packages/cli/tsconfig.build.json
index bfb89bb1..1278b9df 100644
--- a/packages/cli/tsconfig.build.json
+++ b/packages/cli/tsconfig.build.json
@@ -8,7 +8,8 @@
       "@dawn-ai/core": ["../core/src/index.ts"],
       "@dawn-ai/langchain": ["../langchain/src/index.ts"],
       "@dawn-ai/langgraph": ["../langgraph/src/index.ts"],
-      "@dawn-ai/langgraph/*": ["../langgraph/src/*.ts"]
+      "@dawn-ai/langgraph/*": ["../langgraph/src/*.ts"],
+      "@dawn-ai/permissions": ["../permissions/src/index.ts"]
     },
     "rootDir": "src"
   },
@@ -22,6 +23,9 @@
     },
     {
       "path": "../langgraph"
+    },
+    {
+      "path": "../permissions"
     }
   ]
 }
diff --git a/packages/core/package.json b/packages/core/package.json
index 034732ea..5c33687f 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -36,7 +36,10 @@
     "typecheck": "tsc --noEmit"
   },
   "dependencies": {
+    "@dawn-ai/permissions": "workspace:*",
     "@dawn-ai/sdk": "workspace:*",
+    "@dawn-ai/workspace": "workspace:*",
+    "@langchain/langgraph": "^1.3.0",
     "tsx": "^4.8.1",
     "typescript": "5.8.3",
     "zod": "^4.4.3"
diff --git a/packages/core/src/capabilities/built-in/workspace.ts b/packages/core/src/capabilities/built-in/workspace.ts
new file mode 100644
index 00000000..752aebd4
--- /dev/null
+++ b/packages/core/src/capabilities/built-in/workspace.ts
@@ -0,0 +1,228 @@
+import { existsSync } from "node:fs"
+import { join, resolve, sep } from "node:path"
+import type { PermissionsStore } from "@dawn-ai/permissions"
+import { suggestedCommandPattern, suggestedPathPattern } from "@dawn-ai/permissions"
+import type { BackendContext, ExecBackend, FilesystemBackend } from "@dawn-ai/workspace"
+import { localExec, localFilesystem } from "@dawn-ai/workspace"
+import { interrupt } from "@langchain/langgraph"
+import { z } from "zod"
+
+import type { CapabilityMarker, DawnToolDefinition } from "../types.js"
+
+const WORKSPACE_DIRNAME = "workspace"
+
+/**
+ * Resolve the workspace root to a cwd-relative path. This matches the
+ * AGENTS.md capability's resolution (process.cwd() + "workspace") so
+ * the agent's memory and workspace tools point at the same directory.
+ */
+function workspaceRoot(): string {
+  return join(process.cwd(), WORKSPACE_DIRNAME)
+}
+
+const READ_FILE_INPUT = z.object({ path: z.string().min(1) })
+const WRITE_FILE_INPUT = z.object({ path: z.string().min(1), content: z.string() })
+const LIST_DIR_INPUT = z.object({ path: z.string().default(".") })
+const RUN_BASH_INPUT = z.object({ command: z.string().min(1) })
+
+function backendContext(workspaceRoot: string, signal: AbortSignal): BackendContext {
+  return { signal, workspaceRoot }
+}
+
+type GateResult = { allowed: true } | { allowed: false; reason: string }
+
+async function gatePathOp(
+  permissions: PermissionsStore | undefined,
+  operation: "readFile" | "writeFile" | "listDir",
+  absPath: string,
+  workspaceRoot: string,
+): Promise {
+  // If permissions store is absent, allow (legacy behavior — capability used without permissions context).
+  if (!permissions) return { allowed: true }
+  if (permissions.mode === "bypass") return { allowed: true }
+
+  const insideWorkspace = absPath === workspaceRoot || absPath.startsWith(workspaceRoot + sep)
+
+  // Inside workspace: always allow silently.
+  if (insideWorkspace) return { allowed: true }
+
+  // Outside workspace: consult the store.
+  const decision = permissions.match(operation, absPath)
+  if (decision === "allow") return { allowed: true }
+  if (decision === "deny") {
+    return { allowed: false, reason: `Permission denied by user: ${absPath}` }
+  }
+  // decision === "unknown"
+  if (permissions.mode === "non-interactive") {
+    return { allowed: false, reason: `Permission denied (fail-closed): ${absPath}` }
+  }
+  // Interactive: emit LangGraph interrupt and await user decision.
+  const result = await emitPermissionInterrupt({
+    kind: "path",
+    operation,
+    path: absPath,
+    permissions,
+  })
+  if (result === "deny") {
+    return { allowed: false, reason: `Permission denied by user: ${absPath}` }
+  }
+  return { allowed: true }
+}
+
+async function gateBashOp(
+  permissions: PermissionsStore | undefined,
+  command: string,
+): Promise {
+  if (!permissions) return { allowed: true }
+  if (permissions.mode === "bypass") return { allowed: true }
+
+  const decision = permissions.match("bash", command)
+  if (decision === "allow") return { allowed: true }
+  if (decision === "deny") {
+    return { allowed: false, reason: `Permission denied by user: ${command}` }
+  }
+  if (permissions.mode === "non-interactive") {
+    return { allowed: false, reason: `Permission denied (fail-closed): ${command}` }
+  }
+  const result = await emitPermissionInterrupt({
+    kind: "command",
+    command,
+    permissions,
+  })
+  if (result === "deny") {
+    return { allowed: false, reason: `Permission denied by user: ${command}` }
+  }
+  return { allowed: true }
+}
+
+interface InterruptArgs {
+  kind: "command" | "path"
+  command?: string
+  operation?: "readFile" | "writeFile" | "listDir"
+  path?: string
+  permissions: PermissionsStore
+}
+
+async function emitPermissionInterrupt(args: InterruptArgs): Promise<"allow" | "deny"> {
+  const interruptId = `perm-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
+  const suggestedPattern =
+    args.kind === "command"
+      ? suggestedCommandPattern(args.command ?? "")
+      : suggestedPathPattern(args.path ?? "")
+  const payload = {
+    interruptId,
+    type: "permission-request" as const,
+    kind: args.kind,
+    detail:
+      args.kind === "command"
+        ? { command: args.command ?? "", suggestedPattern }
+        : {
+            operation: args.operation ?? "readFile",
+            path: args.path ?? "",
+            suggestedPattern,
+          },
+  }
+  const decision = interrupt(payload) as "once" | "always" | "deny"
+  if (decision === "deny") return "deny"
+  if (decision === "always") {
+    const tool = args.kind === "command" ? "bash" : (args.operation ?? "readFile")
+    await args.permissions.addAllow(tool, suggestedPattern)
+  }
+  return "allow"
+}
+
+interface OverridableTool extends DawnToolDefinition {
+  readonly overridable: true
+}
+
+function buildWorkspaceTools(
+  workspaceRoot: string,
+  fs: FilesystemBackend,
+  exec: ExecBackend,
+  permissions: PermissionsStore | undefined,
+): readonly OverridableTool[] {
+  const readFile: OverridableTool = {
+    name: "readFile",
+    description: "Read a UTF-8 file from the workspace.",
+    schema: READ_FILE_INPUT,
+    overridable: true,
+    run: async (input, ctx) => {
+      const { path } = READ_FILE_INPUT.parse(input)
+      const absPath = resolve(workspaceRoot, path)
+      const gate = await gatePathOp(permissions, "readFile", absPath, workspaceRoot)
+      if (!gate.allowed) {
+        throw new Error(gate.reason)
+      }
+      return fs.readFile(absPath, backendContext(workspaceRoot, ctx.signal))
+    },
+  }
+  const writeFile: OverridableTool = {
+    name: "writeFile",
+    description: "Write a UTF-8 file inside the workspace.",
+    schema: WRITE_FILE_INPUT,
+    overridable: true,
+    run: async (input, ctx) => {
+      const { path, content } = WRITE_FILE_INPUT.parse(input)
+      const absPath = resolve(workspaceRoot, path)
+      const gate = await gatePathOp(permissions, "writeFile", absPath, workspaceRoot)
+      if (!gate.allowed) {
+        throw new Error(gate.reason)
+      }
+      const result = await fs.writeFile(absPath, content, backendContext(workspaceRoot, ctx.signal))
+      return `wrote ${result.bytesWritten} bytes to ${path}`
+    },
+  }
+  const listDir: OverridableTool = {
+    name: "listDir",
+    description: "List entries in a workspace directory.",
+    schema: LIST_DIR_INPUT,
+    overridable: true,
+    run: async (input, ctx) => {
+      const { path } = LIST_DIR_INPUT.parse(input)
+      const absPath = resolve(workspaceRoot, path)
+      const gate = await gatePathOp(permissions, "listDir", absPath, workspaceRoot)
+      if (!gate.allowed) {
+        throw new Error(gate.reason)
+      }
+      const entries = await fs.listDir(absPath, backendContext(workspaceRoot, ctx.signal))
+      return [...entries]
+    },
+  }
+  const runBash: OverridableTool = {
+    name: "runBash",
+    description: "Run a shell command inside the workspace.",
+    schema: RUN_BASH_INPUT,
+    overridable: true,
+    run: async (input, ctx) => {
+      const { command } = RUN_BASH_INPUT.parse(input)
+      const gate = await gateBashOp(permissions, command)
+      if (!gate.allowed) {
+        throw new Error(gate.reason)
+      }
+      return exec.runCommand({ command }, backendContext(workspaceRoot, ctx.signal))
+    },
+  }
+  return [readFile, writeFile, listDir, runBash]
+}
+
+export function createWorkspaceMarker(): CapabilityMarker {
+  return {
+    name: "workspace",
+    detect: async (_routeDir, _context) => existsSync(workspaceRoot()),
+    load: async (_routeDir, context) => {
+      const root = workspaceRoot()
+      if (!existsSync(root)) return {}
+      const fs = context.backends?.filesystem ?? localFilesystem()
+      const exec = context.backends?.exec ?? localExec()
+      const permissions = context.permissions
+
+      if (permissions?.mode === "bypass") {
+        console.warn(
+          "[dawn:permissions] mode=bypass — path-jail disabled, all bash unrestricted. Do not use in production.",
+        )
+      }
+
+      return { tools: buildWorkspaceTools(root, fs, exec, permissions) }
+    },
+  }
+}
diff --git a/packages/core/src/capabilities/types.ts b/packages/core/src/capabilities/types.ts
index 4fb25c92..1f366bdf 100644
--- a/packages/core/src/capabilities/types.ts
+++ b/packages/core/src/capabilities/types.ts
@@ -1,10 +1,17 @@
+import type { PermissionsStore } from "@dawn-ai/permissions"
 import type { DawnAgent } from "@dawn-ai/sdk"
+import type { ExecBackend, FilesystemBackend } from "@dawn-ai/workspace"
 import type { ResolvedStateField, RouteManifest } from "../types.js"
 
 export interface CapabilityMarkerContext {
   readonly routeManifest: RouteManifest
   readonly descriptor: DawnAgent | undefined
   readonly descriptorRouteMap?: ReadonlyMap
+  readonly backends?: {
+    readonly filesystem?: FilesystemBackend
+    readonly exec?: ExecBackend
+  }
+  readonly permissions?: PermissionsStore
 }
 
 export interface DawnToolDefinition {
diff --git a/packages/core/src/config.ts b/packages/core/src/config.ts
index a2f50454..bfd5b714 100644
--- a/packages/core/src/config.ts
+++ b/packages/core/src/config.ts
@@ -1,322 +1,40 @@
 import { constants } from "node:fs"
-import { access, readFile } from "node:fs/promises"
+import { access } from "node:fs/promises"
 import { join } from "node:path"
+import { pathToFileURL } from "node:url"
 
 import type { DawnConfig, LoadDawnConfigOptions, LoadedDawnConfig } from "./types.js"
 
 export const DAWN_CONFIG_FILE = "dawn.config.ts"
 
-type Token =
-  | {
-      readonly type:
-        | "const"
-        | "default"
-        | "export"
-        | "eof"
-        | "equals"
-        | "lbrace"
-        | "rbrace"
-        | "colon"
-        | "comma"
-        | "semicolon"
-    }
-  | { readonly type: "identifier"; readonly value: string }
-  | { readonly type: "string"; readonly value: string }
+let loaderPromise: Promise | undefined
 
-type TokenType = Token["type"]
-type TokenOfType = Extract
+async function registerTsxLoader(): Promise {
+  loaderPromise ??= (async () => {
+    const { register } = (await import("tsx/esm/api")) as {
+      readonly register: () => unknown
+    }
+    register()
+  })()
+  await loaderPromise
+}
 
 export async function loadDawnConfig(options: LoadDawnConfigOptions): Promise {
   const configPath = join(options.appRoot, DAWN_CONFIG_FILE)
-
   await access(configPath, constants.F_OK)
+  await registerTsxLoader()
 
-  const source = await readFile(configPath, "utf8")
-
-  return {
-    appRoot: options.appRoot,
-    config: parseDawnConfig(source),
-    configPath,
-  }
-}
-
-function parseDawnConfig(source: string): DawnConfig {
-  const parser = new DawnConfigParser(source)
-
-  return parser.parse()
-}
-
-class DawnConfigParser {
-  private readonly tokens: Token[]
-  private currentIndex = 0
-  private readonly stringBindings = new Map()
-
-  constructor(source: string) {
-    this.tokens = tokenize(source)
-  }
-
-  parse(): DawnConfig {
-    while (this.match("const")) {
-      this.parseConstDeclaration()
-      this.consumeOptional("semicolon")
-    }
-
-    this.consume("export")
-    this.consume("default")
-
-    const config = this.parseConfigObject()
-
-    this.consumeOptional("semicolon")
-    this.consume("eof")
-
-    return config
-  }
-
-  private parseConstDeclaration(): void {
-    const identifier = this.consume("identifier")
-    this.consume("equals")
-    const value = this.consume("string")
-    this.stringBindings.set(identifier.value, value.value)
-  }
-
-  private parseConfigObject(): DawnConfig {
-    this.consume("lbrace")
-
-    let appDir: string | undefined
-
-    while (!this.check("rbrace")) {
-      const property = this.consume("identifier")
-
-      if (property.value !== "appDir") {
-        throw unsupportedConfig(`unsupported property "${property.value}"`)
-      }
-
-      const resolvedValue = this.match("colon")
-        ? this.parsePropertyValue()
-        : this.resolveIdentifier(property.value)
-
-      appDir = resolvedValue
-
-      if (!this.match("comma")) {
-        break
-      }
-    }
-
-    this.consume("rbrace")
-
-    return appDir ? { appDir } : {}
-  }
-
-  private parsePropertyValue(): string {
-    if (this.check("string")) {
-      return this.consume("string").value
-    }
-
-    if (this.check("identifier")) {
-      return this.resolveIdentifier(this.consume("identifier").value)
-    }
-
-    throw unsupportedConfig("property values must be string literals or const identifiers")
-  }
-
-  private resolveIdentifier(identifier: string): string {
-    const resolved = this.stringBindings.get(identifier)
-
-    if (!resolved) {
-      throw unsupportedConfig(`unknown identifier "${identifier}"`)
-    }
-
-    return resolved
-  }
-
-  private match(type: TokenType): boolean {
-    if (!this.check(type)) {
-      return false
-    }
-
-    this.currentIndex += 1
-    return true
-  }
-
-  private consume(type: TType): TokenOfType {
-    const token = this.peek()
-
-    if (token.type !== type) {
-      throw unsupportedConfig(`expected ${type} but found ${describeToken(token)}`)
-    }
-
-    this.currentIndex += 1
-    return token as TokenOfType
-  }
-
-  private consumeOptional(type: TokenType): void {
-    this.match(type)
-  }
-
-  private check(type: TokenType): boolean {
-    return this.peek().type === type
-  }
-
-  private peek(): Token {
-    return this.tokens[this.currentIndex] ?? { type: "eof" }
+  const mod = (await import(pathToFileURL(configPath).href)) as {
+    readonly default?: unknown
   }
-}
-
-function tokenize(source: string): Token[] {
-  const tokens: Token[] = []
-  let index = source.startsWith("\uFEFF") ? 1 : 0
-
-  while (index < source.length) {
-    const character = source[index]
-
-    if (!character) {
-      break
-    }
-
-    if (isWhitespace(character)) {
-      index += 1
-      continue
-    }
-
-    if (character === "/" && source[index + 1] === "/") {
-      index += 2
-      while (index < source.length && source[index] !== "\n") {
-        index += 1
-      }
-      continue
-    }
-
-    if (character === "/" && source[index + 1] === "*") {
-      const commentEnd = source.indexOf("*/", index + 2)
-
-      if (commentEnd === -1) {
-        throw unsupportedConfig("unterminated block comment")
-      }
-
-      index = commentEnd + 2
-      continue
-    }
-
-    if (character === "{") {
-      tokens.push({ type: "lbrace" })
-      index += 1
-      continue
-    }
-
-    if (character === "}") {
-      tokens.push({ type: "rbrace" })
-      index += 1
-      continue
-    }
-
-    if (character === ":") {
-      tokens.push({ type: "colon" })
-      index += 1
-      continue
-    }
-
-    if (character === ",") {
-      tokens.push({ type: "comma" })
-      index += 1
-      continue
-    }
-
-    if (character === "=") {
-      tokens.push({ type: "equals" })
-      index += 1
-      continue
-    }
-
-    if (character === ";") {
-      tokens.push({ type: "semicolon" })
-      index += 1
-      continue
-    }
-
-    if (character === '"' || character === "'") {
-      const [value, nextIndex] = readStringLiteral(source, index, character)
-      tokens.push({ type: "string", value })
-      index = nextIndex
-      continue
-    }
-
-    if (isIdentifierStart(character)) {
-      const [identifier, nextIndex] = readIdentifier(source, index)
-      index = nextIndex
-
-      if (identifier === "const" || identifier === "export" || identifier === "default") {
-        tokens.push({ type: identifier })
-      } else {
-        tokens.push({ type: "identifier", value: identifier })
-      }
-
-      continue
-    }
 
-    throw unsupportedConfig(`unexpected token "${character}"`)
+  if (!mod.default || typeof mod.default !== "object") {
+    throw new Error(`${DAWN_CONFIG_FILE} must export default an object. Got: ${typeof mod.default}`)
   }
 
-  tokens.push({ type: "eof" })
-
-  return tokens
-}
-
-function readStringLiteral(source: string, startIndex: number, quote: '"' | "'"): [string, number] {
-  let index = startIndex + 1
-  let value = ""
-
-  while (index < source.length) {
-    const character = source[index]
-
-    if (!character) {
-      break
-    }
-
-    if (character === "\\") {
-      throw unsupportedConfig("escaped string literals are not supported")
-    }
-
-    if (character === quote) {
-      return [value, index + 1]
-    }
-
-    value += character
-    index += 1
-  }
-
-  throw unsupportedConfig("unterminated string literal")
-}
-
-function readIdentifier(source: string, startIndex: number): [string, number] {
-  let index = startIndex + 1
-
-  while (index < source.length && isIdentifierPart(source[index] ?? "")) {
-    index += 1
+  return {
+    appRoot: options.appRoot,
+    config: mod.default as DawnConfig,
+    configPath,
   }
-
-  return [source.slice(startIndex, index), index]
-}
-
-function isIdentifierStart(character: string): boolean {
-  return /[A-Za-z_$]/.test(character)
-}
-
-function isIdentifierPart(character: string): boolean {
-  return /[A-Za-z0-9_$]/.test(character)
-}
-
-function isWhitespace(character: string): boolean {
-  return /\s/.test(character)
-}
-
-function describeToken(token: Token): string {
-  return token.type === "identifier" || token.type === "string"
-    ? `${token.type} "${token.value}"`
-    : token.type
-}
-
-function unsupportedConfig(reason: string): Error {
-  return new Error(
-    `Unsupported dawn.config.ts syntax: ${reason}. Supported subset: optional const string declarations followed by export default { appDir } or export default { appDir: "..." }.`,
-  )
 }
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 311e71ef..9b226532 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -3,6 +3,7 @@ export type { RuntimeTodo } from "./capabilities/built-in/planning.js"
 export { createPlanningMarker } from "./capabilities/built-in/planning.js"
 export { createSkillsMarker } from "./capabilities/built-in/skills.js"
 export { createSubagentsMarker } from "./capabilities/built-in/subagents.js"
+export { createWorkspaceMarker } from "./capabilities/built-in/workspace.js"
 export type {
   AppliedContribution,
   ApplyResult,
diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts
index fce1b8c1..672dcc07 100644
--- a/packages/core/src/types.ts
+++ b/packages/core/src/types.ts
@@ -1,9 +1,20 @@
+import type { PermissionMode } from "@dawn-ai/permissions"
 import type { RouteKind } from "@dawn-ai/sdk"
+import type { ExecBackend, FilesystemBackend } from "@dawn-ai/workspace"
 
 export type { RouteKind }
 
 export interface DawnConfig {
   readonly appDir?: string
+  readonly backends?: {
+    readonly filesystem?: FilesystemBackend
+    readonly exec?: ExecBackend
+  }
+  readonly permissions?: {
+    readonly mode?: PermissionMode
+    readonly allow?: Readonly>
+    readonly deny?: Readonly>
+  }
 }
 
 export type RouteSegment =
diff --git a/packages/core/test/capabilities/workspace.test.ts b/packages/core/test/capabilities/workspace.test.ts
new file mode 100644
index 00000000..3d2d7852
--- /dev/null
+++ b/packages/core/test/capabilities/workspace.test.ts
@@ -0,0 +1,204 @@
+import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"
+import { tmpdir } from "node:os"
+import { join } from "node:path"
+import { createPermissionsStore } from "@dawn-ai/permissions"
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"
+
+import { createWorkspaceMarker } from "../../src/capabilities/built-in/workspace.js"
+import type { CapabilityMarkerContext, DawnToolDefinition } from "../../src/capabilities/types.js"
+
+const originalCwd = process.cwd()
+
+function emptyManifest() {
+  return { appRoot: "/app", routes: [] }
+}
+
+function ctx(extras: Partial = {}): CapabilityMarkerContext {
+  return {
+    routeManifest: emptyManifest(),
+    descriptor: undefined,
+    ...extras,
+  }
+}
+
+function findTool(
+  tools: ReadonlyArray | undefined,
+  name: string,
+): DawnToolDefinition {
+  const tool = (tools ?? []).find((t) => t.name === name)
+  if (!tool) throw new Error(`Tool ${name} not found`)
+  return tool
+}
+
+describe("createWorkspaceMarker — detect", () => {
+  let appRoot: string
+  let routeDir: string
+  beforeEach(() => {
+    appRoot = mkdtempSync(join(tmpdir(), "dawn-workspace-cap-"))
+    routeDir = join(appRoot, "route")
+    mkdirSync(routeDir)
+    process.chdir(appRoot)
+  })
+  afterEach(() => {
+    process.chdir(originalCwd)
+    rmSync(appRoot, { recursive: true, force: true })
+  })
+
+  it("returns false when no workspace/ directory exists at cwd", async () => {
+    const detected = await createWorkspaceMarker().detect(routeDir, ctx())
+    expect(detected).toBe(false)
+  })
+
+  it("returns true when workspace/ exists at cwd", async () => {
+    mkdirSync(join(appRoot, "workspace"))
+    const detected = await createWorkspaceMarker().detect(routeDir, ctx())
+    expect(detected).toBe(true)
+  })
+})
+
+describe("createWorkspaceMarker — load", () => {
+  let appRoot: string
+  let routeDir: string
+  let workspaceDir: string
+  beforeEach(() => {
+    appRoot = mkdtempSync(join(tmpdir(), "dawn-workspace-cap-"))
+    routeDir = join(appRoot, "route")
+    mkdirSync(routeDir)
+    workspaceDir = join(appRoot, "workspace")
+    mkdirSync(workspaceDir)
+    process.chdir(appRoot)
+  })
+  afterEach(() => {
+    process.chdir(originalCwd)
+    rmSync(appRoot, { recursive: true, force: true })
+  })
+
+  it("contributes exactly four tools when workspace/ exists", async () => {
+    const contribution = await createWorkspaceMarker().load(routeDir, ctx())
+    const names = (contribution.tools ?? []).map((t) => t.name).sort()
+    expect(names).toEqual(["listDir", "readFile", "runBash", "writeFile"])
+  })
+
+  it("contributes no tools when workspace/ is absent", async () => {
+    rmSync(workspaceDir, { recursive: true })
+    const contribution = await createWorkspaceMarker().load(routeDir, ctx())
+    expect(contribution.tools).toBeUndefined()
+  })
+
+  it("readFile tool calls the configured backend with an absolute path inside the jail", async () => {
+    writeFileSync(join(workspaceDir, "hello.txt"), "hi", "utf8")
+    const fakeBackend = {
+      readFile: vi.fn().mockResolvedValue("hi"),
+      writeFile: vi.fn(),
+      listDir: vi.fn(),
+    }
+    const contribution = await createWorkspaceMarker().load(
+      routeDir,
+      ctx({ backends: { filesystem: fakeBackend } }),
+    )
+    const readTool = findTool(contribution.tools, "readFile")
+    const result = await readTool.run(
+      { path: "hello.txt" },
+      { signal: new AbortController().signal },
+    )
+    expect(result).toBe("hi")
+    expect(fakeBackend.readFile).toHaveBeenCalledOnce()
+    const firstCall = fakeBackend.readFile.mock.calls[0]
+    if (!firstCall) throw new Error("readFile was not called")
+    expect(firstCall[0]).toBe(join(process.cwd(), "workspace", "hello.txt"))
+  })
+
+  it("rejects path-jail escapes when permissions store is present (non-interactive mode)", async () => {
+    const permissions = createPermissionsStore({
+      appRoot,
+      config: undefined,
+      mode: "non-interactive",
+    })
+    await permissions.load()
+    const contribution = await createWorkspaceMarker().load(routeDir, ctx({ permissions }))
+    const readTool = findTool(contribution.tools, "readFile")
+    await expect(
+      readTool.run({ path: "../../etc/passwd" }, { signal: new AbortController().signal }),
+    ).rejects.toThrow(/permission denied/i)
+  })
+
+  it("in bypass mode, every operation proceeds (path-jail disabled)", async () => {
+    const permissions = createPermissionsStore({
+      appRoot,
+      config: undefined,
+      mode: "bypass",
+    })
+    await permissions.load()
+    const contribution = await createWorkspaceMarker().load(routeDir, ctx({ permissions }))
+    const readTool = findTool(contribution.tools, "readFile")
+    // The file doesn't exist outside the workspace, so we expect ENOENT, NOT "outside workspace"
+    await expect(
+      readTool.run({ path: "../../etc/some-fake-file" }, { signal: new AbortController().signal }),
+    ).rejects.not.toThrow(/outside workspace|permission denied/i)
+  })
+
+  it("in non-interactive mode, unknown bash commands hard-refuse", async () => {
+    const permissions = createPermissionsStore({
+      appRoot,
+      config: undefined,
+      mode: "non-interactive",
+    })
+    await permissions.load()
+    const contribution = await createWorkspaceMarker().load(routeDir, ctx({ permissions }))
+    const runBash = findTool(contribution.tools, "runBash")
+    await expect(
+      runBash.run({ command: "ls" }, { signal: new AbortController().signal }),
+    ).rejects.toThrow(/permission denied|fail-closed/i)
+  })
+
+  it("config-seeded allow lets a bash command through in non-interactive mode", async () => {
+    const permissions = createPermissionsStore({
+      appRoot,
+      config: { version: 1, allow: { bash: ["echo"] }, deny: {} },
+      mode: "non-interactive",
+    })
+    await permissions.load()
+    const contribution = await createWorkspaceMarker().load(routeDir, ctx({ permissions }))
+    const runBash = findTool(contribution.tools, "runBash")
+    const result = await runBash.run(
+      { command: "echo hi" },
+      { signal: new AbortController().signal },
+    )
+    expect((result as { stdout: string }).stdout.trim()).toBe("hi")
+  })
+
+  it("uses the default local backends when none configured", async () => {
+    writeFileSync(join(workspaceDir, "ok.txt"), "ok", "utf8")
+    const contribution = await createWorkspaceMarker().load(routeDir, ctx())
+    const readTool = findTool(contribution.tools, "readFile")
+    const result = await readTool.run({ path: "ok.txt" }, { signal: new AbortController().signal })
+    expect(result).toBe("ok")
+  })
+
+  it("runBash tool calls the configured exec backend", async () => {
+    const fakeExec = {
+      runCommand: vi.fn().mockResolvedValue({ stdout: "world", stderr: "", exitCode: 0 }),
+    }
+    const contribution = await createWorkspaceMarker().load(
+      routeDir,
+      ctx({ backends: { exec: fakeExec } }),
+    )
+    const runBash = findTool(contribution.tools, "runBash")
+    const result = await runBash.run(
+      { command: "echo world" },
+      { signal: new AbortController().signal },
+    )
+    expect(result).toMatchObject({ stdout: "world", exitCode: 0 })
+    expect(fakeExec.runCommand).toHaveBeenCalledWith(
+      expect.objectContaining({ command: "echo world" }),
+      expect.any(Object),
+    )
+  })
+
+  it("marks all four tools as overridable", async () => {
+    const contribution = await createWorkspaceMarker().load(routeDir, ctx())
+    for (const t of contribution.tools ?? []) {
+      expect((t as unknown as { overridable?: boolean }).overridable).toBe(true)
+    }
+  })
+})
diff --git a/packages/core/test/config.test.ts b/packages/core/test/config.test.ts
index 301aa6c0..9b9b5169 100644
--- a/packages/core/test/config.test.ts
+++ b/packages/core/test/config.test.ts
@@ -1,77 +1,60 @@
-import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises"
+import { mkdtempSync, rmSync } from "node:fs"
+import { writeFile } from "node:fs/promises"
 import { tmpdir } from "node:os"
 import { join } from "node:path"
-import { fileURLToPath } from "node:url"
-import { afterEach, describe, expect, test } from "vitest"
+import { afterEach, beforeEach, describe, expect, it } from "vitest"
 
-import { loadDawnConfig } from "../src/config"
-
-const CONTRACT_FIXTURES_DIR = fileURLToPath(
-  new URL("../../../test/fixtures/contracts/", import.meta.url),
-)
-const tempDirs: string[] = []
-
-afterEach(async () => {
-  await Promise.all(tempDirs.splice(0).map((dir) => rm(dir, { force: true, recursive: true })))
-})
-
-function fixtureRoot(name: string) {
-  return join(CONTRACT_FIXTURES_DIR, name)
-}
-
-async function createConfigFixture(source: string) {
-  const appRoot = await mkdtemp(join(tmpdir(), "dawn-core-config-"))
-  tempDirs.push(appRoot)
-
-  await writeFile(join(appRoot, "package.json"), "{}\n")
-  await writeFile(join(appRoot, "dawn.config.ts"), source)
-  await mkdir(join(appRoot, "src", "app"), { recursive: true })
-
-  return appRoot
-}
+import { DAWN_CONFIG_FILE, loadDawnConfig } from "../src/config.js"
 
 describe("loadDawnConfig", () => {
-  test("loads appDir from an inline string literal", async () => {
-    const appRoot = await createConfigFixture('export default { appDir: "src/custom-app" }\n')
+  let appRoot: string
 
-    await expect(loadDawnConfig({ appRoot })).resolves.toMatchObject({
-      appRoot,
-      config: { appDir: "src/custom-app" },
-      configPath: join(appRoot, "dawn.config.ts"),
-    })
+  beforeEach(() => {
+    appRoot = mkdtempSync(join(tmpdir(), "dawn-config-"))
   })
 
-  test("loads appDir from the checked-in custom appDir fixture", async () => {
-    const appRoot = fixtureRoot("valid-custom-app-dir")
-
-    await expect(loadDawnConfig({ appRoot })).resolves.toMatchObject({
-      appRoot,
-      config: { appDir: "src/dawn-app" },
-      configPath: join(appRoot, "dawn.config.ts"),
-    })
+  afterEach(() => {
+    rmSync(appRoot, { recursive: true, force: true })
   })
 
-  test("rejects the checked-in invalid config fixture with a Dawn-specific parser error", async () => {
-    const appRoot = fixtureRoot("invalid-config")
+  async function writeConfig(source: string): Promise {
+    await writeFile(join(appRoot, DAWN_CONFIG_FILE), source, "utf8")
+  }
+
+  it("loads a config with just appDir", async () => {
+    await writeConfig(`export default { appDir: "src/app" }\n`)
+    const loaded = await loadDawnConfig({ appRoot })
+    expect(loaded.config).toMatchObject({ appDir: "src/app" })
+    expect(loaded.configPath).toBe(join(appRoot, DAWN_CONFIG_FILE))
+  })
 
-    await expect(loadDawnConfig({ appRoot })).rejects.toThrow("Unsupported dawn.config.ts syntax")
+  it("loads a config with no fields (empty object)", async () => {
+    await writeConfig(`export default {}\n`)
+    const loaded = await loadDawnConfig({ appRoot })
+    expect(loaded.config).toEqual({})
   })
 
-  test("rejects unsupported config properties with a stable parser error", async () => {
-    const appRoot = await createConfigFixture('export default { appDir: "src/app", mode: "dev" }\n')
+  it("loads a config that uses a const binding for appDir", async () => {
+    await writeConfig(`
+      const APP_DIR = "src/app"
+      export default { appDir: APP_DIR }
+    `)
+    const loaded = await loadDawnConfig({ appRoot })
+    expect(loaded.config).toMatchObject({ appDir: "src/app" })
+  })
 
-    await expect(loadDawnConfig({ appRoot })).rejects.toThrow(
-      'Unsupported dawn.config.ts syntax: unsupported property "mode". Supported subset: optional const string declarations followed by export default { appDir } or export default { appDir: "..." }.',
-    )
+  it("rejects missing default export", async () => {
+    await writeConfig(`export const named = { appDir: "x" }\n`)
+    await expect(loadDawnConfig({ appRoot })).rejects.toThrow(/must export default/i)
   })
 
-  test("rejects non-string const appDir bindings with a stable parser error", async () => {
-    const appRoot = await createConfigFixture(
-      "const appDir = getAppDir()\nexport default { appDir }\n",
-    )
+  it("rejects non-object default export", async () => {
+    await writeConfig(`export default "hello"\n`)
+    await expect(loadDawnConfig({ appRoot })).rejects.toThrow(/must export default an object/i)
+  })
 
-    await expect(loadDawnConfig({ appRoot })).rejects.toThrow(
-      'Unsupported dawn.config.ts syntax: unexpected token "(". Supported subset: optional const string declarations followed by export default { appDir } or export default { appDir: "..." }.',
-    )
+  it("propagates TS syntax errors from the imported module", async () => {
+    await writeConfig(`export default { appDir:\n`)
+    await expect(loadDawnConfig({ appRoot })).rejects.toThrow()
   })
 })
diff --git a/packages/create-dawn-app/src/index.ts b/packages/create-dawn-app/src/index.ts
index ea72469c..d5bbc4f1 100644
--- a/packages/create-dawn-app/src/index.ts
+++ b/packages/create-dawn-app/src/index.ts
@@ -178,7 +178,9 @@ function createTemplateReplacements(
   readonly dawnCoreSpecifier: string
   readonly dawnLangchainSpecifier: string
   readonly dawnLanggraphSpecifier: string
+  readonly dawnPermissionsSpecifier: string
   readonly dawnSdkSpecifier: string
+  readonly dawnWorkspaceSpecifier: string
 } {
   if (options.mode === "internal") {
     return {
@@ -190,7 +192,11 @@ function createTemplateReplacements(
       dawnCoreSpecifier: createAbsoluteFileSpecifier(resolve(repoRoot, "packages/core")),
       dawnLangchainSpecifier: createAbsoluteFileSpecifier(resolve(repoRoot, "packages/langchain")),
       dawnLanggraphSpecifier: createAbsoluteFileSpecifier(resolve(repoRoot, "packages/langgraph")),
+      dawnPermissionsSpecifier: createAbsoluteFileSpecifier(
+        resolve(repoRoot, "packages/permissions"),
+      ),
       dawnSdkSpecifier: createAbsoluteFileSpecifier(resolve(repoRoot, "packages/sdk")),
+      dawnWorkspaceSpecifier: createAbsoluteFileSpecifier(resolve(repoRoot, "packages/workspace")),
     }
   }
 
@@ -201,7 +207,9 @@ function createTemplateReplacements(
     dawnCoreSpecifier: options.distTag,
     dawnLangchainSpecifier: options.distTag,
     dawnLanggraphSpecifier: options.distTag,
+    dawnPermissionsSpecifier: options.distTag,
     dawnSdkSpecifier: options.distTag,
+    dawnWorkspaceSpecifier: options.distTag,
   }
 }
 
@@ -227,7 +235,9 @@ async function applyInternalModePackageOverrides(
       "@dawn-ai/core": replacements.dawnCoreSpecifier,
       "@dawn-ai/langchain": replacements.dawnLangchainSpecifier,
       "@dawn-ai/langgraph": replacements.dawnLanggraphSpecifier,
+      "@dawn-ai/permissions": replacements.dawnPermissionsSpecifier,
       "@dawn-ai/sdk": replacements.dawnSdkSpecifier,
+      "@dawn-ai/workspace": replacements.dawnWorkspaceSpecifier,
     },
   }
 
diff --git a/packages/langchain/src/agent-adapter.ts b/packages/langchain/src/agent-adapter.ts
index 11148d8a..5e46619e 100644
--- a/packages/langchain/src/agent-adapter.ts
+++ b/packages/langchain/src/agent-adapter.ts
@@ -2,8 +2,15 @@ import type { PromptFragment, StreamTransformer } from "@dawn-ai/core"
 import type { DawnAgent, RetryConfig } from "@dawn-ai/sdk"
 import { isDawnAgent } from "@dawn-ai/sdk"
 import { type BaseMessageLike, HumanMessage } from "@langchain/core/messages"
+import { Command, MemorySaver } from "@langchain/langgraph"
 import { createChatModel } from "./chat-model-factory.js"
 import { resolveProvider } from "./model-provider-resolver.js"
+import {
+  clearPending,
+  type PendingInterrupt,
+  type ResumeDecision,
+  setPending,
+} from "./pending-interrupts.js"
 import { isRetryableError, withRetry } from "./retry.js"
 import { materializeStateSchema, type ResolvedStateField } from "./state-adapter.js"
 import {
@@ -50,6 +57,20 @@ function assertAgentLike(entry: unknown): asserts entry is AgentLike {
 // changes, the cache key must include a hash of the fragments/transformers.
 const materializedAgents = new WeakMap()
 
+/**
+ * Process-level checkpointer shared by every materialized agent. LangGraph
+ * requires a checkpointer + a stable `thread_id` for `interrupt()` to park
+ * graph state and for `new Command({resume})` to replay from the parked
+ * step. The dev/runtime server passes the client-supplied
+ * `metadata.dawn.thread_id` through to `streamAgent`, which forwards it to
+ * `config.configurable.thread_id`.
+ *
+ * Single shared instance is fine for in-process runtimes; revisit if the
+ * runtime ever runs across processes (each would have its own saver and
+ * resume would need a distributed checkpointer like SQLite/Postgres).
+ */
+const sharedCheckpointer = new MemorySaver()
+
 export function composePromptMessages(
   systemPrompt: string,
   promptFragments: readonly PromptFragment[],
@@ -104,6 +125,9 @@ async function materializeAgent(
         ? (state: Record) =>
             composePromptMessages(descriptor.systemPrompt, fragments, state)
         : descriptor.systemPrompt,
+    // Required so `interrupt()` can park graph state and `Command({resume})`
+    // can replay it. Paired with `config.configurable.thread_id`.
+    checkpointer: sharedCheckpointer,
   }
 
   if (stateFields && stateFields.length > 0) {
@@ -135,10 +159,124 @@ export async function materializeAgentGraph(options: {
 }
 
 export interface AgentStreamChunk {
-  readonly type: "token" | "tool_call" | "tool_result" | "done" | (string & {})
+  readonly type: "token" | "tool_call" | "tool_result" | "interrupt" | "done" | (string & {})
   readonly data: unknown
 }
 
+/**
+ * LangGraph 1.x's `interrupt()` throws a `GraphInterrupt` from inside the tool
+ * node. Under `streamEvents` v2 this surfaces as an `on_tool_error` whose
+ * `event.data.error` is the `GraphInterrupt` instance — its `.name` is
+ * `"GraphInterrupt"` and its `.interrupts` array carries the `{ id, value }`
+ * entries we need. The top-level `on_chain_end` for `LangGraph` does NOT
+ * include `__interrupt__` in this code path (that key appears only on the
+ * `invoke`/`stream` return value), so detection must happen at the tool error.
+ *
+ * We still keep the `__interrupt__` extractor for `on_chain_end` as a
+ * defensive fallback in case a future LangGraph version surfaces interrupts
+ * via the chain output too.
+ */
+const INTERRUPT_KEY = "__interrupt__"
+
+interface RawInterruptEntry {
+  readonly value?: unknown
+  readonly id?: string
+  readonly when?: string
+  readonly resumable?: boolean
+}
+
+function extractInterrupts(output: unknown): readonly RawInterruptEntry[] | undefined {
+  if (!output || typeof output !== "object") return undefined
+  const maybe = (output as Record)[INTERRUPT_KEY]
+  if (!Array.isArray(maybe)) return undefined
+  return maybe as readonly RawInterruptEntry[]
+}
+
+/**
+ * Detects a thrown `GraphInterrupt` surfaced via `on_tool_error`.
+ *
+ * LangGraph's `interrupt()` throws a `GraphInterrupt` whose `.message` is
+ * `JSON.stringify(interrupts)` and whose `.interrupts` array carries the
+ * `{ id, value }` entries. By the time the error reaches `streamEvents`'
+ * `data.error` it has already been stringified — typically into
+ * `\n\nGraphInterrupt: \n    at ...stack`.
+ *
+ * We handle three shapes defensively:
+ *   - object with `.name === "GraphInterrupt"` and `.interrupts` array
+ *     (in case a future LangGraph version surfaces the live error)
+ *   - object/Error whose stringified message starts with a JSON array
+ *   - bare string with the `GraphInterrupt:` marker
+ */
+function extractInterruptsFromError(error: unknown): readonly RawInterruptEntry[] | undefined {
+  if (!error) return undefined
+
+  if (typeof error === "object") {
+    const e = error as { name?: unknown; interrupts?: unknown; message?: unknown }
+    if (e.name === "GraphInterrupt" && Array.isArray(e.interrupts) && e.interrupts.length > 0) {
+      return e.interrupts as readonly RawInterruptEntry[]
+    }
+    if (typeof e.message === "string") {
+      const parsed = parseInterruptStringMessage(e.message)
+      if (parsed) return parsed
+    }
+  }
+
+  if (typeof error === "string") {
+    const parsed = parseInterruptStringMessage(error)
+    if (parsed) return parsed
+  }
+
+  return undefined
+}
+
+/**
+ * Parses the stringified form of a GraphInterrupt's message. The string
+ * begins with `JSON.stringify(interrupts, null, 2)` and is followed by
+ * `\n\nGraphInterrupt: ...\n    at ...` stack metadata. We slice the leading
+ * JSON array up to the first `]` followed by a newline + non-JSON sentinel
+ * and parse it.
+ */
+function parseInterruptStringMessage(text: string): readonly RawInterruptEntry[] | undefined {
+  const trimmed = text.trimStart()
+  if (!trimmed.startsWith("[")) return undefined
+  // Find the matching closing bracket by bracket counting at depth 0 — robust
+  // against nested arrays in the interrupt payloads.
+  let depth = 0
+  let inString = false
+  let escaped = false
+  let end = -1
+  for (let i = 0; i < trimmed.length; i++) {
+    const ch = trimmed[i]
+    if (escaped) {
+      escaped = false
+      continue
+    }
+    if (inString) {
+      if (ch === "\\") escaped = true
+      else if (ch === '"') inString = false
+      continue
+    }
+    if (ch === '"') inString = true
+    else if (ch === "[") depth++
+    else if (ch === "]") {
+      depth--
+      if (depth === 0) {
+        end = i
+        break
+      }
+    }
+  }
+  if (end === -1) return undefined
+  const json = trimmed.slice(0, end + 1)
+  try {
+    const parsed = JSON.parse(json)
+    if (!Array.isArray(parsed) || parsed.length === 0) return undefined
+    return parsed as readonly RawInterruptEntry[]
+  } catch {
+    return undefined
+  }
+}
+
 export interface AgentOptions {
   readonly entry: unknown
   readonly input: unknown
@@ -158,6 +296,14 @@ export interface AgentOptions {
    * drained alongside normal stream chunks (no module-level mutable state).
    */
   readonly subagentResolver?: SubagentResolver
+  /**
+   * Stable per-conversation identifier used as LangGraph's `thread_id`. When
+   * set, the agent-adapter wires it into `config.configurable.thread_id` so
+   * the checkpointer can park interrupted state. Required for resume to work
+   * — without a thread_id, an interrupt ends the stream with no way to
+   * replay.
+   */
+  readonly threadId?: string
 }
 
 export async function executeAgent(options: AgentOptions): Promise {
@@ -235,6 +381,7 @@ export async function* streamAgent(options: AgentOptions): AsyncGenerator 0) {
-    config.configurable = params
+  const configurable: Record = { ...params }
+  if (options.threadId !== undefined && options.threadId.length > 0) {
+    configurable.thread_id = options.threadId
+  }
+  if (Object.keys(configurable).length > 0) {
+    config.configurable = configurable
   }
 
   return { agentInput, config }
@@ -295,6 +447,7 @@ async function* streamFromRunnable(
   streamTransformers?: readonly StreamTransformer[],
   subagentEvents?: AgentStreamChunk[],
   streamContext?: SubagentStreamContext,
+  threadId?: string,
 ): AsyncGenerator {
   // Drains any pending subagent events queued by the bridge. Called before
   // each normal yield to keep ordering predictable on the single event loop.
@@ -311,7 +464,7 @@ async function* streamFromRunnable(
       options: Record,
     ) => AsyncIterable<{
       event: string
-      data: { chunk?: unknown; output?: unknown }
+      data: { chunk?: unknown; output?: unknown; error?: unknown }
       name: string
     }>
   }
@@ -332,101 +485,193 @@ async function* streamFromRunnable(
     return
   }
 
-  let finalOutput: unknown
-  let hasYielded = false
-  let lastStreamError: Error | undefined
-
-  // Retry the entire stream if it fails before producing any output
-  const maxStreamAttempts = retryConfig?.maxAttempts ?? 3
-  for (let attempt = 0; attempt < maxStreamAttempts; attempt++) {
-    hasYielded = false
-    lastStreamError = undefined
-    finalOutput = undefined
-
-    try {
-      for await (const event of streamable.streamEvents(input, {
-        ...config,
-        version: "v2",
-      })) {
-        // Drain any subagent.* events queued by the bridge's writer before
-        // emitting the next normal stream chunk, so ordering is predictable.
-        yield* drainSubagentEvents()
-        switch (event.event) {
-          case "on_chat_model_stream": {
-            // Suppress while a child subagent run is active — child token
-            // events leak onto the parent's streamEvents listener via
-            // LangChain v2 async-local-storage tracing. The dispatcher
-            // already emits a `subagent.message` envelope for each child
-            // token, so emitting the raw token here would duplicate.
-            if (streamContext && streamContext.activeChildRuns > 0) break
-            const content = (event.data.chunk as { content?: unknown })?.content
-            if (content && typeof content === "string" && content.length > 0) {
-              hasYielded = true
-              yield { type: "token" as const, data: content }
+  // Capture into a typed const so TS narrowing survives across the nested
+  // async-generator closure below. Bind to `streamable` — LangGraph's
+  // Pregel.streamEvents reads `this.config?.recursionLimit`, so calling it
+  // unbound throws "Cannot read properties of undefined (reading 'config')".
+  const streamEventsFn = streamable.streamEvents.bind(streamable)
+
+  // Tracks the most recent invocation's outcome. The outer resume loop
+  // inspects this to decide whether to park + replay or finish.
+  interface PassResult {
+    readonly finalOutput: unknown
+    readonly interrupts: readonly RawInterruptEntry[]
+  }
+
+  // Process a single streamEvents iterator: yield AgentStreamChunks and
+  // return whatever __interrupt__ entries appeared in the graph's final
+  // on_chain_end output. Shared between the initial invocation and any
+  // resume re-invocations so the chunk-shaping logic stays in one place.
+  async function* processEventStream(
+    invocationInput: unknown,
+    invocationConfig: Record,
+    allowRetryOnError: boolean,
+  ): AsyncGenerator {
+    let finalOutput: unknown
+    let capturedInterrupts: readonly RawInterruptEntry[] = []
+    let hasYielded = false
+
+    const maxStreamAttempts = allowRetryOnError ? (retryConfig?.maxAttempts ?? 3) : 1
+
+    for (let attempt = 0; attempt < maxStreamAttempts; attempt++) {
+      hasYielded = false
+      finalOutput = undefined
+      capturedInterrupts = []
+
+      try {
+        for await (const event of streamEventsFn(invocationInput, {
+          ...invocationConfig,
+          version: "v2",
+        })) {
+          yield* drainSubagentEvents()
+          switch (event.event) {
+            case "on_chat_model_stream": {
+              if (streamContext && streamContext.activeChildRuns > 0) break
+              const content = (event.data.chunk as { content?: unknown })?.content
+              if (content && typeof content === "string" && content.length > 0) {
+                hasYielded = true
+                yield { type: "token" as const, data: content }
+              }
+              break
             }
-            break
-          }
-          case "on_tool_start": {
-            hasYielded = true
-            yield {
-              type: "tool_call" as const,
-              data: {
-                name: event.name,
-                input: event.data.chunk ?? event.data.output,
-              },
+            case "on_tool_start": {
+              hasYielded = true
+              yield {
+                type: "tool_call" as const,
+                data: {
+                  name: event.name,
+                  input: event.data.chunk ?? event.data.output,
+                },
+              }
+              break
             }
-            break
-          }
-          case "on_tool_end": {
-            hasYielded = true
-            yield {
-              type: "tool_result" as const,
-              data: { name: event.name, output: event.data.output },
+            case "on_tool_end": {
+              hasYielded = true
+              yield {
+                type: "tool_result" as const,
+                data: { name: event.name, output: event.data.output },
+              }
+              for (const transformer of streamTransformers ?? []) {
+                if (transformer.observes !== "tool_result") continue
+                for await (const out of transformer.transform({
+                  toolName: event.name,
+                  toolOutput: event.data.output,
+                })) {
+                  yield {
+                    type: out.event as AgentStreamChunk["type"],
+                    data: out.data,
+                  }
+                }
+              }
+              break
             }
-            for (const transformer of streamTransformers ?? []) {
-              if (transformer.observes !== "tool_result") continue
-              for await (const out of transformer.transform({
-                toolName: event.name,
-                toolOutput: event.data.output,
-              })) {
-                yield {
-                  type: out.event as AgentStreamChunk["type"],
-                  data: out.data,
+            case "on_tool_error": {
+              // LangGraph's interrupt() throws a GraphInterrupt from inside
+              // the tool node. The error bubbles through streamEvents as
+              // on_tool_error with the GraphInterrupt instance on data.error.
+              // LangGraph itself catches it to park the checkpointer state,
+              // so the outer iterator continues normally afterwards.
+              const interrupts = extractInterruptsFromError(event.data.error)
+              if (interrupts && interrupts.length > 0) {
+                capturedInterrupts = interrupts
+                for (const entry of interrupts) {
+                  hasYielded = true
+                  yield {
+                    type: "interrupt" as const,
+                    // The capability's interrupt() payload is wrapped in
+                    // entry.value by LangGraph — surface it verbatim so the
+                    // SSE consumer sees the original {interruptId, kind, ...}
+                    // envelope the workspace capability emitted.
+                    data: entry.value,
+                  }
                 }
               }
+              break
             }
-            break
-          }
-          case "on_chain_end": {
-            if (event.name === "LangGraph") {
-              finalOutput = event.data.output
+            case "on_chain_end": {
+              if (event.name === "LangGraph") {
+                finalOutput = event.data.output
+                const interrupts = extractInterrupts(event.data.output)
+                if (interrupts && interrupts.length > 0) {
+                  capturedInterrupts = interrupts
+                  for (const entry of interrupts) {
+                    hasYielded = true
+                    yield {
+                      type: "interrupt" as const,
+                      // The capability's interrupt() payload is wrapped in
+                      // entry.value by LangGraph — surface it verbatim so the
+                      // SSE consumer sees the original {interruptId, kind, ...}
+                      // envelope the workspace capability emitted.
+                      data: entry.value,
+                    }
+                  }
+                }
+              }
+              break
             }
-            break
           }
         }
+        // Stream completed successfully
+        return { finalOutput, interrupts: capturedInterrupts }
+      } catch (error) {
+        const err = error instanceof Error ? error : new Error(String(error))
+        if (hasYielded || !isRetryableError(error) || attempt === maxStreamAttempts - 1) {
+          throw err
+        }
+        const delay = Math.min(1000 * 2 ** attempt + Math.random() * 500, 10_000)
+        await new Promise((resolve) => setTimeout(resolve, delay))
       }
+    }
+    // Unreachable: the loop either returns or throws.
+    return { finalOutput, interrupts: capturedInterrupts }
+  }
 
-      // Stream completed successfully
+  // Initial invocation. Retries on transient errors before any chunk yields.
+  let pass = yield* processEventStream(input, config, /* allowRetryOnError */ true)
+
+  // Resume loop. Each interrupt → park → await decision → re-invoke with
+  // Command({resume}). The resume invocation may itself interrupt (e.g. a
+  // capability gates another tool call mid-run) — loop until either no
+  // interrupt remains or we cannot resume (no threadId / no resolved
+  // decision).
+  while (pass.interrupts.length > 0) {
+    if (!threadId) {
+      // Without a thread_id there is no checkpointer key to replay from;
+      // the parked state will be discarded. End the stream cleanly so the
+      // SSE consumer can surface the interrupt to the user, but they have
+      // no way to resume this run.
       break
-    } catch (error) {
-      lastStreamError = error instanceof Error ? error : new Error(String(error))
-
-      // If we already yielded chunks, we can't retry (client has partial data)
-      // Or if the error isn't retryable, rethrow immediately
-      if (hasYielded || !isRetryableError(error) || attempt === maxStreamAttempts - 1) {
-        throw lastStreamError
-      }
-
-      // Backoff before retry
-      const delay = Math.min(1000 * 2 ** attempt + Math.random() * 500, 10_000)
-      await new Promise((resolve) => setTimeout(resolve, delay))
     }
+
+    // We only resume the first interrupt — if a capability ever fans out
+    // multiple parallel interrupts in a single step, this becomes lossy
+    // and we'd need to await N decisions. None of today's capabilities do
+    // that; revisit when one does.
+    const entry = pass.interrupts[0]
+    const interruptId =
+      (typeof entry?.id === "string" ? entry.id : undefined) ?? `generated-${Date.now()}`
+
+    const decision = await new Promise((resolve) => {
+      const pending: PendingInterrupt = { interruptId, resolve }
+      setPending(threadId, pending)
+    })
+    clearPending(threadId)
+
+    // Resume invocations reuse the same config (same thread_id, signal,
+    // configurable). Retry-on-error is disabled because we have already
+    // yielded the interrupt chunk; if the resume call fails we surface
+    // the error rather than silently restarting.
+    pass = yield* processEventStream(
+      new Command({ resume: decision }),
+      config,
+      /* allowRetryOnError */ false,
+    )
   }
 
   // Final drain in case the last tool call was the bridged task tool —
   // its events would otherwise be stranded after the stream ends.
   yield* drainSubagentEvents()
-  yield { type: "done", data: finalOutput }
+  yield { type: "done", data: pass.finalOutput }
 }
 
 interface InputMessage {
diff --git a/packages/langchain/src/index.ts b/packages/langchain/src/index.ts
index 76105b15..d0f4d6c7 100644
--- a/packages/langchain/src/index.ts
+++ b/packages/langchain/src/index.ts
@@ -12,6 +12,13 @@ export {
 export { chainAdapter } from "./chain-adapter.js"
 export { createChatModel } from "./chat-model-factory.js"
 export { inferProvider, resolveProvider } from "./model-provider-resolver.js"
+export type { PendingInterrupt, ResumeDecision } from "./pending-interrupts.js"
+export {
+  __resetPendingForTests,
+  clearPending,
+  getPending,
+  setPending,
+} from "./pending-interrupts.js"
 export type { RetryOptions } from "./retry.js"
 export { isRetryableError, withRetry } from "./retry.js"
 export { materializeStateSchema } from "./state-adapter.js"
diff --git a/packages/langchain/src/pending-interrupts.ts b/packages/langchain/src/pending-interrupts.ts
new file mode 100644
index 00000000..09dd61bb
--- /dev/null
+++ b/packages/langchain/src/pending-interrupts.ts
@@ -0,0 +1,43 @@
+/**
+ * Module-level registry of parked LangGraph interrupts, keyed by thread_id.
+ *
+ * Lives in `@dawn-ai/langchain` so that the agent-adapter (which detects
+ * the interrupt and parks the stream) and the CLI's resume endpoint (which
+ * dispatches the user's decision) both reference the same map. Putting it
+ * here avoids a circular dep cli <-> langchain.
+ *
+ * The decision string ("once" | "always" | "deny") is the value passed to
+ * `new Command({resume})` when the agent-adapter re-invokes the graph.
+ * The langchain package intentionally does not depend on
+ * `@dawn-ai/permissions`; the resume endpoint validates the decision shape
+ * before calling `resolve()`.
+ */
+
+export type ResumeDecision = "once" | "always" | "deny"
+
+export interface PendingInterrupt {
+  readonly interruptId: string
+  /** Settles the Promise awaited by the parked agent-adapter generator. */
+  resolve(decision: ResumeDecision): void
+}
+
+const pendingByThread = new Map()
+
+export function getPending(threadId: string): PendingInterrupt | undefined {
+  return pendingByThread.get(threadId)
+}
+
+export function setPending(threadId: string, entry: PendingInterrupt): void {
+  pendingByThread.set(threadId, entry)
+}
+
+export function clearPending(threadId: string): void {
+  pendingByThread.delete(threadId)
+}
+
+/**
+ * Test-only: reset all entries.
+ */
+export function __resetPendingForTests(): void {
+  pendingByThread.clear()
+}
diff --git a/packages/langchain/test/agent-adapter-interrupt.test.ts b/packages/langchain/test/agent-adapter-interrupt.test.ts
new file mode 100644
index 00000000..5b1c0c4d
--- /dev/null
+++ b/packages/langchain/test/agent-adapter-interrupt.test.ts
@@ -0,0 +1,335 @@
+import { Command } from "@langchain/langgraph"
+import { afterEach, describe, expect, test } from "vitest"
+import { streamAgent } from "../src/agent-adapter.js"
+import { __resetPendingForTests, getPending } from "../src/pending-interrupts.js"
+
+/**
+ * These tests mimic the real LangGraph 1.x streamEvents v2 shape:
+ *
+ *   When a tool calls `interrupt(payload)` inside a node, LangGraph throws a
+ *   `GraphInterrupt`. The tool error surfaces via streamEvents as an
+ *   `on_tool_error` event whose `data.error` is a *stringified* form of the
+ *   error — `JSON.stringify(interrupts, null, 2) + "\n\nGraphInterrupt: ..."`.
+ *   The `on_chain_end` for the top-level `LangGraph` chain that follows does
+ *   NOT include `__interrupt__` in this code path (that key only appears on
+ *   the invoke/stream return value, not in streamEvents).
+ *
+ * The adapter must detect the interrupt from the `on_tool_error` event by
+ * parsing the leading JSON array out of the error string. The legacy
+ * `__interrupt__`-on-chain-end path is still supported as a defensive
+ * fallback in case a future LangGraph version surfaces interrupts that way.
+ */
+
+function makeInterruptErrorString(entries: ReadonlyArray<{ id?: string; value: unknown }>): string {
+  return `${JSON.stringify(entries, null, 2)}\n\nGraphInterrupt: ${JSON.stringify(
+    entries,
+    null,
+    2,
+  )}\n    at interrupt (file:///.../interrupt.js:70:8)\n    at processTicksAndRejections (node:internal/process/task_queues:105:5)`
+}
+
+describe("streamAgent — interrupt propagation", () => {
+  afterEach(() => {
+    __resetPendingForTests()
+  })
+
+  test("yields {type: 'interrupt', data} when on_tool_error surfaces a stringified GraphInterrupt", async () => {
+    const interruptPayload = {
+      interruptId: "perm-test-1",
+      type: "permission-request",
+      kind: "command",
+      detail: { command: "ls", suggestedPattern: "ls" },
+    }
+
+    const mockRunnable = {
+      invoke: async () => ({}),
+      streamEvents: async function* (_input: unknown, _options: Record) {
+        yield {
+          event: "on_tool_start",
+          name: "runBash",
+          data: { input: { command: "ls" } },
+        }
+        yield {
+          event: "on_tool_error",
+          name: "runBash",
+          data: {
+            error: makeInterruptErrorString([{ id: "abc", value: interruptPayload }]),
+          },
+        }
+        // LangGraph keeps the iterator alive after parking — the final
+        // on_chain_end fires with the regular output (no __interrupt__).
+        yield {
+          event: "on_chain_end",
+          name: "LangGraph",
+          data: { output: { messages: [] } },
+        }
+      },
+    }
+
+    const chunks: Array<{ type: string; data: unknown }> = []
+    for await (const chunk of streamAgent({
+      entry: mockRunnable,
+      input: { messages: [{ role: "user", content: "test" }] },
+      routeParamNames: [],
+      signal: new AbortController().signal,
+      tools: [],
+    })) {
+      chunks.push({ type: chunk.type, data: chunk.data })
+    }
+
+    const interruptChunks = chunks.filter((c) => c.type === "interrupt")
+    expect(interruptChunks).toHaveLength(1)
+    expect(interruptChunks[0]?.data).toEqual(interruptPayload)
+
+    // The final `done` chunk should still fire (no threadId → no resume).
+    const doneChunks = chunks.filter((c) => c.type === "done")
+    expect(doneChunks).toHaveLength(1)
+  })
+
+  test("yields interrupt when GraphInterrupt is surfaced as a live error object", async () => {
+    // Defensive: if a future LangGraph version stops stringifying the error
+    // and passes the live GraphInterrupt instance through, we must still
+    // detect it via .name + .interrupts.
+    const interruptPayload = { interruptId: "live-1", type: "permission-request" }
+    const liveError = Object.assign(new Error("GraphInterrupt"), {
+      name: "GraphInterrupt",
+      interrupts: [{ id: "live-a", value: interruptPayload }],
+    })
+
+    const mockRunnable = {
+      invoke: async () => ({}),
+      streamEvents: async function* (_input: unknown, _options: Record) {
+        yield {
+          event: "on_tool_error",
+          name: "runBash",
+          data: { error: liveError },
+        }
+      },
+    }
+
+    const chunks: Array<{ type: string; data: unknown }> = []
+    for await (const chunk of streamAgent({
+      entry: mockRunnable,
+      input: { messages: [{ role: "user", content: "test" }] },
+      routeParamNames: [],
+      signal: new AbortController().signal,
+      tools: [],
+    })) {
+      chunks.push({ type: chunk.type, data: chunk.data })
+    }
+
+    expect(chunks.filter((c) => c.type === "interrupt")).toHaveLength(1)
+    expect(chunks.find((c) => c.type === "interrupt")?.data).toEqual(interruptPayload)
+  })
+
+  test("yields interrupt when __interrupt__ appears on on_chain_end output (legacy fallback)", async () => {
+    const interruptPayload = { interruptId: "legacy-1", type: "permission-request" }
+    const mockRunnable = {
+      invoke: async () => ({}),
+      streamEvents: async function* (_input: unknown, _options: Record) {
+        yield {
+          event: "on_chain_end",
+          name: "LangGraph",
+          data: {
+            output: { __interrupt__: [{ value: interruptPayload, id: "legacy-a" }] },
+          },
+        }
+      },
+    }
+
+    const chunks: Array<{ type: string; data: unknown }> = []
+    for await (const chunk of streamAgent({
+      entry: mockRunnable,
+      input: { messages: [{ role: "user", content: "test" }] },
+      routeParamNames: [],
+      signal: new AbortController().signal,
+      tools: [],
+    })) {
+      chunks.push({ type: chunk.type, data: chunk.data })
+    }
+
+    expect(chunks.filter((c) => c.type === "interrupt")).toHaveLength(1)
+    expect(chunks.find((c) => c.type === "interrupt")?.data).toEqual(interruptPayload)
+  })
+
+  test("does not yield an interrupt chunk when no interrupt is surfaced", async () => {
+    const mockRunnable = {
+      invoke: async () => ({ messages: [] }),
+      streamEvents: async function* (_input: unknown, _options: Record) {
+        yield {
+          event: "on_chain_end",
+          name: "LangGraph",
+          data: { output: { messages: [{ content: "hi" }] } },
+        }
+      },
+    }
+
+    const chunks: Array<{ type: string }> = []
+    for await (const chunk of streamAgent({
+      entry: mockRunnable,
+      input: { messages: [{ role: "user", content: "test" }] },
+      routeParamNames: [],
+      signal: new AbortController().signal,
+      tools: [],
+    })) {
+      chunks.push({ type: chunk.type })
+    }
+
+    expect(chunks.filter((c) => c.type === "interrupt")).toHaveLength(0)
+  })
+
+  test("does not treat ordinary tool errors (non-GraphInterrupt) as interrupts", async () => {
+    const mockRunnable = {
+      invoke: async () => ({}),
+      streamEvents: async function* (_input: unknown, _options: Record) {
+        yield {
+          event: "on_tool_error",
+          name: "runBash",
+          data: { error: "Error: boom\n    at foo (bar.js:1:1)" },
+        }
+        yield {
+          event: "on_chain_end",
+          name: "LangGraph",
+          data: { output: { messages: [] } },
+        }
+      },
+    }
+
+    const chunks: Array<{ type: string }> = []
+    for await (const chunk of streamAgent({
+      entry: mockRunnable,
+      input: { messages: [{ role: "user", content: "test" }] },
+      routeParamNames: [],
+      signal: new AbortController().signal,
+      tools: [],
+    })) {
+      chunks.push({ type: chunk.type })
+    }
+
+    expect(chunks.filter((c) => c.type === "interrupt")).toHaveLength(0)
+  })
+
+  test("resume: parks on interrupt, re-invokes with Command({resume}) when pending.resolve fires", async () => {
+    const interruptPayload = {
+      interruptId: "perm-resume-1",
+      type: "permission-request",
+      kind: "command",
+      detail: { command: "ls", suggestedPattern: "ls" },
+    }
+
+    // Mock graph: first streamEvents call emits the stringified GraphInterrupt
+    // via on_tool_error; the resume call emits a normal token + done.
+    let callCount = 0
+    let observedResumeInput: unknown
+    const mockRunnable = {
+      invoke: async () => ({ messages: [] }),
+      streamEvents: async function* (input: unknown, _options: Record) {
+        callCount++
+        if (callCount === 1) {
+          yield {
+            event: "on_tool_error",
+            name: "runBash",
+            data: {
+              error: makeInterruptErrorString([{ id: "abc", value: interruptPayload }]),
+            },
+          }
+          yield {
+            event: "on_chain_end",
+            name: "LangGraph",
+            data: { output: { messages: [] } },
+          }
+          return
+        }
+        observedResumeInput = input
+        yield {
+          event: "on_chat_model_stream",
+          name: "model",
+          data: { chunk: { content: "ok" } },
+        }
+        yield {
+          event: "on_chain_end",
+          name: "LangGraph",
+          data: { output: { messages: [{ content: "done" }] } },
+        }
+      },
+    }
+
+    const threadId = "thread-resume-test"
+
+    const chunks: Array<{ type: string; data?: unknown }> = []
+    const consumer = (async () => {
+      for await (const chunk of streamAgent({
+        entry: mockRunnable,
+        input: { messages: [{ role: "user", content: "test" }] },
+        routeParamNames: [],
+        signal: new AbortController().signal,
+        threadId,
+        tools: [],
+      })) {
+        chunks.push({ type: chunk.type, data: chunk.data })
+      }
+    })()
+
+    // Poll for the pending entry to appear after the interrupt yields.
+    for (let i = 0; i < 50 && !getPending(threadId); i++) {
+      await new Promise((r) => setTimeout(r, 0))
+    }
+
+    const pending = getPending(threadId)
+    expect(pending).toBeDefined()
+    expect(pending?.interruptId).toBe("abc")
+
+    pending?.resolve("once")
+    await consumer
+
+    expect(callCount).toBe(2)
+    expect(observedResumeInput).toBeInstanceOf(Command)
+    expect((observedResumeInput as Command).resume).toBe("once")
+
+    expect(getPending(threadId)).toBeUndefined()
+
+    const types = chunks.map((c) => c.type)
+    expect(types).toContain("interrupt")
+    expect(types).toContain("token")
+    expect(types[types.length - 1]).toBe("done")
+  })
+
+  test("resume without threadId ends the stream after interrupt (no replay)", async () => {
+    const interruptPayload = { interruptId: "p-noresume", type: "x" }
+    let callCount = 0
+    const mockRunnable = {
+      invoke: async () => ({}),
+      streamEvents: async function* (_input: unknown, _options: Record) {
+        callCount++
+        yield {
+          event: "on_tool_error",
+          name: "runBash",
+          data: {
+            error: makeInterruptErrorString([{ id: "x", value: interruptPayload }]),
+          },
+        }
+        yield {
+          event: "on_chain_end",
+          name: "LangGraph",
+          data: { output: { messages: [] } },
+        }
+      },
+    }
+
+    const chunks: Array<{ type: string }> = []
+    for await (const chunk of streamAgent({
+      entry: mockRunnable,
+      input: { messages: [{ role: "user", content: "test" }] },
+      routeParamNames: [],
+      signal: new AbortController().signal,
+      tools: [],
+      // intentionally no threadId
+    })) {
+      chunks.push({ type: chunk.type })
+    }
+
+    expect(callCount).toBe(1)
+    expect(chunks.filter((c) => c.type === "interrupt")).toHaveLength(1)
+    expect(chunks[chunks.length - 1]?.type).toBe("done")
+  })
+})
diff --git a/packages/permissions/package.json b/packages/permissions/package.json
new file mode 100644
index 00000000..43bd2baa
--- /dev/null
+++ b/packages/permissions/package.json
@@ -0,0 +1,42 @@
+{
+  "name": "@dawn-ai/permissions",
+  "version": "0.1.8",
+  "private": false,
+  "type": "module",
+  "license": "MIT",
+  "homepage": "https://github.com/cacheplane/dawnai/tree/main/packages/permissions#readme",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/cacheplane/dawnai.git",
+    "directory": "packages/permissions"
+  },
+  "bugs": {
+    "url": "https://github.com/cacheplane/dawnai/issues"
+  },
+  "engines": {
+    "node": ">=22.12.0"
+  },
+  "files": [
+    "dist"
+  ],
+  "types": "./dist/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "default": "./dist/index.js"
+    }
+  },
+  "publishConfig": {
+    "access": "public"
+  },
+  "scripts": {
+    "build": "tsc -b tsconfig.json",
+    "lint": "biome check --config-path ../config-biome/biome.json package.json src tsconfig.json vitest.config.ts",
+    "test": "vitest --run --config vitest.config.ts --passWithNoTests",
+    "typecheck": "tsc --noEmit"
+  },
+  "devDependencies": {
+    "@dawn-ai/config-typescript": "workspace:*",
+    "@types/node": "25.6.0"
+  }
+}
diff --git a/packages/permissions/src/index.ts b/packages/permissions/src/index.ts
new file mode 100644
index 00000000..dfbc1802
--- /dev/null
+++ b/packages/permissions/src/index.ts
@@ -0,0 +1,12 @@
+export { matchPermission } from "./pattern-matching.js"
+export { createPermissionsStore } from "./permissions-store.js"
+export { suggestedCommandPattern, suggestedPathPattern } from "./suggested-pattern.js"
+export type {
+  CommandDetail,
+  PathDetail,
+  PermissionDecision,
+  PermissionMode,
+  PermissionRequest,
+  PermissionsFile,
+  PermissionsStore,
+} from "./types.js"
diff --git a/packages/permissions/src/pattern-matching.ts b/packages/permissions/src/pattern-matching.ts
new file mode 100644
index 00000000..9d2ca5d9
--- /dev/null
+++ b/packages/permissions/src/pattern-matching.ts
@@ -0,0 +1,26 @@
+type PatternMap = Readonly>
+
+/**
+ * Match a tool+candidate against allow + deny pattern maps.
+ *
+ * Semantics:
+ *   - deny wins over allow
+ *   - prefix matching: `candidate.startsWith(pattern)`
+ *   - no entries for tool → "unknown"
+ */
+export function matchPermission(
+  tool: string,
+  candidate: string,
+  allow: PatternMap,
+  deny: PatternMap,
+): "allow" | "deny" | "unknown" {
+  const denyList = deny[tool] ?? []
+  for (const pattern of denyList) {
+    if (candidate.startsWith(pattern)) return "deny"
+  }
+  const allowList = allow[tool] ?? []
+  for (const pattern of allowList) {
+    if (candidate.startsWith(pattern)) return "allow"
+  }
+  return "unknown"
+}
diff --git a/packages/permissions/src/permissions-store.ts b/packages/permissions/src/permissions-store.ts
new file mode 100644
index 00000000..e79feb93
--- /dev/null
+++ b/packages/permissions/src/permissions-store.ts
@@ -0,0 +1,146 @@
+import { existsSync } from "node:fs"
+import { mkdir, readFile, writeFile } from "node:fs/promises"
+import { join } from "node:path"
+
+import { matchPermission } from "./pattern-matching.js"
+import type { PermissionMode, PermissionsFile, PermissionsStore } from "./types.js"
+
+const PERMISSIONS_DIR = ".dawn"
+const PERMISSIONS_FILE = "permissions.json"
+
+interface CreateOptions {
+  readonly appRoot: string
+  readonly config: PermissionsFile | undefined
+  readonly mode: PermissionMode
+}
+
+type MutableMap = Record
+
+interface State {
+  configAllow: MutableMap
+  configDeny: MutableMap
+  runtimeAllow: MutableMap
+  runtimeDeny: MutableMap
+}
+
+function emptyState(): State {
+  return { configAllow: {}, configDeny: {}, runtimeAllow: {}, runtimeDeny: {} }
+}
+
+function cloneMap(src: Readonly>): MutableMap {
+  const out: MutableMap = {}
+  for (const [k, v] of Object.entries(src)) out[k] = [...v]
+  return out
+}
+
+function effectiveAllow(state: State, mode: PermissionMode): Record {
+  if (mode === "bypass") return {}
+  const out: Record = {}
+  for (const [k, v] of Object.entries(state.configAllow)) out[k] = [...v]
+  if (mode === "interactive") {
+    for (const [k, v] of Object.entries(state.runtimeAllow)) {
+      out[k] = [...(out[k] ?? []), ...v]
+    }
+  }
+  return out
+}
+
+function effectiveDeny(state: State, mode: PermissionMode): Record {
+  if (mode === "bypass") return {}
+  const out: Record = {}
+  for (const [k, v] of Object.entries(state.configDeny)) out[k] = [...v]
+  if (mode === "interactive") {
+    for (const [k, v] of Object.entries(state.runtimeDeny)) {
+      out[k] = [...(out[k] ?? []), ...v]
+    }
+  }
+  return out
+}
+
+export function createPermissionsStore(opts: CreateOptions): PermissionsStore {
+  const { appRoot, config, mode } = opts
+  const state = emptyState()
+  if (config) {
+    state.configAllow = cloneMap(config.allow)
+    state.configDeny = cloneMap(config.deny)
+  }
+
+  let writeQueue: Promise = Promise.resolve()
+
+  async function loadRuntimeFile(): Promise {
+    const filePath = join(appRoot, PERMISSIONS_DIR, PERMISSIONS_FILE)
+    if (!existsSync(filePath)) return
+    let raw: string
+    try {
+      raw = await readFile(filePath, "utf8")
+    } catch (err) {
+      throw new Error(`Failed to read permissions.json: ${(err as Error).message}`)
+    }
+    let parsed: unknown
+    try {
+      parsed = JSON.parse(raw)
+    } catch (err) {
+      throw new Error(`Malformed permissions.json: ${(err as Error).message}`)
+    }
+    const p = parsed as Partial
+    if (p.allow && typeof p.allow === "object") {
+      state.runtimeAllow = cloneMap(p.allow as Record)
+    }
+    if (p.deny && typeof p.deny === "object") {
+      state.runtimeDeny = cloneMap(p.deny as Record)
+    }
+  }
+
+  async function persistRuntimeFile(): Promise {
+    const dir = join(appRoot, PERMISSIONS_DIR)
+    await mkdir(dir, { recursive: true })
+    const file: PermissionsFile = {
+      version: 1,
+      allow: state.runtimeAllow,
+      deny: state.runtimeDeny,
+    }
+    await writeFile(join(dir, PERMISSIONS_FILE), `${JSON.stringify(file, null, 2)}\n`, "utf8")
+  }
+
+  async function ensureGitignoreEntry(): Promise {
+    const gitignorePath = join(appRoot, ".gitignore")
+    let content = ""
+    if (existsSync(gitignorePath)) {
+      content = await readFile(gitignorePath, "utf8")
+      if (content.split("\n").some((line) => line.trim() === ".dawn/")) return
+      if (!content.endsWith("\n") && content.length > 0) content += "\n"
+      content += ".dawn/\n"
+    } else {
+      content = ".dawn/\n"
+    }
+    await writeFile(gitignorePath, content, "utf8")
+  }
+
+  return {
+    mode,
+    match(tool: string, candidate: string) {
+      return matchPermission(
+        tool,
+        candidate,
+        effectiveAllow(state, mode),
+        effectiveDeny(state, mode),
+      )
+    },
+    async load() {
+      if (mode === "interactive") {
+        await loadRuntimeFile()
+      }
+    },
+    async addAllow(tool: string, pattern: string) {
+      const job = async () => {
+        const list = state.runtimeAllow[tool] ?? []
+        if (!list.includes(pattern)) list.push(pattern)
+        state.runtimeAllow[tool] = list
+        await persistRuntimeFile()
+        await ensureGitignoreEntry()
+      }
+      writeQueue = writeQueue.then(job, job)
+      await writeQueue
+    },
+  }
+}
diff --git a/packages/permissions/src/suggested-pattern.ts b/packages/permissions/src/suggested-pattern.ts
new file mode 100644
index 00000000..cacab8ec
--- /dev/null
+++ b/packages/permissions/src/suggested-pattern.ts
@@ -0,0 +1,22 @@
+import { dirname } from "node:path"
+
+/**
+ * Default suggested pattern for a shell command.
+ * Returns the first two whitespace-separated tokens.
+ */
+export function suggestedCommandPattern(command: string): string {
+  const trimmed = command.trim()
+  if (trimmed.length === 0) return ""
+  const tokens = trimmed.split(/\s+/)
+  return tokens.slice(0, 2).join(" ")
+}
+
+/**
+ * Default suggested pattern for a filesystem path.
+ * Returns the parent directory with trailing slash.
+ */
+export function suggestedPathPattern(path: string): string {
+  if (path.endsWith("/")) return path
+  const parent = dirname(path)
+  return parent === "/" ? "/" : `${parent}/`
+}
diff --git a/packages/permissions/src/types.ts b/packages/permissions/src/types.ts
new file mode 100644
index 00000000..dcfff901
--- /dev/null
+++ b/packages/permissions/src/types.ts
@@ -0,0 +1,50 @@
+/**
+ * Public types for the Dawn HITL permissions system.
+ *
+ * The workspace capability calls into a `PermissionsStore` before
+ * invoking its filesystem/exec backends. The store consults the
+ * runtime file at .dawn/permissions.json plus the config-seeded
+ * allow/deny lists and returns one of three decisions: "allow",
+ * "deny", or "unknown". On "unknown" in interactive mode the
+ * capability emits LangGraph's `interrupt()` with a `PermissionRequest`
+ * payload; the resume mechanism returns a `PermissionDecision`.
+ */
+
+export type PermissionMode = "interactive" | "non-interactive" | "bypass"
+
+export type PermissionDecision = "once" | "always" | "deny"
+
+export interface PermissionsFile {
+  readonly version: 1
+  readonly allow: Readonly>
+  readonly deny: Readonly>
+}
+
+export interface CommandDetail {
+  readonly command: string
+  readonly suggestedPattern: string
+}
+
+export interface PathDetail {
+  readonly path: string
+  readonly operation: "readFile" | "writeFile" | "listDir"
+  readonly suggestedPattern: string
+}
+
+export interface PermissionRequest {
+  readonly interruptId: string
+  readonly kind: "command" | "path"
+  readonly detail: CommandDetail | PathDetail
+  readonly threadId: string
+  readonly callId?: string
+}
+
+export interface PermissionsStore {
+  /** Loaded once at construction; subsequent loads not exposed in v1. */
+  load(): Promise
+  match(tool: string, candidate: string): "allow" | "deny" | "unknown"
+  /** Persists an allow entry to disk and updates the in-memory cache. */
+  addAllow(tool: string, pattern: string): Promise
+  /** Active mode (resolved from config + env at construction). */
+  readonly mode: PermissionMode
+}
diff --git a/packages/permissions/test/pattern-matching.test.ts b/packages/permissions/test/pattern-matching.test.ts
new file mode 100644
index 00000000..0d4482e1
--- /dev/null
+++ b/packages/permissions/test/pattern-matching.test.ts
@@ -0,0 +1,29 @@
+import { describe, expect, it } from "vitest"
+import { matchPermission } from "../src/pattern-matching.js"
+
+describe("matchPermission", () => {
+  it("returns unknown when no entries match", () => {
+    expect(matchPermission("bash", "npm install", {}, {})).toBe("unknown")
+  })
+  it("returns allow when candidate matches an allow prefix", () => {
+    expect(matchPermission("bash", "npm install react", { bash: ["npm install"] }, {})).toBe("allow")
+  })
+  it("returns deny when candidate matches a deny prefix", () => {
+    expect(matchPermission("bash", "rm -rf /tmp", {}, { bash: ["rm -rf"] })).toBe("deny")
+  })
+  it("deny wins over allow when both match", () => {
+    expect(matchPermission("bash", "rm -rf /tmp", { bash: ["rm -rf"] }, { bash: ["rm -rf"] })).toBe("deny")
+  })
+  it("does NOT match an allow entry that is not a prefix", () => {
+    expect(matchPermission("bash", "npm test", { bash: ["npm install"] }, {})).toBe("unknown")
+  })
+  it("treats path candidates with absolute prefixes", () => {
+    expect(matchPermission("readFile", "/Users/blove/.zshrc", { readFile: ["/Users/blove/"] }, {})).toBe("allow")
+  })
+  it("does not cross directory boundary when pattern ends with slash", () => {
+    expect(matchPermission("readFile", "/var/logger/app.log", { readFile: ["/var/log/"] }, {})).toBe("unknown")
+  })
+  it("returns unknown for a tool with no entries in either list", () => {
+    expect(matchPermission("runUnknownTool", "anything", { bash: ["ls"] }, { writeFile: ["/tmp/"] })).toBe("unknown")
+  })
+})
diff --git a/packages/permissions/test/permissions-store.test.ts b/packages/permissions/test/permissions-store.test.ts
new file mode 100644
index 00000000..25709e97
--- /dev/null
+++ b/packages/permissions/test/permissions-store.test.ts
@@ -0,0 +1,169 @@
+import { afterEach, beforeEach, describe, expect, it } from "vitest"
+import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"
+import { tmpdir } from "node:os"
+import { join } from "node:path"
+
+import { createPermissionsStore } from "../src/permissions-store.js"
+
+describe("createPermissionsStore — load + match", () => {
+  let appRoot: string
+  beforeEach(() => {
+    appRoot = mkdtempSync(join(tmpdir(), "dawn-perms-"))
+  })
+  afterEach(() => {
+    rmSync(appRoot, { recursive: true, force: true })
+  })
+
+  it("returns unknown when no file and no config", async () => {
+    const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" })
+    await store.load()
+    expect(store.match("bash", "npm install")).toBe("unknown")
+  })
+
+  it("matches entries from .dawn/permissions.json", async () => {
+    mkdirSync(join(appRoot, ".dawn"), { recursive: true })
+    writeFileSync(
+      join(appRoot, ".dawn", "permissions.json"),
+      JSON.stringify({ version: 1, allow: { bash: ["npm install"] }, deny: {} }),
+    )
+    const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" })
+    await store.load()
+    expect(store.match("bash", "npm install react")).toBe("allow")
+    expect(store.match("bash", "rm -rf /")).toBe("unknown")
+  })
+
+  it("merges config + runtime file (both contribute allows)", async () => {
+    mkdirSync(join(appRoot, ".dawn"), { recursive: true })
+    writeFileSync(
+      join(appRoot, ".dawn", "permissions.json"),
+      JSON.stringify({ version: 1, allow: { bash: ["ls"] }, deny: {} }),
+    )
+    const store = createPermissionsStore({
+      appRoot,
+      config: { version: 1, allow: { bash: ["npm install"] }, deny: {} },
+      mode: "interactive",
+    })
+    await store.load()
+    expect(store.match("bash", "ls -la")).toBe("allow")
+    expect(store.match("bash", "npm install react")).toBe("allow")
+  })
+
+  it("deny from config wins over allow from runtime file", async () => {
+    mkdirSync(join(appRoot, ".dawn"), { recursive: true })
+    writeFileSync(
+      join(appRoot, ".dawn", "permissions.json"),
+      JSON.stringify({ version: 1, allow: { bash: ["rm"] }, deny: {} }),
+    )
+    const store = createPermissionsStore({
+      appRoot,
+      config: { version: 1, allow: {}, deny: { bash: ["rm -rf"] } },
+      mode: "interactive",
+    })
+    await store.load()
+    expect(store.match("bash", "rm -rf /tmp")).toBe("deny")
+  })
+
+  it("ignores the runtime file in non-interactive mode", async () => {
+    mkdirSync(join(appRoot, ".dawn"), { recursive: true })
+    writeFileSync(
+      join(appRoot, ".dawn", "permissions.json"),
+      JSON.stringify({ version: 1, allow: { bash: ["npm install"] }, deny: {} }),
+    )
+    const store = createPermissionsStore({
+      appRoot,
+      config: { version: 1, allow: { bash: ["ls"] }, deny: {} },
+      mode: "non-interactive",
+    })
+    await store.load()
+    expect(store.match("bash", "npm install react")).toBe("unknown")
+    expect(store.match("bash", "ls -la")).toBe("allow")
+  })
+
+  it("ignores everything in bypass mode", async () => {
+    mkdirSync(join(appRoot, ".dawn"), { recursive: true })
+    writeFileSync(
+      join(appRoot, ".dawn", "permissions.json"),
+      JSON.stringify({ version: 1, allow: {}, deny: { bash: ["rm"] } }),
+    )
+    const store = createPermissionsStore({
+      appRoot,
+      config: { version: 1, allow: {}, deny: { bash: ["rm"] } },
+      mode: "bypass",
+    })
+    await store.load()
+    expect(store.match("bash", "rm -rf /")).toBe("unknown")
+  })
+
+  it("throws on malformed JSON in the runtime file", async () => {
+    mkdirSync(join(appRoot, ".dawn"), { recursive: true })
+    writeFileSync(join(appRoot, ".dawn", "permissions.json"), "{ not valid json")
+    const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" })
+    await expect(store.load()).rejects.toThrow(/permissions\.json/i)
+  })
+})
+
+describe("createPermissionsStore — addAllow", () => {
+  let appRoot: string
+  beforeEach(() => {
+    appRoot = mkdtempSync(join(tmpdir(), "dawn-perms-"))
+  })
+  afterEach(() => {
+    rmSync(appRoot, { recursive: true, force: true })
+  })
+
+  it("persists an allow entry and updates the in-memory cache atomically", async () => {
+    const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" })
+    await store.load()
+    expect(store.match("bash", "npm install")).toBe("unknown")
+    await store.addAllow("bash", "npm install")
+    expect(store.match("bash", "npm install react")).toBe("allow")
+    const raw = readFileSync(join(appRoot, ".dawn", "permissions.json"), "utf8")
+    const parsed = JSON.parse(raw)
+    expect(parsed.allow.bash).toContain("npm install")
+  })
+
+  it("appends .dawn/ to .gitignore on first write (idempotent)", async () => {
+    writeFileSync(join(appRoot, ".gitignore"), "node_modules/\n.next/\n")
+    const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" })
+    await store.load()
+    await store.addAllow("bash", "ls")
+    const gi = readFileSync(join(appRoot, ".gitignore"), "utf8")
+    expect(gi).toContain(".dawn/")
+    expect(gi).toContain("node_modules/")
+  })
+
+  it("creates .gitignore with .dawn/ when none exists", async () => {
+    const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" })
+    await store.load()
+    await store.addAllow("bash", "ls")
+    const gi = readFileSync(join(appRoot, ".gitignore"), "utf8")
+    expect(gi).toBe(".dawn/\n")
+  })
+
+  it("does not duplicate .dawn/ if already in .gitignore", async () => {
+    writeFileSync(join(appRoot, ".gitignore"), "node_modules/\n.dawn/\n")
+    const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" })
+    await store.load()
+    await store.addAllow("bash", "ls")
+    const gi = readFileSync(join(appRoot, ".gitignore"), "utf8")
+    expect(gi.match(/\.dawn\//g)?.length).toBe(1)
+  })
+
+  it("serializes concurrent addAllow calls", async () => {
+    const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" })
+    await store.load()
+    await Promise.all([
+      store.addAllow("bash", "ls"),
+      store.addAllow("bash", "pwd"),
+      store.addAllow("bash", "cat"),
+    ])
+    const raw = readFileSync(join(appRoot, ".dawn", "permissions.json"), "utf8")
+    const parsed = JSON.parse(raw)
+    expect([...parsed.allow.bash].sort()).toEqual(["cat", "ls", "pwd"])
+  })
+
+  it("exposes the resolved mode", () => {
+    const store = createPermissionsStore({ appRoot, config: undefined, mode: "non-interactive" })
+    expect(store.mode).toBe("non-interactive")
+  })
+})
diff --git a/packages/permissions/test/suggested-pattern.test.ts b/packages/permissions/test/suggested-pattern.test.ts
new file mode 100644
index 00000000..0a55f120
--- /dev/null
+++ b/packages/permissions/test/suggested-pattern.test.ts
@@ -0,0 +1,41 @@
+import { describe, expect, it } from "vitest"
+import {
+  suggestedCommandPattern,
+  suggestedPathPattern,
+} from "../src/suggested-pattern.js"
+
+describe("suggestedCommandPattern", () => {
+  it("returns the first two tokens for a multi-word command", () => {
+    expect(suggestedCommandPattern("npm install react")).toBe("npm install")
+  })
+  it("returns the single token for a one-word command", () => {
+    expect(suggestedCommandPattern("ls")).toBe("ls")
+  })
+  it("returns first two tokens even when the second is short", () => {
+    expect(suggestedCommandPattern("git status")).toBe("git status")
+    expect(suggestedCommandPattern("git push origin main")).toBe("git push")
+  })
+  it("strips leading/trailing whitespace before tokenizing", () => {
+    expect(suggestedCommandPattern("  npm  install  react  ")).toBe("npm install")
+  })
+  it("handles empty input as empty pattern", () => {
+    expect(suggestedCommandPattern("")).toBe("")
+    expect(suggestedCommandPattern("   ")).toBe("")
+  })
+})
+
+describe("suggestedPathPattern", () => {
+  it("returns the parent directory with trailing slash", () => {
+    expect(suggestedPathPattern("/Users/blove/.zshrc")).toBe("/Users/blove/")
+    expect(suggestedPathPattern("/var/log/app.log")).toBe("/var/log/")
+  })
+  it("returns the dir itself with trailing slash when input ends with slash", () => {
+    expect(suggestedPathPattern("/Users/blove/Documents/")).toBe("/Users/blove/Documents/")
+  })
+  it("returns root when input is a top-level file", () => {
+    expect(suggestedPathPattern("/etc")).toBe("/")
+  })
+  it("handles relative paths", () => {
+    expect(suggestedPathPattern("notes/agenda.md")).toBe("notes/")
+  })
+})
diff --git a/packages/permissions/tsconfig.json b/packages/permissions/tsconfig.json
new file mode 100644
index 00000000..0681480b
--- /dev/null
+++ b/packages/permissions/tsconfig.json
@@ -0,0 +1,9 @@
+{
+  "$schema": "https://json.schemastore.org/tsconfig",
+  "extends": "../config-typescript/node.json",
+  "compilerOptions": {
+    "outDir": "dist",
+    "rootDir": "src"
+  },
+  "include": ["src/**/*.ts"]
+}
diff --git a/packages/permissions/vitest.config.ts b/packages/permissions/vitest.config.ts
new file mode 100644
index 00000000..c19dea2b
--- /dev/null
+++ b/packages/permissions/vitest.config.ts
@@ -0,0 +1,9 @@
+import { defineConfig } from "vitest/config"
+
+export default defineConfig({
+  test: {
+    environment: "node",
+    include: ["test/**/*.test.ts"],
+    passWithNoTests: true,
+  },
+})
diff --git a/packages/workspace/package.json b/packages/workspace/package.json
new file mode 100644
index 00000000..50a9aad8
--- /dev/null
+++ b/packages/workspace/package.json
@@ -0,0 +1,42 @@
+{
+  "name": "@dawn-ai/workspace",
+  "version": "0.1.8",
+  "private": false,
+  "type": "module",
+  "license": "MIT",
+  "homepage": "https://github.com/cacheplane/dawnai/tree/main/packages/workspace#readme",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/cacheplane/dawnai.git",
+    "directory": "packages/workspace"
+  },
+  "bugs": {
+    "url": "https://github.com/cacheplane/dawnai/issues"
+  },
+  "engines": {
+    "node": ">=22.12.0"
+  },
+  "files": [
+    "dist"
+  ],
+  "types": "./dist/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "default": "./dist/index.js"
+    }
+  },
+  "publishConfig": {
+    "access": "public"
+  },
+  "scripts": {
+    "build": "tsc -b tsconfig.json",
+    "lint": "biome check --config-path ../config-biome/biome.json package.json src tsconfig.json vitest.config.ts",
+    "test": "vitest --run --config vitest.config.ts --passWithNoTests",
+    "typecheck": "tsc --noEmit"
+  },
+  "devDependencies": {
+    "@dawn-ai/config-typescript": "workspace:*",
+    "@types/node": "25.6.0"
+  }
+}
diff --git a/packages/workspace/src/compose.ts b/packages/workspace/src/compose.ts
new file mode 100644
index 00000000..11c5cf30
--- /dev/null
+++ b/packages/workspace/src/compose.ts
@@ -0,0 +1,13 @@
+/**
+ * Compose middleware functions into a single wrapper.
+ *
+ * Order: the LEFTMOST middleware is the OUTERMOST. Given
+ * `compose(a, b, c)(base)`, the call order is `a -> b -> c -> base`,
+ * mirroring how function call stacks read top-down.
+ *
+ * With zero middlewares, returns the base unchanged (no wrapper object).
+ */
+export function compose(...middlewares: ReadonlyArray<(next: T) => T>): (base: T) => T {
+  if (middlewares.length === 0) return (base) => base
+  return (base) => middlewares.reduceRight((acc, mw) => mw(acc), base)
+}
diff --git a/packages/workspace/src/index.ts b/packages/workspace/src/index.ts
new file mode 100644
index 00000000..a9f62329
--- /dev/null
+++ b/packages/workspace/src/index.ts
@@ -0,0 +1,11 @@
+export { compose } from "./compose.js"
+export { type LocalExecOptions, localExec } from "./local-exec.js"
+export { type LocalFilesystemOptions, localFilesystem } from "./local-filesystem.js"
+export type {
+  BackendContext,
+  ExecBackend,
+  ExecMiddleware,
+  FilesystemBackend,
+  FilesystemMiddleware,
+} from "./types.js"
+export { type LoggingOptions, withExecLogging, withFilesystemLogging } from "./with-logging.js"
diff --git a/packages/workspace/src/local-exec.ts b/packages/workspace/src/local-exec.ts
new file mode 100644
index 00000000..7857b969
--- /dev/null
+++ b/packages/workspace/src/local-exec.ts
@@ -0,0 +1,53 @@
+import { exec as cpExec } from "node:child_process"
+import { promisify } from "node:util"
+import type { BackendContext, ExecBackend } from "./types.js"
+
+const execAsync = promisify(cpExec)
+const DEFAULT_TIMEOUT_MS = 30_000
+
+export interface LocalExecOptions {
+  /** Kill the command if it runs longer than this. Default 30 seconds. */
+  readonly timeout?: number
+  /**
+   * Optional allowlist of command-line patterns. When non-empty, every
+   * command must match at least one regex or `runCommand` throws before
+   * spawning anything. Use to deny dangerous commands in production.
+   */
+  readonly allowedCommands?: readonly RegExp[]
+}
+
+export function localExec(opts: LocalExecOptions = {}): ExecBackend {
+  const timeout = opts.timeout ?? DEFAULT_TIMEOUT_MS
+  const allowed = opts.allowedCommands
+  return {
+    async runCommand(args, ctx: BackendContext) {
+      if (allowed && allowed.length > 0 && !allowed.some((re) => re.test(args.command))) {
+        throw new Error(`Command not allowed by allowedCommands policy: ${args.command}`)
+      }
+      try {
+        const result = await execAsync(args.command, {
+          cwd: args.cwd ?? ctx.workspaceRoot,
+          env: args.env ?? process.env,
+          timeout,
+          signal: ctx.signal,
+        })
+        return { stdout: result.stdout, stderr: result.stderr, exitCode: 0 }
+      } catch (err) {
+        const e = err as NodeJS.ErrnoException & {
+          code?: number | string
+          stdout?: string
+          stderr?: string
+          killed?: boolean
+        }
+        if (e.killed && typeof e.code !== "number") {
+          throw new Error(`Command timeout after ${timeout}ms: ${args.command}`)
+        }
+        return {
+          stdout: e.stdout ?? "",
+          stderr: e.stderr ?? "",
+          exitCode: typeof e.code === "number" ? e.code : 1,
+        }
+      }
+    },
+  }
+}
diff --git a/packages/workspace/src/local-filesystem.ts b/packages/workspace/src/local-filesystem.ts
new file mode 100644
index 00000000..6e9d9c3a
--- /dev/null
+++ b/packages/workspace/src/local-filesystem.ts
@@ -0,0 +1,36 @@
+import { readdir, readFile, stat, writeFile } from "node:fs/promises"
+import type { BackendContext, FilesystemBackend } from "./types.js"
+
+const DEFAULT_MAX_FILE_BYTES = 256 * 1024
+
+export interface LocalFilesystemOptions {
+  /**
+   * Reject `readFile` when the target file exceeds this size.
+   * Default: 256 KiB.
+   */
+  readonly maxFileBytes?: number
+}
+
+export function localFilesystem(opts: LocalFilesystemOptions = {}): FilesystemBackend {
+  const maxBytes = opts.maxFileBytes ?? DEFAULT_MAX_FILE_BYTES
+  return {
+    async readFile(path: string, _ctx: BackendContext): Promise {
+      const s = await stat(path)
+      if (s.size > maxBytes) {
+        throw new Error(`File too large: ${s.size} bytes (max ${maxBytes}) at ${path}`)
+      }
+      return await readFile(path, "utf8")
+    },
+    async writeFile(
+      path: string,
+      content: string,
+      _ctx: BackendContext,
+    ): Promise<{ readonly bytesWritten: number }> {
+      await writeFile(path, content, "utf8")
+      return { bytesWritten: Buffer.byteLength(content, "utf8") }
+    },
+    async listDir(path: string, _ctx: BackendContext): Promise {
+      return await readdir(path)
+    },
+  }
+}
diff --git a/packages/workspace/src/types.ts b/packages/workspace/src/types.ts
new file mode 100644
index 00000000..2ef4f4c9
--- /dev/null
+++ b/packages/workspace/src/types.ts
@@ -0,0 +1,63 @@
+/**
+ * Workspace backend type interfaces.
+ *
+ * Backends are plain objects implementing these interfaces. The
+ * workspace capability calls into them to perform filesystem reads,
+ * writes, listings, and shell command execution. Defaults
+ * (`localFilesystem`, `localExec`) ship in this package; users can
+ * provide their own implementations via dawn.config.ts.
+ */
+
+export interface BackendContext {
+  /** Aborts when the parent agent run is cancelled. */
+  readonly signal: AbortSignal
+  /** Absolute filesystem path of the route's workspace directory. */
+  readonly workspaceRoot: string
+}
+
+export interface FilesystemBackend {
+  /**
+   * Read a UTF-8 file. `path` is an already-resolved absolute path
+   * inside `ctx.workspaceRoot` — the capability has done the path-jail.
+   */
+  readFile(path: string, ctx: BackendContext): Promise
+
+  /** Write a UTF-8 file. Returns the byte count of `content`. */
+  writeFile(
+    path: string,
+    content: string,
+    ctx: BackendContext,
+  ): Promise<{ readonly bytesWritten: number }>
+
+  /** List entries in a directory. Returns leaf names (not full paths). */
+  listDir(path: string, ctx: BackendContext): Promise
+}
+
+export interface ExecBackend {
+  /**
+   * Run a shell command. `args.cwd`, if provided, is already-resolved
+   * to an absolute path inside `ctx.workspaceRoot`.
+   */
+  runCommand(
+    args: {
+      readonly command: string
+      readonly cwd?: string
+      readonly env?: Readonly>
+    },
+    ctx: BackendContext,
+  ): Promise<{
+    readonly stdout: string
+    readonly stderr: string
+    readonly exitCode: number
+  }>
+}
+
+/**
+ * A filesystem middleware is a function that wraps a backend to add
+ * cross-cutting behavior (logging, caching, etc.). Compose multiple
+ * middlewares via `compose()`.
+ */
+export type FilesystemMiddleware = (next: FilesystemBackend) => FilesystemBackend
+
+/** See FilesystemMiddleware. */
+export type ExecMiddleware = (next: ExecBackend) => ExecBackend
diff --git a/packages/workspace/src/with-logging.ts b/packages/workspace/src/with-logging.ts
new file mode 100644
index 00000000..080b9c20
--- /dev/null
+++ b/packages/workspace/src/with-logging.ts
@@ -0,0 +1,45 @@
+import type { ExecMiddleware, FilesystemBackend, FilesystemMiddleware } from "./types.js"
+
+export interface LoggingOptions {
+  /**
+   * Where to send log lines. Default: `console.error`.
+   *
+   * Pass a function for structured logging. The argument is
+   * `{ method, args }` so the function can format however it wants.
+   */
+  readonly destination?: (entry: { method: string; args: unknown[] }) => void
+}
+
+function emit(opts: LoggingOptions, method: string, args: unknown[]): void {
+  if (opts.destination) {
+    opts.destination({ method, args })
+    return
+  }
+  console.error(`[dawn:workspace] ${method}(${args.map((a) => JSON.stringify(a)).join(", ")})`)
+}
+
+export function withFilesystemLogging(opts: LoggingOptions = {}): FilesystemMiddleware {
+  return (next: FilesystemBackend) => ({
+    readFile: async (path, ctx) => {
+      emit(opts, "readFile", [path])
+      return next.readFile(path, ctx)
+    },
+    writeFile: async (path, content, ctx) => {
+      emit(opts, "writeFile", [path, content])
+      return next.writeFile(path, content, ctx)
+    },
+    listDir: async (path, ctx) => {
+      emit(opts, "listDir", [path])
+      return next.listDir(path, ctx)
+    },
+  })
+}
+
+export function withExecLogging(opts: LoggingOptions = {}): ExecMiddleware {
+  return (next) => ({
+    runCommand: async (args, ctx) => {
+      emit(opts, "runCommand", [args.command, args.cwd])
+      return next.runCommand(args, ctx)
+    },
+  })
+}
diff --git a/packages/workspace/test/compose.test.ts b/packages/workspace/test/compose.test.ts
new file mode 100644
index 00000000..2d1eb28f
--- /dev/null
+++ b/packages/workspace/test/compose.test.ts
@@ -0,0 +1,53 @@
+import { describe, expect, it } from "vitest"
+import { compose } from "../src/compose.js"
+import type { FilesystemBackend, FilesystemMiddleware } from "../src/types.js"
+
+const base: FilesystemBackend = {
+  async readFile() { return "BASE" },
+  async writeFile() { return { bytesWritten: 0 } },
+  async listDir() { return [] },
+}
+
+describe("compose", () => {
+  it("with zero middlewares returns the base unchanged", () => {
+    expect(compose()(base)).toBe(base)
+  })
+
+  it("with one middleware wraps the base", async () => {
+    const lower: FilesystemMiddleware = (next) => ({
+      ...next,
+      readFile: async (p, c) => (await next.readFile(p, c)).toLowerCase(),
+    })
+    const wrapped = compose(lower)(base)
+    expect(
+      await wrapped.readFile("x", { signal: new AbortController().signal, workspaceRoot: "/" }),
+    ).toBe("base")
+  })
+
+  it("applies middlewares right-to-left (outermost first)", async () => {
+    const trace: string[] = []
+    const a: FilesystemMiddleware = (next) => ({
+      ...next,
+      readFile: async (p, c) => {
+        trace.push("a:before")
+        const r = await next.readFile(p, c)
+        trace.push("a:after")
+        return r
+      },
+    })
+    const b: FilesystemMiddleware = (next) => ({
+      ...next,
+      readFile: async (p, c) => {
+        trace.push("b:before")
+        const r = await next.readFile(p, c)
+        trace.push("b:after")
+        return r
+      },
+    })
+    await compose(a, b)(base).readFile("x", {
+      signal: new AbortController().signal,
+      workspaceRoot: "/",
+    })
+    expect(trace).toEqual(["a:before", "b:before", "b:after", "a:after"])
+  })
+})
diff --git a/packages/workspace/test/local-exec.test.ts b/packages/workspace/test/local-exec.test.ts
new file mode 100644
index 00000000..9911c82b
--- /dev/null
+++ b/packages/workspace/test/local-exec.test.ts
@@ -0,0 +1,60 @@
+import { describe, expect, it } from "vitest"
+import { mkdtempSync, rmSync } from "node:fs"
+import { tmpdir } from "node:os"
+import { join } from "node:path"
+import { localExec } from "../src/local-exec.js"
+
+function ctx(workspaceRoot: string) {
+  return { signal: new AbortController().signal, workspaceRoot }
+}
+
+describe("localExec", () => {
+  it("runCommand captures stdout, stderr, exitCode", async () => {
+    const root = mkdtempSync(join(tmpdir(), "dawn-localexec-"))
+    try {
+      const exec = localExec()
+      const out = await exec.runCommand({ command: "echo hello" }, ctx(root))
+      expect(out.stdout.trim()).toBe("hello")
+      expect(out.exitCode).toBe(0)
+    } finally {
+      rmSync(root, { recursive: true, force: true })
+    }
+  })
+
+  it("runCommand returns non-zero exitCode on failure", async () => {
+    const root = mkdtempSync(join(tmpdir(), "dawn-localexec-"))
+    try {
+      const exec = localExec()
+      const out = await exec.runCommand({ command: "exit 7" }, ctx(root))
+      expect(out.exitCode).toBe(7)
+    } finally {
+      rmSync(root, { recursive: true, force: true })
+    }
+  })
+
+  it("runCommand enforces timeout", async () => {
+    const root = mkdtempSync(join(tmpdir(), "dawn-localexec-"))
+    try {
+      const exec = localExec({ timeout: 100 })
+      await expect(
+        exec.runCommand({ command: "sleep 1" }, ctx(root)),
+      ).rejects.toThrow(/timeout/i)
+    } finally {
+      rmSync(root, { recursive: true, force: true })
+    }
+  })
+
+  it("runCommand respects allowedCommands regex allowlist", async () => {
+    const root = mkdtempSync(join(tmpdir(), "dawn-localexec-"))
+    try {
+      const exec = localExec({ allowedCommands: [/^echo\b/, /^ls\b/] })
+      const ok = await exec.runCommand({ command: "echo allowed" }, ctx(root))
+      expect(ok.stdout.trim()).toBe("allowed")
+      await expect(
+        exec.runCommand({ command: "rm -rf /" }, ctx(root)),
+      ).rejects.toThrow(/not allowed/i)
+    } finally {
+      rmSync(root, { recursive: true, force: true })
+    }
+  })
+})
diff --git a/packages/workspace/test/local-filesystem.test.ts b/packages/workspace/test/local-filesystem.test.ts
new file mode 100644
index 00000000..7bc399e3
--- /dev/null
+++ b/packages/workspace/test/local-filesystem.test.ts
@@ -0,0 +1,50 @@
+import { describe, expect, it, beforeEach, afterEach } from "vitest"
+import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "node:fs"
+import { tmpdir } from "node:os"
+import { join } from "node:path"
+import { localFilesystem } from "../src/local-filesystem.js"
+
+function ctx(workspaceRoot: string) {
+  return { signal: new AbortController().signal, workspaceRoot }
+}
+
+describe("localFilesystem", () => {
+  let root: string
+  beforeEach(() => {
+    root = mkdtempSync(join(tmpdir(), "dawn-localfs-"))
+  })
+  afterEach(() => {
+    rmSync(root, { recursive: true, force: true })
+  })
+
+  it("readFile returns UTF-8 contents", async () => {
+    writeFileSync(join(root, "hello.txt"), "hi", "utf8")
+    const fs = localFilesystem()
+    expect(await fs.readFile(join(root, "hello.txt"), ctx(root))).toBe("hi")
+  })
+
+  it("readFile rejects files larger than maxFileBytes", async () => {
+    writeFileSync(join(root, "big.txt"), "x".repeat(2048), "utf8")
+    const fs = localFilesystem({ maxFileBytes: 1024 })
+    await expect(fs.readFile(join(root, "big.txt"), ctx(root))).rejects.toThrow(/too large/i)
+  })
+
+  it("writeFile returns the byte count", async () => {
+    const fs = localFilesystem()
+    const res = await fs.writeFile(join(root, "out.txt"), "abc", ctx(root))
+    expect(res.bytesWritten).toBe(3)
+  })
+
+  it("listDir returns directory entries (leaf names only)", async () => {
+    writeFileSync(join(root, "a.txt"), "", "utf8")
+    mkdirSync(join(root, "sub"))
+    const fs = localFilesystem()
+    const entries = await fs.listDir(root, ctx(root))
+    expect([...entries].sort()).toEqual(["a.txt", "sub"])
+  })
+
+  it("readFile on missing file raises ENOENT", async () => {
+    const fs = localFilesystem()
+    await expect(fs.readFile(join(root, "ghost.txt"), ctx(root))).rejects.toThrow(/ENOENT/)
+  })
+})
diff --git a/packages/workspace/test/with-logging.test.ts b/packages/workspace/test/with-logging.test.ts
new file mode 100644
index 00000000..8c1ddeea
--- /dev/null
+++ b/packages/workspace/test/with-logging.test.ts
@@ -0,0 +1,45 @@
+import { describe, expect, it } from "vitest"
+import { withFilesystemLogging } from "../src/with-logging.js"
+import type { FilesystemBackend } from "../src/types.js"
+
+const base: FilesystemBackend = {
+  async readFile() { return "ok" },
+  async writeFile() { return { bytesWritten: 5 } },
+  async listDir() { return ["a"] },
+}
+
+const ctx = { signal: new AbortController().signal, workspaceRoot: "/r" }
+
+describe("withFilesystemLogging", () => {
+  it("invokes the destination callback for each method", async () => {
+    const log: Array<{ method: string; args: unknown[] }> = []
+    const wrapped = withFilesystemLogging({ destination: (e) => log.push(e) })(base)
+    await wrapped.readFile("a.md", ctx)
+    await wrapped.writeFile("b.md", "hi", ctx)
+    await wrapped.listDir("/r", ctx)
+    expect(log.map((e) => e.method)).toEqual(["readFile", "writeFile", "listDir"])
+    expect(log[0]!.args).toEqual(["a.md"])
+    expect(log[1]!.args).toEqual(["b.md", "hi"])
+  })
+
+  it("forwards return values unchanged", async () => {
+    const wrapped = withFilesystemLogging({ destination: () => undefined })(base)
+    expect(await wrapped.readFile("a.md", ctx)).toBe("ok")
+    expect(await wrapped.writeFile("b.md", "hi", ctx)).toEqual({ bytesWritten: 5 })
+    expect([...(await wrapped.listDir("/r", ctx))]).toEqual(["a"])
+  })
+
+  it("defaults destination to console.error when not provided", async () => {
+    const original = console.error
+    const logged: string[] = []
+    console.error = ((msg: string) => logged.push(msg)) as typeof console.error
+    try {
+      const wrapped = withFilesystemLogging()(base)
+      await wrapped.readFile("a.md", ctx)
+    } finally {
+      console.error = original
+    }
+    expect(logged.length).toBe(1)
+    expect(logged[0]).toContain("readFile")
+  })
+})
diff --git a/packages/workspace/tsconfig.json b/packages/workspace/tsconfig.json
new file mode 100644
index 00000000..0681480b
--- /dev/null
+++ b/packages/workspace/tsconfig.json
@@ -0,0 +1,9 @@
+{
+  "$schema": "https://json.schemastore.org/tsconfig",
+  "extends": "../config-typescript/node.json",
+  "compilerOptions": {
+    "outDir": "dist",
+    "rootDir": "src"
+  },
+  "include": ["src/**/*.ts"]
+}
diff --git a/packages/workspace/vitest.config.ts b/packages/workspace/vitest.config.ts
new file mode 100644
index 00000000..44373404
--- /dev/null
+++ b/packages/workspace/vitest.config.ts
@@ -0,0 +1,8 @@
+import { defineConfig } from "vitest/config"
+
+export default defineConfig({
+  test: {
+    environment: "node",
+    include: ["test/**/*.test.ts"],
+  },
+})
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 0b6d0030..539c2115 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -177,6 +177,9 @@ importers:
       '@dawn-ai/langgraph':
         specifier: workspace:*
         version: link:../langgraph
+      '@dawn-ai/permissions':
+        specifier: workspace:*
+        version: link:../permissions
       commander:
         specifier: 14.0.3
         version: 14.0.3
@@ -190,6 +193,9 @@ importers:
       '@dawn-ai/sdk':
         specifier: workspace:*
         version: link:../sdk
+      '@dawn-ai/workspace':
+        specifier: workspace:*
+        version: link:../workspace
       '@langchain/core':
         specifier: 1.1.46
         version: 1.1.46(openai@6.37.0(ws@8.20.1)(zod@4.4.3))(ws@8.20.1)
@@ -213,9 +219,18 @@ importers:
 
   packages/core:
     dependencies:
+      '@dawn-ai/permissions':
+        specifier: workspace:*
+        version: link:../permissions
       '@dawn-ai/sdk':
         specifier: workspace:*
         version: link:../sdk
+      '@dawn-ai/workspace':
+        specifier: workspace:*
+        version: link:../workspace
+      '@langchain/langgraph':
+        specifier: ^1.3.0
+        version: 1.3.0(@langchain/core@1.1.47(openai@6.37.0(ws@8.20.1)(zod@4.4.3))(ws@8.20.1))(openai@6.37.0(ws@8.20.1)(zod@4.4.3))(react-dom@19.2.0(react@19.2.0))(react@19.2.0)(ws@8.20.1)(zod-to-json-schema@3.25.2(zod@4.4.3))(zod@4.4.3)
       tsx:
         specifier: ^4.8.1
         version: 4.21.0
@@ -317,6 +332,15 @@ importers:
         specifier: 25.6.0
         version: 25.6.0
 
+  packages/permissions:
+    devDependencies:
+      '@dawn-ai/config-typescript':
+        specifier: workspace:*
+        version: link:../config-typescript
+      '@types/node':
+        specifier: 25.6.0
+        version: 25.6.0
+
   packages/sdk:
     devDependencies:
       '@dawn-ai/config-typescript':
@@ -345,6 +369,15 @@ importers:
         specifier: 4.4.3
         version: 4.4.3
 
+  packages/workspace:
+    devDependencies:
+      '@dawn-ai/config-typescript':
+        specifier: workspace:*
+        version: link:../config-typescript
+      '@types/node':
+        specifier: 25.6.0
+        version: 25.6.0
+
 packages:
 
   '@alloc/quick-lru@5.2.0':
diff --git a/test/generated/cli-testing-export.test.ts b/test/generated/cli-testing-export.test.ts
index 254792dc..f677d3b7 100644
--- a/test/generated/cli-testing-export.test.ts
+++ b/test/generated/cli-testing-export.test.ts
@@ -26,7 +26,15 @@ describe.each([
   }, async () => {
     const tempRoot = await createTrackedTempDir(`dawn-${label}-testing-pack-`, tempDirs)
     const { installerDir, tarballs } = await createPackagedInstaller({
-      packageNames: ["@dawn-ai/core", "@dawn-ai/langchain", "@dawn-ai/langgraph", "@dawn-ai/sdk", "@dawn-ai/cli"],
+      packageNames: [
+        "@dawn-ai/core",
+        "@dawn-ai/langchain",
+        "@dawn-ai/langgraph",
+        "@dawn-ai/permissions",
+        "@dawn-ai/sdk",
+        "@dawn-ai/workspace",
+        "@dawn-ai/cli",
+      ],
       tempRoot,
     })
 
@@ -38,7 +46,9 @@ describe.each([
         requiredTarball(tarballs, "@dawn-ai/core"),
         requiredTarball(tarballs, "@dawn-ai/langchain"),
         requiredTarball(tarballs, "@dawn-ai/langgraph"),
+        requiredTarball(tarballs, "@dawn-ai/permissions"),
         requiredTarball(tarballs, "@dawn-ai/sdk"),
+        requiredTarball(tarballs, "@dawn-ai/workspace"),
         requiredTarball(tarballs, "@dawn-ai/cli"),
       ],
       installerDir,
@@ -136,7 +146,9 @@ async function writeInstallerOverrides(
     "@dawn-ai/core": requiredTarball(tarballs, "@dawn-ai/core"),
     "@dawn-ai/langchain": requiredTarball(tarballs, "@dawn-ai/langchain"),
     "@dawn-ai/langgraph": requiredTarball(tarballs, "@dawn-ai/langgraph"),
+    "@dawn-ai/permissions": requiredTarball(tarballs, "@dawn-ai/permissions"),
     "@dawn-ai/sdk": requiredTarball(tarballs, "@dawn-ai/sdk"),
+    "@dawn-ai/workspace": requiredTarball(tarballs, "@dawn-ai/workspace"),
   }
 
   await writeFile(
diff --git a/test/generated/fixtures/basic.expected.json b/test/generated/fixtures/basic.expected.json
index 410460fa..fd9b94ba 100644
--- a/test/generated/fixtures/basic.expected.json
+++ b/test/generated/fixtures/basic.expected.json
@@ -27,7 +27,9 @@
         "@dawn-ai/core": "",
         "@dawn-ai/langchain": "",
         "@dawn-ai/langgraph": "",
-        "@dawn-ai/sdk": ""
+        "@dawn-ai/permissions": "",
+        "@dawn-ai/sdk": "",
+        "@dawn-ai/workspace": ""
       }
     }
   },
diff --git a/test/generated/fixtures/custom-app-dir.expected.json b/test/generated/fixtures/custom-app-dir.expected.json
index fa1f6f10..5263e013 100644
--- a/test/generated/fixtures/custom-app-dir.expected.json
+++ b/test/generated/fixtures/custom-app-dir.expected.json
@@ -27,7 +27,9 @@
         "@dawn-ai/core": "",
         "@dawn-ai/langchain": "",
         "@dawn-ai/langgraph": "",
-        "@dawn-ai/sdk": ""
+        "@dawn-ai/permissions": "",
+        "@dawn-ai/sdk": "",
+        "@dawn-ai/workspace": ""
       }
     }
   },
diff --git a/test/generated/harness.ts b/test/generated/harness.ts
index deb31c87..e1f99b4c 100644
--- a/test/generated/harness.ts
+++ b/test/generated/harness.ts
@@ -39,7 +39,9 @@ interface PackedTarballs {
   readonly devkit: string
   readonly langchain: string
   readonly langgraph: string
+  readonly permissions: string
   readonly sdk: string
+  readonly workspace: string
 }
 
 interface RuntimeFixtureSpec {
@@ -173,7 +175,9 @@ export async function prepareGeneratedRuntimeApp(options: {
           "@dawn-ai/core",
           "@dawn-ai/langchain",
           "@dawn-ai/langgraph",
+          "@dawn-ai/permissions",
           "@dawn-ai/sdk",
+          "@dawn-ai/workspace",
         ],
         tempRoot: options.tempRoot,
         transcriptPath,
@@ -469,7 +473,9 @@ async function rewriteDependenciesToTarballs(options: {
       "@dawn-ai/core": options.tarballs.core,
       "@dawn-ai/langchain": options.tarballs.langchain,
       "@dawn-ai/langgraph": options.tarballs.langgraph,
+      "@dawn-ai/permissions": options.tarballs.permissions,
       "@dawn-ai/sdk": options.tarballs.sdk,
+      "@dawn-ai/workspace": options.tarballs.workspace,
     },
   }
 
@@ -663,7 +669,9 @@ function toPackedTarballs(tarballs: Readonly>): PackedTar
     devkit: tarballs["@dawn-ai/devkit"],
     langchain: tarballs["@dawn-ai/langchain"],
     langgraph: tarballs["@dawn-ai/langgraph"],
+    permissions: tarballs["@dawn-ai/permissions"]!,
     sdk: tarballs["@dawn-ai/sdk"],
+    workspace: tarballs["@dawn-ai/workspace"]!,
   }
 }
 
diff --git a/test/generated/run-generated-app.test.ts b/test/generated/run-generated-app.test.ts
index d6275e44..ca10e403 100644
--- a/test/generated/run-generated-app.test.ts
+++ b/test/generated/run-generated-app.test.ts
@@ -31,7 +31,9 @@ interface PackedTarballs {
   readonly devkit: string
   readonly langchain: string
   readonly langgraph: string
+  readonly permissions: string
   readonly sdk: string
+  readonly workspace: string
 }
 
 interface GeneratedAppScenarioResult {
@@ -170,7 +172,9 @@ async function runGeneratedAppScenario(
           "@dawn-ai/core",
           "@dawn-ai/langchain",
           "@dawn-ai/langgraph",
+          "@dawn-ai/permissions",
           "@dawn-ai/sdk",
+          "@dawn-ai/workspace",
         ],
         tempRoot,
         transcriptPath,
@@ -310,7 +314,9 @@ async function rewriteDependenciesToTarballs(options: {
       "@dawn-ai/core": options.tarballs.core,
       "@dawn-ai/langchain": options.tarballs.langchain,
       "@dawn-ai/langgraph": options.tarballs.langgraph,
+      "@dawn-ai/permissions": options.tarballs.permissions,
       "@dawn-ai/sdk": options.tarballs.sdk,
+      "@dawn-ai/workspace": options.tarballs.workspace,
     },
   }
 
@@ -522,7 +528,9 @@ async function createExpectedInternalFixture(
           "@dawn-ai/core": "",
           "@dawn-ai/langchain": "",
           "@dawn-ai/langgraph": "",
+          "@dawn-ai/permissions": "",
           "@dawn-ai/sdk": "",
+          "@dawn-ai/workspace": "",
         },
       },
     },
@@ -538,7 +546,9 @@ function toPackedTarballs(tarballs: Readonly>): PackedTar
     devkit: tarballs["@dawn-ai/devkit"],
     langchain: tarballs["@dawn-ai/langchain"],
     langgraph: tarballs["@dawn-ai/langgraph"],
+    permissions: tarballs["@dawn-ai/permissions"]!,
     sdk: tarballs["@dawn-ai/sdk"],
+    workspace: tarballs["@dawn-ai/workspace"]!,
   }
 }
 
@@ -556,7 +566,9 @@ function normalizeForFixture(
     [context.tarballs.devkit, ""],
     [context.tarballs.langchain, ""],
     [context.tarballs.langgraph, ""],
+    [context.tarballs.permissions, ""],
     [context.tarballs.sdk, ""],
+    [context.tarballs.workspace, ""],
     [`/private${dirname(context.tarballs.cli)}`, ""],
     [dirname(context.tarballs.cli), ""],
     ["25.6.0", ""],
@@ -576,7 +588,9 @@ function normalizeForInternalFixture(
     [pathToRepoPackageFileSpecifier("@dawn-ai/core"), ""],
     [pathToRepoPackageFileSpecifier("@dawn-ai/langchain"), ""],
     [pathToRepoPackageFileSpecifier("@dawn-ai/langgraph"), ""],
+    [pathToRepoPackageFileSpecifier("@dawn-ai/permissions"), ""],
     [pathToRepoPackageFileSpecifier("@dawn-ai/sdk"), ""],
+    [pathToRepoPackageFileSpecifier("@dawn-ai/workspace"), ""],
     ["25.6.0", ""],
     ["6.0.2", ""],
   ]) as GeneratedAppScenarioResult
@@ -589,7 +603,9 @@ function pathToRepoPackageFileSpecifier(
     | "@dawn-ai/core"
     | "@dawn-ai/langchain"
     | "@dawn-ai/langgraph"
-    | "@dawn-ai/sdk",
+    | "@dawn-ai/permissions"
+    | "@dawn-ai/sdk"
+    | "@dawn-ai/workspace",
 ): string {
   const packageDirByName = {
     "@dawn-ai/cli": "packages/cli",
@@ -597,7 +613,9 @@ function pathToRepoPackageFileSpecifier(
     "@dawn-ai/core": "packages/core",
     "@dawn-ai/langchain": "packages/langchain",
     "@dawn-ai/langgraph": "packages/langgraph",
+    "@dawn-ai/permissions": "packages/permissions",
     "@dawn-ai/sdk": "packages/sdk",
+    "@dawn-ai/workspace": "packages/workspace",
   } as const
 
   return pathToFileURL(resolve(REPO_ROOT, packageDirByName[packageName])).toString()
diff --git a/test/runtime/run-runtime-contract.test.ts b/test/runtime/run-runtime-contract.test.ts
index 4b6951d4..cb17c796 100644
--- a/test/runtime/run-runtime-contract.test.ts
+++ b/test/runtime/run-runtime-contract.test.ts
@@ -482,7 +482,9 @@ async function withRuntimeScenario(
           "@dawn-ai/core",
           "@dawn-ai/langchain",
           "@dawn-ai/langgraph",
+          "@dawn-ai/permissions",
           "@dawn-ai/sdk",
+          "@dawn-ai/workspace",
         ],
         tempRoot,
         transcriptPath,
@@ -712,7 +714,9 @@ async function rewriteDependenciesToTarballs(options: {
     "@dawn-ai/cli": options.tarballs["@dawn-ai/cli"],
     "@dawn-ai/core": options.tarballs["@dawn-ai/core"],
     "@dawn-ai/langchain": options.tarballs["@dawn-ai/langchain"],
+    "@dawn-ai/permissions": options.tarballs["@dawn-ai/permissions"],
     "@dawn-ai/sdk": options.tarballs["@dawn-ai/sdk"],
+    "@dawn-ai/workspace": options.tarballs["@dawn-ai/workspace"],
   }
   packageJson.devDependencies = {
     ...packageJson.devDependencies,
@@ -727,7 +731,9 @@ async function rewriteDependenciesToTarballs(options: {
       "@dawn-ai/core": options.tarballs["@dawn-ai/core"],
       "@dawn-ai/langchain": options.tarballs["@dawn-ai/langchain"],
       "@dawn-ai/langgraph": options.tarballs["@dawn-ai/langgraph"],
+      "@dawn-ai/permissions": options.tarballs["@dawn-ai/permissions"],
       "@dawn-ai/sdk": options.tarballs["@dawn-ai/sdk"],
+      "@dawn-ai/workspace": options.tarballs["@dawn-ai/workspace"],
     },
   }
 
diff --git a/test/smoke/run-smoke.test.ts b/test/smoke/run-smoke.test.ts
index 210ef250..a6f78d2a 100644
--- a/test/smoke/run-smoke.test.ts
+++ b/test/smoke/run-smoke.test.ts
@@ -159,7 +159,9 @@ async function runSmokeScenario(fixtureName: SmokeFixtureName): Promise