From f172d22c3311ffe5844b5574bb3b2253be177767 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Wed, 20 May 2026 15:03:24 -0700 Subject: [PATCH 01/23] docs: phase 3 workspace capability + pluggable backends design spec Design for sub-project 4 of the Dawn opinionated agent harness. The workspace tools (readFile, writeFile, listDir, runBash) become a built- in capability auto-wired by the convention of having a workspace/ directory under a route. Filesystem and exec implementations become pluggable via a new @dawn-ai/workspace package shipping the type interfaces, localFilesystem/localExec defaults, a compose() helper, and one demonstration middleware (withLogging). dawn.config.ts switches from the existing hand-rolled string-only parser to tsx-evaluated import so callable backend values can be expressed naturally. Default behavior is unchanged: apps that don't touch dawn.config.ts keep working. Path-jail enforcement lives in the capability; backends receive already-resolved absolute paths. Human-in-the-loop permission gating (interrupt to ask the user about jail escapes) is deferred to a separate sub-project (4.5) with its own brainstorm + spec + plan. Co-Authored-By: Claude Opus 4.7 --- ...-05-20-phase3-workspace-backends-design.md | 357 ++++++++++++++++++ 1 file changed, 357 insertions(+) create mode 100644 docs/superpowers/specs/2026-05-20-phase3-workspace-backends-design.md diff --git a/docs/superpowers/specs/2026-05-20-phase3-workspace-backends-design.md b/docs/superpowers/specs/2026-05-20-phase3-workspace-backends-design.md new file mode 100644 index 0000000..4343a7d --- /dev/null +++ b/docs/superpowers/specs/2026-05-20-phase3-workspace-backends-design.md @@ -0,0 +1,357 @@ +# Phase 3 — Workspace Capability + Pluggable Backends Design + +**Sub-project:** 4 of 7 in the Dawn opinionated agent harness. +**Status:** Spec +**Date:** 2026-05-20 + +## Goal + +Refactor the workspace tools (`readFile`, `writeFile`, `listDir`, `runBash`) from per-route hand-rolled files into a single built-in capability, and introduce a pluggable backend interface so the underlying filesystem and exec implementations can be swapped at the app level. Default behavior is unchanged: existing apps using local-fs + local-exec keep working without touching configuration. Pluggability unlocks in-memory storage for tests, remote sandboxes for production, and middleware composition for cross-cutting concerns like logging. + +## Architecture + +A new built-in capability marker `createWorkspaceMarker()` joins the existing five (planning, agents-md, skills, subagents, this). It auto-discovers the `workspace/` directory under a route (same convention as AGENTS.md uses) and contributes four tools wired to a configurable filesystem + exec backend pair. + +A new pnpm workspace package `@dawn-ai/workspace` ships the backend type interfaces (`FilesystemBackend`, `ExecBackend`, `BackendContext`), the two default implementations (`localFilesystem`, `localExec`), and a small set of functional composition primitives (`compose`, one demonstration middleware `withLogging`). Apps configure backends via `dawn.config.ts`, which switches from the existing hand-rolled string-only parser to a `tsx`-evaluated import so callable values can be expressed naturally. + +The capability owns path-jail enforcement. Backends receive already-resolved absolute paths and trust them. Authors can override the entire workspace tool set at the filesystem-convention layer (a user-authored `tools/readFile.ts` replaces the capability's contribution) or replace specific backend methods via plain spread-and-closure JS / middleware composition. + +Human-in-the-loop permission gating (interrupt the run to ask the user about paths outside the jail) is deliberately deferred to a future sub-project. The capability hard-refuses jail escapes for now; the future permission system will replace that with an interrupt-and-resume flow without changing the backend contract. + +## Design Decisions + +### Sub-project boundary + +This sub-project ships pluggable backends and the workspace capability only. Concretely: + +- Refactor: workspace tools move from per-route user-authored files into a capability that calls into a backend. +- New package: `@dawn-ai/workspace` exports backend types + defaults + composition helpers. +- Config-loader switch: `dawn.config.ts` parsed via `tsx` import instead of the existing restricted parser. + +Deferred to sub-project 4.5 (separate brainstorm + spec + plan cycle): + +- LangGraph `interrupt()` plumbed through Dawn's SSE stream as `event: interrupt` envelopes. +- HTTP resume endpoint + client-side resume UI. +- Permission persistence model (`.dawn/permissions.json` vs. AGENTS.md vs. thread state — to be decided in 4.5). +- "Always allow this path" / "always deny this command" decision flow. + +OS-level isolation (running Dawn under a restricted user, containerization, macOS sandbox profiles) is documented as deployment guidance and never claimed as a security boundary the framework provides. + +### Package name: `@dawn-ai/workspace` + +Chosen over `backends`, `harness`, `system`, `host`, `io`. The capability is named `workspace`; the trigger is the `workspace/` directory; the tools are workspace tools. The package's purpose is self-evident from its name. Future pluggable-defaults packages get domain-specific names (e.g., `@dawn-ai/tracing` if a tracing capability ever lands), matching the Next.js `next/cache` / `next/server` split rather than the LangChain integration-name convention. + +### Path-jail in the capability, not the backend + +The workspace capability resolves the user-supplied relative path against the route's `workspace/` directory and validates that the resolution stays inside before calling the backend. Backends receive an already-resolved absolute path they can trust. Backends do not re-validate. + +Rejected alternative: defense in depth (backend re-checks the jail). Real defense against hostile agents is OS-level isolation (restricted user, container). The capability check is sufficient for correctness against well-behaved agents and avoids duplicating the resolver in every backend. + +When a future HITL permission system lands (sub-project 4.5), the capability's hard-refuse on jail escape becomes a hard-refuse-unless-allowed branch. The backend contract is unchanged by that addition. + +### Workspace capability opt-in: convention only + +A route opts in by having a `workspace/` subdirectory. No descriptor flag. Same trigger AGENTS.md already uses; the AGENTS.md capability and the workspace capability share the same filesystem signal. + +When no `workspace/` exists, the capability contributes nothing — no tools, no prompt fragment, no overhead. + +### Default backends when `dawn.config.ts` omits `backends` + +When the route has a `workspace/` directory but `dawn.config.ts` declares no `backends` field (or `dawn.config.ts` doesn't exist), the capability defaults to `localFilesystem()` + `localExec()`. This preserves existing chat-example behavior: apps that don't touch their config keep working unchanged. + +Explicit config in `dawn.config.ts` always wins: + +```ts +// dawn.config.ts +import { localFilesystem, localExec } from "@dawn-ai/workspace" +export default { + appDir: "src/app", + backends: { + filesystem: localFilesystem({ maxFileBytes: 256 * 1024 }), + exec: localExec({ timeout: 30_000 }), + }, +} +``` + +### Tool set: fixed four, extensible by convention + +The capability contributes exactly four tools: `readFile`, `writeFile`, `listDir`, `runBash`. This matches the deepagents/Claude Code workspace tool set authors already expect. + +Authors who want additional tools (e.g., `runPython`, `httpGet`) author them in `tools/` as today — orthogonal to the workspace capability. Authors who want to override one of the standard four write a `tools/readFile.ts` file (etc.) that replaces the capability's contribution. This requires inverting the existing capability-vs-user-tool collision check introduced in PR #155: user tools win. + +### Config loader: switch from hand-rolled parser to `tsx` import + +The existing `packages/core/src/config.ts` defines a hand-rolled tokenizer + parser that supports only `{ appDir }` and `const FOO = "string"` bindings. It explicitly refuses imports, function values, and nested objects. This was originally a security-conscious choice (don't execute user TS at config-load time). + +The choice now blocks `dawn.config.ts` from expressing callable backends. Switch to a `tsx`-evaluated dynamic import using the same loader Dawn already uses for route discovery. Dawn already executes user TS during route discovery, tool execution, and capability application — there is no new attack surface introduced by also executing the config file. + +Existing `dawn.config.ts` files in the wild (just `{ appDir }`) remain valid TS modules and continue to work without modification. The new loader is ~30 lines net (the parser deletes; the loader is small). + +### Backends are plain objects; composition is functional + +Backends are plain objects implementing the typed interfaces. No classes, no inheritance, no DI container. + +Three layers of extensibility, each progressively more powerful: + +1. **Spread + closure** — vanilla JS for overriding a single method: + ```ts + const base = localFilesystem() + const fs: FilesystemBackend = { + ...base, + readFile: async (path, ctx) => { + if (path.endsWith(".secret")) throw new Error("nope") + return base.readFile(path, ctx) + }, + } + ``` + No new API. Authors who know JS know how to do this. + +2. **Middleware composition** — `compose(...)` helper for stacking concerns: + ```ts + import { compose, localFilesystem, withLogging } from "@dawn-ai/workspace" + const fs = compose(withLogging({ destination: "stderr" }))(localFilesystem()) + ``` + A middleware is a function `(next: FilesystemBackend) => FilesystemBackend`. Same pattern as Vercel AI SDK `wrapLanguageModel`, Express middleware, LangChain callback wrapping. + +3. **Filesystem-convention tool override** — author a `tools/readFile.ts` to replace the capability's contribution entirely. Useful when the override is so different that wrapping the standard backend would be awkward. + +### What ships in `@dawn-ai/workspace` v1 + +```ts +// type interfaces (workspace-specific — not in @dawn-ai/core to keep core free of node:child_process etc) +export interface FilesystemBackend { + readFile(path: string, ctx: BackendContext): Promise + writeFile(path: string, content: string, ctx: BackendContext): Promise<{ bytesWritten: number }> + listDir(path: string, ctx: BackendContext): Promise +} + +export interface ExecBackend { + runCommand( + args: { command: string; cwd?: string; env?: Record }, + ctx: BackendContext, + ): Promise<{ stdout: string; stderr: string; exitCode: number }> +} + +export interface BackendContext { + readonly signal: AbortSignal + readonly workspaceRoot: string +} + +// default impls +export function localFilesystem(opts?: { maxFileBytes?: number }): FilesystemBackend +export function localExec(opts?: { + timeout?: number + allowedCommands?: readonly RegExp[] +}): ExecBackend + +// composition primitives +export type FilesystemMiddleware = (next: FilesystemBackend) => FilesystemBackend +export type ExecMiddleware = (next: ExecBackend) => ExecBackend +export function compose(...middlewares: ReadonlyArray<(next: T) => T>): (base: T) => T + +// one demonstration middleware that ships in v1 +export function withLogging(opts?: { + destination?: "stderr" | ((entry: { method: string; args: unknown[] }) => void) +}): T extends FilesystemBackend ? FilesystemMiddleware : ExecMiddleware +``` + +Resist shipping `withMaxFileSize` / `withPathRestriction` as standalone middlewares — those fit better as options on `localFilesystem()` itself. One demonstration middleware (logging) proves the pattern; community middlewares grow organically. + +## Component Contracts + +### `createWorkspaceMarker` + +```ts +// packages/core/src/capabilities/built-in/workspace.ts +export function createWorkspaceMarker(): CapabilityMarker { + return { + name: "workspace", + detect: async (routeDir) => existsSync(join(routeDir, "workspace")), + load: async (routeDir, context) => { + const workspaceRoot = join(routeDir, "workspace") + const fs = context.backends?.filesystem ?? defaultLocalFilesystem() + const exec = context.backends?.exec ?? defaultLocalExec() + return { tools: buildWorkspaceTools(workspaceRoot, fs, exec) } + }, + } +} +``` + +The four tools share a single path-jail helper: + +```ts +function pathJail(userPath: string, workspaceRoot: string): string { + const resolved = resolve(workspaceRoot, userPath) + if (!resolved.startsWith(workspaceRoot + sep) && resolved !== workspaceRoot) { + throw new Error(`Path is outside workspace: ${userPath}`) + } + return resolved +} +``` + +Each tool's `run` resolves the path, calls the backend, returns the result: + +```ts +const readFileTool: DawnToolDefinition = { + name: "readFile", + description: "Read a UTF-8 file from the workspace.", + schema: z.object({ path: z.string() }), + run: async (input, ctx) => { + const { path } = z.object({ path: z.string() }).parse(input) + const safe = pathJail(path, workspaceRoot) + return await fs.readFile(safe, { signal: ctx.signal, workspaceRoot }) + }, +} +// writeFile, listDir, runBash same shape +``` + +### `CapabilityMarkerContext` extension + +```ts +// packages/core/src/capabilities/types.ts (modify) +export interface CapabilityMarkerContext { + readonly routeManifest: RouteManifest + readonly descriptor: DawnAgent | undefined + readonly descriptorRouteMap?: ReadonlyMap + readonly backends?: { // NEW + readonly filesystem?: FilesystemBackend + readonly exec?: ExecBackend + } +} +``` + +The CLI's `execute-route.ts` loads `dawn.config.ts`, extracts `config.backends`, and threads it into the marker context. + +### `DawnConfig` extension + +```ts +// packages/core/src/types.ts (modify) +export interface DawnConfig { + readonly appDir?: string + readonly backends?: { // NEW + readonly filesystem?: FilesystemBackend + readonly exec?: ExecBackend + } +} +``` + +Importing `FilesystemBackend` / `ExecBackend` into `@dawn-ai/core` creates a new edge: `core` depends on `@dawn-ai/workspace`'s type exports. This is acceptable because the workspace package's type-only entry has no runtime weight (no `node:child_process` etc.) — only the concrete `localFilesystem` / `localExec` factories pull in those deps. The interfaces live in `@dawn-ai/workspace/src/types.ts` (the package that owns the domain); `@dawn-ai/core` imports them via `import type`. + +### Tool-vs-capability collision check inversion + +Current behavior (PR #155, `packages/cli/src/lib/runtime/check-tool-name-uniqueness.ts`): a user-authored tool in `tools/` whose name matches a capability-contributed tool is a build error. + +New behavior: for **the workspace capability only**, a user-authored tool with a matching name **silently replaces** the capability's contribution. Other capabilities (planning's `writeTodos`, skills' `readSkill`, subagents' `task`) retain the collision error — those aren't meant to be replaceable. + +Implementation: the capability declares which of its contributed tools are "overridable." The uniqueness check skips overridable tools when both are present and removes the capability's version, keeping the user's. + +## Out of scope (deferred) + +- **HITL permission system** — `interrupt()` for jail-escape attempts. Sub-project 4.5. +- **Per-route backend override** — currently global only. Add via descriptor field non-breakingly later if a real use case surfaces. +- **OS-level sandboxing** — operator responsibility; Dawn documents deployment guidance. +- **Backend method extensibility** — adding methods beyond the standard four (e.g., custom `runPython` on a backend) does NOT auto-contribute extra tools. Authors who want additional tools write them in `tools/` as today. +- **Non-workspace backends** (tracing, secret resolution, etc.) — separate packages, separate sub-projects. + +## File Structure + +### New package + +``` +packages/workspace/ +├── package.json # @dawn-ai/workspace +├── tsconfig.json +├── vitest.config.ts +├── src/ +│ ├── index.ts # re-exports +│ ├── types.ts # FilesystemBackend, ExecBackend, BackendContext, middleware types +│ ├── local-filesystem.ts # localFilesystem() factory +│ ├── local-exec.ts # localExec() factory +│ ├── compose.ts # compose() helper +│ └── with-logging.ts # withLogging() middleware +└── test/ + ├── local-filesystem.test.ts + ├── local-exec.test.ts + ├── compose.test.ts + └── with-logging.test.ts +``` + +### New files in existing packages + +``` +packages/core/src/capabilities/built-in/workspace.ts # createWorkspaceMarker +packages/core/test/capabilities/workspace.test.ts # marker unit tests +``` + +### Modified files + +``` +packages/core/src/config.ts # rewrite loader to use tsx import +packages/core/test/config.test.ts # rewrite tests for new loader +packages/core/src/types.ts # extend DawnConfig with backends? +packages/core/src/capabilities/types.ts # extend CapabilityMarkerContext with backends? +packages/core/src/index.ts # export createWorkspaceMarker +packages/cli/src/lib/runtime/execute-route.ts # register createWorkspaceMarker, thread backends from config +packages/cli/src/lib/runtime/check-tool-name-uniqueness.ts # support overridable tool names +packages/cli/src/lib/typegen/run-typegen.ts # extra-tool entries for readFile/writeFile/listDir/runBash gated on hasWorkspace +memory/project_phase_status.md # mark sub-project 4 in progress +``` + +### Deleted files (chat example) + +``` +examples/chat/server/src/app/chat/tools/readFile.ts +examples/chat/server/src/app/chat/tools/writeFile.ts +examples/chat/server/src/app/chat/tools/listDir.ts +examples/chat/server/src/app/chat/tools/runBash.ts +examples/chat/server/src/app/chat/workspace-path.ts # if no longer referenced +examples/chat/server/src/app/coordinator/subagents/research/tools/readFile.ts +examples/chat/server/src/app/coordinator/subagents/research/tools/listDir.ts +examples/chat/server/src/app/coordinator/subagents/research/workspace-path.ts # if no longer referenced +``` + +### Notable: pnpm workspace config + +``` +pnpm-workspace.yaml # add "packages/workspace" +turbo.json # verify pipeline picks up the new package +``` + +## Testing strategy + +### Unit (no LLM) + +- `local-filesystem.test.ts` — backend impl reads/writes/lists against a `mkdtempSync` directory; respects `maxFileBytes`; rejects nothing (capability's job). +- `local-exec.test.ts` — `runCommand` executes `echo` and `ls`, captures stdout/stderr/exit; respects `timeout`; respects `allowedCommands` regex allowlist when configured. +- `compose.test.ts` — composes 0, 1, 2 middlewares correctly. Each middleware sees the next one in line. +- `with-logging.test.ts` — captures each method invocation with args; supports stderr and custom destination. +- `workspace.test.ts` (capability) — contributes 4 tools when `workspace/` exists; contributes nothing when absent; tool `run`s call the right backend method with the right args; path-jail rejects `../` escapes with the documented error; reads the default `localFilesystem` + `localExec` when no `backends` in context; uses configured backends when provided. +- `config.test.ts` rewrite — import-evaluated loader handles `{ appDir }`, `{ backends: { filesystem, exec } }`, omitted file (returns empty config), syntax errors surface as TS errors not custom messages. +- `check-tool-name-uniqueness.test.ts` extension — overridable workspace tool names are NOT collision errors when a user tool shadows them. + +### Integration / chat example + +- The chat example's hand-rolled `tools/` files delete. After the migration, `pnpm dev` and a Chrome MCP smoke against both `/chat` and `/coordinator` must produce identical behavior to current main: + - `/chat`: planning + skills + AGENTS.md + workspace tools all work. Same SSE event shape. + - `/coordinator`: research subagent's `listDir` + `readFile` work via the capability. Subagent envelopes still fire correctly. + +No new LLM-driven CI tests; manual smoke is the same policy as existing capabilities. + +### Override pathway + +- A test fixture under `packages/cli/test/fixtures/workspace-tool-override/` defines a custom `tools/readFile.ts` alongside a `workspace/` directory. Verify the build picks the user tool and drops the capability's contribution. + +## Known Risks + +- **Config-loader switch is observable.** Apps with intentionally-restricted `dawn.config.ts` syntax assumptions will discover they can now write arbitrary TS. Mitigation: this is mostly upside; the restriction was already pierceable by any other route file in the app. Document the change in the PR description and CHANGELOG. +- **Tool-override inversion is a behavior change.** Currently a user `tools/readFile.ts` next to a workspace capability would be a build error. After this PR, the user tool silently wins. Mitigation: capability marks specific tools as overridable; the error stays for non-overridable capability tools (planning, skills, subagents). +- **The path-jail still surfaces as an error to the agent** when it tries paths outside the workspace. With no HITL permission system, the agent has to learn from the error message and adjust. Mitigation: the error message is informative ("Path is outside workspace: ../etc/passwd"). When 4.5 lands, this becomes an interactive flow. +- **`@dawn-ai/core` gaining a type-only edge to `@dawn-ai/workspace`** introduces a package-graph consideration. Mitigation: workspace's types are zero-runtime (no `node:` imports in `types.ts`); only the concrete factory functions pull in platform deps. +- **gpt-5 has learned the standard tool shapes by name.** Renaming `runBash` to `runCommand` would normalize but cost familiarity. Keep `runBash` to preserve trained behavior; revisit if a behavior delta shows up in smoke. + +## What we're explicitly NOT changing + +- `agent({ description, subagents, ... })` descriptor stays the same. +- Capability marker contract (`detect`, `load`) stays the same except for the new `backends?` field on `CapabilityMarkerContext`. +- SSE event shape stays the same; no new event types. +- Subagents work continues to work (the `coordinator/subagents/research` route's tools are deleted because they're now provided by the workspace capability — that's the only subagents-related change). From 4261af54f22d5be5762ccfe3193b8b992d95c56f Mon Sep 17 00:00:00 2001 From: Brian Love Date: Wed, 20 May 2026 16:55:02 -0700 Subject: [PATCH 02/23] docs: implementation plan for phase 3 workspace + backends Bite-sized, TDD-structured plan covering: @dawn-ai/workspace package (types, localFilesystem, localExec, compose, withLogging), the createWorkspaceMarker capability, dawn.config.ts loader switch from hand-rolled parser to tsx import, tool-name uniqueness check inversion for overridable tools, runtime wiring, typegen, chat example migration, and the smoke + PR steps. 15 tasks; each commits independently. Co-Authored-By: Claude Opus 4.7 --- .../2026-05-20-phase3-workspace-backends.md | 2005 +++++++++++++++++ 1 file changed, 2005 insertions(+) create mode 100644 docs/superpowers/plans/2026-05-20-phase3-workspace-backends.md diff --git a/docs/superpowers/plans/2026-05-20-phase3-workspace-backends.md b/docs/superpowers/plans/2026-05-20-phase3-workspace-backends.md new file mode 100644 index 0000000..8324234 --- /dev/null +++ b/docs/superpowers/plans/2026-05-20-phase3-workspace-backends.md @@ -0,0 +1,2005 @@ +# Phase 3 — Workspace Capability + Pluggable Backends Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Refactor workspace tools (`readFile`, `writeFile`, `listDir`, `runBash`) from per-route hand-rolled files into a built-in capability auto-wired by the `workspace/` directory convention, with a pluggable filesystem/exec backend pair configurable in `dawn.config.ts` and shipping defaults plus functional composition primitives in a new `@dawn-ai/workspace` package. + +**Architecture:** New `@dawn-ai/workspace` package (types + `localFilesystem`/`localExec` defaults + `compose`/`withLogging` helpers). New `createWorkspaceMarker()` capability in `@dawn-ai/core` that contributes the four tools wired to the configured (or default-local) backends. `dawn.config.ts` loader switches from a hand-rolled string-only parser to a `tsx`-evaluated import so callable backends can be expressed naturally. Path-jail enforcement lives in the capability; backends receive already-resolved absolute paths. Chat example's hand-rolled tool files delete. + +**Tech Stack:** TypeScript, pnpm workspaces, vitest, zod, `node:fs/promises`, `node:child_process`, `tsx/esm/api` (already a dep, used for route discovery). + +**Spec:** `docs/superpowers/specs/2026-05-20-phase3-workspace-backends-design.md` + +--- + +## File Structure (locked in here, used by all tasks below) + +### New package: `packages/workspace/` + +| Path | Responsibility | +|---|---| +| `packages/workspace/package.json` | `@dawn-ai/workspace` manifest | +| `packages/workspace/tsconfig.json` | TS config (extends `@dawn-ai/config-typescript`) | +| `packages/workspace/vitest.config.ts` | Vitest config (mirror `@dawn-ai/core` shape) | +| `packages/workspace/src/index.ts` | Barrel re-exports | +| `packages/workspace/src/types.ts` | `FilesystemBackend`, `ExecBackend`, `BackendContext`, middleware types | +| `packages/workspace/src/local-filesystem.ts` | `localFilesystem()` factory | +| `packages/workspace/src/local-exec.ts` | `localExec()` factory | +| `packages/workspace/src/compose.ts` | `compose()` helper | +| `packages/workspace/src/with-logging.ts` | `withLogging()` middleware | +| `packages/workspace/test/local-filesystem.test.ts` | Unit tests | +| `packages/workspace/test/local-exec.test.ts` | Unit tests | +| `packages/workspace/test/compose.test.ts` | Unit tests | +| `packages/workspace/test/with-logging.test.ts` | Unit tests | + +### New files in existing packages + +| Path | Responsibility | +|---|---| +| `packages/core/src/capabilities/built-in/workspace.ts` | `createWorkspaceMarker()` | +| `packages/core/test/capabilities/workspace.test.ts` | Marker unit tests | + +### Modified files + +| Path | Change | +|---|---| +| `packages/core/src/config.ts` | Replace hand-rolled parser with `tsx`-evaluated import | +| `packages/core/test/config.test.ts` | Rewrite tests for the new loader | +| `packages/core/src/types.ts` | Add `backends?` to `DawnConfig` | +| `packages/core/src/capabilities/types.ts` | Add `backends?` to `CapabilityMarkerContext` | +| `packages/core/src/index.ts` | Export `createWorkspaceMarker` | +| `packages/cli/src/lib/runtime/execute-route.ts` | Register marker; thread backends from loaded `dawn.config` | +| `packages/cli/src/lib/runtime/check-tool-name-uniqueness.ts` | Accept overridable tool names | +| `packages/cli/test/tool-name-uniqueness.test.ts` | Add overridable case | +| `packages/cli/src/lib/typegen/run-typegen.ts` | Add `WORKSPACE_EXTRA_TOOLS` gated on `hasWorkspace(routeDir)` | +| `pnpm-workspace.yaml` | Add `packages/workspace` (verify already covers `packages/*`) | +| `memory/project_phase_status.md` | Mark sub-project 4 in progress, then complete | + +### Deleted files (chat example) + +| Path | Why | +|---|---| +| `examples/chat/server/src/app/chat/tools/readFile.ts` | Capability provides this | +| `examples/chat/server/src/app/chat/tools/writeFile.ts` | Capability provides this | +| `examples/chat/server/src/app/chat/tools/listDir.ts` | Capability provides this | +| `examples/chat/server/src/app/chat/tools/runBash.ts` | Capability provides this | +| `examples/chat/server/src/app/chat/workspace-path.ts` | No longer referenced | +| `examples/chat/server/src/app/coordinator/subagents/research/tools/readFile.ts` | Capability provides this | +| `examples/chat/server/src/app/coordinator/subagents/research/tools/listDir.ts` | Capability provides this | +| `examples/chat/server/src/app/coordinator/subagents/research/workspace-path.ts` | No longer referenced | + +--- + +# Phase A — `@dawn-ai/workspace` package + +### Task 1: Scaffold the workspace package + +**Files:** +- Create: `packages/workspace/package.json` +- Create: `packages/workspace/tsconfig.json` +- Create: `packages/workspace/tsconfig.build.json` +- Create: `packages/workspace/vitest.config.ts` +- Create: `packages/workspace/src/index.ts` (empty barrel for now) +- Verify: `pnpm-workspace.yaml` already covers `packages/*` (should — confirm with `grep packages pnpm-workspace.yaml`) + +- [ ] **Step 1: Inspect a sibling package's manifest pattern** + +Run: `cd /Users/blove/repos/dawn && cat packages/sdk/package.json | head -40` +Expected: see the conventional `name`, `version`, `type: "module"`, `exports`, `scripts`, `devDependencies` pattern. Note the version (likely `0.1.x`). + +- [ ] **Step 2: Write `packages/workspace/package.json`** + +```json +{ + "name": "@dawn-ai/workspace", + "version": "0.1.8", + "type": "module", + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js" + } + }, + "files": ["dist"], + "scripts": { + "build": "tsc -p tsconfig.build.json", + "test": "vitest run", + "typecheck": "tsc -p . --noEmit", + "lint": "biome check --config-path ../config-biome/biome.json package.json src test tsconfig.json tsconfig.build.json vitest.config.ts" + }, + "devDependencies": { + "@dawn-ai/config-typescript": "workspace:*", + "@dawn-ai/config-biome": "workspace:*", + "@biomejs/biome": "catalog:", + "typescript": "catalog:", + "vitest": "catalog:" + } +} +``` + +(Verify the `version`, catalog references, and `@dawn-ai/config-*` package names against an existing sibling — adjust if Dawn uses different names like `@dawn-ai/tsconfig`.) + +- [ ] **Step 3: Write `packages/workspace/tsconfig.json`** + +```json +{ + "extends": "@dawn-ai/config-typescript/base.json", + "include": ["src", "test"] +} +``` + +(Match exactly what `packages/core/tsconfig.json` or `packages/sdk/tsconfig.json` does — adjust the extends path if those use a different shape.) + +- [ ] **Step 4: Write `packages/workspace/tsconfig.build.json`** + +```json +{ + "extends": "./tsconfig.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src", + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "noEmit": false + }, + "include": ["src"] +} +``` + +(Compare against `packages/sdk/tsconfig.build.json` — match exactly.) + +- [ ] **Step 5: Write `packages/workspace/vitest.config.ts`** + +```ts +import { defineConfig } from "vitest/config" + +export default defineConfig({ + test: { + include: ["test/**/*.test.ts"], + }, +}) +``` + +- [ ] **Step 6: Write `packages/workspace/src/index.ts` (empty barrel for now)** + +```ts +// Re-exports will be added as types and impls land in subsequent tasks. +export {} +``` + +- [ ] **Step 7: Install + verify scaffolding** + +Run from repo root: +```bash +cd /Users/blove/repos/dawn && pnpm install 2>&1 | tail -5 +``` +Expected: `Done in Ns`. The new `@dawn-ai/workspace` package is symlinked. + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace build 2>&1 | tail -5` +Expected: build succeeds (empty package builds fine). + +- [ ] **Step 8: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/workspace/ +git commit -m "$(cat <<'EOF' +scaffold(workspace): empty @dawn-ai/workspace package + +Adds the package skeleton (manifest, tsconfig, vitest config) for the +upcoming pluggable workspace backends. No exports yet — types, defaults, +and helpers land in subsequent commits. + +Co-Authored-By: Claude Opus 4.7 +EOF +)" +``` + +--- + +### Task 2: Type interfaces + +**Files:** +- Create: `packages/workspace/src/types.ts` +- Modify: `packages/workspace/src/index.ts` + +- [ ] **Step 1: Write the type file** + +Create `packages/workspace/src/types.ts`: + +```ts +/** + * Workspace backend type interfaces. + * + * Backends are plain objects implementing these interfaces. The + * workspace capability calls into them to perform filesystem reads, + * writes, listings, and shell command execution. Defaults + * (`localFilesystem`, `localExec`) ship in this package; users can + * provide their own implementations via dawn.config.ts. + */ + +export interface BackendContext { + /** Aborts when the parent agent run is cancelled. */ + readonly signal: AbortSignal + /** Absolute filesystem path of the route's workspace directory. */ + readonly workspaceRoot: string +} + +export interface FilesystemBackend { + /** + * Read a UTF-8 file. `path` is an already-resolved absolute path + * inside `ctx.workspaceRoot` — the capability has done the path-jail. + */ + readFile(path: string, ctx: BackendContext): Promise + + /** Write a UTF-8 file. Returns the byte count of `content`. */ + writeFile( + path: string, + content: string, + ctx: BackendContext, + ): Promise<{ readonly bytesWritten: number }> + + /** List entries in a directory. Returns leaf names (not full paths). */ + listDir(path: string, ctx: BackendContext): Promise +} + +export interface ExecBackend { + /** + * Run a shell command. `args.cwd`, if provided, is already-resolved + * to an absolute path inside `ctx.workspaceRoot`. + */ + runCommand( + args: { + readonly command: string + readonly cwd?: string + readonly env?: Readonly> + }, + ctx: BackendContext, + ): Promise<{ + readonly stdout: string + readonly stderr: string + readonly exitCode: number + }> +} + +/** + * A filesystem middleware is a function that wraps a backend to add + * cross-cutting behavior (logging, caching, etc.). Compose multiple + * middlewares via `compose()`. + */ +export type FilesystemMiddleware = (next: FilesystemBackend) => FilesystemBackend + +/** See FilesystemMiddleware. */ +export type ExecMiddleware = (next: ExecBackend) => ExecBackend +``` + +- [ ] **Step 2: Re-export from the barrel** + +Edit `packages/workspace/src/index.ts`: + +```ts +export type { + BackendContext, + ExecBackend, + ExecMiddleware, + FilesystemBackend, + FilesystemMiddleware, +} from "./types.js" +``` + +- [ ] **Step 3: Build + typecheck** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace build 2>&1 | tail -5` +Expected: success. + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace typecheck 2>&1 | tail -5` +Expected: success. + +- [ ] **Step 4: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/workspace/src/types.ts packages/workspace/src/index.ts +git commit -m "feat(workspace): type interfaces for filesystem + exec backends + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 3: `localFilesystem()` factory + +**Files:** +- Create: `packages/workspace/src/local-filesystem.ts` +- Create: `packages/workspace/test/local-filesystem.test.ts` +- Modify: `packages/workspace/src/index.ts` + +- [ ] **Step 1: Write the failing test** + +Create `packages/workspace/test/local-filesystem.test.ts`: + +```ts +import { describe, expect, it, beforeEach, afterEach } from "vitest" +import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" +import { localFilesystem } from "../src/local-filesystem.js" + +function ctx(workspaceRoot: string) { + return { signal: new AbortController().signal, workspaceRoot } +} + +describe("localFilesystem", () => { + let root: string + beforeEach(() => { + root = mkdtempSync(join(tmpdir(), "dawn-localfs-")) + }) + afterEach(() => { + rmSync(root, { recursive: true, force: true }) + }) + + it("readFile returns UTF-8 contents", async () => { + writeFileSync(join(root, "hello.txt"), "hi", "utf8") + const fs = localFilesystem() + expect(await fs.readFile(join(root, "hello.txt"), ctx(root))).toBe("hi") + }) + + it("readFile rejects files larger than maxFileBytes", async () => { + writeFileSync(join(root, "big.txt"), "x".repeat(2048), "utf8") + const fs = localFilesystem({ maxFileBytes: 1024 }) + await expect(fs.readFile(join(root, "big.txt"), ctx(root))).rejects.toThrow(/too large/i) + }) + + it("writeFile returns the byte count", async () => { + const fs = localFilesystem() + const res = await fs.writeFile(join(root, "out.txt"), "abc", ctx(root)) + expect(res.bytesWritten).toBe(3) + }) + + it("listDir returns directory entries (leaf names only)", async () => { + writeFileSync(join(root, "a.txt"), "", "utf8") + mkdirSync(join(root, "sub")) + const fs = localFilesystem() + const entries = await fs.listDir(root, ctx(root)) + expect([...entries].sort()).toEqual(["a.txt", "sub"]) + }) + + it("readFile on missing file raises ENOENT", async () => { + const fs = localFilesystem() + await expect(fs.readFile(join(root, "ghost.txt"), ctx(root))).rejects.toThrow(/ENOENT/) + }) +}) +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace test 2>&1 | tail -10` +Expected: FAIL with `Cannot find module '../src/local-filesystem.js'`. + +- [ ] **Step 3: Implement** + +Create `packages/workspace/src/local-filesystem.ts`: + +```ts +import { readFile, readdir, stat, writeFile } from "node:fs/promises" +import type { BackendContext, FilesystemBackend } from "./types.js" + +const DEFAULT_MAX_FILE_BYTES = 256 * 1024 + +export interface LocalFilesystemOptions { + /** + * Reject `readFile` when the target file exceeds this size. + * Default: 256 KiB. + */ + readonly maxFileBytes?: number +} + +export function localFilesystem(opts: LocalFilesystemOptions = {}): FilesystemBackend { + const maxBytes = opts.maxFileBytes ?? DEFAULT_MAX_FILE_BYTES + return { + async readFile(path: string, _ctx: BackendContext): Promise { + const s = await stat(path) + if (s.size > maxBytes) { + throw new Error(`File too large: ${s.size} bytes (max ${maxBytes}) at ${path}`) + } + return await readFile(path, "utf8") + }, + async writeFile( + path: string, + content: string, + _ctx: BackendContext, + ): Promise<{ readonly bytesWritten: number }> { + await writeFile(path, content, "utf8") + return { bytesWritten: Buffer.byteLength(content, "utf8") } + }, + async listDir(path: string, _ctx: BackendContext): Promise { + return await readdir(path) + }, + } +} +``` + +- [ ] **Step 4: Re-export from barrel** + +Edit `packages/workspace/src/index.ts`, append: + +```ts +export { localFilesystem, type LocalFilesystemOptions } from "./local-filesystem.js" +``` + +- [ ] **Step 5: Run tests to verify pass** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace test 2>&1 | tail -10` +Expected: PASS (5 tests). + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/workspace/src/local-filesystem.ts \ + packages/workspace/test/local-filesystem.test.ts \ + packages/workspace/src/index.ts +git commit -m "feat(workspace): localFilesystem default backend + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 4: `localExec()` factory + +**Files:** +- Create: `packages/workspace/src/local-exec.ts` +- Create: `packages/workspace/test/local-exec.test.ts` +- Modify: `packages/workspace/src/index.ts` + +- [ ] **Step 1: Write the failing test** + +Create `packages/workspace/test/local-exec.test.ts`: + +```ts +import { describe, expect, it } from "vitest" +import { mkdtempSync, rmSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" +import { localExec } from "../src/local-exec.js" + +function ctx(workspaceRoot: string) { + return { signal: new AbortController().signal, workspaceRoot } +} + +describe("localExec", () => { + it("runCommand captures stdout, stderr, exitCode", async () => { + const root = mkdtempSync(join(tmpdir(), "dawn-localexec-")) + try { + const exec = localExec() + const out = await exec.runCommand({ command: "echo hello" }, ctx(root)) + expect(out.stdout.trim()).toBe("hello") + expect(out.exitCode).toBe(0) + } finally { + rmSync(root, { recursive: true, force: true }) + } + }) + + it("runCommand returns non-zero exitCode on failure", async () => { + const root = mkdtempSync(join(tmpdir(), "dawn-localexec-")) + try { + const exec = localExec() + const out = await exec.runCommand({ command: "exit 7" }, ctx(root)) + expect(out.exitCode).toBe(7) + } finally { + rmSync(root, { recursive: true, force: true }) + } + }) + + it("runCommand enforces timeout", async () => { + const root = mkdtempSync(join(tmpdir(), "dawn-localexec-")) + try { + const exec = localExec({ timeout: 100 }) + await expect( + exec.runCommand({ command: "sleep 1" }, ctx(root)), + ).rejects.toThrow(/timeout/i) + } finally { + rmSync(root, { recursive: true, force: true }) + } + }) + + it("runCommand respects allowedCommands regex allowlist", async () => { + const root = mkdtempSync(join(tmpdir(), "dawn-localexec-")) + try { + const exec = localExec({ allowedCommands: [/^echo\b/, /^ls\b/] }) + const ok = await exec.runCommand({ command: "echo allowed" }, ctx(root)) + expect(ok.stdout.trim()).toBe("allowed") + await expect( + exec.runCommand({ command: "rm -rf /" }, ctx(root)), + ).rejects.toThrow(/not allowed/i) + } finally { + rmSync(root, { recursive: true, force: true }) + } + }) +}) +``` + +- [ ] **Step 2: Run to verify failure** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace test -- local-exec 2>&1 | tail -10` +Expected: FAIL (module not found). + +- [ ] **Step 3: Implement** + +Create `packages/workspace/src/local-exec.ts`: + +```ts +import { exec as cpExec } from "node:child_process" +import { promisify } from "node:util" +import type { BackendContext, ExecBackend } from "./types.js" + +const execAsync = promisify(cpExec) +const DEFAULT_TIMEOUT_MS = 30_000 + +export interface LocalExecOptions { + /** Kill the command if it runs longer than this. Default 30 seconds. */ + readonly timeout?: number + /** + * Optional allowlist of command-line patterns. When non-empty, every + * command must match at least one regex or `runCommand` throws before + * spawning anything. Use to deny dangerous commands in production. + */ + readonly allowedCommands?: readonly RegExp[] +} + +export function localExec(opts: LocalExecOptions = {}): ExecBackend { + const timeout = opts.timeout ?? DEFAULT_TIMEOUT_MS + const allowed = opts.allowedCommands + return { + async runCommand(args, ctx: BackendContext) { + if (allowed && allowed.length > 0 && !allowed.some((re) => re.test(args.command))) { + throw new Error(`Command not allowed by allowedCommands policy: ${args.command}`) + } + try { + const result = await execAsync(args.command, { + cwd: args.cwd ?? ctx.workspaceRoot, + env: args.env ?? process.env, + timeout, + signal: ctx.signal, + }) + return { stdout: result.stdout, stderr: result.stderr, exitCode: 0 } + } catch (err) { + const e = err as NodeJS.ErrnoException & { + code?: number | string + stdout?: string + stderr?: string + killed?: boolean + } + if (e.killed && typeof e.code !== "number") { + throw new Error(`Command timeout after ${timeout}ms: ${args.command}`) + } + return { + stdout: e.stdout ?? "", + stderr: e.stderr ?? "", + exitCode: typeof e.code === "number" ? e.code : 1, + } + } + }, + } +} +``` + +- [ ] **Step 4: Re-export from barrel** + +Edit `packages/workspace/src/index.ts`, append: + +```ts +export { localExec, type LocalExecOptions } from "./local-exec.js" +``` + +- [ ] **Step 5: Run tests** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace test 2>&1 | tail -10` +Expected: PASS (9 tests: 5 fs + 4 exec). + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/workspace/src/local-exec.ts \ + packages/workspace/test/local-exec.test.ts \ + packages/workspace/src/index.ts +git commit -m "feat(workspace): localExec default backend + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 5: `compose()` helper + +**Files:** +- Create: `packages/workspace/src/compose.ts` +- Create: `packages/workspace/test/compose.test.ts` +- Modify: `packages/workspace/src/index.ts` + +- [ ] **Step 1: Write the failing test** + +Create `packages/workspace/test/compose.test.ts`: + +```ts +import { describe, expect, it } from "vitest" +import { compose } from "../src/compose.js" +import type { FilesystemBackend, FilesystemMiddleware } from "../src/types.js" + +const base: FilesystemBackend = { + async readFile() { return "BASE" }, + async writeFile() { return { bytesWritten: 0 } }, + async listDir() { return [] }, +} + +describe("compose", () => { + it("with zero middlewares returns the base unchanged", () => { + expect(compose()(base)).toBe(base) + }) + + it("with one middleware wraps the base", async () => { + const upper: FilesystemMiddleware = (next) => ({ + ...next, + readFile: async (p, c) => (await next.readFile(p, c)).toLowerCase(), + }) + const wrapped = compose(upper)(base) + expect(await wrapped.readFile("x", { signal: new AbortController().signal, workspaceRoot: "/" })).toBe("base") + }) + + it("applies middlewares right-to-left (outermost first)", async () => { + const trace: string[] = [] + const a: FilesystemMiddleware = (next) => ({ + ...next, + readFile: async (p, c) => { trace.push("a:before"); const r = await next.readFile(p, c); trace.push("a:after"); return r }, + }) + const b: FilesystemMiddleware = (next) => ({ + ...next, + readFile: async (p, c) => { trace.push("b:before"); const r = await next.readFile(p, c); trace.push("b:after"); return r }, + }) + await compose(a, b)(base).readFile("x", { signal: new AbortController().signal, workspaceRoot: "/" }) + // `compose(a, b)` reads "a wraps b wraps base", so order is a:before, b:before, b:after, a:after + expect(trace).toEqual(["a:before", "b:before", "b:after", "a:after"]) + }) +}) +``` + +- [ ] **Step 2: Run to verify failure** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace test -- compose 2>&1 | tail -10` +Expected: FAIL (module not found). + +- [ ] **Step 3: Implement** + +Create `packages/workspace/src/compose.ts`: + +```ts +/** + * Compose middleware functions into a single wrapper. + * + * Order: the LEFTMOST middleware is the OUTERMOST. Given + * `compose(a, b, c)(base)`, the call order is `a -> b -> c -> base`, + * mirroring how function call stacks read top-down. + * + * With zero middlewares, returns the base unchanged (no wrapper object). + */ +export function compose(...middlewares: ReadonlyArray<(next: T) => T>): (base: T) => T { + if (middlewares.length === 0) return (base) => base + return (base) => middlewares.reduceRight((acc, mw) => mw(acc), base) +} +``` + +- [ ] **Step 4: Re-export** + +Edit `packages/workspace/src/index.ts`, append: + +```ts +export { compose } from "./compose.js" +``` + +- [ ] **Step 5: Run tests** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace test 2>&1 | tail -10` +Expected: PASS (12 tests). + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/workspace/src/compose.ts \ + packages/workspace/test/compose.test.ts \ + packages/workspace/src/index.ts +git commit -m "feat(workspace): compose() middleware helper + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 6: `withLogging()` middleware + +**Files:** +- Create: `packages/workspace/src/with-logging.ts` +- Create: `packages/workspace/test/with-logging.test.ts` +- Modify: `packages/workspace/src/index.ts` + +- [ ] **Step 1: Write the failing test** + +Create `packages/workspace/test/with-logging.test.ts`: + +```ts +import { describe, expect, it } from "vitest" +import { withFilesystemLogging } from "../src/with-logging.js" +import type { FilesystemBackend } from "../src/types.js" + +const base: FilesystemBackend = { + async readFile() { return "ok" }, + async writeFile() { return { bytesWritten: 5 } }, + async listDir() { return ["a"] }, +} + +const ctx = { signal: new AbortController().signal, workspaceRoot: "/r" } + +describe("withFilesystemLogging", () => { + it("invokes the destination callback for each method", async () => { + const log: Array<{ method: string; args: unknown[] }> = [] + const wrapped = withFilesystemLogging({ destination: (e) => log.push(e) })(base) + await wrapped.readFile("a.md", ctx) + await wrapped.writeFile("b.md", "hi", ctx) + await wrapped.listDir("/r", ctx) + expect(log.map((e) => e.method)).toEqual(["readFile", "writeFile", "listDir"]) + expect(log[0]!.args).toEqual(["a.md"]) + expect(log[1]!.args).toEqual(["b.md", "hi"]) + }) + + it("forwards return values unchanged", async () => { + const wrapped = withFilesystemLogging({ destination: () => undefined })(base) + expect(await wrapped.readFile("a.md", ctx)).toBe("ok") + expect(await wrapped.writeFile("b.md", "hi", ctx)).toEqual({ bytesWritten: 5 }) + expect([...(await wrapped.listDir("/r", ctx))]).toEqual(["a"]) + }) + + it("defaults destination to console.error when not provided", async () => { + const original = console.error + const logged: string[] = [] + console.error = ((msg: string) => logged.push(msg)) as typeof console.error + try { + const wrapped = withFilesystemLogging()(base) + await wrapped.readFile("a.md", ctx) + } finally { + console.error = original + } + expect(logged.length).toBe(1) + expect(logged[0]).toContain("readFile") + }) +}) +``` + +- [ ] **Step 2: Run to verify failure** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace test -- with-logging 2>&1 | tail -10` +Expected: FAIL. + +- [ ] **Step 3: Implement** + +Create `packages/workspace/src/with-logging.ts`: + +```ts +import type { ExecMiddleware, FilesystemBackend, FilesystemMiddleware } from "./types.js" + +export interface LoggingOptions { + /** + * Where to send log lines. Default: `console.error`. + * + * Pass a function for structured logging. The argument is + * `{ method, args }` so the function can format however it wants. + */ + readonly destination?: ((entry: { method: string; args: unknown[] }) => void) +} + +function emit(opts: LoggingOptions, method: string, args: unknown[]): void { + if (opts.destination) { + opts.destination({ method, args }) + return + } + console.error(`[dawn:workspace] ${method}(${args.map((a) => JSON.stringify(a)).join(", ")})`) +} + +export function withFilesystemLogging(opts: LoggingOptions = {}): FilesystemMiddleware { + return (next: FilesystemBackend) => ({ + readFile: async (path, ctx) => { + emit(opts, "readFile", [path]) + return next.readFile(path, ctx) + }, + writeFile: async (path, content, ctx) => { + emit(opts, "writeFile", [path, content]) + return next.writeFile(path, content, ctx) + }, + listDir: async (path, ctx) => { + emit(opts, "listDir", [path]) + return next.listDir(path, ctx) + }, + }) +} + +export function withExecLogging(opts: LoggingOptions = {}): ExecMiddleware { + return (next) => ({ + runCommand: async (args, ctx) => { + emit(opts, "runCommand", [args.command, args.cwd]) + return next.runCommand(args, ctx) + }, + }) +} +``` + +(Two named functions, one per interface. Cleaner than the conditional-type approach floated in the spec — explicit type signatures, no inference magic.) + +- [ ] **Step 4: Re-export** + +Edit `packages/workspace/src/index.ts`, append: + +```ts +export { withExecLogging, withFilesystemLogging, type LoggingOptions } from "./with-logging.js" +``` + +- [ ] **Step 5: Run tests** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/workspace test 2>&1 | tail -10` +Expected: PASS (15 tests). + +- [ ] **Step 6: Verify full repo still builds** + +Run: `cd /Users/blove/repos/dawn && pnpm build 2>&1 | tail -8` +Expected: success across all packages. + +- [ ] **Step 7: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/workspace/src/with-logging.ts \ + packages/workspace/test/with-logging.test.ts \ + packages/workspace/src/index.ts +git commit -m "feat(workspace): withFilesystemLogging + withExecLogging middlewares + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +# Phase B — Config loader switch + +### Task 7: Replace hand-rolled config parser with `tsx`-evaluated import + +**Files:** +- Modify: `packages/core/src/config.ts` +- Modify: `packages/core/test/config.test.ts` + +- [ ] **Step 1: Read the current loader + tests** + +Run: `cd /Users/blove/repos/dawn && wc -l packages/core/src/config.ts packages/core/test/config.test.ts` +Read both to understand: +- The current parser supports `const FOO = "x"` + `export default { appDir }` + `export default { appDir: "..." }`. Nothing else. +- Existing tests verify successful parses + rejection of unsupported syntax. + +- [ ] **Step 2: Rewrite `packages/core/src/config.ts`** + +Replace the entire file with: + +```ts +import { constants } from "node:fs" +import { access } from "node:fs/promises" +import { join } from "node:path" +import { pathToFileURL } from "node:url" + +import type { DawnConfig, LoadDawnConfigOptions, LoadedDawnConfig } from "./types.js" + +export const DAWN_CONFIG_FILE = "dawn.config.ts" + +let loaderPromise: Promise | undefined + +async function registerTsxLoader(): Promise { + loaderPromise ??= (async () => { + const { register } = (await import("tsx/esm/api")) as { + readonly register: () => unknown + } + register() + })() + await loaderPromise +} + +export async function loadDawnConfig(options: LoadDawnConfigOptions): Promise { + const configPath = join(options.appRoot, DAWN_CONFIG_FILE) + await access(configPath, constants.F_OK) + await registerTsxLoader() + + const mod = (await import(pathToFileURL(configPath).href)) as { + readonly default?: unknown + } + + if (!mod.default || typeof mod.default !== "object") { + throw new Error( + `${DAWN_CONFIG_FILE} must export default an object. Got: ${typeof mod.default}`, + ) + } + + return { + appRoot: options.appRoot, + config: mod.default as DawnConfig, + configPath, + } +} +``` + +- [ ] **Step 3: Rewrite `packages/core/test/config.test.ts`** + +Replace with: + +```ts +import { afterEach, beforeEach, describe, expect, it } from "vitest" +import { mkdtempSync, rmSync } from "node:fs" +import { writeFile } from "node:fs/promises" +import { tmpdir } from "node:os" +import { join } from "node:path" + +import { DAWN_CONFIG_FILE, loadDawnConfig } from "../src/config.js" + +describe("loadDawnConfig", () => { + let appRoot: string + + beforeEach(() => { + appRoot = mkdtempSync(join(tmpdir(), "dawn-config-")) + }) + + afterEach(() => { + rmSync(appRoot, { recursive: true, force: true }) + }) + + async function writeConfig(source: string): Promise { + await writeFile(join(appRoot, DAWN_CONFIG_FILE), source, "utf8") + } + + it("loads a config with just appDir", async () => { + await writeConfig(`export default { appDir: "src/app" }\n`) + const loaded = await loadDawnConfig({ appRoot }) + expect(loaded.config).toMatchObject({ appDir: "src/app" }) + expect(loaded.configPath).toBe(join(appRoot, DAWN_CONFIG_FILE)) + }) + + it("loads a config with no fields (empty object)", async () => { + await writeConfig(`export default {}\n`) + const loaded = await loadDawnConfig({ appRoot }) + expect(loaded.config).toEqual({}) + }) + + it("loads a config that imports from another module", async () => { + // Note: this test mostly verifies the tsx loader is registered — the + // existence of an importable file is enough; the import doesn't have + // to be a real package. + await writeConfig(` + const APP_DIR = "src/app" + export default { appDir: APP_DIR } + `) + const loaded = await loadDawnConfig({ appRoot }) + expect(loaded.config).toMatchObject({ appDir: "src/app" }) + }) + + it("rejects missing default export", async () => { + await writeConfig(`export const named = { appDir: "x" }\n`) + await expect(loadDawnConfig({ appRoot })).rejects.toThrow(/must export default/i) + }) + + it("rejects non-object default export", async () => { + await writeConfig(`export default "hello"\n`) + await expect(loadDawnConfig({ appRoot })).rejects.toThrow(/must export default an object/i) + }) + + it("propagates TS syntax errors from the imported module", async () => { + await writeConfig(`export default { appDir:\n`) // syntactically invalid + await expect(loadDawnConfig({ appRoot })).rejects.toThrow() + }) +}) +``` + +- [ ] **Step 4: Run config tests** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/core test -- config.test 2>&1 | tail -10` +Expected: PASS (6 tests). + +- [ ] **Step 5: Run the full repo tests to catch unrelated regressions** + +Run: `cd /Users/blove/repos/dawn && pnpm test 2>&1 | tail -10` +Expected: all tests pass. (One file in `packages/core/test/discover-routes.test.ts` writes a `dawn.config.ts` with `export default { appDir: "src/app" }` — that's a valid TS module under the new loader too, so should still work.) + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/core/src/config.ts packages/core/test/config.test.ts +git commit -m "$(cat <<'EOF' +feat(core): switch dawn.config.ts loader from hand-rolled parser to tsx import + +The hand-rolled parser supported only string-literal property values +and const string bindings. The upcoming workspace capability needs to +express callable backend values in dawn.config.ts, which strings can't +express. Switch to a tsx-evaluated dynamic import (same loader Dawn +already uses for route discovery and tool execution). + +Existing dawn.config.ts files (just { appDir }) remain valid TS +modules and continue to load without modification. + +Co-Authored-By: Claude Opus 4.7 +EOF +)" +``` + +--- + +# Phase C — Capability marker + +### Task 8: Extend `DawnConfig` + `CapabilityMarkerContext` with `backends?` + +**Files:** +- Modify: `packages/core/src/types.ts` +- Modify: `packages/core/src/capabilities/types.ts` +- Modify: `packages/core/package.json` (add `@dawn-ai/workspace` peer/dep — type-only) + +- [ ] **Step 1: Add the workspace package as a type-only dependency on @dawn-ai/core** + +Edit `packages/core/package.json`. Add to `devDependencies` (type-only — no runtime dep): + +```json +"@dawn-ai/workspace": "workspace:*" +``` + +- [ ] **Step 2: Extend `DawnConfig`** + +Edit `packages/core/src/types.ts`. Find the `DawnConfig` interface (around line 5) and update: + +```ts +import type { ExecBackend, FilesystemBackend } from "@dawn-ai/workspace" +import type { RouteKind } from "@dawn-ai/sdk" + +export type { RouteKind } + +export interface DawnConfig { + readonly appDir?: string + readonly backends?: { + readonly filesystem?: FilesystemBackend + readonly exec?: ExecBackend + } +} +``` + +- [ ] **Step 3: Extend `CapabilityMarkerContext`** + +Edit `packages/core/src/capabilities/types.ts`. Add to the imports at the top: + +```ts +import type { ExecBackend, FilesystemBackend } from "@dawn-ai/workspace" +``` + +Update the `CapabilityMarkerContext` interface: + +```ts +export interface CapabilityMarkerContext { + readonly routeManifest: RouteManifest + readonly descriptor: DawnAgent | undefined + readonly descriptorRouteMap?: ReadonlyMap + readonly backends?: { + readonly filesystem?: FilesystemBackend + readonly exec?: ExecBackend + } +} +``` + +- [ ] **Step 4: Install + verify** + +Run: `cd /Users/blove/repos/dawn && pnpm install --silent 2>&1 | tail -3` +Expected: workspace package is symlinked into core. + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/core typecheck 2>&1 | tail -5` +Expected: 0 errors. + +- [ ] **Step 5: Run full repo tests** + +Run: `cd /Users/blove/repos/dawn && pnpm test 2>&1 | tail -10` +Expected: green. + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/core/package.json \ + packages/core/src/types.ts \ + packages/core/src/capabilities/types.ts +git commit -m "feat(core): add backends field to DawnConfig + CapabilityMarkerContext + +Type-only edge: @dawn-ai/core now imports FilesystemBackend/ExecBackend +types from @dawn-ai/workspace via 'import type'. No runtime weight. + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 9: Implement `createWorkspaceMarker` + +**Files:** +- Create: `packages/core/src/capabilities/built-in/workspace.ts` +- Create: `packages/core/test/capabilities/workspace.test.ts` +- Modify: `packages/core/src/index.ts` +- Modify: `packages/core/package.json` (`@dawn-ai/workspace` is now a runtime dep too — for the default backends) + +- [ ] **Step 1: Promote workspace from devDep to dep in `@dawn-ai/core`** + +Edit `packages/core/package.json`. Move `@dawn-ai/workspace` from `devDependencies` to `dependencies`. The marker needs `localFilesystem()` and `localExec()` at runtime as defaults. + +- [ ] **Step 2: Write the failing test** + +Create `packages/core/test/capabilities/workspace.test.ts`: + +```ts +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest" +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" + +import { createWorkspaceMarker } from "../../src/capabilities/built-in/workspace.js" +import type { CapabilityMarkerContext } from "../../src/capabilities/types.js" + +function emptyManifest() { + return { appRoot: "/app", routes: [] } +} + +function ctx(extras: Partial = {}): CapabilityMarkerContext { + return { + routeManifest: emptyManifest(), + descriptor: undefined, + ...extras, + } +} + +describe("createWorkspaceMarker — detect", () => { + let routeDir: string + beforeEach(() => { routeDir = mkdtempSync(join(tmpdir(), "dawn-workspace-cap-")) }) + afterEach(() => { rmSync(routeDir, { recursive: true, force: true }) }) + + it("returns false when no workspace/ directory exists", async () => { + const detected = await createWorkspaceMarker().detect(routeDir, ctx()) + expect(detected).toBe(false) + }) + + it("returns true when workspace/ exists", async () => { + mkdirSync(join(routeDir, "workspace")) + const detected = await createWorkspaceMarker().detect(routeDir, ctx()) + expect(detected).toBe(true) + }) +}) + +describe("createWorkspaceMarker — load", () => { + let routeDir: string + beforeEach(() => { + routeDir = mkdtempSync(join(tmpdir(), "dawn-workspace-cap-")) + mkdirSync(join(routeDir, "workspace")) + }) + afterEach(() => { rmSync(routeDir, { recursive: true, force: true }) }) + + it("contributes exactly four tools when workspace/ exists", async () => { + const contribution = await createWorkspaceMarker().load(routeDir, ctx()) + const names = (contribution.tools ?? []).map((t) => t.name).sort() + expect(names).toEqual(["listDir", "readFile", "runBash", "writeFile"]) + }) + + it("contributes no tools when workspace/ is absent", async () => { + rmSync(join(routeDir, "workspace"), { recursive: true }) + const contribution = await createWorkspaceMarker().load(routeDir, ctx()) + expect(contribution.tools).toBeUndefined() + }) + + it("readFile tool calls the configured backend with an absolute path inside the jail", async () => { + writeFileSync(join(routeDir, "workspace", "hello.txt"), "hi", "utf8") + const fakeBackend = { + readFile: vi.fn().mockResolvedValue("hi"), + writeFile: vi.fn(), + listDir: vi.fn(), + } + const contribution = await createWorkspaceMarker().load( + routeDir, + ctx({ backends: { filesystem: fakeBackend } }), + ) + const readTool = contribution.tools!.find((t) => t.name === "readFile")! + const result = await readTool.run({ path: "hello.txt" }, { signal: new AbortController().signal }) + expect(result).toBe("hi") + expect(fakeBackend.readFile).toHaveBeenCalledOnce() + const [absPath] = fakeBackend.readFile.mock.calls[0]! + expect(absPath).toBe(join(routeDir, "workspace", "hello.txt")) + }) + + it("rejects path-jail escapes with a clear error", async () => { + const contribution = await createWorkspaceMarker().load(routeDir, ctx()) + const readTool = contribution.tools!.find((t) => t.name === "readFile")! + await expect( + readTool.run({ path: "../../etc/passwd" }, { signal: new AbortController().signal }), + ).rejects.toThrow(/outside workspace/i) + }) + + it("uses the default local backends when none configured", async () => { + writeFileSync(join(routeDir, "workspace", "ok.txt"), "ok", "utf8") + const contribution = await createWorkspaceMarker().load(routeDir, ctx()) + const readTool = contribution.tools!.find((t) => t.name === "readFile")! + const result = await readTool.run({ path: "ok.txt" }, { signal: new AbortController().signal }) + expect(result).toBe("ok") + }) + + it("runBash tool calls the configured exec backend", async () => { + const fakeExec = { + runCommand: vi.fn().mockResolvedValue({ stdout: "world", stderr: "", exitCode: 0 }), + } + const contribution = await createWorkspaceMarker().load( + routeDir, + ctx({ backends: { exec: fakeExec } }), + ) + const runBash = contribution.tools!.find((t) => t.name === "runBash")! + const result = await runBash.run( + { command: "echo world" }, + { signal: new AbortController().signal }, + ) + expect(result).toMatchObject({ stdout: "world", exitCode: 0 }) + expect(fakeExec.runCommand).toHaveBeenCalledWith( + expect.objectContaining({ command: "echo world" }), + expect.any(Object), + ) + }) + + it("marks all four tools as overridable", async () => { + const contribution = await createWorkspaceMarker().load(routeDir, ctx()) + for (const t of contribution.tools ?? []) { + // Overridable tools carry a flag the uniqueness check reads; see Task 10. + expect((t as unknown as { overridable?: boolean }).overridable).toBe(true) + } + }) +}) +``` + +- [ ] **Step 3: Run tests to verify failure** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/core test -- workspace.test 2>&1 | tail -15` +Expected: FAIL — `Cannot find module '../../src/capabilities/built-in/workspace.js'`. + +- [ ] **Step 4: Implement the marker** + +Create `packages/core/src/capabilities/built-in/workspace.ts`: + +```ts +import { existsSync } from "node:fs" +import { join, resolve, sep } from "node:path" +import { z } from "zod" + +import { localExec, localFilesystem } from "@dawn-ai/workspace" +import type { BackendContext, ExecBackend, FilesystemBackend } from "@dawn-ai/workspace" + +import type { CapabilityMarker, DawnToolDefinition } from "../types.js" + +const WORKSPACE_DIRNAME = "workspace" + +const READ_FILE_INPUT = z.object({ path: z.string().min(1) }) +const WRITE_FILE_INPUT = z.object({ path: z.string().min(1), content: z.string() }) +const LIST_DIR_INPUT = z.object({ path: z.string().default(".") }) +const RUN_BASH_INPUT = z.object({ command: z.string().min(1) }) + +function pathJail(userPath: string, workspaceRoot: string): string { + const resolved = resolve(workspaceRoot, userPath) + if (resolved !== workspaceRoot && !resolved.startsWith(workspaceRoot + sep)) { + throw new Error(`Path is outside workspace: ${userPath}`) + } + return resolved +} + +function backendContext(workspaceRoot: string, signal: AbortSignal): BackendContext { + return { signal, workspaceRoot } +} + +interface OverridableTool extends DawnToolDefinition { + readonly overridable: true +} + +function buildWorkspaceTools( + workspaceRoot: string, + fs: FilesystemBackend, + exec: ExecBackend, +): readonly OverridableTool[] { + const readFile: OverridableTool = { + name: "readFile", + description: "Read a UTF-8 file from the workspace.", + schema: READ_FILE_INPUT, + overridable: true, + run: async (input, ctx) => { + const { path } = READ_FILE_INPUT.parse(input) + const safe = pathJail(path, workspaceRoot) + return fs.readFile(safe, backendContext(workspaceRoot, ctx.signal)) + }, + } + const writeFile: OverridableTool = { + name: "writeFile", + description: "Write a UTF-8 file inside the workspace.", + schema: WRITE_FILE_INPUT, + overridable: true, + run: async (input, ctx) => { + const { path, content } = WRITE_FILE_INPUT.parse(input) + const safe = pathJail(path, workspaceRoot) + const result = await fs.writeFile(safe, content, backendContext(workspaceRoot, ctx.signal)) + return `wrote ${result.bytesWritten} bytes to ${path}` + }, + } + const listDir: OverridableTool = { + name: "listDir", + description: "List entries in a workspace directory.", + schema: LIST_DIR_INPUT, + overridable: true, + run: async (input, ctx) => { + const { path } = LIST_DIR_INPUT.parse(input) + const safe = pathJail(path, workspaceRoot) + const entries = await fs.listDir(safe, backendContext(workspaceRoot, ctx.signal)) + return [...entries] + }, + } + const runBash: OverridableTool = { + name: "runBash", + description: "Run a shell command inside the workspace.", + schema: RUN_BASH_INPUT, + overridable: true, + run: async (input, ctx) => { + const { command } = RUN_BASH_INPUT.parse(input) + return exec.runCommand({ command }, backendContext(workspaceRoot, ctx.signal)) + }, + } + return [readFile, writeFile, listDir, runBash] +} + +export function createWorkspaceMarker(): CapabilityMarker { + return { + name: "workspace", + detect: async (routeDir, _context) => existsSync(join(routeDir, WORKSPACE_DIRNAME)), + load: async (routeDir, context) => { + const workspaceRoot = join(routeDir, WORKSPACE_DIRNAME) + if (!existsSync(workspaceRoot)) return {} + const fs = context.backends?.filesystem ?? localFilesystem() + const exec = context.backends?.exec ?? localExec() + return { tools: buildWorkspaceTools(workspaceRoot, fs, exec) } + }, + } +} +``` + +- [ ] **Step 5: Export from the core barrel** + +Edit `packages/core/src/index.ts`, add (next to the other `createXxxMarker` exports): + +```ts +export { createWorkspaceMarker } from "./capabilities/built-in/workspace.js" +``` + +- [ ] **Step 6: Run tests** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/core test 2>&1 | tail -10` +Expected: PASS (existing tests + 9 new workspace tests). + +- [ ] **Step 7: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/core/package.json \ + packages/core/src/capabilities/built-in/workspace.ts \ + packages/core/test/capabilities/workspace.test.ts \ + packages/core/src/index.ts +git commit -m "feat(core): createWorkspaceMarker capability + +Auto-detects a route's workspace/ directory and contributes four tools +(readFile/writeFile/listDir/runBash) routed through configurable +backends. Defaults to localFilesystem + localExec when no backends are +configured in dawn.config.ts. Path-jail enforced in the capability; +backends receive resolved absolute paths. + +Tools carry an `overridable: true` flag so the uniqueness-check +inversion in the next commit can let user-authored tools/.ts +files supersede them. + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 10: Invert tool-name uniqueness check for overridable tools + +**Files:** +- Modify: `packages/cli/src/lib/runtime/check-tool-name-uniqueness.ts` +- Modify: `packages/cli/test/tool-name-uniqueness.test.ts` +- Modify: `packages/cli/src/lib/runtime/execute-route.ts` (the call site uses the new behavior) + +- [ ] **Step 1: Add the failing test** + +Append to `packages/cli/test/tool-name-uniqueness.test.ts`: + +```ts +describe("checkToolNameUniqueness — overridable", () => { + it("when a capability tool is overridable, a user tool with the same name does NOT error and replaces it", () => { + const result = checkToolNameUniqueness({ + userTools: [{ name: "readFile" }], + capabilityTools: [{ name: "readFile", overridable: true }], + reservedNames: new Set(), + }) + expect(result.ok).toBe(true) + if (!result.ok) return + // The returned `effectiveCapabilityTools` drops the overridden tool. + expect(result.effectiveCapabilityTools).toEqual([]) + }) + + it("when a capability tool is NOT overridable, a user tool with the same name still errors", () => { + const result = checkToolNameUniqueness({ + userTools: [{ name: "writeTodos" }], + capabilityTools: [{ name: "writeTodos" }], // no overridable flag = false + reservedNames: new Set(), + }) + expect(result.ok).toBe(false) + }) +}) +``` + +- [ ] **Step 2: Run to verify failure** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/cli test -- tool-name-uniqueness 2>&1 | tail -10` +Expected: FAIL — `result.effectiveCapabilityTools` doesn't exist yet; and the overridable case errors. + +- [ ] **Step 3: Update the check** + +Edit `packages/cli/src/lib/runtime/check-tool-name-uniqueness.ts`: + +```ts +export interface ToolNameCheckInput { + readonly userTools: ReadonlyArray<{ readonly name: string }> + readonly capabilityTools: ReadonlyArray<{ readonly name: string; readonly overridable?: boolean }> + readonly reservedNames: ReadonlySet +} + +export type ToolNameCheckResult = + | { + readonly ok: true + /** + * Capability tools with the overridable ones removed when shadowed by + * a user tool. The runtime should use THIS list when composing the + * final tool set, not the input `capabilityTools`. + */ + readonly effectiveCapabilityTools: ReadonlyArray<{ readonly name: string; readonly overridable?: boolean }> + } + | { readonly ok: false; readonly message: string } + +export function checkToolNameUniqueness(input: ToolNameCheckInput): ToolNameCheckResult { + const userNames = new Set(input.userTools.map((t) => t.name)) + const effective: typeof input.capabilityTools = [] + + for (const cap of input.capabilityTools) { + if (userNames.has(cap.name)) { + if (cap.overridable) { + // Drop from the effective list; user tool wins. + continue + } + return { + ok: false, + message: `Capability conflict: tool name "${cap.name}" is contributed by a capability and also defined in tools/. Remove the user tool or remove the capability marker file.`, + } + } + effective.push(cap) + } + + for (const t of input.userTools) { + if (input.reservedNames.has(t.name)) { + return { + ok: false, + message: `Reserved tool name: "${t.name}" is reserved by the Dawn harness and cannot be used as a user tool name.`, + } + } + } + + return { ok: true, effectiveCapabilityTools: effective } +} +``` + +- [ ] **Step 4: Update the callsite in `execute-route.ts`** + +In `packages/cli/src/lib/runtime/execute-route.ts`, find the existing block that calls `checkToolNameUniqueness` (around line 305 — the area introduced in PR #155). The current code throws on collision and otherwise concatenates `tools = [...tools, ...capTools]`. Adjust to use the new `effectiveCapabilityTools`: + +```ts +const RESERVED_TOOL_NAMES = new Set(["task"]) +const check = checkToolNameUniqueness({ + userTools: tools.map((t) => ({ name: t.name })), + capabilityTools: capTools.map((t) => ({ + name: t.name, + ...((t as unknown as { overridable?: boolean }).overridable ? { overridable: true } : {}), + })), + reservedNames: RESERVED_TOOL_NAMES, +}) +if (!check.ok) { + return { message: check.message, ok: false } +} + +// Use the effective set so overridden tools are dropped before merging. +const effectiveCapNames = new Set(check.effectiveCapabilityTools.map((t) => t.name)) +const filteredCapTools = capTools.filter((t) => effectiveCapNames.has(t.name)) +tools = [...tools, ...filteredCapTools] +``` + +(The existing state-field collision check below stays unchanged.) + +- [ ] **Step 5: Run all the relevant tests** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/cli test 2>&1 | tail -10` +Expected: PASS (existing tests + 2 new uniqueness tests). + +Run: `cd /Users/blove/repos/dawn && pnpm test 2>&1 | tail -10` +Expected: full repo green. + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/cli/src/lib/runtime/check-tool-name-uniqueness.ts \ + packages/cli/test/tool-name-uniqueness.test.ts \ + packages/cli/src/lib/runtime/execute-route.ts +git commit -m "feat(cli): support overridable capability tools + +Tools marked overridable on a capability contribution can be shadowed +by a user-authored tool with the same name. Used by the workspace +capability so authors can override readFile/writeFile/listDir/runBash +by dropping a file in tools/. Non-overridable capability tools +(writeTodos, readSkill, task) retain the collision error. + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +# Phase D — Runtime wiring + +### Task 11: Register `createWorkspaceMarker` + thread backends from config + +**Files:** +- Modify: `packages/cli/src/lib/runtime/execute-route.ts` + +- [ ] **Step 1: Add imports + the marker to the registry** + +Edit `packages/cli/src/lib/runtime/execute-route.ts`. Add to the existing imports from `@dawn-ai/core`: + +```ts +import { + // ...existing + createWorkspaceMarker, + loadDawnConfig, +} from "@dawn-ai/core" +``` + +Find the `createCapabilityRegistry([...])` block and add the marker: + +```ts +const registry = createCapabilityRegistry([ + createPlanningMarker(), + createAgentsMdMarker(), + createSkillsMarker(), + createSubagentsMarker(), + createWorkspaceMarker(), +]) +``` + +- [ ] **Step 2: Load `dawn.config.ts` once + thread backends into `applyCapabilities` context** + +Before the `applyCapabilities` call (around the block that builds `descriptorRouteMap`), load the config: + +```ts +let configBackends: { filesystem?: FilesystemBackend; exec?: ExecBackend } | undefined +try { + const loaded = await loadDawnConfig({ appRoot }) + configBackends = loaded.config.backends +} catch { + // No dawn.config.ts (or unreadable) — the workspace capability falls + // back to its defaults (localFilesystem + localExec). +} + +const applied = await applyCapabilities(registry, routeDir, { + routeManifest, + descriptor, + descriptorRouteMap, + backends: configBackends, +}) +``` + +Add the type imports at the top: + +```ts +import type { ExecBackend, FilesystemBackend } from "@dawn-ai/workspace" +``` + +And add `@dawn-ai/workspace` to `packages/cli/package.json` dependencies (`pnpm add @dawn-ai/workspace --filter @dawn-ai/cli --workspace`). + +- [ ] **Step 3: Run all tests** + +Run: `cd /Users/blove/repos/dawn && pnpm test 2>&1 | tail -10` +Expected: all green. + +Run: `cd /Users/blove/repos/dawn && pnpm build 2>&1 | tail -10` +Expected: all packages build. + +- [ ] **Step 4: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/cli/src/lib/runtime/execute-route.ts packages/cli/package.json +git commit -m "feat(cli): register workspace capability + thread backends from dawn.config + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 12: Typegen — surface workspace tools + +**Files:** +- Modify: `packages/cli/src/lib/typegen/run-typegen.ts` +- Modify or create: `packages/cli/test/run-typegen.test.ts` (the existing test from sub-project 3) + +- [ ] **Step 1: Read the existing pattern** + +Read `packages/cli/src/lib/typegen/run-typegen.ts`. Note the `PLANNING_EXTRA_TOOL`, `SKILLS_EXTRA_TOOL`, `SUBAGENTS_EXTRA_TOOL` declarations and their `hasX(routeDir)` gates around line 21-100. + +- [ ] **Step 2: Add a failing test** + +Append to `packages/cli/test/run-typegen.test.ts`: + +```ts +describe("typegen — workspace capability", () => { + // Use the existing temp-dir + manifest helpers in this test file. + it("includes readFile/writeFile/listDir/runBash for routes with a workspace/ directory", async () => { + // Set up a tmp app with src/app/foo/{index.ts, workspace/}. + // Run runTypegen and read .dawn/dawn.generated.d.ts. + // Assert all four tool names appear in foo's tool union. + // (Mirror the existing readSkill/task assertions in this file.) + }) + + it("does NOT include the four tools when workspace/ is absent", async () => { + // Same setup minus workspace/. + // Assert none of readFile/writeFile/listDir/runBash appear. + }) +}) +``` + +Read the existing `task` typegen test (added in PR #156 Task 12) and mirror its structure exactly. Same helpers, same temp-dir pattern. + +- [ ] **Step 3: Run to verify failure** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/cli test -- run-typegen 2>&1 | tail -10` +Expected: FAIL. + +- [ ] **Step 4: Add the workspace typegen entries** + +Edit `packages/cli/src/lib/typegen/run-typegen.ts`. Add after `SUBAGENTS_EXTRA_TOOL`: + +```ts +const WORKSPACE_EXTRA_TOOLS: readonly ExtractedToolType[] = [ + { + name: "readFile", + description: "Read a UTF-8 file from the workspace.", + inputType: `{ path: string }`, + outputType: `string`, + }, + { + name: "writeFile", + description: "Write a UTF-8 file inside the workspace.", + inputType: `{ path: string; content: string }`, + outputType: `string`, + }, + { + name: "listDir", + description: "List entries in a workspace directory.", + inputType: `{ path?: string }`, + outputType: `string[]`, + }, + { + name: "runBash", + description: "Run a shell command inside the workspace.", + inputType: `{ command: string }`, + outputType: `{ stdout: string; stderr: string; exitCode: number }`, + }, +] + +function hasWorkspace(routeDir: string): boolean { + return existsSync(join(routeDir, "workspace")) +} +``` + +In the `extraTools` build block (the one with the existing `hasSubagents` gate), add: + +```ts +if (hasWorkspace(route.routeDir)) { + extraTools.push(...WORKSPACE_EXTRA_TOOLS) +} +``` + +- [ ] **Step 5: Run tests + verify** + +Run: `cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/cli test 2>&1 | tail -10` +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/cli/src/lib/typegen/run-typegen.ts packages/cli/test/run-typegen.test.ts +git commit -m "feat(cli): typegen surfaces workspace tools for routes with workspace/ + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +# Phase E — Chat example migration + smoke + +### Task 13: Delete hand-rolled workspace tools from the chat example + +**Files:** +- Delete: 4 files under `examples/chat/server/src/app/chat/tools/` +- Delete: 2 files under `examples/chat/server/src/app/coordinator/subagents/research/tools/` +- Delete: 2 `workspace-path.ts` helpers if unreferenced after the above + +- [ ] **Step 1: Delete chat route's workspace tool files** + +```bash +cd /Users/blove/repos/dawn +git rm examples/chat/server/src/app/chat/tools/readFile.ts +git rm examples/chat/server/src/app/chat/tools/writeFile.ts +git rm examples/chat/server/src/app/chat/tools/listDir.ts +git rm examples/chat/server/src/app/chat/tools/runBash.ts +``` + +- [ ] **Step 2: Check if `chat/workspace-path.ts` is still referenced** + +Run: `cd /Users/blove/repos/dawn && grep -rn "workspace-path\b" examples/chat/server/src/app/chat/ --include="*.ts" 2>/dev/null` +Expected: no matches (only the deleted tool files referenced it). If any remain, leave the helper in place. + +If no remaining references, delete: +```bash +git rm examples/chat/server/src/app/chat/workspace-path.ts +``` + +- [ ] **Step 3: Delete research subagent's workspace tools + helper** + +```bash +git rm examples/chat/server/src/app/coordinator/subagents/research/tools/readFile.ts +git rm examples/chat/server/src/app/coordinator/subagents/research/tools/listDir.ts +``` + +Run: `cd /Users/blove/repos/dawn && grep -rn "workspace-path\b" examples/chat/server/src/app/coordinator/ --include="*.ts" 2>/dev/null` +Expected: no matches. Then: +```bash +git rm examples/chat/server/src/app/coordinator/subagents/research/workspace-path.ts +``` + +- [ ] **Step 4: Build the chat example** + +Run: `cd /Users/blove/repos/dawn/examples/chat/server && pnpm build 2>&1 | tail -10` +Expected: `4 route(s) compiled` (chat, coordinator, coordinator/subagents/research, coordinator/subagents/summarizer). Build succeeds. + +- [ ] **Step 5: Verify typegen surfaces the workspace tools on the routes that have a workspace/ dir** + +Run: +```bash +cd /Users/blove/repos/dawn/examples/chat/server +grep -A 6 'route "/chat"' .dawn/dawn.generated.d.ts | head -20 +grep -A 6 'route "/coordinator/subagents/research"' .dawn/dawn.generated.d.ts | head -20 +``` +Expected: +- `/chat` route's tool union contains `readFile`, `writeFile`, `listDir`, `runBash`, plus existing `writeTodos` and `readSkill`. +- `/coordinator/subagents/research` route's tool union contains the 4 workspace tools (the subagent has `workspace/` via its own dir, OR inherits the workspace convention — verify behavior matches the spec's intent. If the research subagent route doesn't have its own `workspace/` directory, ADD one before this test or accept that those tools no longer show — the spec migration assumes the workspace dir is set up correctly for each route). + +If the research subagent didn't have its own workspace dir originally and was working through path manipulation, create one: + +```bash +mkdir -p examples/chat/server/src/app/coordinator/subagents/research/workspace +touch examples/chat/server/src/app/coordinator/subagents/research/workspace/.gitkeep +``` + +(The spec convention is: each route that wants workspace tools has its own `workspace/` directory. If the existing arrangement pointed all routes at a single shared workspace, that needs review — but most likely the migration just needs the per-route dir, even if it's empty or symlinked.) + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add -A +git commit -m "$(cat <<'EOF' +refactor(examples/chat): migrate to workspace capability + +Delete the hand-rolled readFile/writeFile/listDir/runBash tool files +(and their workspace-path helpers) from both the /chat route and the +research subagent. The workspace capability auto-contributes these +tools when the route has a workspace/ directory. + +Co-Authored-By: Claude Opus 4.7 +EOF +)" +``` + +--- + +### Task 14: Smoke test via Chrome MCP web client + +**Files:** none modified; this is verification only. + +- [ ] **Step 1: Start dev servers** + +```bash +cd /Users/blove/repos/dawn/examples/chat/server && OPENAI_API_KEY="$(grep OPENAI_API_KEY /Users/blove/repos/dawn/.env | cut -d= -f2-)" pnpm dev & +cd /Users/blove/repos/dawn/examples/chat/web && pnpm dev & +# Wait for both: "Dawn dev ready at http://127.0.0.1:3001" and "Ready in Nms" +``` + +- [ ] **Step 2: Drive `/chat` through the web picker** + +Navigate Chrome MCP to `http://localhost:3000`. Click the `/chat` radio (it's the default). Type "Briefly list the files in the workspace." Click Send. Wait ~30 seconds. + +Verify via DOM inspection (JavaScript in the page) that the SSE log contains: +- `event: tool_call data: {"name":"listDir"}` — the workspace capability's listDir tool fired +- `event: tool_result` with the listing +- `event: chunk` events streaming the agent's natural-language response +- `event: done` +- 0 errors (`subagent_failed`, recursion, etc.) + +- [ ] **Step 3: Drive `/coordinator` through the picker** + +Click the `/coordinator` radio. Type "Use research to read AGENTS.md and list its camelCase tool names. Then ask summarizer for a 2-bullet TL;DR." Click Send. Wait ~60 seconds. + +Verify via DOM inspection: +- 2 `subagent.start` events, 2 `subagent.end` events (one each for research, summarizer) +- `subagent.tool_call` events for `readFile` (research's workspace capability) +- `subagent.message` events streaming the children's tokens +- 0 paired duplicates (raw `chunk` whose data matches a `subagent.message` chunk) — sub-project 3's bubbling fix should still hold +- `event: done` with non-empty final assistant text + +- [ ] **Step 4: Kill dev servers** + +```bash +pkill -f "dawn.*dev" +pkill -f "next dev -p 3000" +``` + +- [ ] **Step 5: If anything failed** + +Debug per the failure. Likely candidates: +- A route is missing its `workspace/` dir → the capability didn't activate → no tools were contributed → the agent has nothing to call. +- The capability's path-jail rejected a path the old tool used to accept → may indicate a behavior delta from the old hand-rolled tool. + +Iterate until both probes pass. No move to Task 15 until smoke is clean. + +--- + +### Task 15: Update phase status memory + open PR + +**Files:** +- Modify: `/Users/blove/.claude/projects/-Users-blove-repos-dawn/memory/project_phase_status.md` + +- [ ] **Step 1: Update the memory note** + +Edit `project_phase_status.md`. Find: + +``` +4. Pluggable filesystem / exec backends (`dawn.config.ts`). +``` + +Replace with: + +``` +4. ✅ **Workspace capability + pluggable backends** — shipped in [PR #TBD](https://github.com/cacheplane/dawnai/pull/TBD). Workspace tools (readFile/writeFile/listDir/runBash) auto-contributed by a capability triggered by `/workspace/`. New `@dawn-ai/workspace` package ships `FilesystemBackend`/`ExecBackend` interfaces + `localFilesystem`/`localExec` defaults + `compose`/`withFilesystemLogging`/`withExecLogging` helpers. `dawn.config.ts` switched from hand-rolled string-only parser to `tsx`-evaluated import so callable backends can be expressed. Path-jail enforced in the capability; backends receive resolved absolute paths. Tool override pathway: write `tools/.ts` to shadow a capability-contributed tool. Chat example's hand-rolled workspace tools deleted. HITL permission gating for jail escapes deferred to sub-project 4.5. +``` + +Update the top summary to show 6/7 sub-projects shipped (still in Phase 3). + +- [ ] **Step 2: Push the branch + open the PR** + +```bash +cd /Users/blove/repos/dawn +git push -u origin claude/phase3-workspace +``` + +```bash +gh pr create --title "feat(core,cli,workspace): phase 3 — workspace capability + pluggable backends (sub-project 4)" --body "$(cat <<'EOF' +## Summary + +Sub-project 4 of the Dawn opinionated agent harness. The workspace +tools (readFile/writeFile/listDir/runBash) move from hand-rolled +per-route files into a built-in capability auto-wired by the +`workspace/` directory convention. Filesystem and exec implementations +become pluggable via a new `@dawn-ai/workspace` package; defaults +preserve existing behavior so apps that don't touch `dawn.config.ts` +keep working unchanged. + +`dawn.config.ts` loader switches from a hand-rolled string-only +parser to a `tsx`-evaluated import so callable backend values can be +expressed naturally. + +## Changes + +- New `@dawn-ai/workspace` package: `FilesystemBackend` / `ExecBackend` + type interfaces, `localFilesystem()` and `localExec()` defaults, + `compose()` helper for middleware composition, demonstration + `withFilesystemLogging` / `withExecLogging` middlewares. +- New `createWorkspaceMarker()` capability in `@dawn-ai/core`. Detects + the `workspace/` directory under a route; contributes four tools + routed through the configured backends; enforces path-jail before + calling the backend so backends receive trusted absolute paths. +- `DawnConfig` and `CapabilityMarkerContext` gain an optional + `backends: { filesystem?, exec? }` field. When omitted, the + capability falls back to `localFilesystem()` + `localExec()`. +- Tool-name uniqueness check supports overridable capability tools: + user-authored `tools/readFile.ts` (etc.) replaces the workspace + capability's contribution; non-overridable capability tools + (writeTodos, readSkill, task) retain the collision error. +- Typegen surfaces the four workspace tools on routes with a + `workspace/` directory. +- Chat example's hand-rolled tool files delete from `/chat` and from + `/coordinator/subagents/research`. + +## Test plan + +- [x] `@dawn-ai/workspace` unit tests: types + localFilesystem (5) + + localExec (4) + compose (3) + with-logging (3) = 15 cases +- [x] `createWorkspaceMarker` unit tests: detect, load, tool wiring, + path-jail, default backends, override flag (8 cases) +- [x] `checkToolNameUniqueness` overridable cases (2 new cases) +- [x] Config loader rewrite: 6 cases including syntax-error + propagation +- [x] Typegen: workspace tools appear when `workspace/` exists, absent + otherwise (2 cases) +- [x] Full repo green; build + typecheck + lint clean +- [x] Manual Chrome MCP smoke: `/chat` and `/coordinator` both produce + clean SSE streams; 0 duplicates; 0 errors; `done` event fires + +## Deferred / known limitations + +- **HITL permission system (sub-project 4.5)** — the capability hard- + refuses jail escapes today. A future PR introduces an `interrupt()` + flow so the user can grant per-path permissions, with persistence + to a yet-to-be-decided location (likely `.dawn/permissions.json`). +- **Per-route backend override** — currently global only. Add via + descriptor field if a real use case surfaces. +- **OS-level isolation** — out of scope; documented as deployment + guidance. The path-jail in the capability is a correctness boundary, + not a security boundary against hostile agents. +- **Backend method extensibility** — adding methods beyond the four + standard ones does NOT auto-contribute tools. Authors write + additional tools in `tools/` as today. + +🤖 Generated with [Claude Code](https://claude.com/claude-code) +EOF +)" +``` + +- [ ] **Step 3: Update the memory note with the real PR number** + +Once the PR is created, replace `[PR #TBD]` with the actual URL. + +- [ ] **Step 4: Auto-merge on green** + +```bash +gh pr merge --squash --delete-branch --auto +``` + +Wait for validate-green. Once merged, the sub-project is complete. + +--- + +## Self-review notes + +- **Spec coverage:** every section of the spec maps to a task. New package (T1-6). Config loader switch (T7). DawnConfig + CapabilityMarkerContext extension (T8). Marker implementation (T9). Tool override inversion (T10). Runtime wiring (T11). Typegen (T12). Chat example migration (T13). Smoke (T14). Memory update + PR (T15). +- **Placeholders:** none. Every step has actual code or actual commands. The `hasWorkspace` typegen test's body is intentionally outlined rather than fully written because it mirrors the existing `task` typegen test in the same file — the implementer should copy that test's structure (which I've called out explicitly in Step 1). +- **Type consistency:** `FilesystemBackend` / `ExecBackend` / `BackendContext` signatures stable from T2 through T9. `OverridableTool` shape locked in T9. `effectiveCapabilityTools` from T10 used in T11 (implicitly via the existing call site). Path-jail signature stable. +- **One known sharp edge:** T11's `effectiveCapabilityTools` usage requires `execute-route.ts`'s tool-merge to filter `capTools` by the names in the effective set rather than iterating `capTools` directly. The plan calls this out explicitly. If T10's API doesn't end up returning `effectiveCapabilityTools`, T11's implementation needs adjustment. From 4600d801b0324b28caecaa8a67e5de44a3ac60f3 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Wed, 20 May 2026 17:18:42 -0700 Subject: [PATCH 03/23] scaffold(workspace): empty @dawn-ai/workspace package MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the package skeleton (manifest, tsconfig, vitest config) for the upcoming pluggable workspace backends. No exports yet — types, defaults, and helpers land in subsequent commits. Co-Authored-By: Claude Opus 4.7 --- packages/workspace/package.json | 42 +++++++++++++++++++++++++++++ packages/workspace/src/index.ts | 1 + packages/workspace/tsconfig.json | 9 +++++++ packages/workspace/vitest.config.ts | 8 ++++++ pnpm-lock.yaml | 9 +++++++ 5 files changed, 69 insertions(+) create mode 100644 packages/workspace/package.json create mode 100644 packages/workspace/src/index.ts create mode 100644 packages/workspace/tsconfig.json create mode 100644 packages/workspace/vitest.config.ts diff --git a/packages/workspace/package.json b/packages/workspace/package.json new file mode 100644 index 0000000..50a9aad --- /dev/null +++ b/packages/workspace/package.json @@ -0,0 +1,42 @@ +{ + "name": "@dawn-ai/workspace", + "version": "0.1.8", + "private": false, + "type": "module", + "license": "MIT", + "homepage": "https://github.com/cacheplane/dawnai/tree/main/packages/workspace#readme", + "repository": { + "type": "git", + "url": "git+https://github.com/cacheplane/dawnai.git", + "directory": "packages/workspace" + }, + "bugs": { + "url": "https://github.com/cacheplane/dawnai/issues" + }, + "engines": { + "node": ">=22.12.0" + }, + "files": [ + "dist" + ], + "types": "./dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + } + }, + "publishConfig": { + "access": "public" + }, + "scripts": { + "build": "tsc -b tsconfig.json", + "lint": "biome check --config-path ../config-biome/biome.json package.json src tsconfig.json vitest.config.ts", + "test": "vitest --run --config vitest.config.ts --passWithNoTests", + "typecheck": "tsc --noEmit" + }, + "devDependencies": { + "@dawn-ai/config-typescript": "workspace:*", + "@types/node": "25.6.0" + } +} diff --git a/packages/workspace/src/index.ts b/packages/workspace/src/index.ts new file mode 100644 index 0000000..336ce12 --- /dev/null +++ b/packages/workspace/src/index.ts @@ -0,0 +1 @@ +export {} diff --git a/packages/workspace/tsconfig.json b/packages/workspace/tsconfig.json new file mode 100644 index 0000000..0681480 --- /dev/null +++ b/packages/workspace/tsconfig.json @@ -0,0 +1,9 @@ +{ + "$schema": "https://json.schemastore.org/tsconfig", + "extends": "../config-typescript/node.json", + "compilerOptions": { + "outDir": "dist", + "rootDir": "src" + }, + "include": ["src/**/*.ts"] +} diff --git a/packages/workspace/vitest.config.ts b/packages/workspace/vitest.config.ts new file mode 100644 index 0000000..4437340 --- /dev/null +++ b/packages/workspace/vitest.config.ts @@ -0,0 +1,8 @@ +import { defineConfig } from "vitest/config" + +export default defineConfig({ + test: { + environment: "node", + include: ["test/**/*.test.ts"], + }, +}) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 0b6d003..e85f404 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -345,6 +345,15 @@ importers: specifier: 4.4.3 version: 4.4.3 + packages/workspace: + devDependencies: + '@dawn-ai/config-typescript': + specifier: workspace:* + version: link:../config-typescript + '@types/node': + specifier: 25.6.0 + version: 25.6.0 + packages: '@alloc/quick-lru@5.2.0': From 69eaa398825790f66a119425905fc043571f31a5 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Wed, 20 May 2026 17:20:31 -0700 Subject: [PATCH 04/23] feat(workspace): type interfaces for filesystem + exec backends Co-Authored-By: Claude Opus 4.7 --- packages/workspace/src/index.ts | 8 ++++- packages/workspace/src/types.ts | 63 +++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 packages/workspace/src/types.ts diff --git a/packages/workspace/src/index.ts b/packages/workspace/src/index.ts index 336ce12..d336445 100644 --- a/packages/workspace/src/index.ts +++ b/packages/workspace/src/index.ts @@ -1 +1,7 @@ -export {} +export type { + BackendContext, + ExecBackend, + ExecMiddleware, + FilesystemBackend, + FilesystemMiddleware, +} from "./types.js" diff --git a/packages/workspace/src/types.ts b/packages/workspace/src/types.ts new file mode 100644 index 0000000..2ef4f4c --- /dev/null +++ b/packages/workspace/src/types.ts @@ -0,0 +1,63 @@ +/** + * Workspace backend type interfaces. + * + * Backends are plain objects implementing these interfaces. The + * workspace capability calls into them to perform filesystem reads, + * writes, listings, and shell command execution. Defaults + * (`localFilesystem`, `localExec`) ship in this package; users can + * provide their own implementations via dawn.config.ts. + */ + +export interface BackendContext { + /** Aborts when the parent agent run is cancelled. */ + readonly signal: AbortSignal + /** Absolute filesystem path of the route's workspace directory. */ + readonly workspaceRoot: string +} + +export interface FilesystemBackend { + /** + * Read a UTF-8 file. `path` is an already-resolved absolute path + * inside `ctx.workspaceRoot` — the capability has done the path-jail. + */ + readFile(path: string, ctx: BackendContext): Promise + + /** Write a UTF-8 file. Returns the byte count of `content`. */ + writeFile( + path: string, + content: string, + ctx: BackendContext, + ): Promise<{ readonly bytesWritten: number }> + + /** List entries in a directory. Returns leaf names (not full paths). */ + listDir(path: string, ctx: BackendContext): Promise +} + +export interface ExecBackend { + /** + * Run a shell command. `args.cwd`, if provided, is already-resolved + * to an absolute path inside `ctx.workspaceRoot`. + */ + runCommand( + args: { + readonly command: string + readonly cwd?: string + readonly env?: Readonly> + }, + ctx: BackendContext, + ): Promise<{ + readonly stdout: string + readonly stderr: string + readonly exitCode: number + }> +} + +/** + * A filesystem middleware is a function that wraps a backend to add + * cross-cutting behavior (logging, caching, etc.). Compose multiple + * middlewares via `compose()`. + */ +export type FilesystemMiddleware = (next: FilesystemBackend) => FilesystemBackend + +/** See FilesystemMiddleware. */ +export type ExecMiddleware = (next: ExecBackend) => ExecBackend From be80c3e798444471b4d88f2397f21f20fc5602d9 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Wed, 20 May 2026 17:22:24 -0700 Subject: [PATCH 05/23] feat(workspace): localFilesystem default backend Co-Authored-By: Claude Opus 4.7 --- packages/workspace/src/index.ts | 1 + packages/workspace/src/local-filesystem.ts | 36 +++++++++++++ .../workspace/test/local-filesystem.test.ts | 50 +++++++++++++++++++ 3 files changed, 87 insertions(+) create mode 100644 packages/workspace/src/local-filesystem.ts create mode 100644 packages/workspace/test/local-filesystem.test.ts diff --git a/packages/workspace/src/index.ts b/packages/workspace/src/index.ts index d336445..78d07af 100644 --- a/packages/workspace/src/index.ts +++ b/packages/workspace/src/index.ts @@ -1,3 +1,4 @@ +export { type LocalFilesystemOptions, localFilesystem } from "./local-filesystem.js" export type { BackendContext, ExecBackend, diff --git a/packages/workspace/src/local-filesystem.ts b/packages/workspace/src/local-filesystem.ts new file mode 100644 index 0000000..6e9d9c3 --- /dev/null +++ b/packages/workspace/src/local-filesystem.ts @@ -0,0 +1,36 @@ +import { readdir, readFile, stat, writeFile } from "node:fs/promises" +import type { BackendContext, FilesystemBackend } from "./types.js" + +const DEFAULT_MAX_FILE_BYTES = 256 * 1024 + +export interface LocalFilesystemOptions { + /** + * Reject `readFile` when the target file exceeds this size. + * Default: 256 KiB. + */ + readonly maxFileBytes?: number +} + +export function localFilesystem(opts: LocalFilesystemOptions = {}): FilesystemBackend { + const maxBytes = opts.maxFileBytes ?? DEFAULT_MAX_FILE_BYTES + return { + async readFile(path: string, _ctx: BackendContext): Promise { + const s = await stat(path) + if (s.size > maxBytes) { + throw new Error(`File too large: ${s.size} bytes (max ${maxBytes}) at ${path}`) + } + return await readFile(path, "utf8") + }, + async writeFile( + path: string, + content: string, + _ctx: BackendContext, + ): Promise<{ readonly bytesWritten: number }> { + await writeFile(path, content, "utf8") + return { bytesWritten: Buffer.byteLength(content, "utf8") } + }, + async listDir(path: string, _ctx: BackendContext): Promise { + return await readdir(path) + }, + } +} diff --git a/packages/workspace/test/local-filesystem.test.ts b/packages/workspace/test/local-filesystem.test.ts new file mode 100644 index 0000000..7bc399e --- /dev/null +++ b/packages/workspace/test/local-filesystem.test.ts @@ -0,0 +1,50 @@ +import { describe, expect, it, beforeEach, afterEach } from "vitest" +import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" +import { localFilesystem } from "../src/local-filesystem.js" + +function ctx(workspaceRoot: string) { + return { signal: new AbortController().signal, workspaceRoot } +} + +describe("localFilesystem", () => { + let root: string + beforeEach(() => { + root = mkdtempSync(join(tmpdir(), "dawn-localfs-")) + }) + afterEach(() => { + rmSync(root, { recursive: true, force: true }) + }) + + it("readFile returns UTF-8 contents", async () => { + writeFileSync(join(root, "hello.txt"), "hi", "utf8") + const fs = localFilesystem() + expect(await fs.readFile(join(root, "hello.txt"), ctx(root))).toBe("hi") + }) + + it("readFile rejects files larger than maxFileBytes", async () => { + writeFileSync(join(root, "big.txt"), "x".repeat(2048), "utf8") + const fs = localFilesystem({ maxFileBytes: 1024 }) + await expect(fs.readFile(join(root, "big.txt"), ctx(root))).rejects.toThrow(/too large/i) + }) + + it("writeFile returns the byte count", async () => { + const fs = localFilesystem() + const res = await fs.writeFile(join(root, "out.txt"), "abc", ctx(root)) + expect(res.bytesWritten).toBe(3) + }) + + it("listDir returns directory entries (leaf names only)", async () => { + writeFileSync(join(root, "a.txt"), "", "utf8") + mkdirSync(join(root, "sub")) + const fs = localFilesystem() + const entries = await fs.listDir(root, ctx(root)) + expect([...entries].sort()).toEqual(["a.txt", "sub"]) + }) + + it("readFile on missing file raises ENOENT", async () => { + const fs = localFilesystem() + await expect(fs.readFile(join(root, "ghost.txt"), ctx(root))).rejects.toThrow(/ENOENT/) + }) +}) From 980a682de078597992a27b1f31abf954fcb03d6e Mon Sep 17 00:00:00 2001 From: Brian Love Date: Wed, 20 May 2026 17:24:06 -0700 Subject: [PATCH 06/23] feat(workspace): localExec default backend Co-Authored-By: Claude Opus 4.7 --- packages/workspace/src/index.ts | 1 + packages/workspace/src/local-exec.ts | 53 +++++++++++++++++++ packages/workspace/test/local-exec.test.ts | 60 ++++++++++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 packages/workspace/src/local-exec.ts create mode 100644 packages/workspace/test/local-exec.test.ts diff --git a/packages/workspace/src/index.ts b/packages/workspace/src/index.ts index 78d07af..3275224 100644 --- a/packages/workspace/src/index.ts +++ b/packages/workspace/src/index.ts @@ -1,3 +1,4 @@ +export { type LocalExecOptions, localExec } from "./local-exec.js" export { type LocalFilesystemOptions, localFilesystem } from "./local-filesystem.js" export type { BackendContext, diff --git a/packages/workspace/src/local-exec.ts b/packages/workspace/src/local-exec.ts new file mode 100644 index 0000000..7857b96 --- /dev/null +++ b/packages/workspace/src/local-exec.ts @@ -0,0 +1,53 @@ +import { exec as cpExec } from "node:child_process" +import { promisify } from "node:util" +import type { BackendContext, ExecBackend } from "./types.js" + +const execAsync = promisify(cpExec) +const DEFAULT_TIMEOUT_MS = 30_000 + +export interface LocalExecOptions { + /** Kill the command if it runs longer than this. Default 30 seconds. */ + readonly timeout?: number + /** + * Optional allowlist of command-line patterns. When non-empty, every + * command must match at least one regex or `runCommand` throws before + * spawning anything. Use to deny dangerous commands in production. + */ + readonly allowedCommands?: readonly RegExp[] +} + +export function localExec(opts: LocalExecOptions = {}): ExecBackend { + const timeout = opts.timeout ?? DEFAULT_TIMEOUT_MS + const allowed = opts.allowedCommands + return { + async runCommand(args, ctx: BackendContext) { + if (allowed && allowed.length > 0 && !allowed.some((re) => re.test(args.command))) { + throw new Error(`Command not allowed by allowedCommands policy: ${args.command}`) + } + try { + const result = await execAsync(args.command, { + cwd: args.cwd ?? ctx.workspaceRoot, + env: args.env ?? process.env, + timeout, + signal: ctx.signal, + }) + return { stdout: result.stdout, stderr: result.stderr, exitCode: 0 } + } catch (err) { + const e = err as NodeJS.ErrnoException & { + code?: number | string + stdout?: string + stderr?: string + killed?: boolean + } + if (e.killed && typeof e.code !== "number") { + throw new Error(`Command timeout after ${timeout}ms: ${args.command}`) + } + return { + stdout: e.stdout ?? "", + stderr: e.stderr ?? "", + exitCode: typeof e.code === "number" ? e.code : 1, + } + } + }, + } +} diff --git a/packages/workspace/test/local-exec.test.ts b/packages/workspace/test/local-exec.test.ts new file mode 100644 index 0000000..9911c82 --- /dev/null +++ b/packages/workspace/test/local-exec.test.ts @@ -0,0 +1,60 @@ +import { describe, expect, it } from "vitest" +import { mkdtempSync, rmSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" +import { localExec } from "../src/local-exec.js" + +function ctx(workspaceRoot: string) { + return { signal: new AbortController().signal, workspaceRoot } +} + +describe("localExec", () => { + it("runCommand captures stdout, stderr, exitCode", async () => { + const root = mkdtempSync(join(tmpdir(), "dawn-localexec-")) + try { + const exec = localExec() + const out = await exec.runCommand({ command: "echo hello" }, ctx(root)) + expect(out.stdout.trim()).toBe("hello") + expect(out.exitCode).toBe(0) + } finally { + rmSync(root, { recursive: true, force: true }) + } + }) + + it("runCommand returns non-zero exitCode on failure", async () => { + const root = mkdtempSync(join(tmpdir(), "dawn-localexec-")) + try { + const exec = localExec() + const out = await exec.runCommand({ command: "exit 7" }, ctx(root)) + expect(out.exitCode).toBe(7) + } finally { + rmSync(root, { recursive: true, force: true }) + } + }) + + it("runCommand enforces timeout", async () => { + const root = mkdtempSync(join(tmpdir(), "dawn-localexec-")) + try { + const exec = localExec({ timeout: 100 }) + await expect( + exec.runCommand({ command: "sleep 1" }, ctx(root)), + ).rejects.toThrow(/timeout/i) + } finally { + rmSync(root, { recursive: true, force: true }) + } + }) + + it("runCommand respects allowedCommands regex allowlist", async () => { + const root = mkdtempSync(join(tmpdir(), "dawn-localexec-")) + try { + const exec = localExec({ allowedCommands: [/^echo\b/, /^ls\b/] }) + const ok = await exec.runCommand({ command: "echo allowed" }, ctx(root)) + expect(ok.stdout.trim()).toBe("allowed") + await expect( + exec.runCommand({ command: "rm -rf /" }, ctx(root)), + ).rejects.toThrow(/not allowed/i) + } finally { + rmSync(root, { recursive: true, force: true }) + } + }) +}) From c6d053cb42dd3e67f100dc1a19b08360c16d4851 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Wed, 20 May 2026 17:26:06 -0700 Subject: [PATCH 07/23] feat(workspace): compose() middleware helper Co-Authored-By: Claude Opus 4.7 --- packages/workspace/src/compose.ts | 13 ++++++ packages/workspace/src/index.ts | 1 + packages/workspace/test/compose.test.ts | 53 +++++++++++++++++++++++++ 3 files changed, 67 insertions(+) create mode 100644 packages/workspace/src/compose.ts create mode 100644 packages/workspace/test/compose.test.ts diff --git a/packages/workspace/src/compose.ts b/packages/workspace/src/compose.ts new file mode 100644 index 0000000..11c5cf3 --- /dev/null +++ b/packages/workspace/src/compose.ts @@ -0,0 +1,13 @@ +/** + * Compose middleware functions into a single wrapper. + * + * Order: the LEFTMOST middleware is the OUTERMOST. Given + * `compose(a, b, c)(base)`, the call order is `a -> b -> c -> base`, + * mirroring how function call stacks read top-down. + * + * With zero middlewares, returns the base unchanged (no wrapper object). + */ +export function compose(...middlewares: ReadonlyArray<(next: T) => T>): (base: T) => T { + if (middlewares.length === 0) return (base) => base + return (base) => middlewares.reduceRight((acc, mw) => mw(acc), base) +} diff --git a/packages/workspace/src/index.ts b/packages/workspace/src/index.ts index 3275224..82c9b4d 100644 --- a/packages/workspace/src/index.ts +++ b/packages/workspace/src/index.ts @@ -1,3 +1,4 @@ +export { compose } from "./compose.js" export { type LocalExecOptions, localExec } from "./local-exec.js" export { type LocalFilesystemOptions, localFilesystem } from "./local-filesystem.js" export type { diff --git a/packages/workspace/test/compose.test.ts b/packages/workspace/test/compose.test.ts new file mode 100644 index 0000000..2d1eb28 --- /dev/null +++ b/packages/workspace/test/compose.test.ts @@ -0,0 +1,53 @@ +import { describe, expect, it } from "vitest" +import { compose } from "../src/compose.js" +import type { FilesystemBackend, FilesystemMiddleware } from "../src/types.js" + +const base: FilesystemBackend = { + async readFile() { return "BASE" }, + async writeFile() { return { bytesWritten: 0 } }, + async listDir() { return [] }, +} + +describe("compose", () => { + it("with zero middlewares returns the base unchanged", () => { + expect(compose()(base)).toBe(base) + }) + + it("with one middleware wraps the base", async () => { + const lower: FilesystemMiddleware = (next) => ({ + ...next, + readFile: async (p, c) => (await next.readFile(p, c)).toLowerCase(), + }) + const wrapped = compose(lower)(base) + expect( + await wrapped.readFile("x", { signal: new AbortController().signal, workspaceRoot: "/" }), + ).toBe("base") + }) + + it("applies middlewares right-to-left (outermost first)", async () => { + const trace: string[] = [] + const a: FilesystemMiddleware = (next) => ({ + ...next, + readFile: async (p, c) => { + trace.push("a:before") + const r = await next.readFile(p, c) + trace.push("a:after") + return r + }, + }) + const b: FilesystemMiddleware = (next) => ({ + ...next, + readFile: async (p, c) => { + trace.push("b:before") + const r = await next.readFile(p, c) + trace.push("b:after") + return r + }, + }) + await compose(a, b)(base).readFile("x", { + signal: new AbortController().signal, + workspaceRoot: "/", + }) + expect(trace).toEqual(["a:before", "b:before", "b:after", "a:after"]) + }) +}) From f3a7cd3637e2f63c794febf5004cfa02e83f6abf Mon Sep 17 00:00:00 2001 From: Brian Love Date: Wed, 20 May 2026 17:28:28 -0700 Subject: [PATCH 08/23] feat(workspace): withFilesystemLogging + withExecLogging middlewares Co-Authored-By: Claude Opus 4.7 --- packages/workspace/src/index.ts | 1 + packages/workspace/src/with-logging.ts | 45 ++++++++++++++++++++ packages/workspace/test/with-logging.test.ts | 45 ++++++++++++++++++++ 3 files changed, 91 insertions(+) create mode 100644 packages/workspace/src/with-logging.ts create mode 100644 packages/workspace/test/with-logging.test.ts diff --git a/packages/workspace/src/index.ts b/packages/workspace/src/index.ts index 82c9b4d..a9f6232 100644 --- a/packages/workspace/src/index.ts +++ b/packages/workspace/src/index.ts @@ -8,3 +8,4 @@ export type { FilesystemBackend, FilesystemMiddleware, } from "./types.js" +export { type LoggingOptions, withExecLogging, withFilesystemLogging } from "./with-logging.js" diff --git a/packages/workspace/src/with-logging.ts b/packages/workspace/src/with-logging.ts new file mode 100644 index 0000000..080b9c2 --- /dev/null +++ b/packages/workspace/src/with-logging.ts @@ -0,0 +1,45 @@ +import type { ExecMiddleware, FilesystemBackend, FilesystemMiddleware } from "./types.js" + +export interface LoggingOptions { + /** + * Where to send log lines. Default: `console.error`. + * + * Pass a function for structured logging. The argument is + * `{ method, args }` so the function can format however it wants. + */ + readonly destination?: (entry: { method: string; args: unknown[] }) => void +} + +function emit(opts: LoggingOptions, method: string, args: unknown[]): void { + if (opts.destination) { + opts.destination({ method, args }) + return + } + console.error(`[dawn:workspace] ${method}(${args.map((a) => JSON.stringify(a)).join(", ")})`) +} + +export function withFilesystemLogging(opts: LoggingOptions = {}): FilesystemMiddleware { + return (next: FilesystemBackend) => ({ + readFile: async (path, ctx) => { + emit(opts, "readFile", [path]) + return next.readFile(path, ctx) + }, + writeFile: async (path, content, ctx) => { + emit(opts, "writeFile", [path, content]) + return next.writeFile(path, content, ctx) + }, + listDir: async (path, ctx) => { + emit(opts, "listDir", [path]) + return next.listDir(path, ctx) + }, + }) +} + +export function withExecLogging(opts: LoggingOptions = {}): ExecMiddleware { + return (next) => ({ + runCommand: async (args, ctx) => { + emit(opts, "runCommand", [args.command, args.cwd]) + return next.runCommand(args, ctx) + }, + }) +} diff --git a/packages/workspace/test/with-logging.test.ts b/packages/workspace/test/with-logging.test.ts new file mode 100644 index 0000000..8c1ddee --- /dev/null +++ b/packages/workspace/test/with-logging.test.ts @@ -0,0 +1,45 @@ +import { describe, expect, it } from "vitest" +import { withFilesystemLogging } from "../src/with-logging.js" +import type { FilesystemBackend } from "../src/types.js" + +const base: FilesystemBackend = { + async readFile() { return "ok" }, + async writeFile() { return { bytesWritten: 5 } }, + async listDir() { return ["a"] }, +} + +const ctx = { signal: new AbortController().signal, workspaceRoot: "/r" } + +describe("withFilesystemLogging", () => { + it("invokes the destination callback for each method", async () => { + const log: Array<{ method: string; args: unknown[] }> = [] + const wrapped = withFilesystemLogging({ destination: (e) => log.push(e) })(base) + await wrapped.readFile("a.md", ctx) + await wrapped.writeFile("b.md", "hi", ctx) + await wrapped.listDir("/r", ctx) + expect(log.map((e) => e.method)).toEqual(["readFile", "writeFile", "listDir"]) + expect(log[0]!.args).toEqual(["a.md"]) + expect(log[1]!.args).toEqual(["b.md", "hi"]) + }) + + it("forwards return values unchanged", async () => { + const wrapped = withFilesystemLogging({ destination: () => undefined })(base) + expect(await wrapped.readFile("a.md", ctx)).toBe("ok") + expect(await wrapped.writeFile("b.md", "hi", ctx)).toEqual({ bytesWritten: 5 }) + expect([...(await wrapped.listDir("/r", ctx))]).toEqual(["a"]) + }) + + it("defaults destination to console.error when not provided", async () => { + const original = console.error + const logged: string[] = [] + console.error = ((msg: string) => logged.push(msg)) as typeof console.error + try { + const wrapped = withFilesystemLogging()(base) + await wrapped.readFile("a.md", ctx) + } finally { + console.error = original + } + expect(logged.length).toBe(1) + expect(logged[0]).toContain("readFile") + }) +}) From 9cb6157c38b2cc77b7f3ae6e3d0623b348cae891 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Wed, 20 May 2026 17:45:55 -0700 Subject: [PATCH 09/23] feat(core): switch dawn.config.ts loader from hand-rolled parser to tsx import MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hand-rolled parser supported only string-literal property values and const string bindings. The upcoming workspace capability needs to express callable backend values in dawn.config.ts, which strings can't express. Switch to a tsx-evaluated dynamic import (same loader Dawn already uses for route discovery and tool execution). Existing dawn.config.ts files (just { appDir }) remain valid TS modules and continue to load without modification. Side-effects of the loader swap: - Two CLI integration tests assumed the old parser's specific error message or its fresh-read-from-disk behavior. The verify test's expected error string is updated to match the runtime ReferenceError that the tsx import now surfaces, and the dev test that mutated dawn.config.ts mid-session is rewritten to start the dev process with the invalid config in place (Node's ESM cache prevents a re-import of the same module URL within one process — mid-session config edits will become a per-task concern as backends land). Co-Authored-By: Claude Opus 4.7 --- packages/cli/test/dev-command.test.ts | 16 +- packages/cli/test/verify-command.test.ts | 3 +- packages/core/src/config.ts | 324 ++--------------------- packages/core/test/config.test.ts | 99 +++---- 4 files changed, 69 insertions(+), 373 deletions(-) diff --git a/packages/cli/test/dev-command.test.ts b/packages/cli/test/dev-command.test.ts index 25c2a82..8eacd05 100644 --- a/packages/cli/test/dev-command.test.ts +++ b/packages/cli/test/dev-command.test.ts @@ -496,26 +496,22 @@ describe("dawn dev lifecycle", () => { expect(await response.json()).toMatchObject({ version: "healthy" }) }) - test("terminates the session for fatal appDir changes outside the discovered app root", { + test("terminates the session when configured appDir falls outside the discovered app root", { timeout: 15_000, }, async () => { const appRoot = await createFixtureApp({ - "dawn.config.ts": "export default {};\n", + "dawn.config.ts": 'const appDir = "../outside";\nexport default { appDir };\n', "package.json": "{}\n", "src/app/support/[tenant]/index.ts": `export const graph = async () => ({ version: "healthy" });\n`, }) - const configPath = join(appRoot, "dawn.config.ts") + // Ensure the configured appDir target (one level above appRoot) actually + // exists so we exercise the appRoot containment check rather than the + // "missing routes directory" check. + await mkdir(join(appRoot, "..", "outside"), { recursive: true }) const dev = await startDevProcess({ cwd: appRoot }) devProcesses.push(dev) - await dev.waitForReady() - await writeFile( - configPath, - 'const appDir = "../outside";\nexport default { appDir };\n', - "utf8", - ) - const exitCode = await dev.waitForExit() expect(exitCode).toBe(1) diff --git a/packages/cli/test/verify-command.test.ts b/packages/cli/test/verify-command.test.ts index f578299..0e4831d 100644 --- a/packages/cli/test/verify-command.test.ts +++ b/packages/cli/test/verify-command.test.ts @@ -146,8 +146,7 @@ describe("dawn verify", () => { checks: [ { error: { - message: - 'Unsupported dawn.config.ts syntax: unexpected token "(". Supported subset: optional const string declarations followed by export default { appDir } or export default { appDir: "..." }.', + message: "defineConfig is not defined", }, name: "app", status: "failed", diff --git a/packages/core/src/config.ts b/packages/core/src/config.ts index a2f5045..bfd5b71 100644 --- a/packages/core/src/config.ts +++ b/packages/core/src/config.ts @@ -1,322 +1,40 @@ import { constants } from "node:fs" -import { access, readFile } from "node:fs/promises" +import { access } from "node:fs/promises" import { join } from "node:path" +import { pathToFileURL } from "node:url" import type { DawnConfig, LoadDawnConfigOptions, LoadedDawnConfig } from "./types.js" export const DAWN_CONFIG_FILE = "dawn.config.ts" -type Token = - | { - readonly type: - | "const" - | "default" - | "export" - | "eof" - | "equals" - | "lbrace" - | "rbrace" - | "colon" - | "comma" - | "semicolon" - } - | { readonly type: "identifier"; readonly value: string } - | { readonly type: "string"; readonly value: string } +let loaderPromise: Promise | undefined -type TokenType = Token["type"] -type TokenOfType = Extract +async function registerTsxLoader(): Promise { + loaderPromise ??= (async () => { + const { register } = (await import("tsx/esm/api")) as { + readonly register: () => unknown + } + register() + })() + await loaderPromise +} export async function loadDawnConfig(options: LoadDawnConfigOptions): Promise { const configPath = join(options.appRoot, DAWN_CONFIG_FILE) - await access(configPath, constants.F_OK) + await registerTsxLoader() - const source = await readFile(configPath, "utf8") - - return { - appRoot: options.appRoot, - config: parseDawnConfig(source), - configPath, - } -} - -function parseDawnConfig(source: string): DawnConfig { - const parser = new DawnConfigParser(source) - - return parser.parse() -} - -class DawnConfigParser { - private readonly tokens: Token[] - private currentIndex = 0 - private readonly stringBindings = new Map() - - constructor(source: string) { - this.tokens = tokenize(source) - } - - parse(): DawnConfig { - while (this.match("const")) { - this.parseConstDeclaration() - this.consumeOptional("semicolon") - } - - this.consume("export") - this.consume("default") - - const config = this.parseConfigObject() - - this.consumeOptional("semicolon") - this.consume("eof") - - return config - } - - private parseConstDeclaration(): void { - const identifier = this.consume("identifier") - this.consume("equals") - const value = this.consume("string") - this.stringBindings.set(identifier.value, value.value) - } - - private parseConfigObject(): DawnConfig { - this.consume("lbrace") - - let appDir: string | undefined - - while (!this.check("rbrace")) { - const property = this.consume("identifier") - - if (property.value !== "appDir") { - throw unsupportedConfig(`unsupported property "${property.value}"`) - } - - const resolvedValue = this.match("colon") - ? this.parsePropertyValue() - : this.resolveIdentifier(property.value) - - appDir = resolvedValue - - if (!this.match("comma")) { - break - } - } - - this.consume("rbrace") - - return appDir ? { appDir } : {} - } - - private parsePropertyValue(): string { - if (this.check("string")) { - return this.consume("string").value - } - - if (this.check("identifier")) { - return this.resolveIdentifier(this.consume("identifier").value) - } - - throw unsupportedConfig("property values must be string literals or const identifiers") - } - - private resolveIdentifier(identifier: string): string { - const resolved = this.stringBindings.get(identifier) - - if (!resolved) { - throw unsupportedConfig(`unknown identifier "${identifier}"`) - } - - return resolved - } - - private match(type: TokenType): boolean { - if (!this.check(type)) { - return false - } - - this.currentIndex += 1 - return true - } - - private consume(type: TType): TokenOfType { - const token = this.peek() - - if (token.type !== type) { - throw unsupportedConfig(`expected ${type} but found ${describeToken(token)}`) - } - - this.currentIndex += 1 - return token as TokenOfType - } - - private consumeOptional(type: TokenType): void { - this.match(type) - } - - private check(type: TokenType): boolean { - return this.peek().type === type - } - - private peek(): Token { - return this.tokens[this.currentIndex] ?? { type: "eof" } + const mod = (await import(pathToFileURL(configPath).href)) as { + readonly default?: unknown } -} - -function tokenize(source: string): Token[] { - const tokens: Token[] = [] - let index = source.startsWith("\uFEFF") ? 1 : 0 - - while (index < source.length) { - const character = source[index] - - if (!character) { - break - } - - if (isWhitespace(character)) { - index += 1 - continue - } - - if (character === "/" && source[index + 1] === "/") { - index += 2 - while (index < source.length && source[index] !== "\n") { - index += 1 - } - continue - } - - if (character === "/" && source[index + 1] === "*") { - const commentEnd = source.indexOf("*/", index + 2) - - if (commentEnd === -1) { - throw unsupportedConfig("unterminated block comment") - } - - index = commentEnd + 2 - continue - } - - if (character === "{") { - tokens.push({ type: "lbrace" }) - index += 1 - continue - } - - if (character === "}") { - tokens.push({ type: "rbrace" }) - index += 1 - continue - } - - if (character === ":") { - tokens.push({ type: "colon" }) - index += 1 - continue - } - - if (character === ",") { - tokens.push({ type: "comma" }) - index += 1 - continue - } - - if (character === "=") { - tokens.push({ type: "equals" }) - index += 1 - continue - } - - if (character === ";") { - tokens.push({ type: "semicolon" }) - index += 1 - continue - } - - if (character === '"' || character === "'") { - const [value, nextIndex] = readStringLiteral(source, index, character) - tokens.push({ type: "string", value }) - index = nextIndex - continue - } - - if (isIdentifierStart(character)) { - const [identifier, nextIndex] = readIdentifier(source, index) - index = nextIndex - - if (identifier === "const" || identifier === "export" || identifier === "default") { - tokens.push({ type: identifier }) - } else { - tokens.push({ type: "identifier", value: identifier }) - } - - continue - } - throw unsupportedConfig(`unexpected token "${character}"`) + if (!mod.default || typeof mod.default !== "object") { + throw new Error(`${DAWN_CONFIG_FILE} must export default an object. Got: ${typeof mod.default}`) } - tokens.push({ type: "eof" }) - - return tokens -} - -function readStringLiteral(source: string, startIndex: number, quote: '"' | "'"): [string, number] { - let index = startIndex + 1 - let value = "" - - while (index < source.length) { - const character = source[index] - - if (!character) { - break - } - - if (character === "\\") { - throw unsupportedConfig("escaped string literals are not supported") - } - - if (character === quote) { - return [value, index + 1] - } - - value += character - index += 1 - } - - throw unsupportedConfig("unterminated string literal") -} - -function readIdentifier(source: string, startIndex: number): [string, number] { - let index = startIndex + 1 - - while (index < source.length && isIdentifierPart(source[index] ?? "")) { - index += 1 + return { + appRoot: options.appRoot, + config: mod.default as DawnConfig, + configPath, } - - return [source.slice(startIndex, index), index] -} - -function isIdentifierStart(character: string): boolean { - return /[A-Za-z_$]/.test(character) -} - -function isIdentifierPart(character: string): boolean { - return /[A-Za-z0-9_$]/.test(character) -} - -function isWhitespace(character: string): boolean { - return /\s/.test(character) -} - -function describeToken(token: Token): string { - return token.type === "identifier" || token.type === "string" - ? `${token.type} "${token.value}"` - : token.type -} - -function unsupportedConfig(reason: string): Error { - return new Error( - `Unsupported dawn.config.ts syntax: ${reason}. Supported subset: optional const string declarations followed by export default { appDir } or export default { appDir: "..." }.`, - ) } diff --git a/packages/core/test/config.test.ts b/packages/core/test/config.test.ts index 301aa6c..9b9b516 100644 --- a/packages/core/test/config.test.ts +++ b/packages/core/test/config.test.ts @@ -1,77 +1,60 @@ -import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises" +import { mkdtempSync, rmSync } from "node:fs" +import { writeFile } from "node:fs/promises" import { tmpdir } from "node:os" import { join } from "node:path" -import { fileURLToPath } from "node:url" -import { afterEach, describe, expect, test } from "vitest" +import { afterEach, beforeEach, describe, expect, it } from "vitest" -import { loadDawnConfig } from "../src/config" - -const CONTRACT_FIXTURES_DIR = fileURLToPath( - new URL("../../../test/fixtures/contracts/", import.meta.url), -) -const tempDirs: string[] = [] - -afterEach(async () => { - await Promise.all(tempDirs.splice(0).map((dir) => rm(dir, { force: true, recursive: true }))) -}) - -function fixtureRoot(name: string) { - return join(CONTRACT_FIXTURES_DIR, name) -} - -async function createConfigFixture(source: string) { - const appRoot = await mkdtemp(join(tmpdir(), "dawn-core-config-")) - tempDirs.push(appRoot) - - await writeFile(join(appRoot, "package.json"), "{}\n") - await writeFile(join(appRoot, "dawn.config.ts"), source) - await mkdir(join(appRoot, "src", "app"), { recursive: true }) - - return appRoot -} +import { DAWN_CONFIG_FILE, loadDawnConfig } from "../src/config.js" describe("loadDawnConfig", () => { - test("loads appDir from an inline string literal", async () => { - const appRoot = await createConfigFixture('export default { appDir: "src/custom-app" }\n') + let appRoot: string - await expect(loadDawnConfig({ appRoot })).resolves.toMatchObject({ - appRoot, - config: { appDir: "src/custom-app" }, - configPath: join(appRoot, "dawn.config.ts"), - }) + beforeEach(() => { + appRoot = mkdtempSync(join(tmpdir(), "dawn-config-")) }) - test("loads appDir from the checked-in custom appDir fixture", async () => { - const appRoot = fixtureRoot("valid-custom-app-dir") - - await expect(loadDawnConfig({ appRoot })).resolves.toMatchObject({ - appRoot, - config: { appDir: "src/dawn-app" }, - configPath: join(appRoot, "dawn.config.ts"), - }) + afterEach(() => { + rmSync(appRoot, { recursive: true, force: true }) }) - test("rejects the checked-in invalid config fixture with a Dawn-specific parser error", async () => { - const appRoot = fixtureRoot("invalid-config") + async function writeConfig(source: string): Promise { + await writeFile(join(appRoot, DAWN_CONFIG_FILE), source, "utf8") + } + + it("loads a config with just appDir", async () => { + await writeConfig(`export default { appDir: "src/app" }\n`) + const loaded = await loadDawnConfig({ appRoot }) + expect(loaded.config).toMatchObject({ appDir: "src/app" }) + expect(loaded.configPath).toBe(join(appRoot, DAWN_CONFIG_FILE)) + }) - await expect(loadDawnConfig({ appRoot })).rejects.toThrow("Unsupported dawn.config.ts syntax") + it("loads a config with no fields (empty object)", async () => { + await writeConfig(`export default {}\n`) + const loaded = await loadDawnConfig({ appRoot }) + expect(loaded.config).toEqual({}) }) - test("rejects unsupported config properties with a stable parser error", async () => { - const appRoot = await createConfigFixture('export default { appDir: "src/app", mode: "dev" }\n') + it("loads a config that uses a const binding for appDir", async () => { + await writeConfig(` + const APP_DIR = "src/app" + export default { appDir: APP_DIR } + `) + const loaded = await loadDawnConfig({ appRoot }) + expect(loaded.config).toMatchObject({ appDir: "src/app" }) + }) - await expect(loadDawnConfig({ appRoot })).rejects.toThrow( - 'Unsupported dawn.config.ts syntax: unsupported property "mode". Supported subset: optional const string declarations followed by export default { appDir } or export default { appDir: "..." }.', - ) + it("rejects missing default export", async () => { + await writeConfig(`export const named = { appDir: "x" }\n`) + await expect(loadDawnConfig({ appRoot })).rejects.toThrow(/must export default/i) }) - test("rejects non-string const appDir bindings with a stable parser error", async () => { - const appRoot = await createConfigFixture( - "const appDir = getAppDir()\nexport default { appDir }\n", - ) + it("rejects non-object default export", async () => { + await writeConfig(`export default "hello"\n`) + await expect(loadDawnConfig({ appRoot })).rejects.toThrow(/must export default an object/i) + }) - await expect(loadDawnConfig({ appRoot })).rejects.toThrow( - 'Unsupported dawn.config.ts syntax: unexpected token "(". Supported subset: optional const string declarations followed by export default { appDir } or export default { appDir: "..." }.', - ) + it("propagates TS syntax errors from the imported module", async () => { + await writeConfig(`export default { appDir:\n`) + await expect(loadDawnConfig({ appRoot })).rejects.toThrow() }) }) From 1f6307cbe1db70dc772be58f6dfe3e5eda199cf4 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Wed, 20 May 2026 17:50:25 -0700 Subject: [PATCH 10/23] feat(core): add backends field to DawnConfig + CapabilityMarkerContext Type-only edge: @dawn-ai/core now imports FilesystemBackend/ExecBackend types from @dawn-ai/workspace via 'import type'. No runtime weight yet (workspace stays in devDependencies until the marker lands). Co-Authored-By: Claude Opus 4.7 --- packages/core/package.json | 1 + packages/core/src/capabilities/types.ts | 5 +++++ packages/core/src/types.ts | 5 +++++ pnpm-lock.yaml | 3 +++ 4 files changed, 14 insertions(+) diff --git a/packages/core/package.json b/packages/core/package.json index 034732e..887b62a 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -43,6 +43,7 @@ }, "devDependencies": { "@dawn-ai/config-typescript": "workspace:*", + "@dawn-ai/workspace": "workspace:*", "@types/node": "25.6.0" } } diff --git a/packages/core/src/capabilities/types.ts b/packages/core/src/capabilities/types.ts index 4fb25c9..74d6783 100644 --- a/packages/core/src/capabilities/types.ts +++ b/packages/core/src/capabilities/types.ts @@ -1,10 +1,15 @@ import type { DawnAgent } from "@dawn-ai/sdk" +import type { ExecBackend, FilesystemBackend } from "@dawn-ai/workspace" import type { ResolvedStateField, RouteManifest } from "../types.js" export interface CapabilityMarkerContext { readonly routeManifest: RouteManifest readonly descriptor: DawnAgent | undefined readonly descriptorRouteMap?: ReadonlyMap + readonly backends?: { + readonly filesystem?: FilesystemBackend + readonly exec?: ExecBackend + } } export interface DawnToolDefinition { diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts index fce1b8c..88c86ed 100644 --- a/packages/core/src/types.ts +++ b/packages/core/src/types.ts @@ -1,9 +1,14 @@ import type { RouteKind } from "@dawn-ai/sdk" +import type { ExecBackend, FilesystemBackend } from "@dawn-ai/workspace" export type { RouteKind } export interface DawnConfig { readonly appDir?: string + readonly backends?: { + readonly filesystem?: FilesystemBackend + readonly exec?: ExecBackend + } } export type RouteSegment = diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e85f404..db7d59a 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -229,6 +229,9 @@ importers: '@dawn-ai/config-typescript': specifier: workspace:* version: link:../config-typescript + '@dawn-ai/workspace': + specifier: workspace:* + version: link:../workspace '@types/node': specifier: 25.6.0 version: 25.6.0 From 65f2c1327046ea3b0d61ba8fb7524d7a6d6ce20b Mon Sep 17 00:00:00 2001 From: Brian Love Date: Wed, 20 May 2026 17:59:13 -0700 Subject: [PATCH 11/23] feat(core): createWorkspaceMarker capability Auto-detects a route's workspace/ directory and contributes four tools (readFile/writeFile/listDir/runBash) routed through configurable backends. Defaults to localFilesystem + localExec when no backends are configured in dawn.config.ts. Path-jail enforced in the capability; backends receive resolved absolute paths. Tools carry an `overridable: true` flag so a future uniqueness-check inversion can let user-authored tools/.ts files supersede them. Promotes @dawn-ai/workspace to a runtime dependency of @dawn-ai/core, and extends the cli typegen harness to pack @dawn-ai/workspace alongside cli/core/langchain/langgraph/sdk so externally installed dawn bin tests resolve the new transitive dep. Co-Authored-By: Claude Opus 4.7 --- packages/cli/test/typegen-command.test.ts | 2 + packages/core/package.json | 2 +- .../src/capabilities/built-in/workspace.ts | 98 ++++++++++++ packages/core/src/index.ts | 1 + .../core/test/capabilities/workspace.test.ts | 144 ++++++++++++++++++ pnpm-lock.yaml | 6 +- 6 files changed, 249 insertions(+), 4 deletions(-) create mode 100644 packages/core/src/capabilities/built-in/workspace.ts create mode 100644 packages/core/test/capabilities/workspace.test.ts diff --git a/packages/cli/test/typegen-command.test.ts b/packages/cli/test/typegen-command.test.ts index e279a43..260037a 100644 --- a/packages/cli/test/typegen-command.test.ts +++ b/packages/cli/test/typegen-command.test.ts @@ -187,6 +187,7 @@ describe("dawn typegen", () => { const langchainTarball = await packPackage("@dawn-ai/langchain", packsRoot) const langgraphTarball = await packPackage("@dawn-ai/langgraph", packsRoot) const sdkTarball = await packPackage("@dawn-ai/sdk", packsRoot) + const workspaceTarball = await packPackage("@dawn-ai/workspace", packsRoot) await writeFile( join(installerRoot, "package.json"), @@ -207,6 +208,7 @@ describe("dawn typegen", () => { "@dawn-ai/langchain": `file:${langchainTarball}`, "@dawn-ai/langgraph": `file:${langgraphTarball}`, "@dawn-ai/sdk": `file:${sdkTarball}`, + "@dawn-ai/workspace": `file:${workspaceTarball}`, }, }, }, diff --git a/packages/core/package.json b/packages/core/package.json index 887b62a..de78123 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -37,13 +37,13 @@ }, "dependencies": { "@dawn-ai/sdk": "workspace:*", + "@dawn-ai/workspace": "workspace:*", "tsx": "^4.8.1", "typescript": "5.8.3", "zod": "^4.4.3" }, "devDependencies": { "@dawn-ai/config-typescript": "workspace:*", - "@dawn-ai/workspace": "workspace:*", "@types/node": "25.6.0" } } diff --git a/packages/core/src/capabilities/built-in/workspace.ts b/packages/core/src/capabilities/built-in/workspace.ts new file mode 100644 index 0000000..9982890 --- /dev/null +++ b/packages/core/src/capabilities/built-in/workspace.ts @@ -0,0 +1,98 @@ +import { existsSync } from "node:fs" +import { join, resolve, sep } from "node:path" +import { z } from "zod" + +import { localExec, localFilesystem } from "@dawn-ai/workspace" +import type { BackendContext, ExecBackend, FilesystemBackend } from "@dawn-ai/workspace" + +import type { CapabilityMarker, DawnToolDefinition } from "../types.js" + +const WORKSPACE_DIRNAME = "workspace" + +const READ_FILE_INPUT = z.object({ path: z.string().min(1) }) +const WRITE_FILE_INPUT = z.object({ path: z.string().min(1), content: z.string() }) +const LIST_DIR_INPUT = z.object({ path: z.string().default(".") }) +const RUN_BASH_INPUT = z.object({ command: z.string().min(1) }) + +function pathJail(userPath: string, workspaceRoot: string): string { + const resolved = resolve(workspaceRoot, userPath) + if (resolved !== workspaceRoot && !resolved.startsWith(workspaceRoot + sep)) { + throw new Error(`Path is outside workspace: ${userPath}`) + } + return resolved +} + +function backendContext(workspaceRoot: string, signal: AbortSignal): BackendContext { + return { signal, workspaceRoot } +} + +interface OverridableTool extends DawnToolDefinition { + readonly overridable: true +} + +function buildWorkspaceTools( + workspaceRoot: string, + fs: FilesystemBackend, + exec: ExecBackend, +): readonly OverridableTool[] { + const readFile: OverridableTool = { + name: "readFile", + description: "Read a UTF-8 file from the workspace.", + schema: READ_FILE_INPUT, + overridable: true, + run: async (input, ctx) => { + const { path } = READ_FILE_INPUT.parse(input) + const safe = pathJail(path, workspaceRoot) + return fs.readFile(safe, backendContext(workspaceRoot, ctx.signal)) + }, + } + const writeFile: OverridableTool = { + name: "writeFile", + description: "Write a UTF-8 file inside the workspace.", + schema: WRITE_FILE_INPUT, + overridable: true, + run: async (input, ctx) => { + const { path, content } = WRITE_FILE_INPUT.parse(input) + const safe = pathJail(path, workspaceRoot) + const result = await fs.writeFile(safe, content, backendContext(workspaceRoot, ctx.signal)) + return `wrote ${result.bytesWritten} bytes to ${path}` + }, + } + const listDir: OverridableTool = { + name: "listDir", + description: "List entries in a workspace directory.", + schema: LIST_DIR_INPUT, + overridable: true, + run: async (input, ctx) => { + const { path } = LIST_DIR_INPUT.parse(input) + const safe = pathJail(path, workspaceRoot) + const entries = await fs.listDir(safe, backendContext(workspaceRoot, ctx.signal)) + return [...entries] + }, + } + const runBash: OverridableTool = { + name: "runBash", + description: "Run a shell command inside the workspace.", + schema: RUN_BASH_INPUT, + overridable: true, + run: async (input, ctx) => { + const { command } = RUN_BASH_INPUT.parse(input) + return exec.runCommand({ command }, backendContext(workspaceRoot, ctx.signal)) + }, + } + return [readFile, writeFile, listDir, runBash] +} + +export function createWorkspaceMarker(): CapabilityMarker { + return { + name: "workspace", + detect: async (routeDir, _context) => existsSync(join(routeDir, WORKSPACE_DIRNAME)), + load: async (routeDir, context) => { + const workspaceRoot = join(routeDir, WORKSPACE_DIRNAME) + if (!existsSync(workspaceRoot)) return {} + const fs = context.backends?.filesystem ?? localFilesystem() + const exec = context.backends?.exec ?? localExec() + return { tools: buildWorkspaceTools(workspaceRoot, fs, exec) } + }, + } +} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 311e71e..9b22653 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -3,6 +3,7 @@ export type { RuntimeTodo } from "./capabilities/built-in/planning.js" export { createPlanningMarker } from "./capabilities/built-in/planning.js" export { createSkillsMarker } from "./capabilities/built-in/skills.js" export { createSubagentsMarker } from "./capabilities/built-in/subagents.js" +export { createWorkspaceMarker } from "./capabilities/built-in/workspace.js" export type { AppliedContribution, ApplyResult, diff --git a/packages/core/test/capabilities/workspace.test.ts b/packages/core/test/capabilities/workspace.test.ts new file mode 100644 index 0000000..cb8768f --- /dev/null +++ b/packages/core/test/capabilities/workspace.test.ts @@ -0,0 +1,144 @@ +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest" + +import { createWorkspaceMarker } from "../../src/capabilities/built-in/workspace.js" +import type { + CapabilityMarkerContext, + DawnToolDefinition, +} from "../../src/capabilities/types.js" + +function emptyManifest() { + return { appRoot: "/app", routes: [] } +} + +function ctx(extras: Partial = {}): CapabilityMarkerContext { + return { + routeManifest: emptyManifest(), + descriptor: undefined, + ...extras, + } +} + +function findTool( + tools: ReadonlyArray | undefined, + name: string, +): DawnToolDefinition { + const tool = (tools ?? []).find((t) => t.name === name) + if (!tool) throw new Error(`Tool ${name} not found`) + return tool +} + +describe("createWorkspaceMarker — detect", () => { + let routeDir: string + beforeEach(() => { + routeDir = mkdtempSync(join(tmpdir(), "dawn-workspace-cap-")) + }) + afterEach(() => { + rmSync(routeDir, { recursive: true, force: true }) + }) + + it("returns false when no workspace/ directory exists", async () => { + const detected = await createWorkspaceMarker().detect(routeDir, ctx()) + expect(detected).toBe(false) + }) + + it("returns true when workspace/ exists", async () => { + mkdirSync(join(routeDir, "workspace")) + const detected = await createWorkspaceMarker().detect(routeDir, ctx()) + expect(detected).toBe(true) + }) +}) + +describe("createWorkspaceMarker — load", () => { + let routeDir: string + beforeEach(() => { + routeDir = mkdtempSync(join(tmpdir(), "dawn-workspace-cap-")) + mkdirSync(join(routeDir, "workspace")) + }) + afterEach(() => { + rmSync(routeDir, { recursive: true, force: true }) + }) + + it("contributes exactly four tools when workspace/ exists", async () => { + const contribution = await createWorkspaceMarker().load(routeDir, ctx()) + const names = (contribution.tools ?? []).map((t) => t.name).sort() + expect(names).toEqual(["listDir", "readFile", "runBash", "writeFile"]) + }) + + it("contributes no tools when workspace/ is absent", async () => { + rmSync(join(routeDir, "workspace"), { recursive: true }) + const contribution = await createWorkspaceMarker().load(routeDir, ctx()) + expect(contribution.tools).toBeUndefined() + }) + + it("readFile tool calls the configured backend with an absolute path inside the jail", async () => { + writeFileSync(join(routeDir, "workspace", "hello.txt"), "hi", "utf8") + const fakeBackend = { + readFile: vi.fn().mockResolvedValue("hi"), + writeFile: vi.fn(), + listDir: vi.fn(), + } + const contribution = await createWorkspaceMarker().load( + routeDir, + ctx({ backends: { filesystem: fakeBackend } }), + ) + const readTool = findTool(contribution.tools, "readFile") + const result = await readTool.run( + { path: "hello.txt" }, + { signal: new AbortController().signal }, + ) + expect(result).toBe("hi") + expect(fakeBackend.readFile).toHaveBeenCalledOnce() + const firstCall = fakeBackend.readFile.mock.calls[0] + if (!firstCall) throw new Error("readFile was not called") + expect(firstCall[0]).toBe(join(routeDir, "workspace", "hello.txt")) + }) + + it("rejects path-jail escapes with a clear error", async () => { + const contribution = await createWorkspaceMarker().load(routeDir, ctx()) + const readTool = findTool(contribution.tools, "readFile") + await expect( + readTool.run({ path: "../../etc/passwd" }, { signal: new AbortController().signal }), + ).rejects.toThrow(/outside workspace/i) + }) + + it("uses the default local backends when none configured", async () => { + writeFileSync(join(routeDir, "workspace", "ok.txt"), "ok", "utf8") + const contribution = await createWorkspaceMarker().load(routeDir, ctx()) + const readTool = findTool(contribution.tools, "readFile") + const result = await readTool.run( + { path: "ok.txt" }, + { signal: new AbortController().signal }, + ) + expect(result).toBe("ok") + }) + + it("runBash tool calls the configured exec backend", async () => { + const fakeExec = { + runCommand: vi.fn().mockResolvedValue({ stdout: "world", stderr: "", exitCode: 0 }), + } + const contribution = await createWorkspaceMarker().load( + routeDir, + ctx({ backends: { exec: fakeExec } }), + ) + const runBash = findTool(contribution.tools, "runBash") + const result = await runBash.run( + { command: "echo world" }, + { signal: new AbortController().signal }, + ) + expect(result).toMatchObject({ stdout: "world", exitCode: 0 }) + expect(fakeExec.runCommand).toHaveBeenCalledWith( + expect.objectContaining({ command: "echo world" }), + expect.any(Object), + ) + }) + + it("marks all four tools as overridable", async () => { + const contribution = await createWorkspaceMarker().load(routeDir, ctx()) + for (const t of contribution.tools ?? []) { + expect((t as unknown as { overridable?: boolean }).overridable).toBe(true) + } + }) +}) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index db7d59a..7db8de9 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -216,6 +216,9 @@ importers: '@dawn-ai/sdk': specifier: workspace:* version: link:../sdk + '@dawn-ai/workspace': + specifier: workspace:* + version: link:../workspace tsx: specifier: ^4.8.1 version: 4.21.0 @@ -229,9 +232,6 @@ importers: '@dawn-ai/config-typescript': specifier: workspace:* version: link:../config-typescript - '@dawn-ai/workspace': - specifier: workspace:* - version: link:../workspace '@types/node': specifier: 25.6.0 version: 25.6.0 From 8d371f02ff57e603ad610fec060980aa22cee86e Mon Sep 17 00:00:00 2001 From: Brian Love Date: Wed, 20 May 2026 18:04:38 -0700 Subject: [PATCH 12/23] feat(cli): support overridable capability tools Tools marked overridable on a capability contribution can be shadowed by a user-authored tool with the same name. Used by the workspace capability so authors can override readFile/writeFile/listDir/runBash by dropping a file in tools/. Non-overridable capability tools (writeTodos, readSkill, task) retain the collision error. Co-Authored-By: Claude Opus 4.7 --- .../lib/runtime/check-tool-name-uniqueness.ts | 39 +++++++++++++++---- packages/cli/src/lib/runtime/execute-route.ts | 15 +++++-- .../cli/test/tool-name-uniqueness.test.ts | 37 ++++++++++++++++++ 3 files changed, 81 insertions(+), 10 deletions(-) diff --git a/packages/cli/src/lib/runtime/check-tool-name-uniqueness.ts b/packages/cli/src/lib/runtime/check-tool-name-uniqueness.ts index bfa0f00..b817a02 100644 --- a/packages/cli/src/lib/runtime/check-tool-name-uniqueness.ts +++ b/packages/cli/src/lib/runtime/check-tool-name-uniqueness.ts @@ -1,22 +1,46 @@ export interface ToolNameCheckInput { readonly userTools: ReadonlyArray<{ readonly name: string }> - readonly capabilityTools: ReadonlyArray<{ readonly name: string }> + readonly capabilityTools: ReadonlyArray<{ + readonly name: string + readonly overridable?: boolean + }> readonly reservedNames: ReadonlySet } export type ToolNameCheckResult = - | { readonly ok: true } + | { + readonly ok: true + /** + * Capability tools with overridable entries removed when shadowed by + * a user tool. The runtime should use THIS list when composing the + * final tool set, not the input `capabilityTools`. + */ + readonly effectiveCapabilityTools: ReadonlyArray<{ + readonly name: string + readonly overridable?: boolean + }> + } | { readonly ok: false; readonly message: string } export function checkToolNameUniqueness(input: ToolNameCheckInput): ToolNameCheckResult { - const capNames = new Set(input.capabilityTools.map((t) => t.name)) - for (const t of input.userTools) { - if (capNames.has(t.name)) { + const userNames = new Set(input.userTools.map((t) => t.name)) + const effective: Array<{ readonly name: string; readonly overridable?: boolean }> = [] + + for (const cap of input.capabilityTools) { + if (userNames.has(cap.name)) { + if (cap.overridable) { + // User tool wins; drop the capability tool from the effective set. + continue + } return { ok: false, - message: `Capability conflict: tool name "${t.name}" is contributed by a capability and also defined in tools/. Remove the user tool or remove the capability marker file.`, + message: `Capability conflict: tool name "${cap.name}" is contributed by a capability and also defined in tools/. Remove the user tool or remove the capability marker file.`, } } + effective.push(cap) + } + + for (const t of input.userTools) { if (input.reservedNames.has(t.name)) { return { ok: false, @@ -24,5 +48,6 @@ export function checkToolNameUniqueness(input: ToolNameCheckInput): ToolNameChec } } } - return { ok: true } + + return { ok: true, effectiveCapabilityTools: effective } } diff --git a/packages/cli/src/lib/runtime/execute-route.ts b/packages/cli/src/lib/runtime/execute-route.ts index e643ea8..62247df 100644 --- a/packages/cli/src/lib/runtime/execute-route.ts +++ b/packages/cli/src/lib/runtime/execute-route.ts @@ -311,14 +311,16 @@ async function prepareRouteExecution(options: { for (const t of contribution.tools) { // Adapt capability-contributed tools (which lack filePath/scope) // into the DiscoveredToolDefinition shape used by the runtime. + const overridable = (t as unknown as { overridable?: boolean }).overridable capTools.push({ ...(t.description !== undefined ? { description: t.description } : {}), filePath: ``, name: t.name, + ...(overridable ? { overridable: true } : {}), run: t.run, ...(t.schema !== undefined ? { schema: t.schema } : {}), scope: "route-local", - }) + } as DiscoveredToolDefinition) } } if (contribution.stateFields) capStateFields.push(...contribution.stateFields) @@ -331,13 +333,20 @@ async function prepareRouteExecution(options: { const RESERVED_TOOL_NAMES = new Set(["task"]) // names auto-generated by capabilities const check = checkToolNameUniqueness({ userTools: tools.map((t) => ({ name: t.name })), - capabilityTools: capTools.map((t) => ({ name: t.name })), + capabilityTools: capTools.map((t) => ({ + name: t.name, + ...((t as unknown as { overridable?: boolean }).overridable ? { overridable: true } : {}), + })), reservedNames: RESERVED_TOOL_NAMES, }) if (!check.ok) { return { message: check.message, ok: false } } + // Use the effective set so overridden tools are dropped before merging. + const effectiveCapNames = new Set(check.effectiveCapabilityTools.map((t) => t.name)) + const filteredCapTools = capTools.filter((t) => effectiveCapNames.has(t.name)) + const userStateNames = new Set((stateFields ?? []).map((f) => f.name)) for (const f of capStateFields) { if (userStateNames.has(f.name)) { @@ -348,7 +357,7 @@ async function prepareRouteExecution(options: { } } - tools = [...tools, ...capTools] + tools = [...tools, ...filteredCapTools] stateFields = stateFields ? [...stateFields, ...capStateFields] : capStateFields promptFragments = capPromptFragments streamTransformers = capStreamTransformers diff --git a/packages/cli/test/tool-name-uniqueness.test.ts b/packages/cli/test/tool-name-uniqueness.test.ts index af25e07..4ffcfc5 100644 --- a/packages/cli/test/tool-name-uniqueness.test.ts +++ b/packages/cli/test/tool-name-uniqueness.test.ts @@ -35,3 +35,40 @@ describe("checkToolNameUniqueness", () => { expect(result.message).toContain("task") }) }) + +describe("checkToolNameUniqueness — overridable", () => { + it("when a capability tool is overridable, a user tool with the same name does NOT error and replaces it", () => { + const result = checkToolNameUniqueness({ + userTools: [{ name: "readFile" }], + capabilityTools: [{ name: "readFile", overridable: true }], + reservedNames: new Set(), + }) + expect(result.ok).toBe(true) + if (!result.ok) return + // The returned effectiveCapabilityTools drops the overridden tool. + expect(result.effectiveCapabilityTools).toEqual([]) + }) + + it("when a capability tool is NOT overridable, a user tool with the same name still errors", () => { + const result = checkToolNameUniqueness({ + userTools: [{ name: "writeTodos" }], + capabilityTools: [{ name: "writeTodos" }], // no overridable flag = not overridable + reservedNames: new Set(), + }) + expect(result.ok).toBe(false) + }) + + it("returns the un-shadowed capability tools in effectiveCapabilityTools", () => { + const result = checkToolNameUniqueness({ + userTools: [{ name: "readFile" }], + capabilityTools: [ + { name: "readFile", overridable: true }, + { name: "writeFile", overridable: true }, + ], + reservedNames: new Set(), + }) + expect(result.ok).toBe(true) + if (!result.ok) return + expect(result.effectiveCapabilityTools.map((t) => t.name)).toEqual(["writeFile"]) + }) +}) From 281e70d3e9815f5d8da1d1afa87b5a1abbc84c3a Mon Sep 17 00:00:00 2001 From: Brian Love Date: Wed, 20 May 2026 18:05:29 -0700 Subject: [PATCH 13/23] style: biome auto-fixes (import order) on workspace marker + tests Co-Authored-By: Claude Opus 4.7 --- packages/core/src/capabilities/built-in/workspace.ts | 4 ++-- packages/core/test/capabilities/workspace.test.ts | 10 ++-------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/packages/core/src/capabilities/built-in/workspace.ts b/packages/core/src/capabilities/built-in/workspace.ts index 9982890..888c08a 100644 --- a/packages/core/src/capabilities/built-in/workspace.ts +++ b/packages/core/src/capabilities/built-in/workspace.ts @@ -1,9 +1,9 @@ import { existsSync } from "node:fs" import { join, resolve, sep } from "node:path" -import { z } from "zod" +import type { BackendContext, ExecBackend, FilesystemBackend } from "@dawn-ai/workspace" import { localExec, localFilesystem } from "@dawn-ai/workspace" -import type { BackendContext, ExecBackend, FilesystemBackend } from "@dawn-ai/workspace" +import { z } from "zod" import type { CapabilityMarker, DawnToolDefinition } from "../types.js" diff --git a/packages/core/test/capabilities/workspace.test.ts b/packages/core/test/capabilities/workspace.test.ts index cb8768f..84a3415 100644 --- a/packages/core/test/capabilities/workspace.test.ts +++ b/packages/core/test/capabilities/workspace.test.ts @@ -4,10 +4,7 @@ import { join } from "node:path" import { afterEach, beforeEach, describe, expect, it, vi } from "vitest" import { createWorkspaceMarker } from "../../src/capabilities/built-in/workspace.js" -import type { - CapabilityMarkerContext, - DawnToolDefinition, -} from "../../src/capabilities/types.js" +import type { CapabilityMarkerContext, DawnToolDefinition } from "../../src/capabilities/types.js" function emptyManifest() { return { appRoot: "/app", routes: [] } @@ -108,10 +105,7 @@ describe("createWorkspaceMarker — load", () => { writeFileSync(join(routeDir, "workspace", "ok.txt"), "ok", "utf8") const contribution = await createWorkspaceMarker().load(routeDir, ctx()) const readTool = findTool(contribution.tools, "readFile") - const result = await readTool.run( - { path: "ok.txt" }, - { signal: new AbortController().signal }, - ) + const result = await readTool.run({ path: "ok.txt" }, { signal: new AbortController().signal }) expect(result).toBe("ok") }) From 563952bc28fc0a677eea83cafe6b8d40bc016aee Mon Sep 17 00:00:00 2001 From: Brian Love Date: Wed, 20 May 2026 18:09:10 -0700 Subject: [PATCH 14/23] feat(cli): register workspace capability + thread backends from dawn.config Registers createWorkspaceMarker in the capability registry. Loads dawn.config.ts at the start of prepareRouteExecution and threads config.backends into the CapabilityMarkerContext so the workspace marker uses the configured backends (defaulting to localFilesystem + localExec when none are configured). Co-Authored-By: Claude Opus 4.7 --- packages/cli/package.json | 1 + packages/cli/src/lib/runtime/execute-route.ts | 16 ++++++++++++++++ pnpm-lock.yaml | 3 +++ 3 files changed, 20 insertions(+) diff --git a/packages/cli/package.json b/packages/cli/package.json index 3c4abbb..9cc6f50 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -52,6 +52,7 @@ "devDependencies": { "@dawn-ai/config-typescript": "workspace:*", "@dawn-ai/sdk": "workspace:*", + "@dawn-ai/workspace": "workspace:*", "@langchain/core": "1.1.46", "@types/node": "25.6.0" } diff --git a/packages/cli/src/lib/runtime/execute-route.ts b/packages/cli/src/lib/runtime/execute-route.ts index 62247df..0a6df83 100644 --- a/packages/cli/src/lib/runtime/execute-route.ts +++ b/packages/cli/src/lib/runtime/execute-route.ts @@ -10,8 +10,10 @@ import { createPlanningMarker, createSkillsMarker, createSubagentsMarker, + createWorkspaceMarker, discoverRoutes, findDawnApp, + loadDawnConfig, type ResolvedStateField, type RouteDefinition, type RouteManifest, @@ -19,6 +21,7 @@ import { } from "@dawn-ai/core" import { executeAgent, type SubagentResolver, streamAgent } from "@dawn-ai/langchain" import { type DawnAgent, isDawnAgent } from "@dawn-ai/sdk" +import type { ExecBackend, FilesystemBackend } from "@dawn-ai/workspace" import { checkToolNameUniqueness } from "./check-tool-name-uniqueness.js" import { createDawnContext } from "./dawn-context.js" import { normalizeRouteModule } from "./load-route-kind.js" @@ -274,6 +277,7 @@ async function prepareRouteExecution(options: { createAgentsMdMarker(), createSkillsMarker(), createSubagentsMarker(), + createWorkspaceMarker(), ]) const routeManifest = await discoverRoutes({ appRoot: options.appRoot }) const descriptor = @@ -286,10 +290,22 @@ async function prepareRouteExecution(options: { // invalidated in dev when the runtime rebuilds the manifest. const descriptorRouteMap = await getCachedDescriptorRouteMap(routeManifest) + let configBackends: + | { readonly filesystem?: FilesystemBackend; readonly exec?: ExecBackend } + | undefined + try { + const loaded = await loadDawnConfig({ appRoot: options.appRoot }) + configBackends = loaded.config.backends + } catch { + // No dawn.config.ts (or unreadable). The workspace capability falls + // back to its defaults (localFilesystem + localExec). + } + const applied = await applyCapabilities(registry, routeDir, { routeManifest, descriptor, descriptorRouteMap, + ...(configBackends ? { backends: configBackends } : {}), }) if (applied.errors.length > 0) { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7db8de9..f91d6eb 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -190,6 +190,9 @@ importers: '@dawn-ai/sdk': specifier: workspace:* version: link:../sdk + '@dawn-ai/workspace': + specifier: workspace:* + version: link:../workspace '@langchain/core': specifier: 1.1.46 version: 1.1.46(openai@6.37.0(ws@8.20.1)(zod@4.4.3))(ws@8.20.1) From 3b89c0cd1054e7ab00870969f4431253cba17a25 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Wed, 20 May 2026 18:13:57 -0700 Subject: [PATCH 15/23] feat(cli): typegen surfaces workspace tools for routes with workspace/ Co-Authored-By: Claude Opus 4.7 --- packages/cli/src/lib/typegen/run-typegen.ts | 34 +++++++++++++++++++++ packages/cli/test/run-typegen.test.ts | 31 +++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/packages/cli/src/lib/typegen/run-typegen.ts b/packages/cli/src/lib/typegen/run-typegen.ts index baf6944..90a0652 100644 --- a/packages/cli/src/lib/typegen/run-typegen.ts +++ b/packages/cli/src/lib/typegen/run-typegen.ts @@ -39,6 +39,37 @@ const SUBAGENTS_EXTRA_TOOL: ExtractedToolType = { outputType: `string`, } +const WORKSPACE_EXTRA_TOOLS: readonly ExtractedToolType[] = [ + { + name: "readFile", + description: "Read a UTF-8 file from the workspace.", + inputType: `{ path: string }`, + outputType: `string`, + }, + { + name: "writeFile", + description: "Write a UTF-8 file inside the workspace.", + inputType: `{ path: string; content: string }`, + outputType: `string`, + }, + { + name: "listDir", + description: "List entries in a workspace directory.", + inputType: `{ path?: string }`, + outputType: `string[]`, + }, + { + name: "runBash", + description: "Run a shell command inside the workspace.", + inputType: `{ command: string }`, + outputType: `{ stdout: string; stderr: string; exitCode: number }`, + }, +] + +function hasWorkspace(routeDir: string): boolean { + return existsSync(join(routeDir, "workspace")) +} + const SKILL_DIR_NAME_RE = /^[A-Za-z0-9][A-Za-z0-9_-]*$/ function hasSkills(routeDir: string): boolean { @@ -121,6 +152,9 @@ export async function runTypegen(options: { if (hasSubagents(route.routeDir)) { extraTools.push(SUBAGENTS_EXTRA_TOOL) } + if (hasWorkspace(route.routeDir)) { + extraTools.push(...WORKSPACE_EXTRA_TOOLS) + } routeToolTypes.push({ pathname: route.pathname, diff --git a/packages/cli/test/run-typegen.test.ts b/packages/cli/test/run-typegen.test.ts index c044dc3..c71c719 100644 --- a/packages/cli/test/run-typegen.test.ts +++ b/packages/cli/test/run-typegen.test.ts @@ -151,6 +151,37 @@ describe("runTypegen", () => { expect(content).not.toContain("Dispatch a sub-task") }) + test("includes workspace tools in generated types when workspace/ directory exists", async () => { + const { appRoot, routeDir } = await setupApp() + await mkdir(join(routeDir, "workspace"), { recursive: true }) + + const manifest = await discoverRoutes({ appRoot }) + await runTypegen({ appRoot, manifest }) + + const dtsPath = join(appRoot, ".dawn", "dawn.generated.d.ts") + const content = await readFile(dtsPath, "utf8") + + expect(content).toContain("readFile") + expect(content).toContain("writeFile") + expect(content).toContain("listDir") + expect(content).toContain("runBash") + expect(content).toContain("greet") + }) + + test("omits workspace tools when workspace/ directory is absent", async () => { + const { appRoot } = await setupApp() + const manifest = await discoverRoutes({ appRoot }) + await runTypegen({ appRoot, manifest }) + + const dtsPath = join(appRoot, ".dawn", "dawn.generated.d.ts") + const content = await readFile(dtsPath, "utf8") + + expect(content).not.toContain("Read a UTF-8 file from the workspace") + expect(content).not.toContain("Write a UTF-8 file inside the workspace") + expect(content).not.toContain("List entries in a workspace directory") + expect(content).not.toContain("Run a shell command inside the workspace") + }) + test("writes state.json when state.ts exists", async () => { const { appRoot } = await setupApp({ withState: true }) const manifest = await discoverRoutes({ appRoot }) From 7eff501f7d27425e678e897994cb97d0bf2acb54 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Wed, 20 May 2026 18:16:44 -0700 Subject: [PATCH 16/23] refactor(examples/chat): migrate to workspace capability Delete the hand-rolled readFile/writeFile/listDir/runBash tool files (and their workspace-path helpers) from both the /chat route and the research subagent. The workspace capability auto-contributes these tools when the route has a workspace/ directory, so add empty workspace/ dirs (with .gitkeep) under both routes to opt in. Co-Authored-By: Claude Opus 4.7 --- .../chat/server/src/app/chat/tools/listDir.ts | 16 ------ .../server/src/app/chat/tools/readFile.ts | 16 ------ .../chat/server/src/app/chat/tools/runBash.ts | 37 ------------- .../server/src/app/chat/tools/writeFile.ts | 17 ------ .../src/app/chat/workspace-path.test.ts | 54 ------------------- .../server/src/app/chat/workspace-path.ts | 49 ----------------- .../server/src/app/chat/workspace/.gitkeep | 0 .../subagents/research/tools/listDir.ts | 16 ------ .../subagents/research/tools/readFile.ts | 16 ------ .../subagents/research/workspace-path.ts | 49 ----------------- .../subagents/research/workspace/.gitkeep | 0 11 files changed, 270 deletions(-) delete mode 100644 examples/chat/server/src/app/chat/tools/listDir.ts delete mode 100644 examples/chat/server/src/app/chat/tools/readFile.ts delete mode 100644 examples/chat/server/src/app/chat/tools/runBash.ts delete mode 100644 examples/chat/server/src/app/chat/tools/writeFile.ts delete mode 100644 examples/chat/server/src/app/chat/workspace-path.test.ts delete mode 100644 examples/chat/server/src/app/chat/workspace-path.ts create mode 100644 examples/chat/server/src/app/chat/workspace/.gitkeep delete mode 100644 examples/chat/server/src/app/coordinator/subagents/research/tools/listDir.ts delete mode 100644 examples/chat/server/src/app/coordinator/subagents/research/tools/readFile.ts delete mode 100644 examples/chat/server/src/app/coordinator/subagents/research/workspace-path.ts create mode 100644 examples/chat/server/src/app/coordinator/subagents/research/workspace/.gitkeep diff --git a/examples/chat/server/src/app/chat/tools/listDir.ts b/examples/chat/server/src/app/chat/tools/listDir.ts deleted file mode 100644 index 02e2ea7..0000000 --- a/examples/chat/server/src/app/chat/tools/listDir.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { readdirSync, statSync } from "node:fs" -import { resolveWorkspacePath, workspaceRoot } from "../workspace-path.js" - -/** - * List the entries in a directory inside the workspace. - * Pass "." to list the workspace root. Subdirectories are suffixed with "/". - */ -export default async (input: { readonly path: string }): Promise => { - const dir = resolveWorkspacePath(workspaceRoot(), input.path) - const entries = readdirSync(dir) - entries.sort() - return entries.map((name) => { - const isDir = statSync(`${dir}/${name}`).isDirectory() - return isDir ? `${name}/` : name - }) -} diff --git a/examples/chat/server/src/app/chat/tools/readFile.ts b/examples/chat/server/src/app/chat/tools/readFile.ts deleted file mode 100644 index effb587..0000000 --- a/examples/chat/server/src/app/chat/tools/readFile.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { readFileSync, statSync } from "node:fs" -import { resolveWorkspacePath, workspaceRoot } from "../workspace-path.js" - -const MAX_BYTES = 256 * 1024 - -/** - * Read a UTF-8 text file from the workspace. Rejects files larger than 256 KiB. - */ -export default async (input: { readonly path: string }): Promise => { - const file = resolveWorkspacePath(workspaceRoot(), input.path) - const size = statSync(file).size - if (size > MAX_BYTES) { - throw new Error(`File too large: ${size} bytes (limit ${MAX_BYTES})`) - } - return readFileSync(file, "utf8") -} diff --git a/examples/chat/server/src/app/chat/tools/runBash.ts b/examples/chat/server/src/app/chat/tools/runBash.ts deleted file mode 100644 index 09a3c5c..0000000 --- a/examples/chat/server/src/app/chat/tools/runBash.ts +++ /dev/null @@ -1,37 +0,0 @@ -import { spawn } from "node:child_process" -import { workspaceRoot } from "../workspace-path.js" - -const MAX_TIMEOUT_SECONDS = 120 - -/** - * Run a bash command in the workspace directory. Captures stdout and stderr, - * enforces a hard timeout, and returns the combined output with an exit-code - * footer. NOT a sandbox — do not run untrusted commands. - */ -export default async ( - input: { readonly command: string; readonly timeoutSeconds: number }, -): Promise => { - const timeout = Math.min(Math.max(1, input.timeoutSeconds), MAX_TIMEOUT_SECONDS) - const cwd = workspaceRoot() - - return new Promise((resolveResult) => { - const child = spawn("bash", ["-c", input.command], { cwd }) - let output = "" - child.stdout.on("data", (chunk) => { - output += chunk.toString() - }) - child.stderr.on("data", (chunk) => { - output += chunk.toString() - }) - - const timer = setTimeout(() => { - child.kill("SIGKILL") - output += `\n[killed: exceeded ${timeout}s timeout]` - }, timeout * 1000) - - child.on("close", (code) => { - clearTimeout(timer) - resolveResult(`${output}\n[exit ${code ?? "?"}]`) - }) - }) -} diff --git a/examples/chat/server/src/app/chat/tools/writeFile.ts b/examples/chat/server/src/app/chat/tools/writeFile.ts deleted file mode 100644 index dd180ca..0000000 --- a/examples/chat/server/src/app/chat/tools/writeFile.ts +++ /dev/null @@ -1,17 +0,0 @@ -import { mkdirSync, writeFileSync } from "node:fs" -import { dirname } from "node:path" -import { resolveWorkspacePath, workspaceRoot } from "../workspace-path.js" - -/** - * Write a UTF-8 text file to the workspace. Overwrites existing files. - * Creates parent directories as needed. Returns a one-line summary. - */ -export default async ( - input: { readonly path: string; readonly content: string }, -): Promise => { - const file = resolveWorkspacePath(workspaceRoot(), input.path) - mkdirSync(dirname(file), { recursive: true }) - writeFileSync(file, input.content, "utf8") - const bytes = Buffer.byteLength(input.content, "utf8") - return `wrote ${bytes} bytes to ${input.path}` -} diff --git a/examples/chat/server/src/app/chat/workspace-path.test.ts b/examples/chat/server/src/app/chat/workspace-path.test.ts deleted file mode 100644 index 6d4926e..0000000 --- a/examples/chat/server/src/app/chat/workspace-path.test.ts +++ /dev/null @@ -1,54 +0,0 @@ -import { mkdtempSync, mkdirSync, rmSync, symlinkSync, writeFileSync } from "node:fs" -import { tmpdir } from "node:os" -import { join } from "node:path" -import { afterEach, beforeEach, describe, expect, it } from "vitest" -import { resolveWorkspacePath } from "./workspace-path.js" - -describe("resolveWorkspacePath", () => { - let root: string - let workspace: string - - beforeEach(() => { - root = mkdtempSync(join(tmpdir(), "dawn-chat-")) - workspace = join(root, "workspace") - mkdirSync(workspace, { recursive: true }) - }) - - afterEach(() => { - rmSync(root, { recursive: true, force: true }) - }) - - it("resolves a simple relative path inside the workspace", () => { - const resolved = resolveWorkspacePath(workspace, "notes.md") - expect(resolved).toBe(join(workspace, "notes.md")) - }) - - it("resolves nested paths", () => { - const resolved = resolveWorkspacePath(workspace, "a/b/c.txt") - expect(resolved).toBe(join(workspace, "a/b/c.txt")) - }) - - it("treats '.' as the workspace root", () => { - expect(resolveWorkspacePath(workspace, ".")).toBe(workspace) - }) - - it("rejects absolute paths", () => { - expect(() => resolveWorkspacePath(workspace, "/etc/passwd")).toThrow(/absolute/i) - }) - - it("rejects paths that escape via ..", () => { - expect(() => resolveWorkspacePath(workspace, "../escape.txt")).toThrow(/outside workspace/i) - }) - - it("rejects paths that escape after normalization", () => { - expect(() => resolveWorkspacePath(workspace, "a/../../escape.txt")).toThrow(/outside workspace/i) - }) - - it("rejects symlinks that point outside the workspace", () => { - const outside = join(root, "outside.txt") - writeFileSync(outside, "secret") - const link = join(workspace, "link.txt") - symlinkSync(outside, link) - expect(() => resolveWorkspacePath(workspace, "link.txt")).toThrow(/outside workspace/i) - }) -}) diff --git a/examples/chat/server/src/app/chat/workspace-path.ts b/examples/chat/server/src/app/chat/workspace-path.ts deleted file mode 100644 index d775a41..0000000 --- a/examples/chat/server/src/app/chat/workspace-path.ts +++ /dev/null @@ -1,49 +0,0 @@ -import { existsSync, mkdirSync, realpathSync } from "node:fs" -import { isAbsolute, normalize, relative, resolve } from "node:path" - -/** - * Resolve a user-supplied path against a workspace root, rejecting anything - * that would escape the workspace. - * - * Rules: - * - Absolute paths are rejected outright. - * - The path is normalized; any `..` segment that escapes the workspace is rejected. - * - If the resolved path (or any ancestor) is a symlink, its real path must - * also be inside the workspace. - * - * The workspace directory is created if it does not exist. - */ -export function resolveWorkspacePath(workspaceRoot: string, userPath: string): string { - if (!existsSync(workspaceRoot)) { - mkdirSync(workspaceRoot, { recursive: true }) - } - - if (isAbsolute(userPath)) { - throw new Error(`Path is absolute: ${userPath}`) - } - - const normalized = normalize(userPath) - const resolved = resolve(workspaceRoot, normalized) - const rel = relative(workspaceRoot, resolved) - if (rel.startsWith("..")) { - throw new Error(`Path is outside workspace: ${userPath}`) - } - - // Symlink check: if the path exists and resolves outside, reject. - if (existsSync(resolved)) { - const real = realpathSync(resolved) - const realRel = relative(realpathSync(workspaceRoot), real) - if (realRel.startsWith("..")) { - throw new Error(`Path resolves outside workspace via symlink: ${userPath}`) - } - } - - return resolved -} - -/** - * Resolve the workspace root for the example. Lives at `/workspace`. - */ -export function workspaceRoot(): string { - return resolve(process.cwd(), "workspace") -} diff --git a/examples/chat/server/src/app/chat/workspace/.gitkeep b/examples/chat/server/src/app/chat/workspace/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/examples/chat/server/src/app/coordinator/subagents/research/tools/listDir.ts b/examples/chat/server/src/app/coordinator/subagents/research/tools/listDir.ts deleted file mode 100644 index 02e2ea7..0000000 --- a/examples/chat/server/src/app/coordinator/subagents/research/tools/listDir.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { readdirSync, statSync } from "node:fs" -import { resolveWorkspacePath, workspaceRoot } from "../workspace-path.js" - -/** - * List the entries in a directory inside the workspace. - * Pass "." to list the workspace root. Subdirectories are suffixed with "/". - */ -export default async (input: { readonly path: string }): Promise => { - const dir = resolveWorkspacePath(workspaceRoot(), input.path) - const entries = readdirSync(dir) - entries.sort() - return entries.map((name) => { - const isDir = statSync(`${dir}/${name}`).isDirectory() - return isDir ? `${name}/` : name - }) -} diff --git a/examples/chat/server/src/app/coordinator/subagents/research/tools/readFile.ts b/examples/chat/server/src/app/coordinator/subagents/research/tools/readFile.ts deleted file mode 100644 index effb587..0000000 --- a/examples/chat/server/src/app/coordinator/subagents/research/tools/readFile.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { readFileSync, statSync } from "node:fs" -import { resolveWorkspacePath, workspaceRoot } from "../workspace-path.js" - -const MAX_BYTES = 256 * 1024 - -/** - * Read a UTF-8 text file from the workspace. Rejects files larger than 256 KiB. - */ -export default async (input: { readonly path: string }): Promise => { - const file = resolveWorkspacePath(workspaceRoot(), input.path) - const size = statSync(file).size - if (size > MAX_BYTES) { - throw new Error(`File too large: ${size} bytes (limit ${MAX_BYTES})`) - } - return readFileSync(file, "utf8") -} diff --git a/examples/chat/server/src/app/coordinator/subagents/research/workspace-path.ts b/examples/chat/server/src/app/coordinator/subagents/research/workspace-path.ts deleted file mode 100644 index d775a41..0000000 --- a/examples/chat/server/src/app/coordinator/subagents/research/workspace-path.ts +++ /dev/null @@ -1,49 +0,0 @@ -import { existsSync, mkdirSync, realpathSync } from "node:fs" -import { isAbsolute, normalize, relative, resolve } from "node:path" - -/** - * Resolve a user-supplied path against a workspace root, rejecting anything - * that would escape the workspace. - * - * Rules: - * - Absolute paths are rejected outright. - * - The path is normalized; any `..` segment that escapes the workspace is rejected. - * - If the resolved path (or any ancestor) is a symlink, its real path must - * also be inside the workspace. - * - * The workspace directory is created if it does not exist. - */ -export function resolveWorkspacePath(workspaceRoot: string, userPath: string): string { - if (!existsSync(workspaceRoot)) { - mkdirSync(workspaceRoot, { recursive: true }) - } - - if (isAbsolute(userPath)) { - throw new Error(`Path is absolute: ${userPath}`) - } - - const normalized = normalize(userPath) - const resolved = resolve(workspaceRoot, normalized) - const rel = relative(workspaceRoot, resolved) - if (rel.startsWith("..")) { - throw new Error(`Path is outside workspace: ${userPath}`) - } - - // Symlink check: if the path exists and resolves outside, reject. - if (existsSync(resolved)) { - const real = realpathSync(resolved) - const realRel = relative(realpathSync(workspaceRoot), real) - if (realRel.startsWith("..")) { - throw new Error(`Path resolves outside workspace via symlink: ${userPath}`) - } - } - - return resolved -} - -/** - * Resolve the workspace root for the example. Lives at `/workspace`. - */ -export function workspaceRoot(): string { - return resolve(process.cwd(), "workspace") -} diff --git a/examples/chat/server/src/app/coordinator/subagents/research/workspace/.gitkeep b/examples/chat/server/src/app/coordinator/subagents/research/workspace/.gitkeep new file mode 100644 index 0000000..e69de29 From 6b54c619483c2c1a967ca2acf0374702e5d19881 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Wed, 20 May 2026 18:21:59 -0700 Subject: [PATCH 17/23] fix(core,cli): workspace capability uses cwd-relative root, matching agents-md T13's migration of the chat example surfaced a mismatch: the workspace capability was resolving to /workspace/ while the agents-md capability (and the prior hand-rolled tools) used /workspace/. Result: post-migration, the chat agent's memory file and its workspace tools pointed at completely different directories. Align the workspace capability with the existing convention: process.cwd()/workspace/. Same trigger as agents-md; same root as the deleted hand-rolled tools. The chat example's pre-existing examples/chat/server/workspace/ directory (with AGENTS.md) now serves as the workspace for both /chat and the research subagent. Removes the empty per-route workspace/ stubs T13 created. Co-Authored-By: Claude Opus 4.7 --- .../server/src/app/chat/workspace/.gitkeep | 0 .../subagents/research/workspace/.gitkeep | 0 packages/cli/src/lib/typegen/run-typegen.ts | 4 +- packages/cli/test/run-typegen.test.ts | 8 +++- .../src/capabilities/built-in/workspace.ts | 19 +++++++--- .../core/test/capabilities/workspace.test.ts | 38 +++++++++++++------ 6 files changed, 48 insertions(+), 21 deletions(-) delete mode 100644 examples/chat/server/src/app/chat/workspace/.gitkeep delete mode 100644 examples/chat/server/src/app/coordinator/subagents/research/workspace/.gitkeep diff --git a/examples/chat/server/src/app/chat/workspace/.gitkeep b/examples/chat/server/src/app/chat/workspace/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/examples/chat/server/src/app/coordinator/subagents/research/workspace/.gitkeep b/examples/chat/server/src/app/coordinator/subagents/research/workspace/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/packages/cli/src/lib/typegen/run-typegen.ts b/packages/cli/src/lib/typegen/run-typegen.ts index 90a0652..116d43b 100644 --- a/packages/cli/src/lib/typegen/run-typegen.ts +++ b/packages/cli/src/lib/typegen/run-typegen.ts @@ -66,8 +66,8 @@ const WORKSPACE_EXTRA_TOOLS: readonly ExtractedToolType[] = [ }, ] -function hasWorkspace(routeDir: string): boolean { - return existsSync(join(routeDir, "workspace")) +function hasWorkspace(_routeDir: string): boolean { + return existsSync(join(process.cwd(), "workspace")) } const SKILL_DIR_NAME_RE = /^[A-Za-z0-9][A-Za-z0-9_-]*$/ diff --git a/packages/cli/test/run-typegen.test.ts b/packages/cli/test/run-typegen.test.ts index c71c719..68b0d1f 100644 --- a/packages/cli/test/run-typegen.test.ts +++ b/packages/cli/test/run-typegen.test.ts @@ -8,8 +8,10 @@ import { afterEach, describe, expect, test } from "vitest" import { runTypegen } from "../src/lib/typegen/run-typegen.js" const tempDirs: string[] = [] +const originalCwd = process.cwd() afterEach(async () => { + process.chdir(originalCwd) await Promise.all(tempDirs.splice(0).map((dir) => rm(dir, { force: true, recursive: true }))) }) @@ -152,8 +154,9 @@ describe("runTypegen", () => { }) test("includes workspace tools in generated types when workspace/ directory exists", async () => { - const { appRoot, routeDir } = await setupApp() - await mkdir(join(routeDir, "workspace"), { recursive: true }) + const { appRoot } = await setupApp() + await mkdir(join(appRoot, "workspace"), { recursive: true }) + process.chdir(appRoot) const manifest = await discoverRoutes({ appRoot }) await runTypegen({ appRoot, manifest }) @@ -170,6 +173,7 @@ describe("runTypegen", () => { test("omits workspace tools when workspace/ directory is absent", async () => { const { appRoot } = await setupApp() + process.chdir(appRoot) const manifest = await discoverRoutes({ appRoot }) await runTypegen({ appRoot, manifest }) diff --git a/packages/core/src/capabilities/built-in/workspace.ts b/packages/core/src/capabilities/built-in/workspace.ts index 888c08a..73a7d0e 100644 --- a/packages/core/src/capabilities/built-in/workspace.ts +++ b/packages/core/src/capabilities/built-in/workspace.ts @@ -9,6 +9,15 @@ import type { CapabilityMarker, DawnToolDefinition } from "../types.js" const WORKSPACE_DIRNAME = "workspace" +/** + * Resolve the workspace root to a cwd-relative path. This matches the + * AGENTS.md capability's resolution (process.cwd() + "workspace") so + * the agent's memory and workspace tools point at the same directory. + */ +function workspaceRoot(): string { + return join(process.cwd(), WORKSPACE_DIRNAME) +} + const READ_FILE_INPUT = z.object({ path: z.string().min(1) }) const WRITE_FILE_INPUT = z.object({ path: z.string().min(1), content: z.string() }) const LIST_DIR_INPUT = z.object({ path: z.string().default(".") }) @@ -86,13 +95,13 @@ function buildWorkspaceTools( export function createWorkspaceMarker(): CapabilityMarker { return { name: "workspace", - detect: async (routeDir, _context) => existsSync(join(routeDir, WORKSPACE_DIRNAME)), - load: async (routeDir, context) => { - const workspaceRoot = join(routeDir, WORKSPACE_DIRNAME) - if (!existsSync(workspaceRoot)) return {} + detect: async (_routeDir, _context) => existsSync(workspaceRoot()), + load: async (_routeDir, context) => { + const root = workspaceRoot() + if (!existsSync(root)) return {} const fs = context.backends?.filesystem ?? localFilesystem() const exec = context.backends?.exec ?? localExec() - return { tools: buildWorkspaceTools(workspaceRoot, fs, exec) } + return { tools: buildWorkspaceTools(root, fs, exec) } }, } } diff --git a/packages/core/test/capabilities/workspace.test.ts b/packages/core/test/capabilities/workspace.test.ts index 84a3415..041aa3a 100644 --- a/packages/core/test/capabilities/workspace.test.ts +++ b/packages/core/test/capabilities/workspace.test.ts @@ -6,6 +6,8 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest" import { createWorkspaceMarker } from "../../src/capabilities/built-in/workspace.js" import type { CapabilityMarkerContext, DawnToolDefinition } from "../../src/capabilities/types.js" +const originalCwd = process.cwd() + function emptyManifest() { return { appRoot: "/app", routes: [] } } @@ -28,34 +30,46 @@ function findTool( } describe("createWorkspaceMarker — detect", () => { + let appRoot: string let routeDir: string beforeEach(() => { - routeDir = mkdtempSync(join(tmpdir(), "dawn-workspace-cap-")) + appRoot = mkdtempSync(join(tmpdir(), "dawn-workspace-cap-")) + routeDir = join(appRoot, "route") + mkdirSync(routeDir) + process.chdir(appRoot) }) afterEach(() => { - rmSync(routeDir, { recursive: true, force: true }) + process.chdir(originalCwd) + rmSync(appRoot, { recursive: true, force: true }) }) - it("returns false when no workspace/ directory exists", async () => { + it("returns false when no workspace/ directory exists at cwd", async () => { const detected = await createWorkspaceMarker().detect(routeDir, ctx()) expect(detected).toBe(false) }) - it("returns true when workspace/ exists", async () => { - mkdirSync(join(routeDir, "workspace")) + it("returns true when workspace/ exists at cwd", async () => { + mkdirSync(join(appRoot, "workspace")) const detected = await createWorkspaceMarker().detect(routeDir, ctx()) expect(detected).toBe(true) }) }) describe("createWorkspaceMarker — load", () => { + let appRoot: string let routeDir: string + let workspaceDir: string beforeEach(() => { - routeDir = mkdtempSync(join(tmpdir(), "dawn-workspace-cap-")) - mkdirSync(join(routeDir, "workspace")) + appRoot = mkdtempSync(join(tmpdir(), "dawn-workspace-cap-")) + routeDir = join(appRoot, "route") + mkdirSync(routeDir) + workspaceDir = join(appRoot, "workspace") + mkdirSync(workspaceDir) + process.chdir(appRoot) }) afterEach(() => { - rmSync(routeDir, { recursive: true, force: true }) + process.chdir(originalCwd) + rmSync(appRoot, { recursive: true, force: true }) }) it("contributes exactly four tools when workspace/ exists", async () => { @@ -65,13 +79,13 @@ describe("createWorkspaceMarker — load", () => { }) it("contributes no tools when workspace/ is absent", async () => { - rmSync(join(routeDir, "workspace"), { recursive: true }) + rmSync(workspaceDir, { recursive: true }) const contribution = await createWorkspaceMarker().load(routeDir, ctx()) expect(contribution.tools).toBeUndefined() }) it("readFile tool calls the configured backend with an absolute path inside the jail", async () => { - writeFileSync(join(routeDir, "workspace", "hello.txt"), "hi", "utf8") + writeFileSync(join(workspaceDir, "hello.txt"), "hi", "utf8") const fakeBackend = { readFile: vi.fn().mockResolvedValue("hi"), writeFile: vi.fn(), @@ -90,7 +104,7 @@ describe("createWorkspaceMarker — load", () => { expect(fakeBackend.readFile).toHaveBeenCalledOnce() const firstCall = fakeBackend.readFile.mock.calls[0] if (!firstCall) throw new Error("readFile was not called") - expect(firstCall[0]).toBe(join(routeDir, "workspace", "hello.txt")) + expect(firstCall[0]).toBe(join(process.cwd(), "workspace", "hello.txt")) }) it("rejects path-jail escapes with a clear error", async () => { @@ -102,7 +116,7 @@ describe("createWorkspaceMarker — load", () => { }) it("uses the default local backends when none configured", async () => { - writeFileSync(join(routeDir, "workspace", "ok.txt"), "ok", "utf8") + writeFileSync(join(workspaceDir, "ok.txt"), "ok", "utf8") const contribution = await createWorkspaceMarker().load(routeDir, ctx()) const readTool = findTool(contribution.tools, "readFile") const result = await readTool.run({ path: "ok.txt" }, { signal: new AbortController().signal }) From cf3273f06d53e6c81554a2253b71195eb9224012 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Wed, 20 May 2026 20:55:26 -0700 Subject: [PATCH 18/23] docs(examples/chat): update prompt + README for workspace capability - system-prompt: runBash signature is { command } now (no timeoutSeconds); returns { stdout, stderr, exitCode } instead of a formatted string - README: status reflects shipped subagents + workspace capabilities; layout shows current file structure (no tools/, no workspace-path.ts); deferred list updated to flag HITL permission gating (sub-project 4.5) Co-Authored-By: Claude Opus 4.7 --- examples/chat/README.md | 45 ++++++++++++------- .../chat/server/src/app/chat/system-prompt.ts | 2 +- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/examples/chat/README.md b/examples/chat/README.md index 4ab77e1..7772c33 100644 --- a/examples/chat/README.md +++ b/examples/chat/README.md @@ -1,13 +1,17 @@ # Chat — canonical Dawn harness example -> **Status:** foundational harness primitives (filesystem + bash) + the **planning** and -> **skills** capabilities. Subagents, sandbox isolation, and auto-summarization are still -> deferred — see "Deferred" below. +> **Status:** foundational harness primitives (filesystem + bash) plus the **planning**, +> **skills**, **subagents**, and **workspace** capabilities. Pluggable backend +> implementations (in-memory, remote sandbox) are available — see `dawn.config.ts`. HITL +> permission gating and auto-summarization are still deferred — see "Deferred" below. ## What this shows - Dawn route discovery and the `tools/` convention -- Filesystem tools (read/write/list) + bash, path-jailed to `./workspace` +- **Workspace capability** — when a route's working directory contains `workspace/`, Dawn + auto-contributes `readFile`/`writeFile`/`listDir`/`runBash` tools wired through pluggable + backends. The filesystem and exec backends default to local node:fs / child_process; swap + them in `dawn.config.ts` for in-memory storage, remote sandboxes, etc. - `AGENTS.md` memory autoload — Dawn auto-injects `workspace/AGENTS.md` into the system prompt on every turn; the agent updates it via `writeFile` - **Planning** — `plan.md` in the route directory opts the agent into the built-in `writeTodos` tool, a `todos` state channel, and a `plan_update` SSE event. Open the @@ -17,6 +21,10 @@ the agent's system prompt (name + description). The agent calls `readSkill({ name })` to load a skill's full body on demand. Two example skills ship with the demo: `workspace-conventions` and `recover-from-failure`. +- **Subagents** — `/coordinator` dispatches to specialist subagents (`research`, + `summarizer`) via an auto-generated `task({ subagent, input })` tool. Subagent runs + bubble `subagent.*` SSE events with `call_id` correlation. Pick the `/coordinator` route + in the smoke client to drive it. - End-to-end streaming from a Next.js client over SSE ## Model choice @@ -39,17 +47,24 @@ pnpm dev ``` examples/chat/ -├── server/ # @dawn-example/chat-server (Dawn route + tools) -│ └── src/app/chat/ -│ ├── index.ts # agent({ model, systemPrompt }) -│ ├── state.ts -│ ├── system-prompt.ts -│ ├── workspace-path.ts -│ ├── plan.md # presence enables planning; seeds initial todos -│ └── tools/ # listDir, readFile, writeFile, runBash +├── server/ # @dawn-example/chat-server (Dawn routes) +│ ├── dawn.config.ts # appDir + optional backends config +│ ├── workspace/ # shared workspace (AGENTS.md lives here) +│ └── src/app/ +│ ├── chat/ # /chat route +│ │ ├── index.ts # agent({ model, systemPrompt }) +│ │ ├── state.ts +│ │ ├── system-prompt.ts +│ │ ├── plan.md # presence enables planning +│ │ └── skills/ # SKILL.md files per skill +│ └── coordinator/ # /coordinator route + subagents +│ ├── index.ts +│ └── subagents/ +│ ├── research/index.ts +│ └── summarizer/index.ts └── web/ # @dawn-example/chat-web (Next.js smoke client) └── app/ - ├── page.tsx # textarea + Send + raw event log + ├── page.tsx # route picker + textarea + Send + raw event log └── api/chat/route.ts # SSE proxy ``` @@ -63,8 +78,8 @@ shell expansion — all possible. Do not point untrusted users at this example. These v1 deferrals are the explicit forcing function for Dawn's opinionated harness work: -- Subagent delegation (`task`-style tool) — needs first-class subagent declarations -- Real sandbox isolation for `runBash` — needs pluggable execution backends +- HITL permission gating — interrupt the run when a path is outside the workspace or a + command is high-risk, ask the user, persist the decision - Tool-output offloading and context summarization — needs lifecycle hooks - Nested-object tool inputs (e.g., `edit_file({ edits: [{ old, new }] })`) — typegen extension - Polished web UI — wait for harness primitives to stabilize diff --git a/examples/chat/server/src/app/chat/system-prompt.ts b/examples/chat/server/src/app/chat/system-prompt.ts index 50aef8a..54fe483 100644 --- a/examples/chat/server/src/app/chat/system-prompt.ts +++ b/examples/chat/server/src/app/chat/system-prompt.ts @@ -5,7 +5,7 @@ You operate in a sandboxed \`workspace/\` directory. You have four tools: - \`listDir({ path })\` — list directory contents. Pass "." for the workspace root. - \`readFile({ path })\` — read a UTF-8 text file (max 256 KiB). - \`writeFile({ path, content })\` — create or overwrite a text file. -- \`runBash({ command, timeoutSeconds })\` — run a shell command in the workspace. Use \`timeoutSeconds: 30\` unless the task clearly needs longer (max 120). +- \`runBash({ command })\` — run a shell command in the workspace. Returns \`{ stdout, stderr, exitCode }\`. Commands time out after 30 seconds by default. Memory convention: when you complete meaningful work, update \`AGENTS.md\` (via \`writeFile\`) so future-you remembers what mattered. Dawn auto-injects the current contents of \`workspace/AGENTS.md\` into your system prompt on every turn under the "# Memory" heading — you don't need to read or list it manually. From e66860f2aa964b2c051dcbd7d2c7aef1be19bbc7 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Thu, 21 May 2026 13:25:21 -0700 Subject: [PATCH 19/23] =?UTF-8?q?feat:=20phase=203=20=E2=80=94=20HITL=20pe?= =?UTF-8?q?rmissions=20(sub-project=204.5)=20(#171)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: phase 3 HITL permissions design spec (sub-project 4.5) Designs the human-in-the-loop permission system that builds on sub- project 4's workspace capability. Path-jail escapes and every first- occurrence bash command trigger an interrupt prompt with three approval scopes (Once / Always-for-pattern / Deny). Persisted "always" decisions live in .dawn/permissions.json (project-local, gitignored) using a tool-keyed flat-string format that's forward-compatible with future tool categories. Three operating modes: interactive (dev default), non-interactive (production / CI), bypass (explicit "operator knows what they're doing"). dawn.config.ts gains a permissions field with mode, allow, deny — same shape as the runtime file. DAWN_PERMISSIONS_MODE env var overrides config for the session. SSE envelope shape is Agent-Protocol-compatible so sub-project 7 can implement the spec on top of this without refactoring. New @dawn-ai/permissions package ships types + pattern-matching engine + store. Workspace capability gains a permission check between the path-jail / bash invocation and the actual backend call. Co-Authored-By: Claude Opus 4.7 * docs: implementation plan for phase 3 HITL permissions Bite-sized, TDD-structured plan for sub-project 4.5. 13 tasks across five phases: @dawn-ai/permissions package (T1-T5), config + capability changes (T6-T7), runtime interrupt + resume (T8-T9), chat demo updates (T10-T11), smoke + PR (T12-T13). Co-Authored-By: Claude Opus 4.7 * scaffold(permissions): empty @dawn-ai/permissions package Adds the package skeleton for the upcoming HITL permissions system. No exports yet — types, pattern matching, and store land in subsequent commits. Co-Authored-By: Claude Opus 4.7 * feat(permissions): public types Co-Authored-By: Claude Opus 4.7 * feat(permissions): suggested-pattern helpers Co-Authored-By: Claude Opus 4.7 * feat(permissions): pattern-matching engine Co-Authored-By: Claude Opus 4.7 * feat(permissions): PermissionsStore with file I/O, write queue, gitignore handling Co-Authored-By: Claude Opus 4.7 * feat(core): extend DawnConfig + CapabilityMarkerContext with permissions Type-only edge to @dawn-ai/permissions. Workspace capability will read context.permissions in a subsequent commit. Co-Authored-By: Claude Opus 4.7 * feat(core): workspace capability gates through PermissionsStore Each tool's run() consults the optional PermissionsStore in the capability context: - Path tools (readFile/writeFile/listDir): silent for paths inside the workspace; consult the store for paths outside. - runBash: gate every command regardless of path. Three modes: - interactive: unknown ops emit LangGraph interrupt() and pause the run - non-interactive: unknown ops hard-refuse (fail-closed) - bypass: all ops proceed (path-jail disabled), warn on capability load The old pathJail() helper is removed — the gate now handles out-of-workspace cases via the permission flow. Also packs @dawn-ai/permissions in the CLI typegen install test so the external install can resolve core's new runtime dep. Co-Authored-By: Claude Opus 4.7 * feat(langchain): propagate LangGraph interrupt events to the SSE stream LangGraph 1.x's `interrupt()` does not emit a dedicated streamEvents v2 event; the parked state surfaces as `__interrupt__: [{value, ...}]` in the graph's final `on_chain_end` output. Detect this and yield a {type: "interrupt", data: payload} chunk so the SSE consumer (and the soon-to-arrive resume endpoint) sees the workspace capability's PermissionRequest envelope verbatim. Co-Authored-By: Claude Opus 4.7 * feat(cli): resume endpoint + PermissionsStore wiring Adds POST /threads/:thread_id/resume to the dev runtime server, backed by a module-level pending-interrupts map keyed by thread_id. The endpoint validates interrupt_id (409 on stale, 400 on missing/invalid decision) and invokes the registered resolve() callback before clearing the entry. execute-route.ts now loads the permissions config from dawn.config.ts, honors the DAWN_PERMISSIONS_MODE env override, constructs a PermissionsStore via createPermissionsStore, and threads it into applyCapabilities so the workspace capability's gates have a store to consult. streamResolvedRoute also bridges {type: "interrupt"} chunks from the agent-adapter into the pending map when called with a threadId. Approach: two-call via checkpointer was the only viable option (the in-process Deferred pattern would require the parked tool call to yield back into Node's microtask queue, which LangGraph's GraphInterrupt unwinds). However, Dawn does not yet wire a LangGraph MemorySaver or propagate thread_id into createReactAgent — that plumbing arrives with the Agent Protocol work in sub-project 7. Until then resolve() is a no-op stub: the endpoint accepts decisions and clears the entry, but cannot actually replay the parked graph. The smoke test (T12) will exercise the loop end-to-end once checkpointer support lands. Co-Authored-By: Claude Opus 4.7 * fix(langchain,cli): wire checkpointer + thread_id; complete resume mechanism The original T8/T9 implementation propagated interrupts to the SSE stream and registered them in pendingByThread, but the resume callback was a no-op stub because LangGraph 1.x requires a MemorySaver checkpointer + a stable thread_id to actually replay from the parked state. This commit: - Wires a process-level MemorySaver into createReactAgent. - Propagates thread_id from the request body through streamResolvedRoute to streamAgent to config.configurable.thread_id. - When agent-adapter detects an interrupt, it registers a resolve callback in pendingByThread and awaits the user's decision. - On resume, the adapter re-invokes the graph with new Command({resume}) and yields the resulting events into the same SSE stream. - Moves pending-interrupts.ts to @dawn-ai/langchain so the adapter imports the same map the resume endpoint writes to. Co-Authored-By: Claude Opus 4.7 * feat(examples/chat): seed permissions allow/deny in dawn.config.ts Co-Authored-By: Claude Opus 4.7 * feat(examples/chat-web): inline permission panel + resume proxy Detects 'event: interrupt' frames in the SSE stream; renders an inline panel with Once / Always-for-pattern / Deny buttons; POSTs the decision through /api/permission-resume to the Dawn server's resume endpoint. Co-Authored-By: Claude Opus 4.7 * fix(langchain): bind streamEvents to its Pregel instance to restore /runs/stream Co-Authored-By: Claude Opus 4.7 * fix(langchain): detect LangGraph interrupts at correct event/data path LangGraph 1.x surfaces a tool-invoked interrupt() via streamEvents v2 as an on_tool_error whose data.error is a stringified GraphInterrupt — the leading JSON array is the interrupts list. The top-level LangGraph on_chain_end does not carry __interrupt__ on this path. Parse the error string in on_tool_error to surface the interrupt SSE event; keep __interrupt__-on-chain-end and live GraphInterrupt object detection as defensive fallbacks. Co-Authored-By: Claude Opus 4.7 --------- Co-authored-by: Claude Opus 4.7 --- .../plans/2026-05-21-phase3-permissions.md | 1953 +++++++++++++++++ .../2026-05-21-phase3-permissions-design.md | 347 +++ examples/chat/server/dawn.config.ts | 15 +- .../web/app/api/permission-resume/route.ts | 31 + examples/chat/web/app/page.tsx | 96 +- packages/cli/package.json | 1 + packages/cli/src/lib/dev/runtime-server.ts | 71 + packages/cli/src/lib/runtime/execute-route.ts | 53 +- .../cli/src/lib/runtime/pending-interrupts.ts | 16 + packages/cli/test/resume-endpoint.test.ts | 132 ++ packages/cli/test/typegen-command.test.ts | 2 + packages/cli/tsconfig.build.json | 6 +- packages/core/package.json | 2 + .../src/capabilities/built-in/workspace.ts | 151 +- packages/core/src/capabilities/types.ts | 2 + packages/core/src/types.ts | 6 + .../core/test/capabilities/workspace.test.ts | 58 +- packages/langchain/src/agent-adapter.ts | 413 +++- packages/langchain/src/index.ts | 7 + packages/langchain/src/pending-interrupts.ts | 43 + .../test/agent-adapter-interrupt.test.ts | 337 +++ packages/permissions/package.json | 42 + packages/permissions/src/index.ts | 12 + packages/permissions/src/pattern-matching.ts | 26 + packages/permissions/src/permissions-store.ts | 146 ++ packages/permissions/src/suggested-pattern.ts | 22 + packages/permissions/src/types.ts | 50 + .../permissions/test/pattern-matching.test.ts | 29 + .../test/permissions-store.test.ts | 169 ++ .../test/suggested-pattern.test.ts | 41 + packages/permissions/tsconfig.json | 9 + packages/permissions/vitest.config.ts | 9 + pnpm-lock.yaml | 18 + 33 files changed, 4212 insertions(+), 103 deletions(-) create mode 100644 docs/superpowers/plans/2026-05-21-phase3-permissions.md create mode 100644 docs/superpowers/specs/2026-05-21-phase3-permissions-design.md create mode 100644 examples/chat/web/app/api/permission-resume/route.ts create mode 100644 packages/cli/src/lib/runtime/pending-interrupts.ts create mode 100644 packages/cli/test/resume-endpoint.test.ts create mode 100644 packages/langchain/src/pending-interrupts.ts create mode 100644 packages/langchain/test/agent-adapter-interrupt.test.ts create mode 100644 packages/permissions/package.json create mode 100644 packages/permissions/src/index.ts create mode 100644 packages/permissions/src/pattern-matching.ts create mode 100644 packages/permissions/src/permissions-store.ts create mode 100644 packages/permissions/src/suggested-pattern.ts create mode 100644 packages/permissions/src/types.ts create mode 100644 packages/permissions/test/pattern-matching.test.ts create mode 100644 packages/permissions/test/permissions-store.test.ts create mode 100644 packages/permissions/test/suggested-pattern.test.ts create mode 100644 packages/permissions/tsconfig.json create mode 100644 packages/permissions/vitest.config.ts diff --git a/docs/superpowers/plans/2026-05-21-phase3-permissions.md b/docs/superpowers/plans/2026-05-21-phase3-permissions.md new file mode 100644 index 0000000..b2d803b --- /dev/null +++ b/docs/superpowers/plans/2026-05-21-phase3-permissions.md @@ -0,0 +1,1953 @@ +# Phase 3 — HITL Permissions Implementation Plan (sub-project 4.5) + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Replace the workspace capability's hard-refuse-on-path-jail-escape behavior with a human-in-the-loop interrupt flow, and add the same prompt-for-approval gating to `runBash` and all path-touching operations outside the workspace. Three operating modes (interactive / non-interactive / bypass) configurable in `dawn.config.ts` and overridable via `DAWN_PERMISSIONS_MODE` env var. Persisted decisions live in `.dawn/permissions.json` (project-local, gitignored). + +**Architecture:** New `@dawn-ai/permissions` package ships `PermissionsStore` (file I/O + pattern matching + write queue), public types, smart-default pattern inference. Workspace capability adds a permission check between path-jail / bash invocation and the backend call. Dawn's HTTP dev server (`packages/cli/src/lib/dev/runtime-server.ts`) gains a `POST /threads/:thread_id/resume` endpoint. Agent adapter propagates LangGraph `interrupt()` events as `event: interrupt` SSE envelopes. Chat-web client renders an inline permission panel and proxies resume calls. + +**Tech Stack:** TypeScript, pnpm workspaces, vitest, zod, LangGraph 1.x `interrupt()` + `Command({resume})`, native `node:http`, Next.js 16. + +**Spec:** `docs/superpowers/specs/2026-05-21-phase3-permissions-design.md` + +--- + +## File Structure (locked here, used by all tasks) + +### New package + +| Path | Purpose | +|---|---| +| `packages/permissions/package.json` | `@dawn-ai/permissions` manifest | +| `packages/permissions/tsconfig.json` | TS config (mirror sibling packages) | +| `packages/permissions/vitest.config.ts` | Vitest config | +| `packages/permissions/src/index.ts` | Barrel re-exports | +| `packages/permissions/src/types.ts` | `PermissionsFile`, `PermissionMode`, `PermissionRequest`, `PermissionDecision`, `PermissionsStore` interface | +| `packages/permissions/src/pattern-matching.ts` | `match(tool, candidate, allowMap, denyMap)` → `"allow" | "deny" | "unknown"` | +| `packages/permissions/src/suggested-pattern.ts` | `suggestedCommandPattern(cmd)` + `suggestedPathPattern(path)` | +| `packages/permissions/src/permissions-store.ts` | `createPermissionsStore({appRoot, config, mode})` — load, match, addAllow, gitignore handling, write queue | +| `packages/permissions/test/*.test.ts` | Unit tests per file | + +### New + modified in existing packages + +| Path | Change | +|---|---| +| `packages/core/package.json` | Add `@dawn-ai/permissions` to dependencies | +| `packages/core/src/types.ts` | Extend `DawnConfig` with `permissions?: { mode, allow, deny }` | +| `packages/core/src/capabilities/types.ts` | Extend `CapabilityMarkerContext` with `permissions?: PermissionsStore` | +| `packages/core/src/capabilities/built-in/workspace.ts` | Gate every tool's `run()` through the permissions store; mode-aware path-jail (bypass disables) | +| `packages/core/test/capabilities/workspace.test.ts` | Add interrupt-flow tests | +| `packages/cli/src/lib/runtime/execute-route.ts` | Construct `PermissionsStore` from loaded config + env-var override; thread into `CapabilityMarkerContext` | +| `packages/cli/src/lib/dev/runtime-server.ts` | Register `POST /threads/:thread_id/resume` route + thread-state map | +| `packages/cli/test/resume-endpoint.test.ts` | New — endpoint tests | +| `packages/langchain/src/agent-adapter.ts` | Detect LangGraph `interrupt` events in `streamEvents` v2 output → yield `{type: "interrupt", data: ...}` chunks; handle `Command({resume})` re-invocation path | +| `packages/langchain/test/agent-adapter-interrupt.test.ts` | New — interrupt propagation test | +| `examples/chat/server/dawn.config.ts` | Seeded `permissions.allow` for demo (`bash: ["ls"]`) and `permissions.deny` (`bash: ["rm -rf", "sudo"]`) | +| `examples/chat/web/app/api/permission-resume/route.ts` | New — proxy to Dawn's resume endpoint | +| `examples/chat/web/app/page.tsx` | Inline permission panel + button handlers + resume POST | +| `memory/project_phase_status.md` | Mark sub-project 4.5 | + +--- + +## Phase A — `@dawn-ai/permissions` package + +### Task 1: Scaffold the permissions package + +**Files:** +- Create: `packages/permissions/package.json` +- Create: `packages/permissions/tsconfig.json` +- Create: `packages/permissions/vitest.config.ts` +- Create: `packages/permissions/src/index.ts` + +- [ ] **Step 1: Inspect sibling pattern** + +Run: `cd /Users/blove/repos/dawn && cat packages/workspace/package.json` +Note the exact catalog references and devDeps (workspace is the most-recent sibling and the closest template). + +- [ ] **Step 2: Write `packages/permissions/package.json`** + +Mirror `packages/workspace/package.json` exactly, substituting: +- `name`: `"@dawn-ai/permissions"` +- `version`: match siblings (likely `0.1.8`) + +No runtime deps yet. Same scripts (`build`, `test`, `typecheck`, `lint`). + +- [ ] **Step 3: Write `packages/permissions/tsconfig.json`** + +```json +{ + "extends": "@dawn-ai/config-typescript/node.json", + "include": ["src", "test"], + "compilerOptions": { "outDir": "dist", "rootDir": "." } +} +``` + +(Match `packages/workspace/tsconfig.json` exactly — copy-paste then adjust paths.) + +- [ ] **Step 4: Write `packages/permissions/vitest.config.ts`** + +```ts +import { defineConfig } from "vitest/config" +export default defineConfig({ + test: { + include: ["test/**/*.test.ts"], + passWithNoTests: true, + }, +}) +``` + +- [ ] **Step 5: Write `packages/permissions/src/index.ts`** + +```ts +// Re-exports will be added as types and impls land in subsequent tasks. +export {} +``` + +- [ ] **Step 6: Verify** + +```bash +cd /Users/blove/repos/dawn && pnpm install 2>&1 | tail -3 +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions build 2>&1 | tail -5 +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions test 2>&1 | tail -5 +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions lint 2>&1 | tail -5 +``` + +All should succeed (test passes with no test files). + +- [ ] **Step 7: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/permissions/ +git commit -m "scaffold(permissions): empty @dawn-ai/permissions package + +Adds the package skeleton for the upcoming HITL permissions system. +No exports yet — types, pattern matching, and store land in subsequent +commits. + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 2: Public types + +**Files:** +- Create: `packages/permissions/src/types.ts` +- Modify: `packages/permissions/src/index.ts` + +- [ ] **Step 1: Write `packages/permissions/src/types.ts`** + +```ts +/** + * Public types for the Dawn HITL permissions system. + * + * The workspace capability calls into a `PermissionsStore` before + * invoking its filesystem/exec backends. The store consults the + * runtime file at .dawn/permissions.json plus the config-seeded + * allow/deny lists and returns one of three decisions: "allow", + * "deny", or "unknown". On "unknown" in interactive mode the + * capability emits LangGraph's `interrupt()` with a `PermissionRequest` + * payload; the resume mechanism returns a `PermissionDecision`. + */ + +export type PermissionMode = "interactive" | "non-interactive" | "bypass" + +export type PermissionDecision = "once" | "always" | "deny" + +export interface PermissionsFile { + readonly version: 1 + readonly allow: Readonly> + readonly deny: Readonly> +} + +export interface CommandDetail { + readonly command: string + readonly suggestedPattern: string +} + +export interface PathDetail { + readonly path: string + readonly operation: "readFile" | "writeFile" | "listDir" + readonly suggestedPattern: string +} + +export interface PermissionRequest { + readonly interruptId: string + readonly kind: "command" | "path" + readonly detail: CommandDetail | PathDetail + readonly threadId: string + readonly callId?: string +} + +export interface PermissionsStore { + /** Loaded once at construction; subsequent loads not exposed in v1. */ + match(tool: string, candidate: string): "allow" | "deny" | "unknown" + /** Persists an allow entry to disk and updates the in-memory cache. */ + addAllow(tool: string, pattern: string): Promise + /** Active mode (resolved from config + env at construction). */ + readonly mode: PermissionMode +} +``` + +- [ ] **Step 2: Re-export from barrel** + +Edit `packages/permissions/src/index.ts`. Replace with: + +```ts +export type { + CommandDetail, + PathDetail, + PermissionDecision, + PermissionMode, + PermissionRequest, + PermissionsFile, + PermissionsStore, +} from "./types.js" +``` + +- [ ] **Step 3: Verify** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions build 2>&1 | tail -3 +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions typecheck 2>&1 | tail -3 +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions lint 2>&1 | tail -3 +``` + +Expect: all clean. + +- [ ] **Step 4: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/permissions/src/types.ts packages/permissions/src/index.ts +git commit -m "feat(permissions): public types + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 3: Suggested-pattern helpers + +**Files:** +- Create: `packages/permissions/src/suggested-pattern.ts` +- Create: `packages/permissions/test/suggested-pattern.test.ts` +- Modify: `packages/permissions/src/index.ts` + +- [ ] **Step 1: Write failing tests** + +Create `packages/permissions/test/suggested-pattern.test.ts`: + +```ts +import { describe, expect, it } from "vitest" +import { + suggestedCommandPattern, + suggestedPathPattern, +} from "../src/suggested-pattern.js" + +describe("suggestedCommandPattern", () => { + it("returns the first two tokens for a multi-word command", () => { + expect(suggestedCommandPattern("npm install react")).toBe("npm install") + }) + + it("returns the single token for a one-word command", () => { + expect(suggestedCommandPattern("ls")).toBe("ls") + }) + + it("returns first two tokens even when the second is short", () => { + expect(suggestedCommandPattern("git status")).toBe("git status") + expect(suggestedCommandPattern("git push origin main")).toBe("git push") + }) + + it("strips leading/trailing whitespace before tokenizing", () => { + expect(suggestedCommandPattern(" npm install react ")).toBe("npm install") + }) + + it("handles empty input as empty pattern", () => { + expect(suggestedCommandPattern("")).toBe("") + expect(suggestedCommandPattern(" ")).toBe("") + }) +}) + +describe("suggestedPathPattern", () => { + it("returns the parent directory with trailing slash", () => { + expect(suggestedPathPattern("/Users/blove/.zshrc")).toBe("/Users/blove/") + expect(suggestedPathPattern("/var/log/app.log")).toBe("/var/log/") + }) + + it("returns the dir itself with trailing slash when input ends with slash", () => { + expect(suggestedPathPattern("/Users/blove/Documents/")).toBe("/Users/blove/Documents/") + }) + + it("returns root when input is a top-level file", () => { + expect(suggestedPathPattern("/etc")).toBe("/") + }) + + it("handles relative paths", () => { + expect(suggestedPathPattern("notes/agenda.md")).toBe("notes/") + }) +}) +``` + +- [ ] **Step 2: Run to verify failure** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions test 2>&1 | tail -10 +``` +Expect: FAIL — module not found. + +- [ ] **Step 3: Implement** + +Create `packages/permissions/src/suggested-pattern.ts`: + +```ts +import { dirname } from "node:path" + +/** + * Default suggested pattern for a shell command. + * + * Returns the first two whitespace-separated tokens. `npm install react` + * → `npm install`. `ls` → `ls`. This is the sweet spot — covers + * `npm install ` and `npm test` as distinct patterns, vs lumping + * everything under `npm`. + */ +export function suggestedCommandPattern(command: string): string { + const trimmed = command.trim() + if (trimmed.length === 0) return "" + const tokens = trimmed.split(/\s+/) + return tokens.slice(0, 2).join(" ") +} + +/** + * Default suggested pattern for a filesystem path. + * + * Returns the parent directory of the path with a trailing slash. + * `/Users/blove/.zshrc` → `/Users/blove/`. Trailing slash makes + * prefix matching unambiguous (so `/var/log/` does not match + * `/var/logger/app.log`). + */ +export function suggestedPathPattern(path: string): string { + if (path.endsWith("/")) return path + const parent = dirname(path) + return parent === "/" ? "/" : `${parent}/` +} +``` + +- [ ] **Step 4: Re-export** + +Edit `packages/permissions/src/index.ts`. Append: + +```ts +export { suggestedCommandPattern, suggestedPathPattern } from "./suggested-pattern.js" +``` + +- [ ] **Step 5: Run tests** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions test 2>&1 | tail -5 +``` +Expect: PASS (9 tests). + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/permissions/src/suggested-pattern.ts \ + packages/permissions/test/suggested-pattern.test.ts \ + packages/permissions/src/index.ts +git commit -m "feat(permissions): suggested-pattern helpers for commands and paths + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 4: Pattern matching + +**Files:** +- Create: `packages/permissions/src/pattern-matching.ts` +- Create: `packages/permissions/test/pattern-matching.test.ts` +- Modify: `packages/permissions/src/index.ts` + +- [ ] **Step 1: Write failing tests** + +Create `packages/permissions/test/pattern-matching.test.ts`: + +```ts +import { describe, expect, it } from "vitest" +import { matchPermission } from "../src/pattern-matching.js" + +describe("matchPermission", () => { + it("returns unknown when no entries match", () => { + expect( + matchPermission("bash", "npm install", {}, {}), + ).toBe("unknown") + }) + + it("returns allow when candidate matches an allow prefix", () => { + expect( + matchPermission("bash", "npm install react", { bash: ["npm install"] }, {}), + ).toBe("allow") + }) + + it("returns deny when candidate matches a deny prefix", () => { + expect( + matchPermission("bash", "rm -rf /tmp", {}, { bash: ["rm -rf"] }), + ).toBe("deny") + }) + + it("deny wins over allow when both match", () => { + expect( + matchPermission( + "bash", + "rm -rf /tmp", + { bash: ["rm"] }, // would match "rm -rf /tmp" as prefix? No — "rm " vs "rm -rf" + { bash: ["rm -rf"] }, + ), + ).toBe("deny") + }) + + it("does NOT match an allow entry that is not a prefix", () => { + expect( + matchPermission("bash", "npm test", { bash: ["npm install"] }, {}), + ).toBe("unknown") + }) + + it("treats path candidates with absolute prefixes", () => { + expect( + matchPermission( + "readFile", + "/Users/blove/.zshrc", + { readFile: ["/Users/blove/"] }, + {}, + ), + ).toBe("allow") + }) + + it("does not cross directory boundary without trailing slash", () => { + // /var/logger/app.log should NOT match allow=/var/log (no trailing slash) + // because /var/log is a prefix string of /var/logger. With trailing slash + // it does NOT match. + expect( + matchPermission( + "readFile", + "/var/logger/app.log", + { readFile: ["/var/log/"] }, + {}, + ), + ).toBe("unknown") + }) + + it("returns unknown for a tool with no entries in either list", () => { + expect( + matchPermission( + "runUnknownTool", + "anything", + { bash: ["ls"] }, + { writeFile: ["/tmp/"] }, + ), + ).toBe("unknown") + }) +}) +``` + +- [ ] **Step 2: Run to verify failure** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions test 2>&1 | tail -8 +``` +Expect: FAIL — module not found. + +- [ ] **Step 3: Implement** + +Create `packages/permissions/src/pattern-matching.ts`: + +```ts +type PatternMap = Readonly> + +/** + * Match a tool+candidate against allow + deny pattern maps. + * + * Semantics: + * - deny wins over allow (a candidate that matches both returns "deny") + * - prefix matching: `candidate.startsWith(pattern)` + * - no entries for tool in either map → "unknown" + * + * Patterns are expected to encode any required boundary themselves (e.g., + * path patterns should end with "/" to prevent crossing directory + * boundaries; command patterns are first-N tokens already). + */ +export function matchPermission( + tool: string, + candidate: string, + allow: PatternMap, + deny: PatternMap, +): "allow" | "deny" | "unknown" { + const denyList = deny[tool] ?? [] + for (const pattern of denyList) { + if (candidate.startsWith(pattern)) return "deny" + } + const allowList = allow[tool] ?? [] + for (const pattern of allowList) { + if (candidate.startsWith(pattern)) return "allow" + } + return "unknown" +} +``` + +- [ ] **Step 4: Re-export** + +Edit `packages/permissions/src/index.ts`. Append: + +```ts +export { matchPermission } from "./pattern-matching.js" +``` + +- [ ] **Step 5: Run tests** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions test 2>&1 | tail -5 +``` +Expect: PASS (17 total: 9 + 8 new). + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/permissions/src/pattern-matching.ts \ + packages/permissions/test/pattern-matching.test.ts \ + packages/permissions/src/index.ts +git commit -m "feat(permissions): pattern-matching engine (allow/deny/unknown) + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 5: PermissionsStore + +**Files:** +- Create: `packages/permissions/src/permissions-store.ts` +- Create: `packages/permissions/test/permissions-store.test.ts` +- Modify: `packages/permissions/src/index.ts` + +- [ ] **Step 1: Write failing tests** + +Create `packages/permissions/test/permissions-store.test.ts`: + +```ts +import { afterEach, beforeEach, describe, expect, it } from "vitest" +import { mkdtempSync, readFileSync, rmSync, existsSync, writeFileSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" + +import { createPermissionsStore } from "../src/permissions-store.js" +import type { PermissionsFile } from "../src/types.js" + +describe("createPermissionsStore — load + match", () => { + let appRoot: string + beforeEach(() => { + appRoot = mkdtempSync(join(tmpdir(), "dawn-perms-")) + }) + afterEach(() => { + rmSync(appRoot, { recursive: true, force: true }) + }) + + it("returns unknown when no file or config", async () => { + const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" }) + await store.load() + expect(store.match("bash", "npm install")).toBe("unknown") + }) + + it("matches entries from .dawn/permissions.json", async () => { + writeFileSync( + join(appRoot, ".dawn", "permissions.json"), + JSON.stringify({ + version: 1, + allow: { bash: ["npm install"] }, + deny: {}, + }), + { encoding: "utf8", flag: "w" }, + ) + // mkdir is needed before writeFileSync — adjust: + // Actually the test should use mkdirSync first + }) +}) +``` + +(Wait — let me rewrite this test more carefully. The implementer should follow the corrected version below.) + +Use this corrected test file: + +```ts +import { afterEach, beforeEach, describe, expect, it } from "vitest" +import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" + +import { createPermissionsStore } from "../src/permissions-store.js" + +describe("createPermissionsStore — load + match", () => { + let appRoot: string + beforeEach(() => { + appRoot = mkdtempSync(join(tmpdir(), "dawn-perms-")) + }) + afterEach(() => { + rmSync(appRoot, { recursive: true, force: true }) + }) + + it("returns unknown when no file and no config", async () => { + const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" }) + await store.load() + expect(store.match("bash", "npm install")).toBe("unknown") + }) + + it("matches entries from .dawn/permissions.json", async () => { + mkdirSync(join(appRoot, ".dawn"), { recursive: true }) + writeFileSync( + join(appRoot, ".dawn", "permissions.json"), + JSON.stringify({ + version: 1, + allow: { bash: ["npm install"] }, + deny: {}, + }), + ) + const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" }) + await store.load() + expect(store.match("bash", "npm install react")).toBe("allow") + expect(store.match("bash", "rm -rf /")).toBe("unknown") + }) + + it("merges config + runtime file (both contribute allows)", async () => { + mkdirSync(join(appRoot, ".dawn"), { recursive: true }) + writeFileSync( + join(appRoot, ".dawn", "permissions.json"), + JSON.stringify({ version: 1, allow: { bash: ["ls"] }, deny: {} }), + ) + const store = createPermissionsStore({ + appRoot, + config: { version: 1, allow: { bash: ["npm install"] }, deny: {} }, + mode: "interactive", + }) + await store.load() + expect(store.match("bash", "ls -la")).toBe("allow") + expect(store.match("bash", "npm install react")).toBe("allow") + }) + + it("deny from config wins over allow from runtime file", async () => { + mkdirSync(join(appRoot, ".dawn"), { recursive: true }) + writeFileSync( + join(appRoot, ".dawn", "permissions.json"), + JSON.stringify({ version: 1, allow: { bash: ["rm"] }, deny: {} }), + ) + const store = createPermissionsStore({ + appRoot, + config: { version: 1, allow: {}, deny: { bash: ["rm -rf"] } }, + mode: "interactive", + }) + await store.load() + expect(store.match("bash", "rm -rf /tmp")).toBe("deny") + }) + + it("ignores the runtime file in non-interactive mode", async () => { + mkdirSync(join(appRoot, ".dawn"), { recursive: true }) + writeFileSync( + join(appRoot, ".dawn", "permissions.json"), + JSON.stringify({ version: 1, allow: { bash: ["npm install"] }, deny: {} }), + ) + const store = createPermissionsStore({ + appRoot, + config: { version: 1, allow: { bash: ["ls"] }, deny: {} }, + mode: "non-interactive", + }) + await store.load() + expect(store.match("bash", "npm install react")).toBe("unknown") + expect(store.match("bash", "ls -la")).toBe("allow") + }) + + it("ignores everything in bypass mode", async () => { + mkdirSync(join(appRoot, ".dawn"), { recursive: true }) + writeFileSync( + join(appRoot, ".dawn", "permissions.json"), + JSON.stringify({ version: 1, allow: {}, deny: { bash: ["rm"] } }), + ) + const store = createPermissionsStore({ + appRoot, + config: { version: 1, allow: {}, deny: { bash: ["rm"] } }, + mode: "bypass", + }) + await store.load() + // bypass mode: store always returns "unknown" (which the capability interprets as "go ahead") + expect(store.match("bash", "rm -rf /")).toBe("unknown") + }) + + it("throws on malformed JSON in the runtime file", async () => { + mkdirSync(join(appRoot, ".dawn"), { recursive: true }) + writeFileSync(join(appRoot, ".dawn", "permissions.json"), "{ not valid json") + const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" }) + await expect(store.load()).rejects.toThrow(/permissions\.json/i) + }) +}) + +describe("createPermissionsStore — addAllow", () => { + let appRoot: string + beforeEach(() => { + appRoot = mkdtempSync(join(tmpdir(), "dawn-perms-")) + }) + afterEach(() => { + rmSync(appRoot, { recursive: true, force: true }) + }) + + it("persists an allow entry and updates the in-memory cache atomically", async () => { + const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" }) + await store.load() + expect(store.match("bash", "npm install")).toBe("unknown") + await store.addAllow("bash", "npm install") + expect(store.match("bash", "npm install react")).toBe("allow") + const raw = readFileSync(join(appRoot, ".dawn", "permissions.json"), "utf8") + const parsed = JSON.parse(raw) + expect(parsed.allow.bash).toContain("npm install") + }) + + it("appends .dawn/ to .gitignore on first write (idempotent)", async () => { + writeFileSync(join(appRoot, ".gitignore"), "node_modules/\n.next/\n") + const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" }) + await store.load() + await store.addAllow("bash", "ls") + const gi = readFileSync(join(appRoot, ".gitignore"), "utf8") + expect(gi).toContain(".dawn/") + expect(gi).toContain("node_modules/") // preserved existing + }) + + it("creates .gitignore with .dawn/ when none exists", async () => { + const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" }) + await store.load() + await store.addAllow("bash", "ls") + const gi = readFileSync(join(appRoot, ".gitignore"), "utf8") + expect(gi).toBe(".dawn/\n") + }) + + it("does not duplicate .dawn/ if already in .gitignore", async () => { + writeFileSync(join(appRoot, ".gitignore"), "node_modules/\n.dawn/\n") + const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" }) + await store.load() + await store.addAllow("bash", "ls") + const gi = readFileSync(join(appRoot, ".gitignore"), "utf8") + expect(gi.match(/\.dawn\//g)?.length).toBe(1) + }) + + it("serializes concurrent addAllow calls", async () => { + const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" }) + await store.load() + await Promise.all([ + store.addAllow("bash", "ls"), + store.addAllow("bash", "pwd"), + store.addAllow("bash", "cat"), + ]) + const raw = readFileSync(join(appRoot, ".dawn", "permissions.json"), "utf8") + const parsed = JSON.parse(raw) + expect([...parsed.allow.bash].sort()).toEqual(["cat", "ls", "pwd"]) + }) + + it("exposes the resolved mode", () => { + const store = createPermissionsStore({ appRoot, config: undefined, mode: "non-interactive" }) + expect(store.mode).toBe("non-interactive") + }) +}) +``` + +- [ ] **Step 2: Run to verify failure** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions test 2>&1 | tail -10 +``` +Expect: FAIL — `Cannot find module '../src/permissions-store.js'`. + +- [ ] **Step 3: Implement** + +Create `packages/permissions/src/permissions-store.ts`: + +```ts +import { existsSync, readFileSync } from "node:fs" +import { mkdir, readFile, writeFile } from "node:fs/promises" +import { join } from "node:path" + +import { matchPermission } from "./pattern-matching.js" +import type { + PermissionMode, + PermissionsFile, + PermissionsStore, +} from "./types.js" + +const PERMISSIONS_DIR = ".dawn" +const PERMISSIONS_FILE = "permissions.json" + +interface CreateOptions { + readonly appRoot: string + readonly config: PermissionsFile | undefined + readonly mode: PermissionMode +} + +type MutableMap = Record + +interface State { + configAllow: MutableMap + configDeny: MutableMap + runtimeAllow: MutableMap + runtimeDeny: MutableMap +} + +function emptyState(): State { + return { configAllow: {}, configDeny: {}, runtimeAllow: {}, runtimeDeny: {} } +} + +function cloneMap(src: Readonly>): MutableMap { + const out: MutableMap = {} + for (const [k, v] of Object.entries(src)) out[k] = [...v] + return out +} + +function effectiveAllow(state: State, mode: PermissionMode): Record { + if (mode === "bypass") return {} + const out: Record = {} + for (const [k, v] of Object.entries(state.configAllow)) out[k] = [...v] + if (mode === "interactive") { + for (const [k, v] of Object.entries(state.runtimeAllow)) { + out[k] = [...(out[k] ?? []), ...v] + } + } + return out +} + +function effectiveDeny(state: State, mode: PermissionMode): Record { + if (mode === "bypass") return {} + const out: Record = {} + for (const [k, v] of Object.entries(state.configDeny)) out[k] = [...v] + if (mode === "interactive") { + for (const [k, v] of Object.entries(state.runtimeDeny)) { + out[k] = [...(out[k] ?? []), ...v] + } + } + return out +} + +export function createPermissionsStore(opts: CreateOptions): PermissionsStore { + const { appRoot, config, mode } = opts + const state = emptyState() + if (config) { + state.configAllow = cloneMap(config.allow) + state.configDeny = cloneMap(config.deny) + } + + let writeQueue: Promise = Promise.resolve() + + async function loadRuntimeFile(): Promise { + const filePath = join(appRoot, PERMISSIONS_DIR, PERMISSIONS_FILE) + if (!existsSync(filePath)) return + let raw: string + try { + raw = await readFile(filePath, "utf8") + } catch (err) { + throw new Error(`Failed to read permissions.json: ${(err as Error).message}`) + } + let parsed: unknown + try { + parsed = JSON.parse(raw) + } catch (err) { + throw new Error(`Malformed permissions.json: ${(err as Error).message}`) + } + const p = parsed as Partial + if (p.allow && typeof p.allow === "object") state.runtimeAllow = cloneMap(p.allow as Record) + if (p.deny && typeof p.deny === "object") state.runtimeDeny = cloneMap(p.deny as Record) + } + + async function persistRuntimeFile(): Promise { + const dir = join(appRoot, PERMISSIONS_DIR) + await mkdir(dir, { recursive: true }) + const file: PermissionsFile = { + version: 1, + allow: state.runtimeAllow, + deny: state.runtimeDeny, + } + await writeFile(join(dir, PERMISSIONS_FILE), `${JSON.stringify(file, null, 2)}\n`, "utf8") + } + + async function ensureGitignoreEntry(): Promise { + const gitignorePath = join(appRoot, ".gitignore") + let content = "" + if (existsSync(gitignorePath)) { + content = await readFile(gitignorePath, "utf8") + if (content.split("\n").some((line) => line.trim() === ".dawn/")) return + if (!content.endsWith("\n") && content.length > 0) content += "\n" + content += ".dawn/\n" + } else { + content = ".dawn/\n" + } + await writeFile(gitignorePath, content, "utf8") + } + + return { + mode, + match(tool: string, candidate: string) { + return matchPermission(tool, candidate, effectiveAllow(state, mode), effectiveDeny(state, mode)) + }, + async load() { + if (mode === "interactive") { + await loadRuntimeFile() + } + }, + async addAllow(tool: string, pattern: string) { + const job = async () => { + const list = state.runtimeAllow[tool] ?? [] + if (!list.includes(pattern)) list.push(pattern) + state.runtimeAllow[tool] = list + await persistRuntimeFile() + await ensureGitignoreEntry() + } + writeQueue = writeQueue.then(job, job) + await writeQueue + }, + } +} +``` + +- [ ] **Step 4: Re-export** + +Edit `packages/permissions/src/index.ts`. Append: + +```ts +export { createPermissionsStore } from "./permissions-store.js" +``` + +- [ ] **Step 5: Run tests** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions test 2>&1 | tail -10 +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions build 2>&1 | tail -3 +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/permissions lint 2>&1 | tail -3 +``` +Expect: PASS (~28 tests total), build + lint clean. + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/permissions/src/permissions-store.ts \ + packages/permissions/test/permissions-store.test.ts \ + packages/permissions/src/index.ts +git commit -m "feat(permissions): PermissionsStore with file I/O + write queue + gitignore handling + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +## Phase B — Config + capability changes + +### Task 6: Extend DawnConfig + CapabilityMarkerContext + +**Files:** +- Modify: `packages/core/package.json` — add `@dawn-ai/permissions` to devDependencies (type-only for now) +- Modify: `packages/core/src/types.ts` — extend `DawnConfig` +- Modify: `packages/core/src/capabilities/types.ts` — extend `CapabilityMarkerContext` + +- [ ] **Step 1: Add permissions package as type-only dep** + +Edit `packages/core/package.json`. Add to `devDependencies`: + +```json +"@dawn-ai/permissions": "workspace:*" +``` + +Run: `cd /Users/blove/repos/dawn && pnpm install --silent 2>&1 | tail -3` + +- [ ] **Step 2: Extend `DawnConfig`** + +Edit `packages/core/src/types.ts`. Add to the existing imports: + +```ts +import type { PermissionMode } from "@dawn-ai/permissions" +``` + +Find the `DawnConfig` interface and extend: + +```ts +export interface DawnConfig { + readonly appDir?: string + readonly backends?: { + readonly filesystem?: FilesystemBackend + readonly exec?: ExecBackend + } + readonly permissions?: { + readonly mode?: PermissionMode + readonly allow?: Readonly> + readonly deny?: Readonly> + } +} +``` + +- [ ] **Step 3: Extend `CapabilityMarkerContext`** + +Edit `packages/core/src/capabilities/types.ts`. Add to imports: + +```ts +import type { PermissionsStore } from "@dawn-ai/permissions" +``` + +Find `CapabilityMarkerContext` and extend: + +```ts +export interface CapabilityMarkerContext { + // ... existing fields + readonly permissions?: PermissionsStore +} +``` + +- [ ] **Step 4: Verify** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/core typecheck 2>&1 | tail -3 +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/core lint 2>&1 | tail -3 +cd /Users/blove/repos/dawn && pnpm test 2>&1 | tail -8 +``` +Expect: clean, full repo still green. + +- [ ] **Step 5: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/core/package.json packages/core/src/types.ts packages/core/src/capabilities/types.ts +git commit -m "feat(core): extend DawnConfig + CapabilityMarkerContext with permissions + +Type-only edge to @dawn-ai/permissions. Workspace capability will read +context.permissions in a subsequent commit. + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 7: Workspace capability gates through PermissionsStore + +**Files:** +- Modify: `packages/core/src/capabilities/built-in/workspace.ts` +- Modify: `packages/core/test/capabilities/workspace.test.ts` +- Modify: `packages/core/package.json` (promote permissions from devDep to dep — runtime use) + +- [ ] **Step 1: Promote permissions package to runtime dep** + +Edit `packages/core/package.json`. Move `@dawn-ai/permissions` from `devDependencies` to `dependencies`. Run `pnpm install`. + +- [ ] **Step 2: Add failing tests** + +Append to `packages/core/test/capabilities/workspace.test.ts`: + +```ts +import { createPermissionsStore } from "@dawn-ai/permissions" +import type { PermissionsStore } from "@dawn-ai/permissions" + +describe("createWorkspaceMarker — permissions gating", () => { + let routeDir: string + let appRoot: string + beforeEach(() => { + appRoot = mkdtempSync(join(tmpdir(), "dawn-perm-cap-")) + routeDir = appRoot + mkdirSync(join(appRoot, "workspace")) + process.chdir(appRoot) + }) + afterEach(() => { + process.chdir(originalCwd) + rmSync(appRoot, { recursive: true, force: true }) + }) + + async function makeStore(mode: "interactive" | "non-interactive" | "bypass", config?: { allow?: Record; deny?: Record }): Promise { + const store = createPermissionsStore({ + appRoot, + config: config + ? { version: 1, allow: config.allow ?? {}, deny: config.deny ?? {} } + : undefined, + mode, + }) + await store.load() + return store + } + + it("calls the backend normally when path matches allow", async () => { + writeFileSync(join(appRoot, "workspace", "ok.txt"), "ok", "utf8") + const permissions = await makeStore("non-interactive", { + allow: { readFile: [join(appRoot, "workspace") + "/"] }, + }) + const contribution = await createWorkspaceMarker().load(routeDir, ctx({ permissions })) + const readTool = contribution.tools!.find((t) => t.name === "readFile")! + const result = await readTool.run( + { path: "ok.txt" }, + { signal: new AbortController().signal }, + ) + expect(result).toBe("ok") + }) + + it("returns a deny error to the agent when path matches deny", async () => { + writeFileSync(join(appRoot, "workspace", "blocked.txt"), "x", "utf8") + const permissions = await makeStore("non-interactive", { + deny: { readFile: [join(appRoot, "workspace") + "/blocked"] }, + }) + const contribution = await createWorkspaceMarker().load(routeDir, ctx({ permissions })) + const readTool = contribution.tools!.find((t) => t.name === "readFile")! + await expect( + readTool.run({ path: "blocked.txt" }, { signal: new AbortController().signal }), + ).rejects.toThrow(/permission denied/i) + }) + + it("in non-interactive mode, unknown commands hard-refuse", async () => { + const permissions = await makeStore("non-interactive") + const contribution = await createWorkspaceMarker().load(routeDir, ctx({ permissions })) + const runBash = contribution.tools!.find((t) => t.name === "runBash")! + await expect( + runBash.run({ command: "ls" }, { signal: new AbortController().signal }), + ).rejects.toThrow(/permission denied|fail-closed/i) + }) + + it("in bypass mode, every operation proceeds (path-jail disabled)", async () => { + const permissions = await makeStore("bypass") + const contribution = await createWorkspaceMarker().load(routeDir, ctx({ permissions })) + const readTool = contribution.tools!.find((t) => t.name === "readFile")! + // In bypass mode, reading outside the workspace should NOT raise "outside workspace" + // (it might raise ENOENT instead because the file doesn't exist). + await expect( + readTool.run({ path: "../../etc/some-fake-file" }, { signal: new AbortController().signal }), + ).rejects.not.toThrow(/outside workspace/i) + }) +}) +``` + +(Note: this assumes `process.chdir` is already in the existing `workspace.test.ts` from sub-project 4 — verify by reading the file. Adjust the new tests to share the same setup.) + +- [ ] **Step 3: Run to verify failure** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/core test -- workspace 2>&1 | tail -15 +``` +Expect: FAIL — capability ignores `context.permissions`. + +- [ ] **Step 4: Update the capability** + +Edit `packages/core/src/capabilities/built-in/workspace.ts`. Add imports: + +```ts +import type { PermissionsStore } from "@dawn-ai/permissions" +``` + +Change `buildWorkspaceTools` signature to accept the optional store: + +```ts +function buildWorkspaceTools( + workspaceRoot: string, + fs: FilesystemBackend, + exec: ExecBackend, + permissions: PermissionsStore | undefined, +): readonly OverridableTool[] { /* ... */ } +``` + +Add a helper for gating: + +```ts +async function gate( + permissions: PermissionsStore | undefined, + tool: string, + candidate: string, +): Promise<"allow" | "deny" | "unknown"> { + if (!permissions) return "allow" // capability used without permissions context = legacy behavior, allow + if (permissions.mode === "bypass") return "allow" + return permissions.match(tool, candidate) +} +``` + +In each tool's `run`: + +- For path tools (`readFile`/`writeFile`/`listDir`): resolve the path first, then check: + - If the path is INSIDE the workspace: allow silently (the workspace is the trusted area; no need to gate every read of `workspace/notes.md`). + - If the path is OUTSIDE the workspace OR if `permissions.mode === "bypass"`: skip the jail check; consult `gate()`. If `"deny"` → throw `Permission denied by user: ${path}`. If `"unknown"` AND mode === "interactive" → emit interrupt; AND mode === "non-interactive" → throw "Permission denied (fail-closed)". If `"allow"` → proceed. + +```ts +// readFile (rewritten): +run: async (input, ctx) => { + const { path } = READ_FILE_INPUT.parse(input) + const absPath = resolve(workspaceRoot, path) + const insideWorkspace = + absPath === workspaceRoot || absPath.startsWith(workspaceRoot + sep) + + if (!insideWorkspace || permissions?.mode === "bypass") { + // Consult permissions for the operation + const decision = await gate(permissions, "readFile", absPath) + if (decision === "deny") { + throw new Error(`Permission denied by user: ${path}`) + } + if (decision === "unknown") { + if (permissions?.mode === "non-interactive") { + throw new Error(`Permission denied (fail-closed): ${path}`) + } + // interactive: emit LangGraph interrupt() — handled by helper (see Task 8) + const result = await requestPermissionInterrupt({ + kind: "path", + operation: "readFile", + path: absPath, + permissions, + }) + if (result === "deny") { + throw new Error(`Permission denied by user: ${path}`) + } + // "allow" — proceed + } + } + + return fs.readFile(absPath, backendContext(workspaceRoot, ctx.signal)) +} +``` + +`requestPermissionInterrupt` is a helper imported from `@dawn-ai/langchain` — wait, that creates a core→langchain dep. Restructure: the helper lives in `@dawn-ai/permissions` and uses LangGraph's `interrupt()` directly (which is available via `import { interrupt } from "@langchain/langgraph"`). Add `@langchain/langgraph` to `@dawn-ai/permissions` as a peerDependency (or dependency). + +Actually, simpler: have the workspace capability import `interrupt` from `@langchain/langgraph` directly (it's the LangGraph primitive). Add `@langchain/langgraph` to `@dawn-ai/core` as a peerDependency if not already present. + +Verify: `cd /Users/blove/repos/dawn && grep "@langchain/langgraph" packages/core/package.json` + +If not present: add to peerDependencies. If `@dawn-ai/core` shouldn't take a runtime dep on langgraph, do the interrupt logic in `@dawn-ai/langchain` and pass it via the resolver — but that's heavier. **For v1, accept the core→langgraph dep**. + +```ts +import { interrupt } from "@langchain/langgraph" + +async function requestPermissionInterrupt(args: { + kind: "command" | "path" + command?: string + operation?: "readFile" | "writeFile" | "listDir" + path?: string + permissions: PermissionsStore | undefined +}): Promise<"allow" | "deny"> { + const interruptId = `perm-${Date.now()}-${Math.random().toString(36).slice(2, 8)}` + const suggestedPattern = + args.kind === "command" + ? suggestedCommandPattern(args.command!) + : suggestedPathPattern(args.path!) + const payload = { + interruptId, + type: "permission-request" as const, + kind: args.kind, + detail: + args.kind === "command" + ? { command: args.command!, suggestedPattern } + : { operation: args.operation!, path: args.path!, suggestedPattern }, + } + // LangGraph's interrupt() pauses the graph and yields the payload on the stream. + // The resume value comes back here when the resume endpoint fires. + const decision = interrupt(payload) as "once" | "always" | "deny" + if (decision === "deny") return "deny" + if (decision === "always" && args.permissions) { + const tool = args.kind === "command" ? "bash" : args.operation! + await args.permissions.addAllow(tool, suggestedPattern) + } + return "allow" +} +``` + +Apply the same pattern to `writeFile`, `listDir`, and `runBash`. For `runBash`, EVERY command is gated (no inside/outside-workspace short-circuit): + +```ts +// runBash (rewritten): +run: async (input, ctx) => { + const { command } = RUN_BASH_INPUT.parse(input) + const decision = await gate(permissions, "bash", command) + if (decision === "deny") { + throw new Error(`Permission denied by user: ${command}`) + } + if (decision === "unknown") { + if (permissions?.mode === "non-interactive") { + throw new Error(`Permission denied (fail-closed): ${command}`) + } + const result = await requestPermissionInterrupt({ + kind: "command", + command, + permissions, + }) + if (result === "deny") { + throw new Error(`Permission denied by user: ${command}`) + } + } + return exec.runCommand({ command }, backendContext(workspaceRoot, ctx.signal)) +} +``` + +Bypass mode for path-jail: the existing `pathJail()` call needs to be skipped when `permissions?.mode === "bypass"`. Wrap the jail call in a check OR remove the jail (since the gate handles the bypass case). + +Actually the cleanest restructure: the capability no longer calls `pathJail()` at all — it resolves the path with `resolve(workspaceRoot, path)`, checks "is inside workspace?" itself, and gates if not (or if bypass). The "Path is outside workspace" error becomes part of the deny path for non-interactive mode. + +- [ ] **Step 5: Pass permissions through `load()`** + +In the `load()` of the marker: + +```ts +load: async (_routeDir, context) => { + const root = workspaceRoot() + if (!existsSync(root)) return {} + const fs = context.backends?.filesystem ?? localFilesystem() + const exec = context.backends?.exec ?? localExec() + const permissions = context.permissions + // Warn on bypass mode + if (permissions?.mode === "bypass") { + console.warn( + "[dawn:permissions] mode=bypass — path-jail disabled, all bash unrestricted. Do not use in production.", + ) + } + return { tools: buildWorkspaceTools(root, fs, exec, permissions) } +} +``` + +- [ ] **Step 6: Run tests** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/core test 2>&1 | tail -10 +cd /Users/blove/repos/dawn && pnpm test 2>&1 | tail -10 +cd /Users/blove/repos/dawn && pnpm build 2>&1 | tail -5 +cd /Users/blove/repos/dawn && pnpm lint 2>&1 | tail -5 +``` +Expect: all green. + +Note: the interrupt-flow tests (interactive mode → emits interrupt) cannot run in isolation because `interrupt()` is a LangGraph primitive that requires a live graph runtime. These tests should mock `interrupt()` to return a canned value, or be deferred to integration tests in `@dawn-ai/langchain`. + +Pragmatic approach for THIS task: only test the non-interactive + bypass paths in unit tests; defer interactive-flow testing to Task 8's agent-adapter integration test. + +- [ ] **Step 7: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/core/package.json \ + packages/core/src/capabilities/built-in/workspace.ts \ + packages/core/test/capabilities/workspace.test.ts +git commit -m "feat(core): workspace capability gates through PermissionsStore + +Each of readFile/writeFile/listDir/runBash now consults the optional +PermissionsStore in CapabilityMarkerContext before invoking the +backend. Three modes: + +- interactive: unknown ops emit LangGraph interrupt() and pause the run +- non-interactive: unknown ops hard-refuse (fail-closed) +- bypass: path-jail disabled, every op proceeds (warn on capability load) + +Path-touching operations short-circuit (no gate) for paths INSIDE the +workspace. runBash gates every command regardless. + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +## Phase C — Runtime: agent-adapter + resume endpoint + +### Task 8: Propagate interrupt events through the SSE stream + +**Files:** +- Modify: `packages/langchain/src/agent-adapter.ts` +- Create: `packages/langchain/test/agent-adapter-interrupt.test.ts` + +- [ ] **Step 1: Inspect existing streamFromRunnable** + +Read `packages/langchain/src/agent-adapter.ts` lines around `streamFromRunnable` and `streamEvents`. Note the case statements for `on_chat_model_stream`, `on_tool_start`, etc. The interrupt event from LangGraph v2 streamEvents has `event: "on_interrupt"` (verify by checking LangGraph 1.x docs or a quick smoke test). + +If `on_interrupt` doesn't exist in LangGraph's stream events, LangGraph 1.x surfaces interrupts as a special return value from `graph.invoke()` rather than a stream event. In that case the propagation happens differently — at the graph-return level rather than mid-stream. Verify and adjust. + +- [ ] **Step 2: Write a failing test** + +Create `packages/langchain/test/agent-adapter-interrupt.test.ts`: + +```ts +import { describe, expect, it, vi } from "vitest" +import { streamAgent } from "../src/agent-adapter.js" + +describe("streamAgent — interrupt propagation", () => { + it("yields {type: 'interrupt', data: ...} when the graph emits a LangGraph interrupt", async () => { + // Mock a graph that interrupts on its first tool call + const mockGraph = { + invoke: vi.fn(), + streamEvents: async function* () { + yield { + event: "on_chain_start", + name: "LangGraph", + data: { input: {} }, + } + // Simulate the LangGraph interrupt envelope shape + yield { + event: "on_interrupt", // or whatever LangGraph v2 actually emits + data: { value: { interruptId: "perm-x", type: "permission-request", kind: "command", detail: { command: "ls", suggestedPattern: "ls" } } }, + } + }, + } + + const chunks: unknown[] = [] + for await (const c of streamAgent({ + entry: mockGraph, + input: { messages: [{ role: "user", content: "x" }] }, + routeParamNames: [], + signal: new AbortController().signal, + tools: [], + })) { + chunks.push(c) + } + + const interruptChunk = chunks.find((c) => (c as { type: string }).type === "interrupt") + expect(interruptChunk).toBeDefined() + }) +}) +``` + +(Note: this test may need adjustment based on LangGraph's actual event shape. The implementer should investigate LangGraph 1.x's interrupt-related stream-events output before writing the assertion.) + +- [ ] **Step 3: Run to verify failure** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/langchain test -- agent-adapter-interrupt 2>&1 | tail -10 +``` +Expect: FAIL — `streamFromRunnable` does not currently emit `interrupt` chunks. + +- [ ] **Step 4: Add the interrupt case** + +In `streamFromRunnable`, add a new case in the for-await switch: + +```ts +case "on_interrupt": { + hasYielded = true + yield { + type: "interrupt" as const, + data: (event.data as { value?: unknown }).value, + } + break +} +``` + +(If LangGraph emits interrupts via a different event name, use that.) + +- [ ] **Step 5: Run + verify** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/langchain test 2>&1 | tail -10 +``` +Expect: PASS. + +- [ ] **Step 6: Commit** + +```bash +cd /Users/blove/repos/dawn +git add packages/langchain/src/agent-adapter.ts packages/langchain/test/agent-adapter-interrupt.test.ts +git commit -m "feat(langchain): propagate LangGraph interrupt events to the SSE stream + +When the graph emits an interrupt (via LangGraph's interrupt() primitive), +the agent-adapter yields a {type: 'interrupt', data: payload} chunk so +the SSE serializer can render it as 'event: interrupt' to clients. + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 9: Resume endpoint in the dev HTTP server + +**Files:** +- Modify: `packages/cli/src/lib/dev/runtime-server.ts` +- Create: `packages/cli/test/resume-endpoint.test.ts` +- Possibly modify: `packages/cli/src/lib/runtime/execute-route.ts` (build PermissionsStore + thread it; also: maintain a per-thread "pending interrupt" map) + +- [ ] **Step 1: Inspect the existing dev server** + +Read `packages/cli/src/lib/dev/runtime-server.ts`. Note how `/runs/stream` is implemented. The new `/threads/:thread_id/resume` route follows the same pattern. + +- [ ] **Step 2: Build the in-memory thread-state map** + +In `runtime-server.ts` (or a new sibling file), maintain: + +```ts +interface PendingInterrupt { + interruptId: string + // Resume function bound to the parked graph; called when resume arrives. + resolve: (decision: "once" | "always" | "deny") => void +} + +const pendingByThread = new Map() +``` + +When the agent emits an interrupt during a streamed run, the runtime registers the pending interrupt: + +```ts +pendingByThread.set(threadId, { interruptId: payload.interruptId, resolve }) +``` + +The `resolve` function is the callback that, when invoked, returns the decision to the LangGraph `interrupt()` call (via `Command({resume})`). + +- [ ] **Step 3: Implement the resume route** + +In the request dispatch of `runtime-server.ts`, add a match for `POST /threads/:thread_id/resume`: + +```ts +if (request.method === "POST" && /^\/threads\/[^/]+\/resume$/.test(url.pathname)) { + const threadId = url.pathname.split("/")[2]! + const body = await readJsonBody(request) + const { interrupt_id, decision } = body as { interrupt_id: string; decision: "once" | "always" | "deny" } + + const pending = pendingByThread.get(threadId) + if (!pending) { + response.writeHead(400, { "content-type": "application/json" }) + response.end(JSON.stringify({ error: "no parked interrupt for thread" })) + return + } + if (pending.interruptId !== interrupt_id) { + response.writeHead(409, { "content-type": "application/json" }) + response.end(JSON.stringify({ error: "stale interrupt_id" })) + return + } + pending.resolve(decision) + pendingByThread.delete(threadId) + response.writeHead(200, { "content-type": "application/json" }) + response.end(JSON.stringify({ ok: true })) + return +} +``` + +(Helper `readJsonBody` collects stdin into a buffer + parses; should exist already from the existing `/runs/stream` POST handling — reuse it.) + +- [ ] **Step 4: Write endpoint tests** + +Create `packages/cli/test/resume-endpoint.test.ts`. The test should: + +1. Start the runtime server in isolation (find the existing test pattern in `dev-command.test.ts` or similar). +2. Pre-populate the `pendingByThread` map with a known thread + interrupt id. +3. POST a valid resume — expect 200, expect the resolve callback to fire with the right decision. +4. POST with stale `interrupt_id` — expect 409. +5. POST without a pending interrupt — expect 400. +6. Invalid JSON body — expect 400. + +(Mirror the existing dev-server test scaffolding.) + +- [ ] **Step 5: Run + verify** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-ai/cli test -- resume-endpoint 2>&1 | tail -10 +cd /Users/blove/repos/dawn && pnpm test 2>&1 | tail -10 +``` +Expect: PASS. + +- [ ] **Step 6: Wire PermissionsStore + interrupt-bridging into execute-route.ts** + +In `packages/cli/src/lib/runtime/execute-route.ts`, after loading `dawn.config.ts`: + +```ts +import { createPermissionsStore } from "@dawn-ai/permissions" + +// ... existing config load logic +const permissionsConfig = loaded?.config.permissions +const envMode = process.env.DAWN_PERMISSIONS_MODE as + | "interactive" + | "non-interactive" + | "bypass" + | undefined +const mode = envMode ?? permissionsConfig?.mode ?? "interactive" + +const permissionsStore = createPermissionsStore({ + appRoot, + config: permissionsConfig + ? { + version: 1, + allow: permissionsConfig.allow ?? {}, + deny: permissionsConfig.deny ?? {}, + } + : undefined, + mode, +}) +await permissionsStore.load() + +// Thread into capability context: +const applied = await applyCapabilities(registry, routeDir, { + routeManifest, + descriptor, + descriptorRouteMap, + ...(configBackends ? { backends: configBackends } : {}), + permissions: permissionsStore, +}) +``` + +The interrupt bridge — connecting `interrupt()` (in the capability) to the SSE stream + the `pendingByThread` map — requires that the streamed run's `interrupt` chunks (Task 8) get translated into `pendingByThread.set(...)` calls inside the SSE serializer. The serializer is in the same file region as the existing SSE event emitter. Add: + +```ts +// in the stream chunk loop, for an "interrupt" chunk: +if (chunk.type === "interrupt") { + pendingByThread.set(threadId, { + interruptId: chunk.data.interruptId, + resolve: (decision) => { + // This callback resumes the parked LangGraph. + // Implementation: re-invoke the graph with Command({resume: decision}). + // The complexity here is connecting the resolve function back to the + // LangGraph that's currently parked. Approach: when the graph emits + // an interrupt, the `interrupt()` call resolves to whatever value is + // passed in via Command({resume}) on the next invocation. So `resolve` + // here needs to trigger a SECOND graph invocation with the resume value. + // + // For v1: simplest is to keep a Deferred that the original + // graph.invoke() awaits inside its tool's run(). When resolve fires, + // it settles the Deferred, the tool's run() returns to LangGraph, and + // the graph continues. + // + // This means the agent's run() function in the capability needs to + // wrap interrupt() in a way that integrates with this Deferred pattern. + // See implementer note below. + }, + }) + // Forward the interrupt event to the SSE stream: + yield { type: "interrupt", data: chunk.data } +} +``` + +**Implementer note:** the actual mechanism by which `resolve` translates into a LangGraph resume is the trickiest piece of this task. The LangGraph `interrupt()` primitive expects to be re-invoked via `Command({resume})` on the SAME graph instance with the SAME thread_id. The runtime server needs to keep the graph instance alive between the initial `streamEvents` and the resume call. + +If this proves intractable for v1, the FALLBACK design is: +1. The initial run's stream ends when interrupt fires (return early). +2. The next call to `/runs/stream` with the same `thread_id` includes `{ resume_value: decision }` in the payload. +3. The runtime constructs the graph fresh, calls `graph.invoke(Command({resume: decision}), {configurable: {thread_id}})`, and resumes from the checkpoint. + +This "fall-back" requires LangGraph's checkpointer to be enabled (it should be, for thread continuity). Verify that the existing /runs/stream payload supports this OR add a new field for it. + +If the fall-back is too disruptive, mark this task as DONE_WITH_CONCERNS and document the limitation: "Resume mechanism functional for path-jail/bash interrupts in the same process; multi-process resume requires the future Agent Protocol implementation (sub-project 7)." + +- [ ] **Step 7: Commit** + +```bash +cd /Users/blove/repos/dawn +git add -A +git commit -m "$(cat <<'EOF' +feat(cli): resume endpoint + PermissionsStore wiring + +Adds POST /threads/:thread_id/resume to the dev HTTP server. Maintains +an in-memory pendingByThread map of parked interrupts. The resume +handler validates the interrupt_id, invokes the resolver bound to the +parked graph, and returns 200. + +execute-route.ts constructs the PermissionsStore from dawn.config.ts ++ DAWN_PERMISSIONS_MODE env var and threads it into the +CapabilityMarkerContext. The workspace capability reads it. + +The interrupt-to-resume bridging is the trickiest piece; v1 uses a +Deferred-per-pending-interrupt pattern that requires the graph to stay +alive between the initial /runs/stream and the resume POST. + +Co-Authored-By: Claude Opus 4.7 +EOF +)" +``` + +--- + +## Phase D — Chat demo + +### Task 10: Seed permissions in chat demo's dawn.config.ts + +**Files:** +- Modify: `examples/chat/server/dawn.config.ts` + +- [ ] **Step 1: Update the config** + +Replace contents with: + +```ts +export default { + appDir: "src/app", + permissions: { + // Default mode (omit means "interactive") + // Seed a few obviously-safe commands so prompt fatigue is reasonable on first run. + allow: { + bash: ["ls", "pwd", "cat", "echo", "head", "tail", "wc"], + }, + // Block obviously-destructive patterns even when interactive. + deny: { + bash: ["rm -rf", "sudo", "chmod 777"], + }, + }, +} +``` + +- [ ] **Step 2: Verify the example builds** + +```bash +cd /Users/blove/repos/dawn/examples/chat/server && pnpm build 2>&1 | tail -5 +``` +Expect: `4 route(s) compiled`. + +- [ ] **Step 3: Commit** + +```bash +cd /Users/blove/repos/dawn +git add examples/chat/server/dawn.config.ts +git commit -m "feat(examples/chat): seed permissions allow/deny in dawn.config.ts + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +### Task 11: Chat-web inline permission panel + resume proxy + +**Files:** +- Create: `examples/chat/web/app/api/permission-resume/route.ts` +- Modify: `examples/chat/web/app/page.tsx` + +- [ ] **Step 1: Write the resume proxy** + +Create `examples/chat/web/app/api/permission-resume/route.ts`: + +```ts +import { NextRequest } from "next/server" + +export const runtime = "nodejs" +export const dynamic = "force-dynamic" + +export async function POST(req: NextRequest): Promise { + const serverUrl = process.env.DAWN_SERVER_URL ?? "http://127.0.0.1:3001" + const body = (await req.json()) as { + threadId: string + interruptId: string + decision: "once" | "always" | "deny" + } + + const upstream = await fetch(`${serverUrl}/threads/${encodeURIComponent(body.threadId)}/resume`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + interrupt_id: body.interruptId, + decision: body.decision, + }), + }) + + return new Response(upstream.body, { + status: upstream.status, + headers: { "content-type": "application/json" }, + }) +} +``` + +- [ ] **Step 2: Add the inline panel to page.tsx** + +Edit `examples/chat/web/app/page.tsx`. Add state for pending interrupt + handlers; render an inline panel when present: + +```tsx +const [pendingInterrupt, setPendingInterrupt] = useState<{ + interruptId: string + kind: "command" | "path" + detail: any // shape from SSE +} | null>(null) + +// Inside the SSE read loop, parse "event: interrupt" lines: +// Detection: lines.match(/^event: interrupt$/) then read the following "data: ..." line. +// Parse the JSON, setPendingInterrupt(parsedData). + +async function resolveInterrupt(decision: "once" | "always" | "deny") { + if (!pendingInterrupt || !threadId) return + await fetch("/api/permission-resume", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + threadId, + interruptId: pendingInterrupt.interruptId, + decision, + }), + }) + setPendingInterrupt(null) +} + +// Render — above the event log, when pendingInterrupt is non-null: +{pendingInterrupt && ( +
+ ⚠️ Permission request +

+ The agent wants to {pendingInterrupt.kind === "command" ? "run command:" : `${pendingInterrupt.detail.operation}:`} +

+ + {pendingInterrupt.kind === "command" ? pendingInterrupt.detail.command : pendingInterrupt.detail.path} + +
+ + + +
+
+)} +``` + +- [ ] **Step 3: Verify** + +```bash +cd /Users/blove/repos/dawn && pnpm --filter @dawn-example/chat-web typecheck 2>&1 | tail -5 +cd /Users/blove/repos/dawn && pnpm --filter @dawn-example/chat-web build 2>&1 | tail -5 +``` +Expect: clean typecheck + build. + +- [ ] **Step 4: Commit** + +```bash +cd /Users/blove/repos/dawn +git add examples/chat/web/ +git commit -m "feat(examples/chat-web): inline permission panel + resume proxy + +Co-Authored-By: Claude Opus 4.7 " +``` + +--- + +## Phase E — Smoke + PR + +### Task 12: Manual Chrome MCP smoke + +**Files:** none modified. + +- [ ] **Step 1: Start both dev servers** + +```bash +cd /Users/blove/repos/dawn/examples/chat/server && OPENAI_API_KEY="$(grep OPENAI_API_KEY /Users/blove/repos/dawn/.env | cut -d= -f2-)" pnpm dev & +cd /Users/blove/repos/dawn/examples/chat/web && pnpm dev & +``` + +Wait for both ("Dawn dev ready" + "Ready in Nms"). + +- [ ] **Step 2: Drive `/chat` with a prompt that triggers bash gating** + +Navigate Chrome MCP to `http://localhost:3000`, ensure `/chat` is selected, send: `Run `ls -la` in the workspace.` + +Expected behavior: +- An `event: interrupt` envelope arrives on the SSE stream. +- The inline panel renders: "The agent wants to run command: `ls -la`", with three buttons. +- The "Allow always" button labels with the suggested pattern (`ls -la` → first two tokens, so `ls -la`). +- Clicking "Allow once" sends the resume; the SSE log resumes streaming; the agent's tool call completes. +- Send the SAME prompt again → another interrupt fires (Once didn't persist). +- Re-send and click "Allow always for `ls -la`" → SSE log resumes; verify `.dawn/permissions.json` now has `allow.bash: ["ls -la"]`. +- Re-send a THIRD time → no interrupt; runs silently. + +- [ ] **Step 3: Trigger a denied command** + +Prompt: `Run `rm -rf /tmp/` in the workspace.` + +Expected: NO interrupt fires (config.deny has `rm -rf`). The tool returns the deny error; agent responds something like "I cannot run that command, it's blocked." + +- [ ] **Step 4: Trigger a path-outside-workspace prompt** + +Prompt: `Read /etc/hostname please.` + +Expected: interrupt fires with `{kind: "path", operation: "readFile", path: "/etc/hostname", suggestedPattern: "/etc/"}`. Click "Deny". The tool returns the deny error; agent acknowledges. + +- [ ] **Step 5: Switch to bypass mode + verify** + +Edit `examples/chat/server/dawn.config.ts` to set `permissions: { mode: "bypass" }`. Restart the chat-server. Re-run: `Read /etc/hostname please.` + +Expected: NO interrupt. The tool actually reads `/etc/hostname` and returns its contents. (The path-jail is disabled.) + +Restore `dawn.config.ts` to interactive mode before continuing. + +- [ ] **Step 6: Kill dev servers** + +```bash +pkill -f "dawn.*dev" +pkill -f "next dev -p 3000" +``` + +- [ ] **Step 7: If any step failed** + +Debug. Likely candidates: +- The interrupt envelope isn't appearing on the SSE stream → check Task 8's propagation. +- The resume endpoint returns 200 but the run doesn't resume → check the Deferred/Command-resume mechanism from Task 9. +- `.dawn/permissions.json` doesn't get written on "always" → check the PermissionsStore.addAllow path. + +Iterate until smoke is clean. No move to Task 13 until all 5 substeps succeed. + +--- + +### Task 13: Update phase memory + open PR + +**Files:** +- Modify: `/Users/blove/.claude/projects/-Users-blove-repos-dawn/memory/project_phase_status.md` + +- [ ] **Step 1: Update phase status memory** + +Edit `project_phase_status.md`. Find the section for sub-project 4 (recently shipped) and ADD a new entry beneath it for 4.5: + +``` +4.5. ✅ **HITL permissions** — shipped in [PR #TBD](https://github.com/cacheplane/0/pull/TBD). +Three modes (interactive default / non-interactive / bypass) in +dawn.config.ts. Path-jail escapes + every first-occurrence bash command +trigger an interrupt prompt with three approval scopes (Once / +Always-for-pattern / Deny). Smart-default pattern inference (first 2 +tokens for commands, parent dir for paths). Persisted decisions live +in .dawn/permissions.json (project-local, gitignored, auto-appended to +.gitignore). New @dawn-ai/permissions package ships types + pattern- +matching + PermissionsStore. SSE envelope shape is Agent-Protocol- +compatible. +``` + +Also bump the top summary if applicable. + +- [ ] **Step 2: Push the branch + open the PR** + +```bash +cd /Users/blove/repos/dawn +git push -u origin claude/phase3-permissions +gh pr create --title "feat: phase 3 — HITL permissions (sub-project 4.5)" --body "$(cat <<'EOF' +## Summary + +Sub-project 4.5 of the Dawn opinionated agent harness. Builds on +sub-project 4 (workspace capability, PR #170): replaces the hard-refuse- +on-path-jail-escape with an interrupt prompt; adds the same gating to +runBash. Three modes: interactive (default), non-interactive +(production / CI), bypass (explicit trust). Persisted "always" +decisions live in `.dawn/permissions.json` (project-local, gitignored). + +Spec: `docs/superpowers/specs/2026-05-21-phase3-permissions-design.md` +Plan: `docs/superpowers/plans/2026-05-21-phase3-permissions.md` + +## Changes + +- New `@dawn-ai/permissions` package: types + pattern-matching + suggested-pattern + PermissionsStore. +- Workspace capability gates every tool's run() through PermissionsStore. + - readFile/writeFile/listDir: gate only when path is outside the workspace. + - runBash: gate every command on first occurrence. + - bypass mode disables the path-jail entirely. +- DawnConfig + CapabilityMarkerContext extend with permissions/PermissionsStore. +- Dev HTTP server adds POST /threads/:thread_id/resume. +- Agent-adapter propagates LangGraph interrupt() as `event: interrupt` SSE envelopes. +- Chat demo seeds permissions in dawn.config.ts; web client renders inline permission panel. + +## Test plan + +- [x] Unit tests across @dawn-ai/permissions (suggested-pattern, matching, store) +- [x] Workspace capability tests covering interactive/non-interactive/bypass paths +- [x] Resume endpoint tests +- [x] Agent-adapter interrupt propagation test +- [x] Manual Chrome MCP smoke (5 scenarios) + +🤖 Generated with [Claude Code](https://claude.com/claude-code) +EOF +)" +``` + +- [ ] **Step 3: Update memory with the real PR number** + +After PR URL prints, replace `#TBD` with the real number in the memory note. + +- [ ] **Step 4: Enable auto-merge** + +```bash +gh pr merge --squash --delete-branch --auto +``` + +Wait for validate-green. + +--- + +## Self-review notes + +- **Spec coverage:** Every section maps to a task. Architecture (T1–T7). Modes (T6, T7). Persistence (T5). SSE envelope (T8). Resume endpoint (T9). Web client UX (T11). Config seeding (T10). Smoke (T12). +- **Known sharp edge in T9:** the interrupt-to-resume bridging mechanism is the most uncertain piece — depends on LangGraph 1.x checkpointer behavior and how `interrupt()` interacts with `streamEvents()`. The plan documents a fallback if the in-process Deferred pattern doesn't work cleanly. The implementer may need to investigate LangGraph 1.x's actual interrupt semantics empirically before locking in the design. +- **Placeholder scan:** clean. The `LangGraph interrupt event name` (`on_interrupt` vs other) is flagged as something to verify, not a placeholder. +- **Type consistency:** `PermissionMode`, `PermissionDecision`, `PermissionsFile`, `PermissionsStore` consistent throughout. The capability's `gate()` helper signature stable. The SSE envelope payload shape stable across the spec + plan. diff --git a/docs/superpowers/specs/2026-05-21-phase3-permissions-design.md b/docs/superpowers/specs/2026-05-21-phase3-permissions-design.md new file mode 100644 index 0000000..49b1c78 --- /dev/null +++ b/docs/superpowers/specs/2026-05-21-phase3-permissions-design.md @@ -0,0 +1,347 @@ +# Phase 3 — HITL Permissions Design (sub-project 4.5) + +**Status:** Spec +**Date:** 2026-05-21 +**Builds on:** sub-project 4 (workspace capability + pluggable backends, PR #170) + +## Goal + +Replace the workspace capability's hard-refuse-on-path-jail-escape behavior with a human-in-the-loop permission flow, and add the same prompt-for-approval gating to `runBash`. The user sees a permission prompt on first occurrence of any non-pre-approved bash command or out-of-workspace path operation; they can grant once, always-for-pattern, or deny. Persisted "always" decisions live in a project-local `.dawn/permissions.json` that's gitignored by default. Production deployments run in non-interactive or bypass mode with a curated allow/deny list. + +## Architecture + +A new `@dawn-ai/permissions` package ships the pattern-matching engine, the persistence store, and the public types. The existing workspace capability gains a permission check between the path-jail / bash invocation and the actual backend call. When a check returns "unknown", the capability emits LangGraph's `interrupt()` with a `PermissionRequest` payload; the parent run pauses; the SSE stream surfaces `event: interrupt` to the client; the client resolves the prompt and POSTs to `/threads/{thread_id}/resume`; the runtime resumes the graph with `Command({resume})` and the capability acts on the decision. + +The three operating modes — `"interactive"`, `"non-interactive"`, `"bypass"` — encode the realistic deployment shapes (interactive dev, production with config-only enforcement, intentional bypass for trusted environments). Mode comes from `dawn.config.ts`'s new `permissions` field or the `DAWN_PERMISSIONS_MODE` env var. + +The persistence file format mirrors the runtime API: a tool-keyed `{allow, deny}` object. Same shape in `.dawn/permissions.json` (runtime additions, per-developer, gitignored) and in `dawn.config.ts`'s `permissions.allow` / `permissions.deny` (design-time baseline, checked in). Effective permissions = config + runtime, with deny always winning. + +## Design Decisions + +### Scope: path-jail escapes + every bash command + +Matches the industry-standard tool-call-level gating used by Claude Code, OpenAI Codex CLI, and Cursor. Bash gets prompted on every first occurrence (Claude Code parity); path escapes that would previously hard-refuse now prompt. Other tools (`readFile`, `writeFile`, `listDir`) only prompt when the resolved path is outside the workspace — staying inside is silent, matching today's behavior. + +Rejected: "risky-pattern only" gating for bash. The set of risky commands is impossible to enumerate completely; missing patterns become silent failures of judgment. Prompting every first-occurrence command and relying on prefix-matched "always" persistence is the industry-validated approach. + +Rejected: generalized capability-driven gating (any tool can declare itself gateable). Premature surface design without empirical signal on what authors need. Build on 4.5's interrupt/resume/persistence infrastructure later if real demand surfaces (sub-project 4.6 territory). + +### Three approval scopes: Once / Always-for-pattern / Deny + +The user sees three buttons on every prompt: + +- **Once** — allows this single call. Next equivalent call prompts again. +- **Always** — persists an allow entry using the suggested pattern (prefix-matched). Future matching calls are silent. +- **Deny** — refuses this call. The tool returns an error to the agent. The agent can recover (apologize, try a different approach). No persistent deny entry — that's deferred to 4.6. + +Rejected "for-session" as a fourth scope. Adds cognitive load with marginal value. If users need transient approvals, they can grant Once repeatedly. + +### Pattern matching: smart defaults, no DSL + +- **Bash:** suggested pattern is the first 1–2 whitespace-separated tokens. `npm install react` → `npm install`. `ls` → `ls`. `git status` → `git status`. (Two tokens is the sweet spot — covers `npm install ` and `npm test` separately, vs lumping them as `npm`.) +- **Path:** suggested pattern is the parent directory of the requested path, ending with `/`. `/Users/blove/.zshrc` → `/Users/blove/`. `/var/log/app.log` → `/var/log/`. +- **Matching:** candidate is a prefix-match against the stored pattern. A bash candidate matches if its first tokens equal the pattern; a path candidate matches if it starts with the pattern. + +Rejected: a glob / regex DSL. Industry standard is prefix-matching with smart defaults; complex pattern editors add surface without proportional value at this stage. + +Rejected: an interactive pattern-editor in the prompt UI (Claude Code does this). For Dawn's smoke client (throwaway), the suggested pattern is fixed. Power users can edit `.dawn/permissions.json` directly if they need a narrower or broader pattern. + +### Persistence: `.dawn/permissions.json`, project-local, gitignored + +```json +{ + "version": 1, + "allow": { + "bash": ["npm install", "ls", "git status"], + "readFile": ["/Users/blove/"], + "writeFile": ["/tmp/dawn-scratch/"], + "listDir": ["/Users/blove/Documents/"] + }, + "deny": {} +} +``` + +Tool-keyed top-level structure. Arrays of prefix patterns per tool. Forward-compatible (new tool category = new key, zero migration). More concise than Claude Code's `Tool(pattern)` notation; trivially parseable; easy to hand-edit. + +The store appends `.dawn/` to the project's `.gitignore` on first write (idempotent). Manual edits to `.dawn/permissions.json` while the dev server is running require a server restart — the store does not live-watch the file. + +### Three modes: `interactive` / `non-interactive` / `bypass` + +```ts +permissions: { + mode: "interactive" | "non-interactive" | "bypass" // default: "interactive" + allow: { bash: ["npm install"], readFile: ["/Users/blove/"] } + deny: { bash: ["rm -rf", "sudo"] } +} +``` + +| Mode | Prompts? | `config.allow` | `config.deny` | `.dawn/permissions.json` | Unknown commands | Path-jail | +|---|---|---|---|---|---|---| +| `interactive` (default) | Yes | Auto-allow | Hard-refuse | Auto-allow | Prompt | Triggers prompt on escape | +| `non-interactive` | No | Auto-allow | Hard-refuse | Ignored | Hard-refuse (fail-closed) | Intact, hard-refuse on escape | +| `bypass` | No | Ignored | Ignored | Ignored | Run unchecked | Disabled | + +Production should use `non-interactive` with a curated `config.allow` and `config.deny`. CI should use `non-interactive` as well. Local development uses the default `interactive`. `bypass` is for explicit "operator knows what they're doing" scenarios (screencast, internal admin tools) — using it disables Dawn's safety boundary entirely; the mode name + docs make that obvious. + +### Config-seeded baseline + runtime additions + +`config.allow` and `config.deny` form the design-time baseline (committed to git, shared across developers). `.dawn/permissions.json` is the per-developer runtime additive (gitignored, accumulated by clicking "Always"). Effective permissions: + +``` +effective.allow[tool] = (config.allow[tool] ?? []) ∪ (runtime.allow[tool] ?? []) +effective.deny[tool] = (config.deny[tool] ?? []) ∪ (runtime.deny[tool] ?? []) +``` + +Both files use the same shape — runtime entries can be promoted to config by hand-copying. + +### Env-var escape hatch: `DAWN_PERMISSIONS_MODE` + +Setting `DAWN_PERMISSIONS_MODE=non-interactive` (or `=bypass`, `=interactive`) overrides `dawn.config.ts`'s `permissions.mode` for the session. Useful for ad-hoc switching without editing config (e.g., `DAWN_PERMISSIONS_MODE=bypass pnpm dev` during a demo). + +### SSE envelope shape (forward-compatible with Agent Protocol) + +``` +event: interrupt +data: { + "interrupt_id": "perm-1779200000-x7y2z", + "type": "permission-request", + "kind": "command" | "path", + "detail": { + // for kind=="command": + "command": "npm install react", + "suggestedPattern": "npm install" + // for kind=="path": + "operation": "readFile" | "writeFile" | "listDir", + "path": "/Users/blove/.zshrc", + "suggestedPattern": "/Users/blove/" + }, + "thread_id": "smoke-coord-1", + "call_id": "task-abc" // present when the interrupt fires inside a subagent +} +``` + +`interrupt_id` correlates prompt-to-resume. `suggestedPattern` is what the capability will persist if the user clicks "Always" — surfaced in the envelope so the client can render transparent button labels (e.g., "Allow always for `npm install`"). + +### Resume endpoint + +``` +POST /threads/{thread_id}/resume +content-type: application/json + +{ + "interrupt_id": "perm-1779200000-x7y2z", + "decision": "once" | "always" | "deny" +} +``` + +Runtime invokes `graph.invoke(Command({resume: decision}), {configurable: {thread_id}})`. The parked graph resumes, the capability acts on the decision, downstream SSE events continue normally. + +**Failure modes:** + +- Client closes SSE stream before resuming → run stays parked in the LangGraph checkpoint. Next invocation of the thread re-surfaces the interrupt. +- Stale `interrupt_id` → 409 with `{ error: "no pending interrupt with that id" }`. +- Mismatched `thread_id` → 400. + +This shape is **Agent-Protocol-compatible** — sub-project 7 will implement the spec on top of this without refactoring 4.5. + +### Web client UX (chat demo only) + +The chat-web smoke client is throwaway, so the UX bar is just "make the prompt usable." When `event: interrupt` with `type: "permission-request"` arrives: + +1. Pause auto-scroll. +2. Render an inline panel above the event log showing the operation + three buttons (Once / Always for `` / Deny). +3. On click, POST to `/api/permission-resume` (a new Next.js route proxy) which forwards to Dawn's resume endpoint. +4. Hide the panel; event log resumes streaming. + +Multiple pending interrupts (e.g., subagent emits an interrupt while parent is parked): queue one at a time, oldest first. Subagent interrupts include the subagent name in the panel header ("research subagent wants to..."). + +### Path-jail in bypass mode + +`mode: "bypass"` disables the workspace capability's path-jail entirely. `readFile("/etc/passwd")` proceeds, `writeFile("/etc/hosts", ...)` writes. This is intentional — bypass mode means "I trust the agent fully" — but it's also dangerous, so: + +- The mode name + docs make the implication explicit +- A console.warn fires on capability load: `[dawn:permissions] mode=bypass — path-jail disabled, all bash unrestricted. Do not use in production.` + +## Component Contracts + +### `@dawn-ai/permissions` types + +```ts +export interface PermissionsFile { + readonly version: 1 + readonly allow: Readonly> + readonly deny: Readonly> +} + +export type PermissionMode = "interactive" | "non-interactive" | "bypass" + +export interface PermissionRequest { + readonly interruptId: string + readonly kind: "command" | "path" + readonly detail: CommandDetail | PathDetail + readonly threadId: string + readonly callId?: string // when emitted from inside a subagent +} + +export interface CommandDetail { + readonly command: string + readonly suggestedPattern: string // first 1-2 tokens +} + +export interface PathDetail { + readonly path: string + readonly operation: "readFile" | "writeFile" | "listDir" + readonly suggestedPattern: string // parent dir, trailing slash +} + +export type PermissionDecision = "once" | "always" | "deny" + +export interface PermissionsStore { + load(): Promise + match(tool: string, candidate: string): "allow" | "deny" | "unknown" + addAllow(tool: string, pattern: string): Promise + mode: PermissionMode +} + +export function createPermissionsStore(opts: { + readonly appRoot: string + readonly config: PermissionsFile | undefined + readonly mode: PermissionMode +}): PermissionsStore +``` + +### `CapabilityMarkerContext` extension + +```ts +export interface CapabilityMarkerContext { + // ... existing fields + readonly permissions?: PermissionsStore // present when workspace capability is active +} +``` + +### `DawnConfig` extension + +```ts +export interface DawnConfig { + readonly appDir?: string + readonly backends?: { /* unchanged */ } + readonly permissions?: { + readonly mode?: PermissionMode + readonly allow?: Readonly> + readonly deny?: Readonly> + } +} +``` + +### Workspace capability changes + +For each of the four tools, the `run()` function becomes: + +```ts +async (input, ctx) => { + const { path } = SCHEMA.parse(input) + + // 1. Resolve + jail + let safe: string + try { + safe = pathJail(path, workspaceRoot) + } catch { + // Jail escape. In bypass mode, proceed anyway. Otherwise, gate. + if (permissions.mode === "bypass") { + safe = resolve(workspaceRoot, path) // absolute, but outside workspace + } else { + const decision = await requestPermission(permissions, "readFile", path, ctx) + if (decision === "deny") { + throw new Error(`Permission denied by user: ${path}`) + } + safe = resolve(workspaceRoot, path) + } + } + + // 2. Backend call + return fs.readFile(safe, backendContext(workspaceRoot, ctx.signal)) +} +``` + +For `runBash`, the gate fires unconditionally before invoking the backend (every bash command is gated when mode is interactive). + +The `requestPermission` helper handles: matching against the store first (allow/deny short-circuits); emitting `interrupt()` on unknown; receiving the resume; calling `addAllow` on "always"; returning the final decision. + +### Resume endpoint registration + +Dawn's CLI dev server registers `POST /threads/:thread_id/resume` alongside the existing `/runs/stream`. Handler: + +```ts +async function handleResume(req): Promise { + const { thread_id } = req.params + const { interrupt_id, decision } = await req.json() + const result = await runtime.resume({ threadId: thread_id, interruptId: interrupt_id, decision }) + if (result.kind === "stale") return new Response(JSON.stringify({ error: "no pending interrupt" }), { status: 409 }) + if (result.kind === "no-thread") return new Response(null, { status: 400 }) + return new Response(null, { status: 200 }) +} +``` + +The runtime maintains an in-memory `Map` for active interrupts so it can validate `interrupt_id` and forward the `Command({resume})` to the right graph. + +## File structure + +### New package + +``` +packages/permissions/ +├── package.json # @dawn-ai/permissions +├── tsconfig.json +├── vitest.config.ts +├── src/ +│ ├── index.ts +│ ├── types.ts +│ ├── permissions-store.ts +│ ├── pattern-matching.ts +│ └── suggested-pattern.ts +└── test/ + ├── permissions-store.test.ts + ├── pattern-matching.test.ts + └── suggested-pattern.test.ts +``` + +### New + modified in existing packages + +``` +packages/core/src/capabilities/built-in/workspace.ts # modified — adds permission check; supports bypass mode +packages/core/src/capabilities/types.ts # adds `permissions` field to CapabilityMarkerContext +packages/core/src/types.ts # extends DawnConfig with permissions field +packages/core/test/capabilities/workspace.test.ts # adds interrupt-flow tests +packages/cli/src/lib/runtime/execute-route.ts # constructs PermissionsStore + threads into context +packages/cli/src/lib/runtime/resume-endpoint.ts # new — HTTP handler +packages/cli/src/lib/server/ # registers the resume route +packages/cli/test/resume-endpoint.test.ts # new +packages/langchain/src/agent-adapter.ts # propagates interrupt() → `event: interrupt`; handles Command({resume}) +examples/chat/server/dawn.config.ts # demo: seeded allow + deny +examples/chat/web/app/api/permission-resume/route.ts # new — proxy +examples/chat/web/app/page.tsx # adds inline permission panel +memory/project_phase_status.md # mark sub-project 4.5 in progress +``` + +## Testing strategy + +Per Section 7 of the brainstorm — unit tests for pattern matching, suggested-pattern, store; integration test for resume endpoint; extended workspace capability tests for interrupt flow; manual Chrome MCP smoke covering interactive prompts (once / always / deny), config-only mode, bypass mode, subagent-emitted interrupts. + +No new LLM-driven CI tests — same policy as existing capabilities. + +## Out of scope (deferred) + +- **Persistent "deny always" entries** (sub-project 4.6) — schema accommodates a `deny` array but no UI yet for setting one. Today's deny path is per-call. +- **Generalized capability-driven gating** (sub-project 4.6) — any capability or user tool can declare "this operation needs confirmation." Builds on 4.5's interrupt/resume/persistence infrastructure. +- **Interactive pattern editor in the prompt UI** — power users edit `.dawn/permissions.json` directly. Pattern-editing in the web client is throwaway-demo territory. +- **Two-tier config** (project + user-global `~/.dawn/permissions.json`) — single project-local file is sufficient until someone asks for it. +- **Per-route permission overrides** — global per-app for v1. +- **Polished web client** — current chat-web is throwaway. The eventual polished client (separate sub-project) will have a true modal, optimistic UI, etc. + +## Known risks + +- **`bypass` mode disables the path-jail.** This is the explicit semantic but it's also load-bearing safety. Mitigation: warn loudly on capability load when bypass is active; document the implication in every reference to the mode. +- **Concurrent `addAllow` calls** could race on disk write. Mitigation: single-flight write queue in `PermissionsStore`. +- **The resume endpoint requires a stable `thread_id`.** If the chat-web client generates a new `thread_id` per page load (current behavior), then closing the tab loses the parked run. Mitigation: document the limitation; sub-project 7's Agent Protocol implementation introduces thread persistence properly. +- **Pattern matching false-positives.** Approving `npm install` once allows `npm install --global some-malicious-package`. Mitigation: docs explicitly call this out; users who want strict matching add exact patterns to `dawn.config.ts`'s `allow` (no `:*` semantics yet — every entry is prefix). Future schema extension could add exact-match syntax. +- **Production deployments forgetting to switch from `interactive` to `non-interactive`** would block forever waiting for prompts no one sees. Mitigation: docs strongly recommend `non-interactive` for production; `DAWN_PERMISSIONS_MODE` env var lets infra set it without touching code. diff --git a/examples/chat/server/dawn.config.ts b/examples/chat/server/dawn.config.ts index b1c6ea4..16c12f8 100644 --- a/examples/chat/server/dawn.config.ts +++ b/examples/chat/server/dawn.config.ts @@ -1 +1,14 @@ -export default {} +export default { + appDir: "src/app", + permissions: { + // Default mode (omitted) is "interactive" — the demo shows the permission flow. + // Seed a few obviously-safe commands so prompt fatigue is reasonable on first run. + allow: { + bash: ["ls", "pwd", "cat", "echo", "head", "tail", "wc"], + }, + // Block obviously-destructive patterns even when interactive. + deny: { + bash: ["rm -rf", "sudo", "chmod 777"], + }, + }, +} diff --git a/examples/chat/web/app/api/permission-resume/route.ts b/examples/chat/web/app/api/permission-resume/route.ts new file mode 100644 index 0000000..be0681b --- /dev/null +++ b/examples/chat/web/app/api/permission-resume/route.ts @@ -0,0 +1,31 @@ +import { NextRequest } from "next/server" + +export const runtime = "nodejs" +export const dynamic = "force-dynamic" + +export async function POST(req: NextRequest): Promise { + const serverUrl = process.env.DAWN_SERVER_URL ?? "http://127.0.0.1:3001" + const body = (await req.json()) as { + threadId: string + interruptId: string + decision: "once" | "always" | "deny" + } + + const upstream = await fetch( + `${serverUrl}/threads/${encodeURIComponent(body.threadId)}/resume`, + { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + interrupt_id: body.interruptId, + decision: body.decision, + }), + }, + ) + + const text = await upstream.text() + return new Response(text, { + status: upstream.status, + headers: { "content-type": "application/json" }, + }) +} diff --git a/examples/chat/web/app/page.tsx b/examples/chat/web/app/page.tsx index 67c4867..bfd77d9 100644 --- a/examples/chat/web/app/page.tsx +++ b/examples/chat/web/app/page.tsx @@ -8,12 +8,39 @@ function newThreadId(): string { type RouteId = "chat" | "coordinator" +type PendingInterrupt = { + interruptId: string + type: string + kind: "command" | "path" + detail: { + command?: string + operation?: string + path?: string + suggestedPattern: string + } +} + export default function Page() { const [threadId, setThreadId] = useState(null) const [input, setInput] = useState("") const [events, setEvents] = useState([]) const [busy, setBusy] = useState(false) const [route, setRoute] = useState("chat") + const [pendingInterrupt, setPendingInterrupt] = useState(null) + + async function resolveInterrupt(decision: "once" | "always" | "deny") { + if (!pendingInterrupt || !threadId) return + await fetch("/api/permission-resume", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + threadId, + interruptId: pendingInterrupt.interruptId, + decision, + }), + }) + setPendingInterrupt(null) + } function switchRoute(next: RouteId) { if (next === route) return @@ -47,6 +74,7 @@ export default function Page() { const reader = res.body.getReader() const decoder = new TextDecoder() let buf = "" + let nextLineIsInterruptData = false while (true) { const { value, done } = await reader.read() if (done) break @@ -54,7 +82,27 @@ export default function Page() { const lines = buf.split("\n") buf = lines.pop() ?? "" for (const line of lines) { - if (line.trim()) setEvents((e) => [...e, line]) + if (!line.trim()) continue + if (line === "event: interrupt") { + nextLineIsInterruptData = true + setEvents((e) => [...e, line]) + continue + } + if (nextLineIsInterruptData && line.startsWith("data: ")) { + try { + const payload = JSON.parse(line.slice("data: ".length)) + setPendingInterrupt({ + interruptId: payload.interruptId, + type: payload.type, + kind: payload.kind, + detail: payload.detail, + }) + } catch { + /* ignore parse errors */ + } + nextLineIsInterruptData = false + } + setEvents((e) => [...e, line]) } } if (buf.trim()) setEvents((e) => [...e, buf]) @@ -111,6 +159,52 @@ export default function Page() { > {busy ? "Streaming…" : "Send"} + {pendingInterrupt && ( +
+ ⚠️ Permission request +

+ {pendingInterrupt.kind === "command" + ? "The agent wants to run command:" + : `The agent wants to ${pendingInterrupt.detail.operation}:`} +

+ + {pendingInterrupt.kind === "command" + ? pendingInterrupt.detail.command + : pendingInterrupt.detail.path} + +
+ + + +
+
+ )}
 {
+  const { request, response, threadId } = options
+
+  if (!threadId) {
+    sendJson(response, 400, createRequestErrorBody("Missing thread_id in resume URL"))
+    return
+  }
+
+  const rawBody = await readRequestBody(request)
+  const parsedBody = parseJson(rawBody)
+  if (!parsedBody.ok || !isRecord(parsedBody.value)) {
+    sendJson(response, 400, createRequestErrorBody("Malformed resume request body"))
+    return
+  }
+
+  const body = parsedBody.value
+  const interruptId = typeof body.interrupt_id === "string" ? body.interrupt_id : undefined
+  const decision = body.decision
+  if (!interruptId) {
+    sendJson(response, 400, createRequestErrorBody("Missing interrupt_id"))
+    return
+  }
+  if (decision !== "once" && decision !== "always" && decision !== "deny") {
+    sendJson(response, 400, createRequestErrorBody("decision must be 'once', 'always', or 'deny'"))
+    return
+  }
+
+  const pending = getPending(threadId)
+  if (!pending) {
+    sendJson(response, 400, createRequestErrorBody("No parked interrupt for thread"))
+    return
+  }
+  if (pending.interruptId !== interruptId) {
+    sendJson(response, 409, createRequestErrorBody("Stale interrupt_id"))
+    return
+  }
+
+  pending.resolve(decision)
+  clearPending(threadId)
+  sendJson(response, 200, { ok: true })
+}
+
 const SHUTDOWN_ABORTED = Symbol("shutdown-aborted")
 
 async function raceRequestAgainstShutdown(
@@ -423,6 +493,7 @@ interface RunsWaitRequest {
       readonly mode: "agent" | "chain" | "graph" | "workflow"
       readonly route_id: string
       readonly route_path: string
+      readonly thread_id?: string
     }
   }
   readonly on_completion: "delete"
diff --git a/packages/cli/src/lib/runtime/execute-route.ts b/packages/cli/src/lib/runtime/execute-route.ts
index 0a6df83..90ff427 100644
--- a/packages/cli/src/lib/runtime/execute-route.ts
+++ b/packages/cli/src/lib/runtime/execute-route.ts
@@ -20,6 +20,11 @@ import {
   resolveStateFields,
 } from "@dawn-ai/core"
 import { executeAgent, type SubagentResolver, streamAgent } from "@dawn-ai/langchain"
+import {
+  createPermissionsStore,
+  type PermissionMode,
+  type PermissionsStore,
+} from "@dawn-ai/permissions"
 import { type DawnAgent, isDawnAgent } from "@dawn-ai/sdk"
 import type { ExecBackend, FilesystemBackend } from "@dawn-ai/workspace"
 import { checkToolNameUniqueness } from "./check-tool-name-uniqueness.js"
@@ -135,6 +140,14 @@ export async function* streamResolvedRoute(options: {
   readonly routeId: string
   readonly routePath: string
   readonly signal?: AbortSignal
+  /**
+   * Stable per-conversation identifier forwarded to the agent-adapter as
+   * LangGraph's `thread_id`. When set, `interrupt()` calls park graph
+   * state in the checkpointer and the `/threads/:thread_id/resume`
+   * endpoint can replay them by handing a `PermissionDecision` back to the
+   * adapter via the pending-interrupts map.
+   */
+  readonly threadId?: string
 }): AsyncGenerator {
   const prepared = await prepareRouteExecution(options)
 
@@ -171,6 +184,7 @@ export async function* streamResolvedRoute(options: {
     ...(promptFragments && promptFragments.length > 0 ? { promptFragments } : {}),
     ...(streamTransformers && streamTransformers.length > 0 ? { streamTransformers } : {}),
     ...(subagentResolver ? { subagentResolver } : {}),
+    ...(options.threadId ? { threadId: options.threadId } : {}),
   })) {
     switch (chunk.type) {
       case "token":
@@ -189,6 +203,14 @@ export async function* streamResolvedRoute(options: {
       case "done":
         yield { type: "done", output: chunk.data }
         break
+      case "interrupt": {
+        // The agent-adapter registers the pending entry in
+        // pending-interrupts so the /threads/:thread_id/resume endpoint
+        // can correlate the POST. We just forward the chunk to the SSE
+        // consumer.
+        yield { type: "interrupt", data: chunk.data }
+        break
+      }
       default: {
         // Capability-contributed event types (e.g. plan_update from the planning capability).
         // The langchain layer widened AgentStreamChunk["type"] to allow arbitrary strings;
@@ -293,19 +315,48 @@ async function prepareRouteExecution(options: {
     let configBackends:
       | { readonly filesystem?: FilesystemBackend; readonly exec?: ExecBackend }
       | undefined
+    let permissionsConfig:
+      | {
+          readonly mode?: PermissionMode
+          readonly allow?: Readonly>
+          readonly deny?: Readonly>
+        }
+      | undefined
     try {
       const loaded = await loadDawnConfig({ appRoot: options.appRoot })
       configBackends = loaded.config.backends
+      permissionsConfig = loaded.config.permissions
     } catch {
       // No dawn.config.ts (or unreadable). The workspace capability falls
-      // back to its defaults (localFilesystem + localExec).
+      // back to its defaults (localFilesystem + localExec); permissions
+      // defaults to "interactive" with empty allow/deny.
     }
 
+    const envMode = process.env.DAWN_PERMISSIONS_MODE
+    const mode: PermissionMode =
+      envMode === "interactive" || envMode === "non-interactive" || envMode === "bypass"
+        ? envMode
+        : (permissionsConfig?.mode ?? "interactive")
+
+    const permissionsStore: PermissionsStore = createPermissionsStore({
+      appRoot: options.appRoot,
+      config: permissionsConfig
+        ? {
+            version: 1,
+            allow: permissionsConfig.allow ?? {},
+            deny: permissionsConfig.deny ?? {},
+          }
+        : undefined,
+      mode,
+    })
+    await permissionsStore.load()
+
     const applied = await applyCapabilities(registry, routeDir, {
       routeManifest,
       descriptor,
       descriptorRouteMap,
       ...(configBackends ? { backends: configBackends } : {}),
+      permissions: permissionsStore,
     })
 
     if (applied.errors.length > 0) {
diff --git a/packages/cli/src/lib/runtime/pending-interrupts.ts b/packages/cli/src/lib/runtime/pending-interrupts.ts
new file mode 100644
index 0000000..e716a15
--- /dev/null
+++ b/packages/cli/src/lib/runtime/pending-interrupts.ts
@@ -0,0 +1,16 @@
+/**
+ * Re-exports the pending-interrupts registry from `@dawn-ai/langchain`.
+ *
+ * The map itself lives in the langchain package so the agent-adapter (which
+ * parks the stream on interrupt) and the CLI's resume endpoint (which
+ * dispatches the user's decision) share the same module-level state without
+ * introducing a circular dep cli <-> langchain.
+ */
+
+export type { PendingInterrupt, ResumeDecision } from "@dawn-ai/langchain"
+export {
+  __resetPendingForTests,
+  clearPending,
+  getPending,
+  setPending,
+} from "@dawn-ai/langchain"
diff --git a/packages/cli/test/resume-endpoint.test.ts b/packages/cli/test/resume-endpoint.test.ts
new file mode 100644
index 0000000..092b396
--- /dev/null
+++ b/packages/cli/test/resume-endpoint.test.ts
@@ -0,0 +1,132 @@
+import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises"
+import { tmpdir } from "node:os"
+import { join } from "node:path"
+
+import { afterEach, beforeEach, describe, expect, test } from "vitest"
+
+import { startRuntimeServer } from "../src/lib/dev/runtime-server.js"
+import { __resetPendingForTests, setPending } from "../src/lib/runtime/pending-interrupts.js"
+
+const tempDirs: string[] = []
+const servers: Array<{ close: () => Promise }> = []
+
+beforeEach(() => {
+  __resetPendingForTests()
+})
+
+afterEach(async () => {
+  await Promise.all(tempDirs.splice(0).map((dir) => rm(dir, { force: true, recursive: true })))
+  await Promise.all(servers.splice(0).map((server) => server.close()))
+})
+
+describe("POST /threads/:thread_id/resume", () => {
+  test("returns 200 and invokes resolve when interrupt_id matches", async () => {
+    const appRoot = await createFixtureApp({
+      "dawn.config.ts": "export default {};\n",
+      "package.json": "{}\n",
+      "src/app/noop/index.ts": "export const graph = async () => ({ ok: true });\n",
+    })
+    const server = await startRuntimeServer({ appRoot })
+    servers.push(server)
+
+    let resolvedWith: string | undefined
+    setPending("thread-1", {
+      interruptId: "perm-abc",
+      resolve: (decision) => {
+        resolvedWith = decision
+      },
+    })
+
+    const response = await fetch(new URL("/threads/thread-1/resume", server.url), {
+      body: JSON.stringify({ interrupt_id: "perm-abc", decision: "once" }),
+      headers: { "content-type": "application/json" },
+      method: "POST",
+    })
+
+    expect(response.status).toBe(200)
+    expect(await response.json()).toEqual({ ok: true })
+    expect(resolvedWith).toBe("once")
+  })
+
+  test("returns 409 when interrupt_id is stale", async () => {
+    const appRoot = await createFixtureApp({
+      "dawn.config.ts": "export default {};\n",
+      "package.json": "{}\n",
+      "src/app/noop/index.ts": "export const graph = async () => ({ ok: true });\n",
+    })
+    const server = await startRuntimeServer({ appRoot })
+    servers.push(server)
+
+    setPending("thread-2", {
+      interruptId: "perm-current",
+      resolve: () => {
+        throw new Error("resolve should not fire for stale interrupt_id")
+      },
+    })
+
+    const response = await fetch(new URL("/threads/thread-2/resume", server.url), {
+      body: JSON.stringify({ interrupt_id: "perm-old", decision: "once" }),
+      headers: { "content-type": "application/json" },
+      method: "POST",
+    })
+
+    expect(response.status).toBe(409)
+    const body = (await response.json()) as { error?: { message?: string } }
+    expect(body.error?.message).toMatch(/stale/i)
+  })
+
+  test("returns 400 when no pending interrupt exists for the thread", async () => {
+    const appRoot = await createFixtureApp({
+      "dawn.config.ts": "export default {};\n",
+      "package.json": "{}\n",
+      "src/app/noop/index.ts": "export const graph = async () => ({ ok: true });\n",
+    })
+    const server = await startRuntimeServer({ appRoot })
+    servers.push(server)
+
+    const response = await fetch(new URL("/threads/missing-thread/resume", server.url), {
+      body: JSON.stringify({ interrupt_id: "perm-x", decision: "deny" }),
+      headers: { "content-type": "application/json" },
+      method: "POST",
+    })
+
+    expect(response.status).toBe(400)
+    const body = (await response.json()) as { error?: { message?: string } }
+    expect(body.error?.message).toMatch(/no parked interrupt/i)
+  })
+
+  test("returns 400 when decision is not one of once/always/deny", async () => {
+    const appRoot = await createFixtureApp({
+      "dawn.config.ts": "export default {};\n",
+      "package.json": "{}\n",
+      "src/app/noop/index.ts": "export const graph = async () => ({ ok: true });\n",
+    })
+    const server = await startRuntimeServer({ appRoot })
+    servers.push(server)
+
+    setPending("thread-3", { interruptId: "p1", resolve: () => undefined })
+
+    const response = await fetch(new URL("/threads/thread-3/resume", server.url), {
+      body: JSON.stringify({ interrupt_id: "p1", decision: "bogus" }),
+      headers: { "content-type": "application/json" },
+      method: "POST",
+    })
+
+    expect(response.status).toBe(400)
+  })
+})
+
+async function createFixtureApp(files: Readonly>) {
+  const appRoot = await mkdtemp(join(tmpdir(), "dawn-cli-resume-"))
+  tempDirs.push(appRoot)
+
+  await Promise.all(
+    Object.entries(files).map(async ([relativePath, source]) => {
+      const filePath = join(appRoot, relativePath)
+      await mkdir(join(filePath, ".."), { recursive: true })
+      await writeFile(filePath, source, "utf8")
+    }),
+  )
+
+  return appRoot
+}
diff --git a/packages/cli/test/typegen-command.test.ts b/packages/cli/test/typegen-command.test.ts
index 260037a..b89c738 100644
--- a/packages/cli/test/typegen-command.test.ts
+++ b/packages/cli/test/typegen-command.test.ts
@@ -186,6 +186,7 @@ describe("dawn typegen", () => {
     const coreTarball = await packPackage("@dawn-ai/core", packsRoot)
     const langchainTarball = await packPackage("@dawn-ai/langchain", packsRoot)
     const langgraphTarball = await packPackage("@dawn-ai/langgraph", packsRoot)
+    const permissionsTarball = await packPackage("@dawn-ai/permissions", packsRoot)
     const sdkTarball = await packPackage("@dawn-ai/sdk", packsRoot)
     const workspaceTarball = await packPackage("@dawn-ai/workspace", packsRoot)
 
@@ -207,6 +208,7 @@ describe("dawn typegen", () => {
               "@dawn-ai/core": `file:${coreTarball}`,
               "@dawn-ai/langchain": `file:${langchainTarball}`,
               "@dawn-ai/langgraph": `file:${langgraphTarball}`,
+              "@dawn-ai/permissions": `file:${permissionsTarball}`,
               "@dawn-ai/sdk": `file:${sdkTarball}`,
               "@dawn-ai/workspace": `file:${workspaceTarball}`,
             },
diff --git a/packages/cli/tsconfig.build.json b/packages/cli/tsconfig.build.json
index bfb89bb..1278b9d 100644
--- a/packages/cli/tsconfig.build.json
+++ b/packages/cli/tsconfig.build.json
@@ -8,7 +8,8 @@
       "@dawn-ai/core": ["../core/src/index.ts"],
       "@dawn-ai/langchain": ["../langchain/src/index.ts"],
       "@dawn-ai/langgraph": ["../langgraph/src/index.ts"],
-      "@dawn-ai/langgraph/*": ["../langgraph/src/*.ts"]
+      "@dawn-ai/langgraph/*": ["../langgraph/src/*.ts"],
+      "@dawn-ai/permissions": ["../permissions/src/index.ts"]
     },
     "rootDir": "src"
   },
@@ -22,6 +23,9 @@
     },
     {
       "path": "../langgraph"
+    },
+    {
+      "path": "../permissions"
     }
   ]
 }
diff --git a/packages/core/package.json b/packages/core/package.json
index de78123..5c33687 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -36,8 +36,10 @@
     "typecheck": "tsc --noEmit"
   },
   "dependencies": {
+    "@dawn-ai/permissions": "workspace:*",
     "@dawn-ai/sdk": "workspace:*",
     "@dawn-ai/workspace": "workspace:*",
+    "@langchain/langgraph": "^1.3.0",
     "tsx": "^4.8.1",
     "typescript": "5.8.3",
     "zod": "^4.4.3"
diff --git a/packages/core/src/capabilities/built-in/workspace.ts b/packages/core/src/capabilities/built-in/workspace.ts
index 73a7d0e..752aebd 100644
--- a/packages/core/src/capabilities/built-in/workspace.ts
+++ b/packages/core/src/capabilities/built-in/workspace.ts
@@ -1,8 +1,10 @@
 import { existsSync } from "node:fs"
 import { join, resolve, sep } from "node:path"
+import type { PermissionsStore } from "@dawn-ai/permissions"
+import { suggestedCommandPattern, suggestedPathPattern } from "@dawn-ai/permissions"
 import type { BackendContext, ExecBackend, FilesystemBackend } from "@dawn-ai/workspace"
-
 import { localExec, localFilesystem } from "@dawn-ai/workspace"
+import { interrupt } from "@langchain/langgraph"
 import { z } from "zod"
 
 import type { CapabilityMarker, DawnToolDefinition } from "../types.js"
@@ -23,16 +25,110 @@ const WRITE_FILE_INPUT = z.object({ path: z.string().min(1), content: z.string()
 const LIST_DIR_INPUT = z.object({ path: z.string().default(".") })
 const RUN_BASH_INPUT = z.object({ command: z.string().min(1) })
 
-function pathJail(userPath: string, workspaceRoot: string): string {
-  const resolved = resolve(workspaceRoot, userPath)
-  if (resolved !== workspaceRoot && !resolved.startsWith(workspaceRoot + sep)) {
-    throw new Error(`Path is outside workspace: ${userPath}`)
+function backendContext(workspaceRoot: string, signal: AbortSignal): BackendContext {
+  return { signal, workspaceRoot }
+}
+
+type GateResult = { allowed: true } | { allowed: false; reason: string }
+
+async function gatePathOp(
+  permissions: PermissionsStore | undefined,
+  operation: "readFile" | "writeFile" | "listDir",
+  absPath: string,
+  workspaceRoot: string,
+): Promise {
+  // If permissions store is absent, allow (legacy behavior — capability used without permissions context).
+  if (!permissions) return { allowed: true }
+  if (permissions.mode === "bypass") return { allowed: true }
+
+  const insideWorkspace = absPath === workspaceRoot || absPath.startsWith(workspaceRoot + sep)
+
+  // Inside workspace: always allow silently.
+  if (insideWorkspace) return { allowed: true }
+
+  // Outside workspace: consult the store.
+  const decision = permissions.match(operation, absPath)
+  if (decision === "allow") return { allowed: true }
+  if (decision === "deny") {
+    return { allowed: false, reason: `Permission denied by user: ${absPath}` }
   }
-  return resolved
+  // decision === "unknown"
+  if (permissions.mode === "non-interactive") {
+    return { allowed: false, reason: `Permission denied (fail-closed): ${absPath}` }
+  }
+  // Interactive: emit LangGraph interrupt and await user decision.
+  const result = await emitPermissionInterrupt({
+    kind: "path",
+    operation,
+    path: absPath,
+    permissions,
+  })
+  if (result === "deny") {
+    return { allowed: false, reason: `Permission denied by user: ${absPath}` }
+  }
+  return { allowed: true }
 }
 
-function backendContext(workspaceRoot: string, signal: AbortSignal): BackendContext {
-  return { signal, workspaceRoot }
+async function gateBashOp(
+  permissions: PermissionsStore | undefined,
+  command: string,
+): Promise {
+  if (!permissions) return { allowed: true }
+  if (permissions.mode === "bypass") return { allowed: true }
+
+  const decision = permissions.match("bash", command)
+  if (decision === "allow") return { allowed: true }
+  if (decision === "deny") {
+    return { allowed: false, reason: `Permission denied by user: ${command}` }
+  }
+  if (permissions.mode === "non-interactive") {
+    return { allowed: false, reason: `Permission denied (fail-closed): ${command}` }
+  }
+  const result = await emitPermissionInterrupt({
+    kind: "command",
+    command,
+    permissions,
+  })
+  if (result === "deny") {
+    return { allowed: false, reason: `Permission denied by user: ${command}` }
+  }
+  return { allowed: true }
+}
+
+interface InterruptArgs {
+  kind: "command" | "path"
+  command?: string
+  operation?: "readFile" | "writeFile" | "listDir"
+  path?: string
+  permissions: PermissionsStore
+}
+
+async function emitPermissionInterrupt(args: InterruptArgs): Promise<"allow" | "deny"> {
+  const interruptId = `perm-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
+  const suggestedPattern =
+    args.kind === "command"
+      ? suggestedCommandPattern(args.command ?? "")
+      : suggestedPathPattern(args.path ?? "")
+  const payload = {
+    interruptId,
+    type: "permission-request" as const,
+    kind: args.kind,
+    detail:
+      args.kind === "command"
+        ? { command: args.command ?? "", suggestedPattern }
+        : {
+            operation: args.operation ?? "readFile",
+            path: args.path ?? "",
+            suggestedPattern,
+          },
+  }
+  const decision = interrupt(payload) as "once" | "always" | "deny"
+  if (decision === "deny") return "deny"
+  if (decision === "always") {
+    const tool = args.kind === "command" ? "bash" : (args.operation ?? "readFile")
+    await args.permissions.addAllow(tool, suggestedPattern)
+  }
+  return "allow"
 }
 
 interface OverridableTool extends DawnToolDefinition {
@@ -43,6 +139,7 @@ function buildWorkspaceTools(
   workspaceRoot: string,
   fs: FilesystemBackend,
   exec: ExecBackend,
+  permissions: PermissionsStore | undefined,
 ): readonly OverridableTool[] {
   const readFile: OverridableTool = {
     name: "readFile",
@@ -51,8 +148,12 @@ function buildWorkspaceTools(
     overridable: true,
     run: async (input, ctx) => {
       const { path } = READ_FILE_INPUT.parse(input)
-      const safe = pathJail(path, workspaceRoot)
-      return fs.readFile(safe, backendContext(workspaceRoot, ctx.signal))
+      const absPath = resolve(workspaceRoot, path)
+      const gate = await gatePathOp(permissions, "readFile", absPath, workspaceRoot)
+      if (!gate.allowed) {
+        throw new Error(gate.reason)
+      }
+      return fs.readFile(absPath, backendContext(workspaceRoot, ctx.signal))
     },
   }
   const writeFile: OverridableTool = {
@@ -62,8 +163,12 @@ function buildWorkspaceTools(
     overridable: true,
     run: async (input, ctx) => {
       const { path, content } = WRITE_FILE_INPUT.parse(input)
-      const safe = pathJail(path, workspaceRoot)
-      const result = await fs.writeFile(safe, content, backendContext(workspaceRoot, ctx.signal))
+      const absPath = resolve(workspaceRoot, path)
+      const gate = await gatePathOp(permissions, "writeFile", absPath, workspaceRoot)
+      if (!gate.allowed) {
+        throw new Error(gate.reason)
+      }
+      const result = await fs.writeFile(absPath, content, backendContext(workspaceRoot, ctx.signal))
       return `wrote ${result.bytesWritten} bytes to ${path}`
     },
   }
@@ -74,8 +179,12 @@ function buildWorkspaceTools(
     overridable: true,
     run: async (input, ctx) => {
       const { path } = LIST_DIR_INPUT.parse(input)
-      const safe = pathJail(path, workspaceRoot)
-      const entries = await fs.listDir(safe, backendContext(workspaceRoot, ctx.signal))
+      const absPath = resolve(workspaceRoot, path)
+      const gate = await gatePathOp(permissions, "listDir", absPath, workspaceRoot)
+      if (!gate.allowed) {
+        throw new Error(gate.reason)
+      }
+      const entries = await fs.listDir(absPath, backendContext(workspaceRoot, ctx.signal))
       return [...entries]
     },
   }
@@ -86,6 +195,10 @@ function buildWorkspaceTools(
     overridable: true,
     run: async (input, ctx) => {
       const { command } = RUN_BASH_INPUT.parse(input)
+      const gate = await gateBashOp(permissions, command)
+      if (!gate.allowed) {
+        throw new Error(gate.reason)
+      }
       return exec.runCommand({ command }, backendContext(workspaceRoot, ctx.signal))
     },
   }
@@ -101,7 +214,15 @@ export function createWorkspaceMarker(): CapabilityMarker {
       if (!existsSync(root)) return {}
       const fs = context.backends?.filesystem ?? localFilesystem()
       const exec = context.backends?.exec ?? localExec()
-      return { tools: buildWorkspaceTools(root, fs, exec) }
+      const permissions = context.permissions
+
+      if (permissions?.mode === "bypass") {
+        console.warn(
+          "[dawn:permissions] mode=bypass — path-jail disabled, all bash unrestricted. Do not use in production.",
+        )
+      }
+
+      return { tools: buildWorkspaceTools(root, fs, exec, permissions) }
     },
   }
 }
diff --git a/packages/core/src/capabilities/types.ts b/packages/core/src/capabilities/types.ts
index 74d6783..1f366bd 100644
--- a/packages/core/src/capabilities/types.ts
+++ b/packages/core/src/capabilities/types.ts
@@ -1,3 +1,4 @@
+import type { PermissionsStore } from "@dawn-ai/permissions"
 import type { DawnAgent } from "@dawn-ai/sdk"
 import type { ExecBackend, FilesystemBackend } from "@dawn-ai/workspace"
 import type { ResolvedStateField, RouteManifest } from "../types.js"
@@ -10,6 +11,7 @@ export interface CapabilityMarkerContext {
     readonly filesystem?: FilesystemBackend
     readonly exec?: ExecBackend
   }
+  readonly permissions?: PermissionsStore
 }
 
 export interface DawnToolDefinition {
diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts
index 88c86ed..672dcc0 100644
--- a/packages/core/src/types.ts
+++ b/packages/core/src/types.ts
@@ -1,3 +1,4 @@
+import type { PermissionMode } from "@dawn-ai/permissions"
 import type { RouteKind } from "@dawn-ai/sdk"
 import type { ExecBackend, FilesystemBackend } from "@dawn-ai/workspace"
 
@@ -9,6 +10,11 @@ export interface DawnConfig {
     readonly filesystem?: FilesystemBackend
     readonly exec?: ExecBackend
   }
+  readonly permissions?: {
+    readonly mode?: PermissionMode
+    readonly allow?: Readonly>
+    readonly deny?: Readonly>
+  }
 }
 
 export type RouteSegment =
diff --git a/packages/core/test/capabilities/workspace.test.ts b/packages/core/test/capabilities/workspace.test.ts
index 041aa3a..3d2d785 100644
--- a/packages/core/test/capabilities/workspace.test.ts
+++ b/packages/core/test/capabilities/workspace.test.ts
@@ -1,6 +1,7 @@
 import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"
 import { tmpdir } from "node:os"
 import { join } from "node:path"
+import { createPermissionsStore } from "@dawn-ai/permissions"
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"
 
 import { createWorkspaceMarker } from "../../src/capabilities/built-in/workspace.js"
@@ -107,12 +108,63 @@ describe("createWorkspaceMarker — load", () => {
     expect(firstCall[0]).toBe(join(process.cwd(), "workspace", "hello.txt"))
   })
 
-  it("rejects path-jail escapes with a clear error", async () => {
-    const contribution = await createWorkspaceMarker().load(routeDir, ctx())
+  it("rejects path-jail escapes when permissions store is present (non-interactive mode)", async () => {
+    const permissions = createPermissionsStore({
+      appRoot,
+      config: undefined,
+      mode: "non-interactive",
+    })
+    await permissions.load()
+    const contribution = await createWorkspaceMarker().load(routeDir, ctx({ permissions }))
     const readTool = findTool(contribution.tools, "readFile")
     await expect(
       readTool.run({ path: "../../etc/passwd" }, { signal: new AbortController().signal }),
-    ).rejects.toThrow(/outside workspace/i)
+    ).rejects.toThrow(/permission denied/i)
+  })
+
+  it("in bypass mode, every operation proceeds (path-jail disabled)", async () => {
+    const permissions = createPermissionsStore({
+      appRoot,
+      config: undefined,
+      mode: "bypass",
+    })
+    await permissions.load()
+    const contribution = await createWorkspaceMarker().load(routeDir, ctx({ permissions }))
+    const readTool = findTool(contribution.tools, "readFile")
+    // The file doesn't exist outside the workspace, so we expect ENOENT, NOT "outside workspace"
+    await expect(
+      readTool.run({ path: "../../etc/some-fake-file" }, { signal: new AbortController().signal }),
+    ).rejects.not.toThrow(/outside workspace|permission denied/i)
+  })
+
+  it("in non-interactive mode, unknown bash commands hard-refuse", async () => {
+    const permissions = createPermissionsStore({
+      appRoot,
+      config: undefined,
+      mode: "non-interactive",
+    })
+    await permissions.load()
+    const contribution = await createWorkspaceMarker().load(routeDir, ctx({ permissions }))
+    const runBash = findTool(contribution.tools, "runBash")
+    await expect(
+      runBash.run({ command: "ls" }, { signal: new AbortController().signal }),
+    ).rejects.toThrow(/permission denied|fail-closed/i)
+  })
+
+  it("config-seeded allow lets a bash command through in non-interactive mode", async () => {
+    const permissions = createPermissionsStore({
+      appRoot,
+      config: { version: 1, allow: { bash: ["echo"] }, deny: {} },
+      mode: "non-interactive",
+    })
+    await permissions.load()
+    const contribution = await createWorkspaceMarker().load(routeDir, ctx({ permissions }))
+    const runBash = findTool(contribution.tools, "runBash")
+    const result = await runBash.run(
+      { command: "echo hi" },
+      { signal: new AbortController().signal },
+    )
+    expect((result as { stdout: string }).stdout.trim()).toBe("hi")
   })
 
   it("uses the default local backends when none configured", async () => {
diff --git a/packages/langchain/src/agent-adapter.ts b/packages/langchain/src/agent-adapter.ts
index 11148d8..cf63cee 100644
--- a/packages/langchain/src/agent-adapter.ts
+++ b/packages/langchain/src/agent-adapter.ts
@@ -2,8 +2,15 @@ import type { PromptFragment, StreamTransformer } from "@dawn-ai/core"
 import type { DawnAgent, RetryConfig } from "@dawn-ai/sdk"
 import { isDawnAgent } from "@dawn-ai/sdk"
 import { type BaseMessageLike, HumanMessage } from "@langchain/core/messages"
+import { Command, MemorySaver } from "@langchain/langgraph"
 import { createChatModel } from "./chat-model-factory.js"
 import { resolveProvider } from "./model-provider-resolver.js"
+import {
+  clearPending,
+  type PendingInterrupt,
+  type ResumeDecision,
+  setPending,
+} from "./pending-interrupts.js"
 import { isRetryableError, withRetry } from "./retry.js"
 import { materializeStateSchema, type ResolvedStateField } from "./state-adapter.js"
 import {
@@ -50,6 +57,20 @@ function assertAgentLike(entry: unknown): asserts entry is AgentLike {
 // changes, the cache key must include a hash of the fragments/transformers.
 const materializedAgents = new WeakMap()
 
+/**
+ * Process-level checkpointer shared by every materialized agent. LangGraph
+ * requires a checkpointer + a stable `thread_id` for `interrupt()` to park
+ * graph state and for `new Command({resume})` to replay from the parked
+ * step. The dev/runtime server passes the client-supplied
+ * `metadata.dawn.thread_id` through to `streamAgent`, which forwards it to
+ * `config.configurable.thread_id`.
+ *
+ * Single shared instance is fine for in-process runtimes; revisit if the
+ * runtime ever runs across processes (each would have its own saver and
+ * resume would need a distributed checkpointer like SQLite/Postgres).
+ */
+const sharedCheckpointer = new MemorySaver()
+
 export function composePromptMessages(
   systemPrompt: string,
   promptFragments: readonly PromptFragment[],
@@ -104,6 +125,9 @@ async function materializeAgent(
         ? (state: Record) =>
             composePromptMessages(descriptor.systemPrompt, fragments, state)
         : descriptor.systemPrompt,
+    // Required so `interrupt()` can park graph state and `Command({resume})`
+    // can replay it. Paired with `config.configurable.thread_id`.
+    checkpointer: sharedCheckpointer,
   }
 
   if (stateFields && stateFields.length > 0) {
@@ -135,10 +159,130 @@ export async function materializeAgentGraph(options: {
 }
 
 export interface AgentStreamChunk {
-  readonly type: "token" | "tool_call" | "tool_result" | "done" | (string & {})
+  readonly type: "token" | "tool_call" | "tool_result" | "interrupt" | "done" | (string & {})
   readonly data: unknown
 }
 
+/**
+ * LangGraph 1.x's `interrupt()` throws a `GraphInterrupt` from inside the tool
+ * node. Under `streamEvents` v2 this surfaces as an `on_tool_error` whose
+ * `event.data.error` is the `GraphInterrupt` instance — its `.name` is
+ * `"GraphInterrupt"` and its `.interrupts` array carries the `{ id, value }`
+ * entries we need. The top-level `on_chain_end` for `LangGraph` does NOT
+ * include `__interrupt__` in this code path (that key appears only on the
+ * `invoke`/`stream` return value), so detection must happen at the tool error.
+ *
+ * We still keep the `__interrupt__` extractor for `on_chain_end` as a
+ * defensive fallback in case a future LangGraph version surfaces interrupts
+ * via the chain output too.
+ */
+const INTERRUPT_KEY = "__interrupt__"
+
+interface RawInterruptEntry {
+  readonly value?: unknown
+  readonly id?: string
+  readonly when?: string
+  readonly resumable?: boolean
+}
+
+function extractInterrupts(output: unknown): readonly RawInterruptEntry[] | undefined {
+  if (!output || typeof output !== "object") return undefined
+  const maybe = (output as Record)[INTERRUPT_KEY]
+  if (!Array.isArray(maybe)) return undefined
+  return maybe as readonly RawInterruptEntry[]
+}
+
+/**
+ * Detects a thrown `GraphInterrupt` surfaced via `on_tool_error`.
+ *
+ * LangGraph's `interrupt()` throws a `GraphInterrupt` whose `.message` is
+ * `JSON.stringify(interrupts)` and whose `.interrupts` array carries the
+ * `{ id, value }` entries. By the time the error reaches `streamEvents`'
+ * `data.error` it has already been stringified — typically into
+ * `\n\nGraphInterrupt: \n    at ...stack`.
+ *
+ * We handle three shapes defensively:
+ *   - object with `.name === "GraphInterrupt"` and `.interrupts` array
+ *     (in case a future LangGraph version surfaces the live error)
+ *   - object/Error whose stringified message starts with a JSON array
+ *   - bare string with the `GraphInterrupt:` marker
+ */
+function extractInterruptsFromError(error: unknown): readonly RawInterruptEntry[] | undefined {
+  if (!error) return undefined
+
+  if (typeof error === "object") {
+    const e = error as { name?: unknown; interrupts?: unknown; message?: unknown }
+    if (
+      e.name === "GraphInterrupt" &&
+      Array.isArray(e.interrupts) &&
+      e.interrupts.length > 0
+    ) {
+      return e.interrupts as readonly RawInterruptEntry[]
+    }
+    if (typeof e.message === "string") {
+      const parsed = parseInterruptStringMessage(e.message)
+      if (parsed) return parsed
+    }
+  }
+
+  if (typeof error === "string") {
+    const parsed = parseInterruptStringMessage(error)
+    if (parsed) return parsed
+  }
+
+  return undefined
+}
+
+/**
+ * Parses the stringified form of a GraphInterrupt's message. The string
+ * begins with `JSON.stringify(interrupts, null, 2)` and is followed by
+ * `\n\nGraphInterrupt: ...\n    at ...` stack metadata. We slice the leading
+ * JSON array up to the first `]` followed by a newline + non-JSON sentinel
+ * and parse it.
+ */
+function parseInterruptStringMessage(
+  text: string,
+): readonly RawInterruptEntry[] | undefined {
+  const trimmed = text.trimStart()
+  if (!trimmed.startsWith("[")) return undefined
+  // Find the matching closing bracket by bracket counting at depth 0 — robust
+  // against nested arrays in the interrupt payloads.
+  let depth = 0
+  let inString = false
+  let escape = false
+  let end = -1
+  for (let i = 0; i < trimmed.length; i++) {
+    const ch = trimmed[i]
+    if (escape) {
+      escape = false
+      continue
+    }
+    if (inString) {
+      if (ch === "\\") escape = true
+      else if (ch === '"') inString = false
+      continue
+    }
+    if (ch === '"') inString = true
+    else if (ch === "[") depth++
+    else if (ch === "]") {
+      depth--
+      if (depth === 0) {
+        end = i
+        break
+      }
+    }
+  }
+  if (end === -1) return undefined
+  const json = trimmed.slice(0, end + 1)
+  try {
+    const parsed = JSON.parse(json)
+    if (!Array.isArray(parsed) || parsed.length === 0) return undefined
+    return parsed as readonly RawInterruptEntry[]
+  } catch {
+    return undefined
+  }
+}
+
 export interface AgentOptions {
   readonly entry: unknown
   readonly input: unknown
@@ -158,6 +302,14 @@ export interface AgentOptions {
    * drained alongside normal stream chunks (no module-level mutable state).
    */
   readonly subagentResolver?: SubagentResolver
+  /**
+   * Stable per-conversation identifier used as LangGraph's `thread_id`. When
+   * set, the agent-adapter wires it into `config.configurable.thread_id` so
+   * the checkpointer can park interrupted state. Required for resume to work
+   * — without a thread_id, an interrupt ends the stream with no way to
+   * replay.
+   */
+  readonly threadId?: string
 }
 
 export async function executeAgent(options: AgentOptions): Promise {
@@ -235,6 +387,7 @@ export async function* streamAgent(options: AgentOptions): AsyncGenerator 0) {
-    config.configurable = params
+  const configurable: Record = { ...params }
+  if (options.threadId !== undefined && options.threadId.length > 0) {
+    configurable.thread_id = options.threadId
+  }
+  if (Object.keys(configurable).length > 0) {
+    config.configurable = configurable
   }
 
   return { agentInput, config }
@@ -295,6 +453,7 @@ async function* streamFromRunnable(
   streamTransformers?: readonly StreamTransformer[],
   subagentEvents?: AgentStreamChunk[],
   streamContext?: SubagentStreamContext,
+  threadId?: string,
 ): AsyncGenerator {
   // Drains any pending subagent events queued by the bridge. Called before
   // each normal yield to keep ordering predictable on the single event loop.
@@ -311,7 +470,7 @@ async function* streamFromRunnable(
       options: Record,
     ) => AsyncIterable<{
       event: string
-      data: { chunk?: unknown; output?: unknown }
+      data: { chunk?: unknown; output?: unknown; error?: unknown }
       name: string
     }>
   }
@@ -332,101 +491,193 @@ async function* streamFromRunnable(
     return
   }
 
-  let finalOutput: unknown
-  let hasYielded = false
-  let lastStreamError: Error | undefined
-
-  // Retry the entire stream if it fails before producing any output
-  const maxStreamAttempts = retryConfig?.maxAttempts ?? 3
-  for (let attempt = 0; attempt < maxStreamAttempts; attempt++) {
-    hasYielded = false
-    lastStreamError = undefined
-    finalOutput = undefined
-
-    try {
-      for await (const event of streamable.streamEvents(input, {
-        ...config,
-        version: "v2",
-      })) {
-        // Drain any subagent.* events queued by the bridge's writer before
-        // emitting the next normal stream chunk, so ordering is predictable.
-        yield* drainSubagentEvents()
-        switch (event.event) {
-          case "on_chat_model_stream": {
-            // Suppress while a child subagent run is active — child token
-            // events leak onto the parent's streamEvents listener via
-            // LangChain v2 async-local-storage tracing. The dispatcher
-            // already emits a `subagent.message` envelope for each child
-            // token, so emitting the raw token here would duplicate.
-            if (streamContext && streamContext.activeChildRuns > 0) break
-            const content = (event.data.chunk as { content?: unknown })?.content
-            if (content && typeof content === "string" && content.length > 0) {
-              hasYielded = true
-              yield { type: "token" as const, data: content }
+  // Capture into a typed const so TS narrowing survives across the nested
+  // async-generator closure below. Bind to `streamable` — LangGraph's
+  // Pregel.streamEvents reads `this.config?.recursionLimit`, so calling it
+  // unbound throws "Cannot read properties of undefined (reading 'config')".
+  const streamEventsFn = streamable.streamEvents.bind(streamable)
+
+  // Tracks the most recent invocation's outcome. The outer resume loop
+  // inspects this to decide whether to park + replay or finish.
+  interface PassResult {
+    readonly finalOutput: unknown
+    readonly interrupts: readonly RawInterruptEntry[]
+  }
+
+  // Process a single streamEvents iterator: yield AgentStreamChunks and
+  // return whatever __interrupt__ entries appeared in the graph's final
+  // on_chain_end output. Shared between the initial invocation and any
+  // resume re-invocations so the chunk-shaping logic stays in one place.
+  async function* processEventStream(
+    invocationInput: unknown,
+    invocationConfig: Record,
+    allowRetryOnError: boolean,
+  ): AsyncGenerator {
+    let finalOutput: unknown
+    let capturedInterrupts: readonly RawInterruptEntry[] = []
+    let hasYielded = false
+
+    const maxStreamAttempts = allowRetryOnError ? (retryConfig?.maxAttempts ?? 3) : 1
+
+    for (let attempt = 0; attempt < maxStreamAttempts; attempt++) {
+      hasYielded = false
+      finalOutput = undefined
+      capturedInterrupts = []
+
+      try {
+        for await (const event of streamEventsFn(invocationInput, {
+          ...invocationConfig,
+          version: "v2",
+        })) {
+          yield* drainSubagentEvents()
+          switch (event.event) {
+            case "on_chat_model_stream": {
+              if (streamContext && streamContext.activeChildRuns > 0) break
+              const content = (event.data.chunk as { content?: unknown })?.content
+              if (content && typeof content === "string" && content.length > 0) {
+                hasYielded = true
+                yield { type: "token" as const, data: content }
+              }
+              break
             }
-            break
-          }
-          case "on_tool_start": {
-            hasYielded = true
-            yield {
-              type: "tool_call" as const,
-              data: {
-                name: event.name,
-                input: event.data.chunk ?? event.data.output,
-              },
+            case "on_tool_start": {
+              hasYielded = true
+              yield {
+                type: "tool_call" as const,
+                data: {
+                  name: event.name,
+                  input: event.data.chunk ?? event.data.output,
+                },
+              }
+              break
             }
-            break
-          }
-          case "on_tool_end": {
-            hasYielded = true
-            yield {
-              type: "tool_result" as const,
-              data: { name: event.name, output: event.data.output },
+            case "on_tool_end": {
+              hasYielded = true
+              yield {
+                type: "tool_result" as const,
+                data: { name: event.name, output: event.data.output },
+              }
+              for (const transformer of streamTransformers ?? []) {
+                if (transformer.observes !== "tool_result") continue
+                for await (const out of transformer.transform({
+                  toolName: event.name,
+                  toolOutput: event.data.output,
+                })) {
+                  yield {
+                    type: out.event as AgentStreamChunk["type"],
+                    data: out.data,
+                  }
+                }
+              }
+              break
             }
-            for (const transformer of streamTransformers ?? []) {
-              if (transformer.observes !== "tool_result") continue
-              for await (const out of transformer.transform({
-                toolName: event.name,
-                toolOutput: event.data.output,
-              })) {
-                yield {
-                  type: out.event as AgentStreamChunk["type"],
-                  data: out.data,
+            case "on_tool_error": {
+              // LangGraph's interrupt() throws a GraphInterrupt from inside
+              // the tool node. The error bubbles through streamEvents as
+              // on_tool_error with the GraphInterrupt instance on data.error.
+              // LangGraph itself catches it to park the checkpointer state,
+              // so the outer iterator continues normally afterwards.
+              const interrupts = extractInterruptsFromError(event.data.error)
+              if (interrupts && interrupts.length > 0) {
+                capturedInterrupts = interrupts
+                for (const entry of interrupts) {
+                  hasYielded = true
+                  yield {
+                    type: "interrupt" as const,
+                    // The capability's interrupt() payload is wrapped in
+                    // entry.value by LangGraph — surface it verbatim so the
+                    // SSE consumer sees the original {interruptId, kind, ...}
+                    // envelope the workspace capability emitted.
+                    data: entry.value,
+                  }
                 }
               }
+              break
             }
-            break
-          }
-          case "on_chain_end": {
-            if (event.name === "LangGraph") {
-              finalOutput = event.data.output
+            case "on_chain_end": {
+              if (event.name === "LangGraph") {
+                finalOutput = event.data.output
+                const interrupts = extractInterrupts(event.data.output)
+                if (interrupts && interrupts.length > 0) {
+                  capturedInterrupts = interrupts
+                  for (const entry of interrupts) {
+                    hasYielded = true
+                    yield {
+                      type: "interrupt" as const,
+                      // The capability's interrupt() payload is wrapped in
+                      // entry.value by LangGraph — surface it verbatim so the
+                      // SSE consumer sees the original {interruptId, kind, ...}
+                      // envelope the workspace capability emitted.
+                      data: entry.value,
+                    }
+                  }
+                }
+              }
+              break
             }
-            break
           }
         }
+        // Stream completed successfully
+        return { finalOutput, interrupts: capturedInterrupts }
+      } catch (error) {
+        const err = error instanceof Error ? error : new Error(String(error))
+        if (hasYielded || !isRetryableError(error) || attempt === maxStreamAttempts - 1) {
+          throw err
+        }
+        const delay = Math.min(1000 * 2 ** attempt + Math.random() * 500, 10_000)
+        await new Promise((resolve) => setTimeout(resolve, delay))
       }
+    }
+    // Unreachable: the loop either returns or throws.
+    return { finalOutput, interrupts: capturedInterrupts }
+  }
 
-      // Stream completed successfully
+  // Initial invocation. Retries on transient errors before any chunk yields.
+  let pass = yield* processEventStream(input, config, /* allowRetryOnError */ true)
+
+  // Resume loop. Each interrupt → park → await decision → re-invoke with
+  // Command({resume}). The resume invocation may itself interrupt (e.g. a
+  // capability gates another tool call mid-run) — loop until either no
+  // interrupt remains or we cannot resume (no threadId / no resolved
+  // decision).
+  while (pass.interrupts.length > 0) {
+    if (!threadId) {
+      // Without a thread_id there is no checkpointer key to replay from;
+      // the parked state will be discarded. End the stream cleanly so the
+      // SSE consumer can surface the interrupt to the user, but they have
+      // no way to resume this run.
       break
-    } catch (error) {
-      lastStreamError = error instanceof Error ? error : new Error(String(error))
-
-      // If we already yielded chunks, we can't retry (client has partial data)
-      // Or if the error isn't retryable, rethrow immediately
-      if (hasYielded || !isRetryableError(error) || attempt === maxStreamAttempts - 1) {
-        throw lastStreamError
-      }
-
-      // Backoff before retry
-      const delay = Math.min(1000 * 2 ** attempt + Math.random() * 500, 10_000)
-      await new Promise((resolve) => setTimeout(resolve, delay))
     }
+
+    // We only resume the first interrupt — if a capability ever fans out
+    // multiple parallel interrupts in a single step, this becomes lossy
+    // and we'd need to await N decisions. None of today's capabilities do
+    // that; revisit when one does.
+    const entry = pass.interrupts[0]
+    const interruptId =
+      (typeof entry?.id === "string" ? entry.id : undefined) ?? `generated-${Date.now()}`
+
+    const decision = await new Promise((resolve) => {
+      const pending: PendingInterrupt = { interruptId, resolve }
+      setPending(threadId, pending)
+    })
+    clearPending(threadId)
+
+    // Resume invocations reuse the same config (same thread_id, signal,
+    // configurable). Retry-on-error is disabled because we have already
+    // yielded the interrupt chunk; if the resume call fails we surface
+    // the error rather than silently restarting.
+    pass = yield* processEventStream(
+      new Command({ resume: decision }),
+      config,
+      /* allowRetryOnError */ false,
+    )
   }
 
   // Final drain in case the last tool call was the bridged task tool —
   // its events would otherwise be stranded after the stream ends.
   yield* drainSubagentEvents()
-  yield { type: "done", data: finalOutput }
+  yield { type: "done", data: pass.finalOutput }
 }
 
 interface InputMessage {
diff --git a/packages/langchain/src/index.ts b/packages/langchain/src/index.ts
index 76105b1..d0f4d6c 100644
--- a/packages/langchain/src/index.ts
+++ b/packages/langchain/src/index.ts
@@ -12,6 +12,13 @@ export {
 export { chainAdapter } from "./chain-adapter.js"
 export { createChatModel } from "./chat-model-factory.js"
 export { inferProvider, resolveProvider } from "./model-provider-resolver.js"
+export type { PendingInterrupt, ResumeDecision } from "./pending-interrupts.js"
+export {
+  __resetPendingForTests,
+  clearPending,
+  getPending,
+  setPending,
+} from "./pending-interrupts.js"
 export type { RetryOptions } from "./retry.js"
 export { isRetryableError, withRetry } from "./retry.js"
 export { materializeStateSchema } from "./state-adapter.js"
diff --git a/packages/langchain/src/pending-interrupts.ts b/packages/langchain/src/pending-interrupts.ts
new file mode 100644
index 0000000..09dd61b
--- /dev/null
+++ b/packages/langchain/src/pending-interrupts.ts
@@ -0,0 +1,43 @@
+/**
+ * Module-level registry of parked LangGraph interrupts, keyed by thread_id.
+ *
+ * Lives in `@dawn-ai/langchain` so that the agent-adapter (which detects
+ * the interrupt and parks the stream) and the CLI's resume endpoint (which
+ * dispatches the user's decision) both reference the same map. Putting it
+ * here avoids a circular dep cli <-> langchain.
+ *
+ * The decision string ("once" | "always" | "deny") is the value passed to
+ * `new Command({resume})` when the agent-adapter re-invokes the graph.
+ * The langchain package intentionally does not depend on
+ * `@dawn-ai/permissions`; the resume endpoint validates the decision shape
+ * before calling `resolve()`.
+ */
+
+export type ResumeDecision = "once" | "always" | "deny"
+
+export interface PendingInterrupt {
+  readonly interruptId: string
+  /** Settles the Promise awaited by the parked agent-adapter generator. */
+  resolve(decision: ResumeDecision): void
+}
+
+const pendingByThread = new Map()
+
+export function getPending(threadId: string): PendingInterrupt | undefined {
+  return pendingByThread.get(threadId)
+}
+
+export function setPending(threadId: string, entry: PendingInterrupt): void {
+  pendingByThread.set(threadId, entry)
+}
+
+export function clearPending(threadId: string): void {
+  pendingByThread.delete(threadId)
+}
+
+/**
+ * Test-only: reset all entries.
+ */
+export function __resetPendingForTests(): void {
+  pendingByThread.clear()
+}
diff --git a/packages/langchain/test/agent-adapter-interrupt.test.ts b/packages/langchain/test/agent-adapter-interrupt.test.ts
new file mode 100644
index 0000000..353b3a6
--- /dev/null
+++ b/packages/langchain/test/agent-adapter-interrupt.test.ts
@@ -0,0 +1,337 @@
+import { Command } from "@langchain/langgraph"
+import { afterEach, describe, expect, test } from "vitest"
+import { streamAgent } from "../src/agent-adapter.js"
+import { __resetPendingForTests, getPending } from "../src/pending-interrupts.js"
+
+/**
+ * These tests mimic the real LangGraph 1.x streamEvents v2 shape:
+ *
+ *   When a tool calls `interrupt(payload)` inside a node, LangGraph throws a
+ *   `GraphInterrupt`. The tool error surfaces via streamEvents as an
+ *   `on_tool_error` event whose `data.error` is a *stringified* form of the
+ *   error — `JSON.stringify(interrupts, null, 2) + "\n\nGraphInterrupt: ..."`.
+ *   The `on_chain_end` for the top-level `LangGraph` chain that follows does
+ *   NOT include `__interrupt__` in this code path (that key only appears on
+ *   the invoke/stream return value, not in streamEvents).
+ *
+ * The adapter must detect the interrupt from the `on_tool_error` event by
+ * parsing the leading JSON array out of the error string. The legacy
+ * `__interrupt__`-on-chain-end path is still supported as a defensive
+ * fallback in case a future LangGraph version surfaces interrupts that way.
+ */
+
+function makeInterruptErrorString(
+  entries: ReadonlyArray<{ id?: string; value: unknown }>,
+): string {
+  return `${JSON.stringify(entries, null, 2)}\n\nGraphInterrupt: ${JSON.stringify(
+    entries,
+    null,
+    2,
+  )}\n    at interrupt (file:///.../interrupt.js:70:8)\n    at processTicksAndRejections (node:internal/process/task_queues:105:5)`
+}
+
+describe("streamAgent — interrupt propagation", () => {
+  afterEach(() => {
+    __resetPendingForTests()
+  })
+
+  test("yields {type: 'interrupt', data} when on_tool_error surfaces a stringified GraphInterrupt", async () => {
+    const interruptPayload = {
+      interruptId: "perm-test-1",
+      type: "permission-request",
+      kind: "command",
+      detail: { command: "ls", suggestedPattern: "ls" },
+    }
+
+    const mockRunnable = {
+      invoke: async () => ({}),
+      streamEvents: async function* (_input: unknown, _options: Record) {
+        yield {
+          event: "on_tool_start",
+          name: "runBash",
+          data: { input: { command: "ls" } },
+        }
+        yield {
+          event: "on_tool_error",
+          name: "runBash",
+          data: {
+            error: makeInterruptErrorString([{ id: "abc", value: interruptPayload }]),
+          },
+        }
+        // LangGraph keeps the iterator alive after parking — the final
+        // on_chain_end fires with the regular output (no __interrupt__).
+        yield {
+          event: "on_chain_end",
+          name: "LangGraph",
+          data: { output: { messages: [] } },
+        }
+      },
+    }
+
+    const chunks: Array<{ type: string; data: unknown }> = []
+    for await (const chunk of streamAgent({
+      entry: mockRunnable,
+      input: { messages: [{ role: "user", content: "test" }] },
+      routeParamNames: [],
+      signal: new AbortController().signal,
+      tools: [],
+    })) {
+      chunks.push({ type: chunk.type, data: chunk.data })
+    }
+
+    const interruptChunks = chunks.filter((c) => c.type === "interrupt")
+    expect(interruptChunks).toHaveLength(1)
+    expect(interruptChunks[0]?.data).toEqual(interruptPayload)
+
+    // The final `done` chunk should still fire (no threadId → no resume).
+    const doneChunks = chunks.filter((c) => c.type === "done")
+    expect(doneChunks).toHaveLength(1)
+  })
+
+  test("yields interrupt when GraphInterrupt is surfaced as a live error object", async () => {
+    // Defensive: if a future LangGraph version stops stringifying the error
+    // and passes the live GraphInterrupt instance through, we must still
+    // detect it via .name + .interrupts.
+    const interruptPayload = { interruptId: "live-1", type: "permission-request" }
+    const liveError = Object.assign(new Error("GraphInterrupt"), {
+      name: "GraphInterrupt",
+      interrupts: [{ id: "live-a", value: interruptPayload }],
+    })
+
+    const mockRunnable = {
+      invoke: async () => ({}),
+      streamEvents: async function* (_input: unknown, _options: Record) {
+        yield {
+          event: "on_tool_error",
+          name: "runBash",
+          data: { error: liveError },
+        }
+      },
+    }
+
+    const chunks: Array<{ type: string; data: unknown }> = []
+    for await (const chunk of streamAgent({
+      entry: mockRunnable,
+      input: { messages: [{ role: "user", content: "test" }] },
+      routeParamNames: [],
+      signal: new AbortController().signal,
+      tools: [],
+    })) {
+      chunks.push({ type: chunk.type, data: chunk.data })
+    }
+
+    expect(chunks.filter((c) => c.type === "interrupt")).toHaveLength(1)
+    expect(chunks.find((c) => c.type === "interrupt")?.data).toEqual(interruptPayload)
+  })
+
+  test("yields interrupt when __interrupt__ appears on on_chain_end output (legacy fallback)", async () => {
+    const interruptPayload = { interruptId: "legacy-1", type: "permission-request" }
+    const mockRunnable = {
+      invoke: async () => ({}),
+      streamEvents: async function* (_input: unknown, _options: Record) {
+        yield {
+          event: "on_chain_end",
+          name: "LangGraph",
+          data: {
+            output: { __interrupt__: [{ value: interruptPayload, id: "legacy-a" }] },
+          },
+        }
+      },
+    }
+
+    const chunks: Array<{ type: string; data: unknown }> = []
+    for await (const chunk of streamAgent({
+      entry: mockRunnable,
+      input: { messages: [{ role: "user", content: "test" }] },
+      routeParamNames: [],
+      signal: new AbortController().signal,
+      tools: [],
+    })) {
+      chunks.push({ type: chunk.type, data: chunk.data })
+    }
+
+    expect(chunks.filter((c) => c.type === "interrupt")).toHaveLength(1)
+    expect(chunks.find((c) => c.type === "interrupt")?.data).toEqual(interruptPayload)
+  })
+
+  test("does not yield an interrupt chunk when no interrupt is surfaced", async () => {
+    const mockRunnable = {
+      invoke: async () => ({ messages: [] }),
+      streamEvents: async function* (_input: unknown, _options: Record) {
+        yield {
+          event: "on_chain_end",
+          name: "LangGraph",
+          data: { output: { messages: [{ content: "hi" }] } },
+        }
+      },
+    }
+
+    const chunks: Array<{ type: string }> = []
+    for await (const chunk of streamAgent({
+      entry: mockRunnable,
+      input: { messages: [{ role: "user", content: "test" }] },
+      routeParamNames: [],
+      signal: new AbortController().signal,
+      tools: [],
+    })) {
+      chunks.push({ type: chunk.type })
+    }
+
+    expect(chunks.filter((c) => c.type === "interrupt")).toHaveLength(0)
+  })
+
+  test("does not treat ordinary tool errors (non-GraphInterrupt) as interrupts", async () => {
+    const mockRunnable = {
+      invoke: async () => ({}),
+      streamEvents: async function* (_input: unknown, _options: Record) {
+        yield {
+          event: "on_tool_error",
+          name: "runBash",
+          data: { error: "Error: boom\n    at foo (bar.js:1:1)" },
+        }
+        yield {
+          event: "on_chain_end",
+          name: "LangGraph",
+          data: { output: { messages: [] } },
+        }
+      },
+    }
+
+    const chunks: Array<{ type: string }> = []
+    for await (const chunk of streamAgent({
+      entry: mockRunnable,
+      input: { messages: [{ role: "user", content: "test" }] },
+      routeParamNames: [],
+      signal: new AbortController().signal,
+      tools: [],
+    })) {
+      chunks.push({ type: chunk.type })
+    }
+
+    expect(chunks.filter((c) => c.type === "interrupt")).toHaveLength(0)
+  })
+
+  test("resume: parks on interrupt, re-invokes with Command({resume}) when pending.resolve fires", async () => {
+    const interruptPayload = {
+      interruptId: "perm-resume-1",
+      type: "permission-request",
+      kind: "command",
+      detail: { command: "ls", suggestedPattern: "ls" },
+    }
+
+    // Mock graph: first streamEvents call emits the stringified GraphInterrupt
+    // via on_tool_error; the resume call emits a normal token + done.
+    let callCount = 0
+    let observedResumeInput: unknown
+    const mockRunnable = {
+      invoke: async () => ({ messages: [] }),
+      streamEvents: async function* (input: unknown, _options: Record) {
+        callCount++
+        if (callCount === 1) {
+          yield {
+            event: "on_tool_error",
+            name: "runBash",
+            data: {
+              error: makeInterruptErrorString([{ id: "abc", value: interruptPayload }]),
+            },
+          }
+          yield {
+            event: "on_chain_end",
+            name: "LangGraph",
+            data: { output: { messages: [] } },
+          }
+          return
+        }
+        observedResumeInput = input
+        yield {
+          event: "on_chat_model_stream",
+          name: "model",
+          data: { chunk: { content: "ok" } },
+        }
+        yield {
+          event: "on_chain_end",
+          name: "LangGraph",
+          data: { output: { messages: [{ content: "done" }] } },
+        }
+      },
+    }
+
+    const threadId = "thread-resume-test"
+
+    const chunks: Array<{ type: string; data?: unknown }> = []
+    const consumer = (async () => {
+      for await (const chunk of streamAgent({
+        entry: mockRunnable,
+        input: { messages: [{ role: "user", content: "test" }] },
+        routeParamNames: [],
+        signal: new AbortController().signal,
+        threadId,
+        tools: [],
+      })) {
+        chunks.push({ type: chunk.type, data: chunk.data })
+      }
+    })()
+
+    // Poll for the pending entry to appear after the interrupt yields.
+    for (let i = 0; i < 50 && !getPending(threadId); i++) {
+      await new Promise((r) => setTimeout(r, 0))
+    }
+
+    const pending = getPending(threadId)
+    expect(pending).toBeDefined()
+    expect(pending?.interruptId).toBe("abc")
+
+    pending?.resolve("once")
+    await consumer
+
+    expect(callCount).toBe(2)
+    expect(observedResumeInput).toBeInstanceOf(Command)
+    expect((observedResumeInput as Command).resume).toBe("once")
+
+    expect(getPending(threadId)).toBeUndefined()
+
+    const types = chunks.map((c) => c.type)
+    expect(types).toContain("interrupt")
+    expect(types).toContain("token")
+    expect(types[types.length - 1]).toBe("done")
+  })
+
+  test("resume without threadId ends the stream after interrupt (no replay)", async () => {
+    const interruptPayload = { interruptId: "p-noresume", type: "x" }
+    let callCount = 0
+    const mockRunnable = {
+      invoke: async () => ({}),
+      streamEvents: async function* (_input: unknown, _options: Record) {
+        callCount++
+        yield {
+          event: "on_tool_error",
+          name: "runBash",
+          data: {
+            error: makeInterruptErrorString([{ id: "x", value: interruptPayload }]),
+          },
+        }
+        yield {
+          event: "on_chain_end",
+          name: "LangGraph",
+          data: { output: { messages: [] } },
+        }
+      },
+    }
+
+    const chunks: Array<{ type: string }> = []
+    for await (const chunk of streamAgent({
+      entry: mockRunnable,
+      input: { messages: [{ role: "user", content: "test" }] },
+      routeParamNames: [],
+      signal: new AbortController().signal,
+      tools: [],
+      // intentionally no threadId
+    })) {
+      chunks.push({ type: chunk.type })
+    }
+
+    expect(callCount).toBe(1)
+    expect(chunks.filter((c) => c.type === "interrupt")).toHaveLength(1)
+    expect(chunks[chunks.length - 1]?.type).toBe("done")
+  })
+})
diff --git a/packages/permissions/package.json b/packages/permissions/package.json
new file mode 100644
index 0000000..43bd2ba
--- /dev/null
+++ b/packages/permissions/package.json
@@ -0,0 +1,42 @@
+{
+  "name": "@dawn-ai/permissions",
+  "version": "0.1.8",
+  "private": false,
+  "type": "module",
+  "license": "MIT",
+  "homepage": "https://github.com/cacheplane/dawnai/tree/main/packages/permissions#readme",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/cacheplane/dawnai.git",
+    "directory": "packages/permissions"
+  },
+  "bugs": {
+    "url": "https://github.com/cacheplane/dawnai/issues"
+  },
+  "engines": {
+    "node": ">=22.12.0"
+  },
+  "files": [
+    "dist"
+  ],
+  "types": "./dist/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "default": "./dist/index.js"
+    }
+  },
+  "publishConfig": {
+    "access": "public"
+  },
+  "scripts": {
+    "build": "tsc -b tsconfig.json",
+    "lint": "biome check --config-path ../config-biome/biome.json package.json src tsconfig.json vitest.config.ts",
+    "test": "vitest --run --config vitest.config.ts --passWithNoTests",
+    "typecheck": "tsc --noEmit"
+  },
+  "devDependencies": {
+    "@dawn-ai/config-typescript": "workspace:*",
+    "@types/node": "25.6.0"
+  }
+}
diff --git a/packages/permissions/src/index.ts b/packages/permissions/src/index.ts
new file mode 100644
index 0000000..dfbc180
--- /dev/null
+++ b/packages/permissions/src/index.ts
@@ -0,0 +1,12 @@
+export { matchPermission } from "./pattern-matching.js"
+export { createPermissionsStore } from "./permissions-store.js"
+export { suggestedCommandPattern, suggestedPathPattern } from "./suggested-pattern.js"
+export type {
+  CommandDetail,
+  PathDetail,
+  PermissionDecision,
+  PermissionMode,
+  PermissionRequest,
+  PermissionsFile,
+  PermissionsStore,
+} from "./types.js"
diff --git a/packages/permissions/src/pattern-matching.ts b/packages/permissions/src/pattern-matching.ts
new file mode 100644
index 0000000..9d2ca5d
--- /dev/null
+++ b/packages/permissions/src/pattern-matching.ts
@@ -0,0 +1,26 @@
+type PatternMap = Readonly>
+
+/**
+ * Match a tool+candidate against allow + deny pattern maps.
+ *
+ * Semantics:
+ *   - deny wins over allow
+ *   - prefix matching: `candidate.startsWith(pattern)`
+ *   - no entries for tool → "unknown"
+ */
+export function matchPermission(
+  tool: string,
+  candidate: string,
+  allow: PatternMap,
+  deny: PatternMap,
+): "allow" | "deny" | "unknown" {
+  const denyList = deny[tool] ?? []
+  for (const pattern of denyList) {
+    if (candidate.startsWith(pattern)) return "deny"
+  }
+  const allowList = allow[tool] ?? []
+  for (const pattern of allowList) {
+    if (candidate.startsWith(pattern)) return "allow"
+  }
+  return "unknown"
+}
diff --git a/packages/permissions/src/permissions-store.ts b/packages/permissions/src/permissions-store.ts
new file mode 100644
index 0000000..e79feb9
--- /dev/null
+++ b/packages/permissions/src/permissions-store.ts
@@ -0,0 +1,146 @@
+import { existsSync } from "node:fs"
+import { mkdir, readFile, writeFile } from "node:fs/promises"
+import { join } from "node:path"
+
+import { matchPermission } from "./pattern-matching.js"
+import type { PermissionMode, PermissionsFile, PermissionsStore } from "./types.js"
+
+const PERMISSIONS_DIR = ".dawn"
+const PERMISSIONS_FILE = "permissions.json"
+
+interface CreateOptions {
+  readonly appRoot: string
+  readonly config: PermissionsFile | undefined
+  readonly mode: PermissionMode
+}
+
+type MutableMap = Record
+
+interface State {
+  configAllow: MutableMap
+  configDeny: MutableMap
+  runtimeAllow: MutableMap
+  runtimeDeny: MutableMap
+}
+
+function emptyState(): State {
+  return { configAllow: {}, configDeny: {}, runtimeAllow: {}, runtimeDeny: {} }
+}
+
+function cloneMap(src: Readonly>): MutableMap {
+  const out: MutableMap = {}
+  for (const [k, v] of Object.entries(src)) out[k] = [...v]
+  return out
+}
+
+function effectiveAllow(state: State, mode: PermissionMode): Record {
+  if (mode === "bypass") return {}
+  const out: Record = {}
+  for (const [k, v] of Object.entries(state.configAllow)) out[k] = [...v]
+  if (mode === "interactive") {
+    for (const [k, v] of Object.entries(state.runtimeAllow)) {
+      out[k] = [...(out[k] ?? []), ...v]
+    }
+  }
+  return out
+}
+
+function effectiveDeny(state: State, mode: PermissionMode): Record {
+  if (mode === "bypass") return {}
+  const out: Record = {}
+  for (const [k, v] of Object.entries(state.configDeny)) out[k] = [...v]
+  if (mode === "interactive") {
+    for (const [k, v] of Object.entries(state.runtimeDeny)) {
+      out[k] = [...(out[k] ?? []), ...v]
+    }
+  }
+  return out
+}
+
+export function createPermissionsStore(opts: CreateOptions): PermissionsStore {
+  const { appRoot, config, mode } = opts
+  const state = emptyState()
+  if (config) {
+    state.configAllow = cloneMap(config.allow)
+    state.configDeny = cloneMap(config.deny)
+  }
+
+  let writeQueue: Promise = Promise.resolve()
+
+  async function loadRuntimeFile(): Promise {
+    const filePath = join(appRoot, PERMISSIONS_DIR, PERMISSIONS_FILE)
+    if (!existsSync(filePath)) return
+    let raw: string
+    try {
+      raw = await readFile(filePath, "utf8")
+    } catch (err) {
+      throw new Error(`Failed to read permissions.json: ${(err as Error).message}`)
+    }
+    let parsed: unknown
+    try {
+      parsed = JSON.parse(raw)
+    } catch (err) {
+      throw new Error(`Malformed permissions.json: ${(err as Error).message}`)
+    }
+    const p = parsed as Partial
+    if (p.allow && typeof p.allow === "object") {
+      state.runtimeAllow = cloneMap(p.allow as Record)
+    }
+    if (p.deny && typeof p.deny === "object") {
+      state.runtimeDeny = cloneMap(p.deny as Record)
+    }
+  }
+
+  async function persistRuntimeFile(): Promise {
+    const dir = join(appRoot, PERMISSIONS_DIR)
+    await mkdir(dir, { recursive: true })
+    const file: PermissionsFile = {
+      version: 1,
+      allow: state.runtimeAllow,
+      deny: state.runtimeDeny,
+    }
+    await writeFile(join(dir, PERMISSIONS_FILE), `${JSON.stringify(file, null, 2)}\n`, "utf8")
+  }
+
+  async function ensureGitignoreEntry(): Promise {
+    const gitignorePath = join(appRoot, ".gitignore")
+    let content = ""
+    if (existsSync(gitignorePath)) {
+      content = await readFile(gitignorePath, "utf8")
+      if (content.split("\n").some((line) => line.trim() === ".dawn/")) return
+      if (!content.endsWith("\n") && content.length > 0) content += "\n"
+      content += ".dawn/\n"
+    } else {
+      content = ".dawn/\n"
+    }
+    await writeFile(gitignorePath, content, "utf8")
+  }
+
+  return {
+    mode,
+    match(tool: string, candidate: string) {
+      return matchPermission(
+        tool,
+        candidate,
+        effectiveAllow(state, mode),
+        effectiveDeny(state, mode),
+      )
+    },
+    async load() {
+      if (mode === "interactive") {
+        await loadRuntimeFile()
+      }
+    },
+    async addAllow(tool: string, pattern: string) {
+      const job = async () => {
+        const list = state.runtimeAllow[tool] ?? []
+        if (!list.includes(pattern)) list.push(pattern)
+        state.runtimeAllow[tool] = list
+        await persistRuntimeFile()
+        await ensureGitignoreEntry()
+      }
+      writeQueue = writeQueue.then(job, job)
+      await writeQueue
+    },
+  }
+}
diff --git a/packages/permissions/src/suggested-pattern.ts b/packages/permissions/src/suggested-pattern.ts
new file mode 100644
index 0000000..cacab8e
--- /dev/null
+++ b/packages/permissions/src/suggested-pattern.ts
@@ -0,0 +1,22 @@
+import { dirname } from "node:path"
+
+/**
+ * Default suggested pattern for a shell command.
+ * Returns the first two whitespace-separated tokens.
+ */
+export function suggestedCommandPattern(command: string): string {
+  const trimmed = command.trim()
+  if (trimmed.length === 0) return ""
+  const tokens = trimmed.split(/\s+/)
+  return tokens.slice(0, 2).join(" ")
+}
+
+/**
+ * Default suggested pattern for a filesystem path.
+ * Returns the parent directory with trailing slash.
+ */
+export function suggestedPathPattern(path: string): string {
+  if (path.endsWith("/")) return path
+  const parent = dirname(path)
+  return parent === "/" ? "/" : `${parent}/`
+}
diff --git a/packages/permissions/src/types.ts b/packages/permissions/src/types.ts
new file mode 100644
index 0000000..dcfff90
--- /dev/null
+++ b/packages/permissions/src/types.ts
@@ -0,0 +1,50 @@
+/**
+ * Public types for the Dawn HITL permissions system.
+ *
+ * The workspace capability calls into a `PermissionsStore` before
+ * invoking its filesystem/exec backends. The store consults the
+ * runtime file at .dawn/permissions.json plus the config-seeded
+ * allow/deny lists and returns one of three decisions: "allow",
+ * "deny", or "unknown". On "unknown" in interactive mode the
+ * capability emits LangGraph's `interrupt()` with a `PermissionRequest`
+ * payload; the resume mechanism returns a `PermissionDecision`.
+ */
+
+export type PermissionMode = "interactive" | "non-interactive" | "bypass"
+
+export type PermissionDecision = "once" | "always" | "deny"
+
+export interface PermissionsFile {
+  readonly version: 1
+  readonly allow: Readonly>
+  readonly deny: Readonly>
+}
+
+export interface CommandDetail {
+  readonly command: string
+  readonly suggestedPattern: string
+}
+
+export interface PathDetail {
+  readonly path: string
+  readonly operation: "readFile" | "writeFile" | "listDir"
+  readonly suggestedPattern: string
+}
+
+export interface PermissionRequest {
+  readonly interruptId: string
+  readonly kind: "command" | "path"
+  readonly detail: CommandDetail | PathDetail
+  readonly threadId: string
+  readonly callId?: string
+}
+
+export interface PermissionsStore {
+  /** Loaded once at construction; subsequent loads not exposed in v1. */
+  load(): Promise
+  match(tool: string, candidate: string): "allow" | "deny" | "unknown"
+  /** Persists an allow entry to disk and updates the in-memory cache. */
+  addAllow(tool: string, pattern: string): Promise
+  /** Active mode (resolved from config + env at construction). */
+  readonly mode: PermissionMode
+}
diff --git a/packages/permissions/test/pattern-matching.test.ts b/packages/permissions/test/pattern-matching.test.ts
new file mode 100644
index 0000000..0d4482e
--- /dev/null
+++ b/packages/permissions/test/pattern-matching.test.ts
@@ -0,0 +1,29 @@
+import { describe, expect, it } from "vitest"
+import { matchPermission } from "../src/pattern-matching.js"
+
+describe("matchPermission", () => {
+  it("returns unknown when no entries match", () => {
+    expect(matchPermission("bash", "npm install", {}, {})).toBe("unknown")
+  })
+  it("returns allow when candidate matches an allow prefix", () => {
+    expect(matchPermission("bash", "npm install react", { bash: ["npm install"] }, {})).toBe("allow")
+  })
+  it("returns deny when candidate matches a deny prefix", () => {
+    expect(matchPermission("bash", "rm -rf /tmp", {}, { bash: ["rm -rf"] })).toBe("deny")
+  })
+  it("deny wins over allow when both match", () => {
+    expect(matchPermission("bash", "rm -rf /tmp", { bash: ["rm -rf"] }, { bash: ["rm -rf"] })).toBe("deny")
+  })
+  it("does NOT match an allow entry that is not a prefix", () => {
+    expect(matchPermission("bash", "npm test", { bash: ["npm install"] }, {})).toBe("unknown")
+  })
+  it("treats path candidates with absolute prefixes", () => {
+    expect(matchPermission("readFile", "/Users/blove/.zshrc", { readFile: ["/Users/blove/"] }, {})).toBe("allow")
+  })
+  it("does not cross directory boundary when pattern ends with slash", () => {
+    expect(matchPermission("readFile", "/var/logger/app.log", { readFile: ["/var/log/"] }, {})).toBe("unknown")
+  })
+  it("returns unknown for a tool with no entries in either list", () => {
+    expect(matchPermission("runUnknownTool", "anything", { bash: ["ls"] }, { writeFile: ["/tmp/"] })).toBe("unknown")
+  })
+})
diff --git a/packages/permissions/test/permissions-store.test.ts b/packages/permissions/test/permissions-store.test.ts
new file mode 100644
index 0000000..25709e9
--- /dev/null
+++ b/packages/permissions/test/permissions-store.test.ts
@@ -0,0 +1,169 @@
+import { afterEach, beforeEach, describe, expect, it } from "vitest"
+import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"
+import { tmpdir } from "node:os"
+import { join } from "node:path"
+
+import { createPermissionsStore } from "../src/permissions-store.js"
+
+describe("createPermissionsStore — load + match", () => {
+  let appRoot: string
+  beforeEach(() => {
+    appRoot = mkdtempSync(join(tmpdir(), "dawn-perms-"))
+  })
+  afterEach(() => {
+    rmSync(appRoot, { recursive: true, force: true })
+  })
+
+  it("returns unknown when no file and no config", async () => {
+    const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" })
+    await store.load()
+    expect(store.match("bash", "npm install")).toBe("unknown")
+  })
+
+  it("matches entries from .dawn/permissions.json", async () => {
+    mkdirSync(join(appRoot, ".dawn"), { recursive: true })
+    writeFileSync(
+      join(appRoot, ".dawn", "permissions.json"),
+      JSON.stringify({ version: 1, allow: { bash: ["npm install"] }, deny: {} }),
+    )
+    const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" })
+    await store.load()
+    expect(store.match("bash", "npm install react")).toBe("allow")
+    expect(store.match("bash", "rm -rf /")).toBe("unknown")
+  })
+
+  it("merges config + runtime file (both contribute allows)", async () => {
+    mkdirSync(join(appRoot, ".dawn"), { recursive: true })
+    writeFileSync(
+      join(appRoot, ".dawn", "permissions.json"),
+      JSON.stringify({ version: 1, allow: { bash: ["ls"] }, deny: {} }),
+    )
+    const store = createPermissionsStore({
+      appRoot,
+      config: { version: 1, allow: { bash: ["npm install"] }, deny: {} },
+      mode: "interactive",
+    })
+    await store.load()
+    expect(store.match("bash", "ls -la")).toBe("allow")
+    expect(store.match("bash", "npm install react")).toBe("allow")
+  })
+
+  it("deny from config wins over allow from runtime file", async () => {
+    mkdirSync(join(appRoot, ".dawn"), { recursive: true })
+    writeFileSync(
+      join(appRoot, ".dawn", "permissions.json"),
+      JSON.stringify({ version: 1, allow: { bash: ["rm"] }, deny: {} }),
+    )
+    const store = createPermissionsStore({
+      appRoot,
+      config: { version: 1, allow: {}, deny: { bash: ["rm -rf"] } },
+      mode: "interactive",
+    })
+    await store.load()
+    expect(store.match("bash", "rm -rf /tmp")).toBe("deny")
+  })
+
+  it("ignores the runtime file in non-interactive mode", async () => {
+    mkdirSync(join(appRoot, ".dawn"), { recursive: true })
+    writeFileSync(
+      join(appRoot, ".dawn", "permissions.json"),
+      JSON.stringify({ version: 1, allow: { bash: ["npm install"] }, deny: {} }),
+    )
+    const store = createPermissionsStore({
+      appRoot,
+      config: { version: 1, allow: { bash: ["ls"] }, deny: {} },
+      mode: "non-interactive",
+    })
+    await store.load()
+    expect(store.match("bash", "npm install react")).toBe("unknown")
+    expect(store.match("bash", "ls -la")).toBe("allow")
+  })
+
+  it("ignores everything in bypass mode", async () => {
+    mkdirSync(join(appRoot, ".dawn"), { recursive: true })
+    writeFileSync(
+      join(appRoot, ".dawn", "permissions.json"),
+      JSON.stringify({ version: 1, allow: {}, deny: { bash: ["rm"] } }),
+    )
+    const store = createPermissionsStore({
+      appRoot,
+      config: { version: 1, allow: {}, deny: { bash: ["rm"] } },
+      mode: "bypass",
+    })
+    await store.load()
+    expect(store.match("bash", "rm -rf /")).toBe("unknown")
+  })
+
+  it("throws on malformed JSON in the runtime file", async () => {
+    mkdirSync(join(appRoot, ".dawn"), { recursive: true })
+    writeFileSync(join(appRoot, ".dawn", "permissions.json"), "{ not valid json")
+    const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" })
+    await expect(store.load()).rejects.toThrow(/permissions\.json/i)
+  })
+})
+
+describe("createPermissionsStore — addAllow", () => {
+  let appRoot: string
+  beforeEach(() => {
+    appRoot = mkdtempSync(join(tmpdir(), "dawn-perms-"))
+  })
+  afterEach(() => {
+    rmSync(appRoot, { recursive: true, force: true })
+  })
+
+  it("persists an allow entry and updates the in-memory cache atomically", async () => {
+    const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" })
+    await store.load()
+    expect(store.match("bash", "npm install")).toBe("unknown")
+    await store.addAllow("bash", "npm install")
+    expect(store.match("bash", "npm install react")).toBe("allow")
+    const raw = readFileSync(join(appRoot, ".dawn", "permissions.json"), "utf8")
+    const parsed = JSON.parse(raw)
+    expect(parsed.allow.bash).toContain("npm install")
+  })
+
+  it("appends .dawn/ to .gitignore on first write (idempotent)", async () => {
+    writeFileSync(join(appRoot, ".gitignore"), "node_modules/\n.next/\n")
+    const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" })
+    await store.load()
+    await store.addAllow("bash", "ls")
+    const gi = readFileSync(join(appRoot, ".gitignore"), "utf8")
+    expect(gi).toContain(".dawn/")
+    expect(gi).toContain("node_modules/")
+  })
+
+  it("creates .gitignore with .dawn/ when none exists", async () => {
+    const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" })
+    await store.load()
+    await store.addAllow("bash", "ls")
+    const gi = readFileSync(join(appRoot, ".gitignore"), "utf8")
+    expect(gi).toBe(".dawn/\n")
+  })
+
+  it("does not duplicate .dawn/ if already in .gitignore", async () => {
+    writeFileSync(join(appRoot, ".gitignore"), "node_modules/\n.dawn/\n")
+    const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" })
+    await store.load()
+    await store.addAllow("bash", "ls")
+    const gi = readFileSync(join(appRoot, ".gitignore"), "utf8")
+    expect(gi.match(/\.dawn\//g)?.length).toBe(1)
+  })
+
+  it("serializes concurrent addAllow calls", async () => {
+    const store = createPermissionsStore({ appRoot, config: undefined, mode: "interactive" })
+    await store.load()
+    await Promise.all([
+      store.addAllow("bash", "ls"),
+      store.addAllow("bash", "pwd"),
+      store.addAllow("bash", "cat"),
+    ])
+    const raw = readFileSync(join(appRoot, ".dawn", "permissions.json"), "utf8")
+    const parsed = JSON.parse(raw)
+    expect([...parsed.allow.bash].sort()).toEqual(["cat", "ls", "pwd"])
+  })
+
+  it("exposes the resolved mode", () => {
+    const store = createPermissionsStore({ appRoot, config: undefined, mode: "non-interactive" })
+    expect(store.mode).toBe("non-interactive")
+  })
+})
diff --git a/packages/permissions/test/suggested-pattern.test.ts b/packages/permissions/test/suggested-pattern.test.ts
new file mode 100644
index 0000000..0a55f12
--- /dev/null
+++ b/packages/permissions/test/suggested-pattern.test.ts
@@ -0,0 +1,41 @@
+import { describe, expect, it } from "vitest"
+import {
+  suggestedCommandPattern,
+  suggestedPathPattern,
+} from "../src/suggested-pattern.js"
+
+describe("suggestedCommandPattern", () => {
+  it("returns the first two tokens for a multi-word command", () => {
+    expect(suggestedCommandPattern("npm install react")).toBe("npm install")
+  })
+  it("returns the single token for a one-word command", () => {
+    expect(suggestedCommandPattern("ls")).toBe("ls")
+  })
+  it("returns first two tokens even when the second is short", () => {
+    expect(suggestedCommandPattern("git status")).toBe("git status")
+    expect(suggestedCommandPattern("git push origin main")).toBe("git push")
+  })
+  it("strips leading/trailing whitespace before tokenizing", () => {
+    expect(suggestedCommandPattern("  npm  install  react  ")).toBe("npm install")
+  })
+  it("handles empty input as empty pattern", () => {
+    expect(suggestedCommandPattern("")).toBe("")
+    expect(suggestedCommandPattern("   ")).toBe("")
+  })
+})
+
+describe("suggestedPathPattern", () => {
+  it("returns the parent directory with trailing slash", () => {
+    expect(suggestedPathPattern("/Users/blove/.zshrc")).toBe("/Users/blove/")
+    expect(suggestedPathPattern("/var/log/app.log")).toBe("/var/log/")
+  })
+  it("returns the dir itself with trailing slash when input ends with slash", () => {
+    expect(suggestedPathPattern("/Users/blove/Documents/")).toBe("/Users/blove/Documents/")
+  })
+  it("returns root when input is a top-level file", () => {
+    expect(suggestedPathPattern("/etc")).toBe("/")
+  })
+  it("handles relative paths", () => {
+    expect(suggestedPathPattern("notes/agenda.md")).toBe("notes/")
+  })
+})
diff --git a/packages/permissions/tsconfig.json b/packages/permissions/tsconfig.json
new file mode 100644
index 0000000..0681480
--- /dev/null
+++ b/packages/permissions/tsconfig.json
@@ -0,0 +1,9 @@
+{
+  "$schema": "https://json.schemastore.org/tsconfig",
+  "extends": "../config-typescript/node.json",
+  "compilerOptions": {
+    "outDir": "dist",
+    "rootDir": "src"
+  },
+  "include": ["src/**/*.ts"]
+}
diff --git a/packages/permissions/vitest.config.ts b/packages/permissions/vitest.config.ts
new file mode 100644
index 0000000..c19dea2
--- /dev/null
+++ b/packages/permissions/vitest.config.ts
@@ -0,0 +1,9 @@
+import { defineConfig } from "vitest/config"
+
+export default defineConfig({
+  test: {
+    environment: "node",
+    include: ["test/**/*.test.ts"],
+    passWithNoTests: true,
+  },
+})
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index f91d6eb..539c211 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -177,6 +177,9 @@ importers:
       '@dawn-ai/langgraph':
         specifier: workspace:*
         version: link:../langgraph
+      '@dawn-ai/permissions':
+        specifier: workspace:*
+        version: link:../permissions
       commander:
         specifier: 14.0.3
         version: 14.0.3
@@ -216,12 +219,18 @@ importers:
 
   packages/core:
     dependencies:
+      '@dawn-ai/permissions':
+        specifier: workspace:*
+        version: link:../permissions
       '@dawn-ai/sdk':
         specifier: workspace:*
         version: link:../sdk
       '@dawn-ai/workspace':
         specifier: workspace:*
         version: link:../workspace
+      '@langchain/langgraph':
+        specifier: ^1.3.0
+        version: 1.3.0(@langchain/core@1.1.47(openai@6.37.0(ws@8.20.1)(zod@4.4.3))(ws@8.20.1))(openai@6.37.0(ws@8.20.1)(zod@4.4.3))(react-dom@19.2.0(react@19.2.0))(react@19.2.0)(ws@8.20.1)(zod-to-json-schema@3.25.2(zod@4.4.3))(zod@4.4.3)
       tsx:
         specifier: ^4.8.1
         version: 4.21.0
@@ -323,6 +332,15 @@ importers:
         specifier: 25.6.0
         version: 25.6.0
 
+  packages/permissions:
+    devDependencies:
+      '@dawn-ai/config-typescript':
+        specifier: workspace:*
+        version: link:../config-typescript
+      '@types/node':
+        specifier: 25.6.0
+        version: 25.6.0
+
   packages/sdk:
     devDependencies:
       '@dawn-ai/config-typescript':

From 20f36ea5e4a0a7086125996bcc02f6607d277238 Mon Sep 17 00:00:00 2001
From: Brian Love 
Date: Thu, 21 May 2026 13:43:59 -0700
Subject: [PATCH 20/23] =?UTF-8?q?style(langchain):=20rename=20escape?=
 =?UTF-8?q?=E2=86=92escaped=20+=20biome=20auto-format?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CI lint failed on two issues:
- Shadow of global `escape` in the interrupt-error JSON parser
- Line-length formatting in a test helper signature

Co-Authored-By: Claude Opus 4.7 
---
 packages/langchain/src/agent-adapter.ts        | 18 ++++++------------
 .../test/agent-adapter-interrupt.test.ts       |  4 +---
 2 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/packages/langchain/src/agent-adapter.ts b/packages/langchain/src/agent-adapter.ts
index cf63cee..5e46619 100644
--- a/packages/langchain/src/agent-adapter.ts
+++ b/packages/langchain/src/agent-adapter.ts
@@ -212,11 +212,7 @@ function extractInterruptsFromError(error: unknown): readonly RawInterruptEntry[
 
   if (typeof error === "object") {
     const e = error as { name?: unknown; interrupts?: unknown; message?: unknown }
-    if (
-      e.name === "GraphInterrupt" &&
-      Array.isArray(e.interrupts) &&
-      e.interrupts.length > 0
-    ) {
+    if (e.name === "GraphInterrupt" && Array.isArray(e.interrupts) && e.interrupts.length > 0) {
       return e.interrupts as readonly RawInterruptEntry[]
     }
     if (typeof e.message === "string") {
@@ -240,25 +236,23 @@ function extractInterruptsFromError(error: unknown): readonly RawInterruptEntry[
  * JSON array up to the first `]` followed by a newline + non-JSON sentinel
  * and parse it.
  */
-function parseInterruptStringMessage(
-  text: string,
-): readonly RawInterruptEntry[] | undefined {
+function parseInterruptStringMessage(text: string): readonly RawInterruptEntry[] | undefined {
   const trimmed = text.trimStart()
   if (!trimmed.startsWith("[")) return undefined
   // Find the matching closing bracket by bracket counting at depth 0 — robust
   // against nested arrays in the interrupt payloads.
   let depth = 0
   let inString = false
-  let escape = false
+  let escaped = false
   let end = -1
   for (let i = 0; i < trimmed.length; i++) {
     const ch = trimmed[i]
-    if (escape) {
-      escape = false
+    if (escaped) {
+      escaped = false
       continue
     }
     if (inString) {
-      if (ch === "\\") escape = true
+      if (ch === "\\") escaped = true
       else if (ch === '"') inString = false
       continue
     }
diff --git a/packages/langchain/test/agent-adapter-interrupt.test.ts b/packages/langchain/test/agent-adapter-interrupt.test.ts
index 353b3a6..5b1c0c4 100644
--- a/packages/langchain/test/agent-adapter-interrupt.test.ts
+++ b/packages/langchain/test/agent-adapter-interrupt.test.ts
@@ -20,9 +20,7 @@ import { __resetPendingForTests, getPending } from "../src/pending-interrupts.js
  * fallback in case a future LangGraph version surfaces interrupts that way.
  */
 
-function makeInterruptErrorString(
-  entries: ReadonlyArray<{ id?: string; value: unknown }>,
-): string {
+function makeInterruptErrorString(entries: ReadonlyArray<{ id?: string; value: unknown }>): string {
   return `${JSON.stringify(entries, null, 2)}\n\nGraphInterrupt: ${JSON.stringify(
     entries,
     null,

From 5bad7f881f903724d5390ca38890cb682f3b13dd Mon Sep 17 00:00:00 2001
From: Brian Love 
Date: Thu, 21 May 2026 14:02:40 -0700
Subject: [PATCH 21/23] test(harness): pack @dawn-ai/workspace +
 @dawn-ai/permissions in framework verify

Adds the two new workspace packages (introduced in sub-projects 4 and
4.5) to the framework-verification harness's pack list, override maps,
and fixture snapshots so the generated-app contract tests can install
them locally instead of trying the npm registry (404).

Also extends create-dawn-ai-app's internal-mode overrides to point
@dawn-ai/permissions and @dawn-ai/workspace at the in-repo packages so
the contributor-local lifecycle resolves transitive workspace:* deps.

Co-Authored-By: Claude Opus 4.7 
---
 packages/create-dawn-app/src/index.ts         | 10 ++++++++++
 test/generated/cli-testing-export.test.ts     | 14 ++++++++++++-
 test/generated/fixtures/basic.expected.json   |  4 +++-
 .../fixtures/custom-app-dir.expected.json     |  4 +++-
 test/generated/harness.ts                     |  8 ++++++++
 test/generated/run-generated-app.test.ts      | 20 ++++++++++++++++++-
 6 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/packages/create-dawn-app/src/index.ts b/packages/create-dawn-app/src/index.ts
index ea72469..d5bbc4f 100644
--- a/packages/create-dawn-app/src/index.ts
+++ b/packages/create-dawn-app/src/index.ts
@@ -178,7 +178,9 @@ function createTemplateReplacements(
   readonly dawnCoreSpecifier: string
   readonly dawnLangchainSpecifier: string
   readonly dawnLanggraphSpecifier: string
+  readonly dawnPermissionsSpecifier: string
   readonly dawnSdkSpecifier: string
+  readonly dawnWorkspaceSpecifier: string
 } {
   if (options.mode === "internal") {
     return {
@@ -190,7 +192,11 @@ function createTemplateReplacements(
       dawnCoreSpecifier: createAbsoluteFileSpecifier(resolve(repoRoot, "packages/core")),
       dawnLangchainSpecifier: createAbsoluteFileSpecifier(resolve(repoRoot, "packages/langchain")),
       dawnLanggraphSpecifier: createAbsoluteFileSpecifier(resolve(repoRoot, "packages/langgraph")),
+      dawnPermissionsSpecifier: createAbsoluteFileSpecifier(
+        resolve(repoRoot, "packages/permissions"),
+      ),
       dawnSdkSpecifier: createAbsoluteFileSpecifier(resolve(repoRoot, "packages/sdk")),
+      dawnWorkspaceSpecifier: createAbsoluteFileSpecifier(resolve(repoRoot, "packages/workspace")),
     }
   }
 
@@ -201,7 +207,9 @@ function createTemplateReplacements(
     dawnCoreSpecifier: options.distTag,
     dawnLangchainSpecifier: options.distTag,
     dawnLanggraphSpecifier: options.distTag,
+    dawnPermissionsSpecifier: options.distTag,
     dawnSdkSpecifier: options.distTag,
+    dawnWorkspaceSpecifier: options.distTag,
   }
 }
 
@@ -227,7 +235,9 @@ async function applyInternalModePackageOverrides(
       "@dawn-ai/core": replacements.dawnCoreSpecifier,
       "@dawn-ai/langchain": replacements.dawnLangchainSpecifier,
       "@dawn-ai/langgraph": replacements.dawnLanggraphSpecifier,
+      "@dawn-ai/permissions": replacements.dawnPermissionsSpecifier,
       "@dawn-ai/sdk": replacements.dawnSdkSpecifier,
+      "@dawn-ai/workspace": replacements.dawnWorkspaceSpecifier,
     },
   }
 
diff --git a/test/generated/cli-testing-export.test.ts b/test/generated/cli-testing-export.test.ts
index 254792d..f677d3b 100644
--- a/test/generated/cli-testing-export.test.ts
+++ b/test/generated/cli-testing-export.test.ts
@@ -26,7 +26,15 @@ describe.each([
   }, async () => {
     const tempRoot = await createTrackedTempDir(`dawn-${label}-testing-pack-`, tempDirs)
     const { installerDir, tarballs } = await createPackagedInstaller({
-      packageNames: ["@dawn-ai/core", "@dawn-ai/langchain", "@dawn-ai/langgraph", "@dawn-ai/sdk", "@dawn-ai/cli"],
+      packageNames: [
+        "@dawn-ai/core",
+        "@dawn-ai/langchain",
+        "@dawn-ai/langgraph",
+        "@dawn-ai/permissions",
+        "@dawn-ai/sdk",
+        "@dawn-ai/workspace",
+        "@dawn-ai/cli",
+      ],
       tempRoot,
     })
 
@@ -38,7 +46,9 @@ describe.each([
         requiredTarball(tarballs, "@dawn-ai/core"),
         requiredTarball(tarballs, "@dawn-ai/langchain"),
         requiredTarball(tarballs, "@dawn-ai/langgraph"),
+        requiredTarball(tarballs, "@dawn-ai/permissions"),
         requiredTarball(tarballs, "@dawn-ai/sdk"),
+        requiredTarball(tarballs, "@dawn-ai/workspace"),
         requiredTarball(tarballs, "@dawn-ai/cli"),
       ],
       installerDir,
@@ -136,7 +146,9 @@ async function writeInstallerOverrides(
     "@dawn-ai/core": requiredTarball(tarballs, "@dawn-ai/core"),
     "@dawn-ai/langchain": requiredTarball(tarballs, "@dawn-ai/langchain"),
     "@dawn-ai/langgraph": requiredTarball(tarballs, "@dawn-ai/langgraph"),
+    "@dawn-ai/permissions": requiredTarball(tarballs, "@dawn-ai/permissions"),
     "@dawn-ai/sdk": requiredTarball(tarballs, "@dawn-ai/sdk"),
+    "@dawn-ai/workspace": requiredTarball(tarballs, "@dawn-ai/workspace"),
   }
 
   await writeFile(
diff --git a/test/generated/fixtures/basic.expected.json b/test/generated/fixtures/basic.expected.json
index 410460f..fd9b94b 100644
--- a/test/generated/fixtures/basic.expected.json
+++ b/test/generated/fixtures/basic.expected.json
@@ -27,7 +27,9 @@
         "@dawn-ai/core": "",
         "@dawn-ai/langchain": "",
         "@dawn-ai/langgraph": "",
-        "@dawn-ai/sdk": ""
+        "@dawn-ai/permissions": "",
+        "@dawn-ai/sdk": "",
+        "@dawn-ai/workspace": ""
       }
     }
   },
diff --git a/test/generated/fixtures/custom-app-dir.expected.json b/test/generated/fixtures/custom-app-dir.expected.json
index fa1f6f1..5263e01 100644
--- a/test/generated/fixtures/custom-app-dir.expected.json
+++ b/test/generated/fixtures/custom-app-dir.expected.json
@@ -27,7 +27,9 @@
         "@dawn-ai/core": "",
         "@dawn-ai/langchain": "",
         "@dawn-ai/langgraph": "",
-        "@dawn-ai/sdk": ""
+        "@dawn-ai/permissions": "",
+        "@dawn-ai/sdk": "",
+        "@dawn-ai/workspace": ""
       }
     }
   },
diff --git a/test/generated/harness.ts b/test/generated/harness.ts
index deb31c8..e1f99b4 100644
--- a/test/generated/harness.ts
+++ b/test/generated/harness.ts
@@ -39,7 +39,9 @@ interface PackedTarballs {
   readonly devkit: string
   readonly langchain: string
   readonly langgraph: string
+  readonly permissions: string
   readonly sdk: string
+  readonly workspace: string
 }
 
 interface RuntimeFixtureSpec {
@@ -173,7 +175,9 @@ export async function prepareGeneratedRuntimeApp(options: {
           "@dawn-ai/core",
           "@dawn-ai/langchain",
           "@dawn-ai/langgraph",
+          "@dawn-ai/permissions",
           "@dawn-ai/sdk",
+          "@dawn-ai/workspace",
         ],
         tempRoot: options.tempRoot,
         transcriptPath,
@@ -469,7 +473,9 @@ async function rewriteDependenciesToTarballs(options: {
       "@dawn-ai/core": options.tarballs.core,
       "@dawn-ai/langchain": options.tarballs.langchain,
       "@dawn-ai/langgraph": options.tarballs.langgraph,
+      "@dawn-ai/permissions": options.tarballs.permissions,
       "@dawn-ai/sdk": options.tarballs.sdk,
+      "@dawn-ai/workspace": options.tarballs.workspace,
     },
   }
 
@@ -663,7 +669,9 @@ function toPackedTarballs(tarballs: Readonly>): PackedTar
     devkit: tarballs["@dawn-ai/devkit"],
     langchain: tarballs["@dawn-ai/langchain"],
     langgraph: tarballs["@dawn-ai/langgraph"],
+    permissions: tarballs["@dawn-ai/permissions"]!,
     sdk: tarballs["@dawn-ai/sdk"],
+    workspace: tarballs["@dawn-ai/workspace"]!,
   }
 }
 
diff --git a/test/generated/run-generated-app.test.ts b/test/generated/run-generated-app.test.ts
index d6275e4..ca10e40 100644
--- a/test/generated/run-generated-app.test.ts
+++ b/test/generated/run-generated-app.test.ts
@@ -31,7 +31,9 @@ interface PackedTarballs {
   readonly devkit: string
   readonly langchain: string
   readonly langgraph: string
+  readonly permissions: string
   readonly sdk: string
+  readonly workspace: string
 }
 
 interface GeneratedAppScenarioResult {
@@ -170,7 +172,9 @@ async function runGeneratedAppScenario(
           "@dawn-ai/core",
           "@dawn-ai/langchain",
           "@dawn-ai/langgraph",
+          "@dawn-ai/permissions",
           "@dawn-ai/sdk",
+          "@dawn-ai/workspace",
         ],
         tempRoot,
         transcriptPath,
@@ -310,7 +314,9 @@ async function rewriteDependenciesToTarballs(options: {
       "@dawn-ai/core": options.tarballs.core,
       "@dawn-ai/langchain": options.tarballs.langchain,
       "@dawn-ai/langgraph": options.tarballs.langgraph,
+      "@dawn-ai/permissions": options.tarballs.permissions,
       "@dawn-ai/sdk": options.tarballs.sdk,
+      "@dawn-ai/workspace": options.tarballs.workspace,
     },
   }
 
@@ -522,7 +528,9 @@ async function createExpectedInternalFixture(
           "@dawn-ai/core": "",
           "@dawn-ai/langchain": "",
           "@dawn-ai/langgraph": "",
+          "@dawn-ai/permissions": "",
           "@dawn-ai/sdk": "",
+          "@dawn-ai/workspace": "",
         },
       },
     },
@@ -538,7 +546,9 @@ function toPackedTarballs(tarballs: Readonly>): PackedTar
     devkit: tarballs["@dawn-ai/devkit"],
     langchain: tarballs["@dawn-ai/langchain"],
     langgraph: tarballs["@dawn-ai/langgraph"],
+    permissions: tarballs["@dawn-ai/permissions"]!,
     sdk: tarballs["@dawn-ai/sdk"],
+    workspace: tarballs["@dawn-ai/workspace"]!,
   }
 }
 
@@ -556,7 +566,9 @@ function normalizeForFixture(
     [context.tarballs.devkit, ""],
     [context.tarballs.langchain, ""],
     [context.tarballs.langgraph, ""],
+    [context.tarballs.permissions, ""],
     [context.tarballs.sdk, ""],
+    [context.tarballs.workspace, ""],
     [`/private${dirname(context.tarballs.cli)}`, ""],
     [dirname(context.tarballs.cli), ""],
     ["25.6.0", ""],
@@ -576,7 +588,9 @@ function normalizeForInternalFixture(
     [pathToRepoPackageFileSpecifier("@dawn-ai/core"), ""],
     [pathToRepoPackageFileSpecifier("@dawn-ai/langchain"), ""],
     [pathToRepoPackageFileSpecifier("@dawn-ai/langgraph"), ""],
+    [pathToRepoPackageFileSpecifier("@dawn-ai/permissions"), ""],
     [pathToRepoPackageFileSpecifier("@dawn-ai/sdk"), ""],
+    [pathToRepoPackageFileSpecifier("@dawn-ai/workspace"), ""],
     ["25.6.0", ""],
     ["6.0.2", ""],
   ]) as GeneratedAppScenarioResult
@@ -589,7 +603,9 @@ function pathToRepoPackageFileSpecifier(
     | "@dawn-ai/core"
     | "@dawn-ai/langchain"
     | "@dawn-ai/langgraph"
-    | "@dawn-ai/sdk",
+    | "@dawn-ai/permissions"
+    | "@dawn-ai/sdk"
+    | "@dawn-ai/workspace",
 ): string {
   const packageDirByName = {
     "@dawn-ai/cli": "packages/cli",
@@ -597,7 +613,9 @@ function pathToRepoPackageFileSpecifier(
     "@dawn-ai/core": "packages/core",
     "@dawn-ai/langchain": "packages/langchain",
     "@dawn-ai/langgraph": "packages/langgraph",
+    "@dawn-ai/permissions": "packages/permissions",
     "@dawn-ai/sdk": "packages/sdk",
+    "@dawn-ai/workspace": "packages/workspace",
   } as const
 
   return pathToFileURL(resolve(REPO_ROOT, packageDirByName[packageName])).toString()

From a11027986e4b82b746aff80b68ce65908a1d8fb0 Mon Sep 17 00:00:00 2001
From: Brian Love 
Date: Thu, 21 May 2026 14:11:03 -0700
Subject: [PATCH 22/23] test(harness): pack workspace + permissions in runtime
 contract verify

Co-Authored-By: Claude Opus 4.7 
---
 test/runtime/run-runtime-contract.test.ts | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/runtime/run-runtime-contract.test.ts b/test/runtime/run-runtime-contract.test.ts
index 4b6951d..cb17c79 100644
--- a/test/runtime/run-runtime-contract.test.ts
+++ b/test/runtime/run-runtime-contract.test.ts
@@ -482,7 +482,9 @@ async function withRuntimeScenario(
           "@dawn-ai/core",
           "@dawn-ai/langchain",
           "@dawn-ai/langgraph",
+          "@dawn-ai/permissions",
           "@dawn-ai/sdk",
+          "@dawn-ai/workspace",
         ],
         tempRoot,
         transcriptPath,
@@ -712,7 +714,9 @@ async function rewriteDependenciesToTarballs(options: {
     "@dawn-ai/cli": options.tarballs["@dawn-ai/cli"],
     "@dawn-ai/core": options.tarballs["@dawn-ai/core"],
     "@dawn-ai/langchain": options.tarballs["@dawn-ai/langchain"],
+    "@dawn-ai/permissions": options.tarballs["@dawn-ai/permissions"],
     "@dawn-ai/sdk": options.tarballs["@dawn-ai/sdk"],
+    "@dawn-ai/workspace": options.tarballs["@dawn-ai/workspace"],
   }
   packageJson.devDependencies = {
     ...packageJson.devDependencies,
@@ -727,7 +731,9 @@ async function rewriteDependenciesToTarballs(options: {
       "@dawn-ai/core": options.tarballs["@dawn-ai/core"],
       "@dawn-ai/langchain": options.tarballs["@dawn-ai/langchain"],
       "@dawn-ai/langgraph": options.tarballs["@dawn-ai/langgraph"],
+      "@dawn-ai/permissions": options.tarballs["@dawn-ai/permissions"],
       "@dawn-ai/sdk": options.tarballs["@dawn-ai/sdk"],
+      "@dawn-ai/workspace": options.tarballs["@dawn-ai/workspace"],
     },
   }
 

From 3bbbb8006bb4462c73ae7468c2377a4cff29f5d1 Mon Sep 17 00:00:00 2001
From: Brian Love 
Date: Thu, 21 May 2026 14:22:51 -0700
Subject: [PATCH 23/23] test(harness): pack workspace + permissions in runtime
 smoke verify

Co-Authored-By: Claude Opus 4.7 
---
 test/smoke/run-smoke.test.ts | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/smoke/run-smoke.test.ts b/test/smoke/run-smoke.test.ts
index 210ef25..a6f78d2 100644
--- a/test/smoke/run-smoke.test.ts
+++ b/test/smoke/run-smoke.test.ts
@@ -159,7 +159,9 @@ async function runSmokeScenario(fixtureName: SmokeFixtureName): Promise