diff --git a/app/src/components/ChatKit/ChatKitPanel.test.tsx b/app/src/components/ChatKit/ChatKitPanel.test.tsx
index d61bc322..7016341f 100644
--- a/app/src/components/ChatKit/ChatKitPanel.test.tsx
+++ b/app/src/components/ChatKit/ChatKitPanel.test.tsx
@@ -103,6 +103,7 @@ vi.mock("../../contexts/NotebookContext", () => ({
   useNotebookContext: () => ({
     getNotebookData: () => undefined,
     useNotebookSnapshot: () => ({ notebook: { cells: [] } }),
+    useNotebookList: () => [],
   }),
 }));
 
@@ -272,6 +273,60 @@ describe("ChatKitPanel codex harness routing", () => {
     expect(bridgeMock.connect).not.toHaveBeenCalled();
   });
 
+  it("handles ExecuteCode params with top-level code for NotebookService tool names", async () => {
+    render(<ChatKitPanel />);
+    const config = useChatKitMock.mock.calls.at(0)?.[0];
+
+    const result = await config.onClientTool({
+      name: "agent_tools_v1_NotebookService_ExecuteCode",
+      params: {
+        call_id: "call-top-level",
+        previous_response_id: "resp-1",
+        code: "console.log('ok')",
+      },
+    });
+
+    expect(result.callId).toBe("call-top-level");
+    expect(result.previousResponseId).toBe("resp-1");
+    expect(String(result.clientError ?? "")).not.toContain("Failed to decode tool params");
+    expect(String(result.clientError ?? "")).not.toContain('key "code" is unknown');
+  });
+
+  it("handles ExecuteCode params for direct ExecuteCode tool name", async () => {
+    render(<ChatKitPanel />);
+    const config = useChatKitMock.mock.calls.at(0)?.[0];
+
+    const result = await config.onClientTool({
+      name: "ExecuteCode",
+      params: {
+        call_id: "call-direct",
+        code: "console.log('direct')",
+      },
+    });
+
+    expect(result.callId).toBe("call-direct");
+    expect(String(result.clientError ?? "")).not.toContain("Failed to decode tool params");
+    expect(String(result.clientError ?? "")).not.toContain('key "code" is unknown');
+  });
+
+  it("returns unknown tool error for unrecognized tool names", async () => {
+    render(<ChatKitPanel />);
+    const config = useChatKitMock.mock.calls.at(0)?.[0];
+
+    const result = await config.onClientTool({
+      name: "unknown_tool_name",
+      params: {
+        call_id: "",
+        previous_response_id: "resp-1",
+        code: "console.log('payload-shape')",
+      },
+    });
+
+    expect(String(result.clientError ?? "")).toContain("Unknown tool unknown_tool_name");
+    expect(String(result.clientError ?? "")).not.toContain("Failed to decode tool params");
+    expect(String(result.clientError ?? "")).not.toContain('key "code" is unknown');
+  });
+
   it("routes ChatKit to /codex/app-server/ws and connects codex bridge + proxy websocket", async () => {
     harnessState.defaultHarness.adapter = "codex";
 
diff --git a/app/src/components/ChatKit/ChatKitPanel.tsx b/app/src/components/ChatKit/ChatKitPanel.tsx
index 51a149e3..dba94da4 100644
--- a/app/src/components/ChatKit/ChatKitPanel.tsx
+++ b/app/src/components/ChatKit/ChatKitPanel.tsx
@@ -1,33 +1,41 @@
-import { useCallback, useEffect, useMemo, useRef, useState } from "react";
-import { ChatKit, useChatKit, ChatKitIcon } from "@openai/chatkit-react";
-import { parser_pb, RunmeMetadataKey, useCell } from "../../contexts/CellContext";
-import { useNotebookContext } from "../../contexts/NotebookContext";
-import { useOutput } from "../../contexts/OutputContext";
-import { useCurrentDoc } from "../../contexts/CurrentDocContext";
-import { create, fromJsonString, toJson } from "@bufbuild/protobuf";
+import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
+import { ChatKit, useChatKit, ChatKitIcon } from '@openai/chatkit-react'
+import {
+  parser_pb,
+  RunmeMetadataKey,
+  useCell,
+} from '../../contexts/CellContext'
+import { useNotebookContext } from '../../contexts/NotebookContext'
+import { useOutput } from '../../contexts/OutputContext'
+import { useCurrentDoc } from '../../contexts/CurrentDocContext'
+import { create, fromJsonString, toJson } from '@bufbuild/protobuf'
 import {
   useHarness,
   buildChatkitUrl,
   buildCodexAppServerWsUrl,
   buildCodexBridgeWsUrl,
   type HarnessProfile,
-} from "../../lib/runtime/harnessManager";
-import { getCodexToolBridge } from "../../lib/runtime/codexToolBridge";
-import { getCodexExecuteApprovalManager } from "../../lib/runtime/codexExecuteApprovalManager";
-import { getCodexAppServerProxyClient } from "../../lib/runtime/codexAppServerProxyClient";
-import { createCodexChatkitFetch } from "../../lib/runtime/codexChatkitFetch";
-import { createResponsesDirectChatkitFetch } from "../../lib/runtime/responsesDirectChatkitFetch";
+} from '../../lib/runtime/harnessManager'
+import { getCodexToolBridge } from '../../lib/runtime/codexToolBridge'
+import { getCodexExecuteApprovalManager } from '../../lib/runtime/codexExecuteApprovalManager'
+import { getCodexAppServerProxyClient } from '../../lib/runtime/codexAppServerProxyClient'
+import { createCodexChatkitFetch } from '../../lib/runtime/codexChatkitFetch'
+import { createResponsesDirectChatkitFetch } from '../../lib/runtime/responsesDirectChatkitFetch'
+import {
+  createCodeModeExecutor,
+  getCodeModeErrorOutput,
+} from '../../lib/runtime/codeModeExecutor'
 import {
   getCodexConversationController,
   useCodexConversationSnapshot,
-} from "../../lib/runtime/codexConversationController";
-import { useCodexProjects } from "../../lib/runtime/codexProjectManager";
-import { appLogger } from "../../lib/logging/runtime";
-import { responsesDirectConfigManager } from "../../lib/runtime/responsesDirectConfigManager";
-
-import { getAccessToken, getAuthData } from "../../token";
-import { getBrowserAdapter } from "../../browserAdapter.client";
-import { type Cell } from "../../protogen/runme/parser/v1/parser_pb.js";
+} from '../../lib/runtime/codexConversationController'
+import { useCodexProjects } from '../../lib/runtime/codexProjectManager'
+import { appLogger } from '../../lib/logging/runtime'
+import { responsesDirectConfigManager } from '../../lib/runtime/responsesDirectConfigManager'
+
+import { getAccessToken, getAuthData } from '../../token'
+import { getBrowserAdapter } from '../../browserAdapter.client'
+import { type Cell } from '../../protogen/runme/parser/v1/parser_pb.js'
 import {
   ToolCallInputSchema,
   ToolCallOutputSchema,
@@ -37,156 +45,217 @@ import {
   ListCellsResponseSchema,
   ChatkitStateSchema,
   NotebookServiceExecuteCellsResponseSchema,
-} from "../../protogen/oaiproto/aisre/notebooks_pb.js";
-import { getConfiguredChatKitDomainKey } from "../../lib/appConfig";
+} from '../../protogen/oaiproto/aisre/notebooks_pb.js'
+import { getConfiguredChatKitDomainKey } from '../../lib/appConfig'
 class UserNotLoggedInError extends Error {
-  constructor(message = "You must log in to use runme chat.") {
-    super(message);
-    this.name = "UserNotLoggedInError";
+  constructor(message = 'You must log in to use runme chat.') {
+    super(message)
+    this.name = 'UserNotLoggedInError'
   }
 }
 
-const CHATKIT_GREETING = "How can runme help you today?";
+const CHATKIT_GREETING = 'How can runme help you today?'
 
 const CHATKIT_PLACEHOLDER =
-  "Describe the production issue or question you are investigating";
+  'Describe the production issue or question you are investigating'
 
 const CHATKIT_STARTER_PROMPTS = [
   {
-    label: "Setup a local runner for runme to execute code",
-    prompt: "How do I setup a local runner to execute code with runme?",
-    icon: "circle-question",
+    label: 'Setup a local runner for runme to execute code',
+    prompt: 'How do I setup a local runner to execute code with runme?',
+    icon: 'circle-question',
   },
   {
-    label: "Plot metrics",
-    prompt: "Plot the requests for the o3 model.",
-    icon: "book-open",
+    label: 'Plot metrics',
+    prompt: 'Plot the requests for the o3 model.',
+    icon: 'book-open',
   },
   {
-    label: "Handle an alert or incident",
+    label: 'Handle an alert or incident',
     prompt:
-      "I just got paged for TBT (time between tokens) being high. Search notion, the mono-repo, and slack for runbooks for dealing with this alert and give me instrunctions for dealing with it?",
-    icon: "search",
+      'I just got paged for TBT (time between tokens) being high. Search notion, the mono-repo, and slack for runbooks for dealing with this alert and give me instrunctions for dealing with it?',
+    icon: 'search',
   },
-] as const;
+] as const
 
 // Transitional: NotebookService currently exposes multiple notebook-specific tools.
 // Design direction is to simplify the agent-facing surface to a single
 // "execute JavaScript" capability and route notebook mutations through the
 // sandbox NotebooksApi.
-const TOOL_PREFIX = "agent_tools_v1_NotebookService_";
+const TOOL_PREFIX = 'agent_tools_v1_NotebookService_'
+
+const UPDATE_CELLS_TOOL = TOOL_PREFIX + 'UpdateCells'
+const LIST_CELLS_TOOL = TOOL_PREFIX + 'ListCells'
+const GET_CELLS_TOOL = TOOL_PREFIX + 'GetCells'
+const EXECUTE_CODE_TOOL = TOOL_PREFIX + 'ExecuteCode'
+const EXECUTE_CODE_DIRECT_TOOL = 'ExecuteCode'
 
-const UPDATE_CELLS_TOOL = TOOL_PREFIX + "UpdateCells";
-const LIST_CELLS_TOOL = TOOL_PREFIX + "ListCells";
-const GET_CELLS_TOOL = TOOL_PREFIX + "GetCells";
+function asRecord(value: unknown): Record<string, unknown> {
+  if (!value || typeof value !== 'object' || Array.isArray(value)) {
+    return {}
+  }
+  return value as Record<string, unknown>
+}
+
+function asString(value: unknown): string {
+  return typeof value === 'string' ? value : ''
+}
+
+function parseExecuteCodePayload(value: unknown): {
+  callId: string
+  previousResponseId: string
+  code: string
+} | null {
+  if (typeof value === 'string') {
+    try {
+      return parseExecuteCodePayload(JSON.parse(value))
+    } catch {
+      return null
+    }
+  }
+  const root = asRecord(value)
+  const executeCodeCandidate = root.executeCode ?? root.execute_code ?? root
+  const executeCode = asRecord(executeCodeCandidate)
+  const code = asString(executeCode.code)
+  if (!code) {
+    return null
+  }
+  return {
+    callId: asString(root.callId ?? root.call_id),
+    previousResponseId: asString(
+      root.previousResponseId ?? root.previous_response_id
+    ),
+    code,
+  }
+}
+
+function buildExecuteCodeToolOutput(args: {
+  callId: string
+  previousResponseId: string
+  output: string
+  clientError?: string
+}): Record<string, unknown> {
+  const hasError = Boolean(
+    args.clientError && args.clientError.trim().length > 0
+  )
+  return {
+    callId: args.callId,
+    previousResponseId: args.previousResponseId,
+    status: hasError ? 'STATUS_FAILED' : 'STATUS_SUCCESS',
+    clientError: args.clientError ?? '',
+    executeCode: {
+      output: args.output,
+    },
+  }
+}
 
-type SSEInterceptor = (rawEvent: string) => void;
+type SSEInterceptor = (rawEvent: string) => void
 
 const useAuthorizedFetch = (
-  getChatkitState: () => ReturnType<(typeof ChatkitStateSchema)["create"]>,
+  getChatkitState: () => ReturnType<(typeof ChatkitStateSchema)['create']>,
   options?: {
-    onSSEEvent?: SSEInterceptor;
-    baseFetch?: typeof fetch;
-    includeRunmeHeaders?: boolean;
-    includeChatkitState?: boolean;
-  },
+    onSSEEvent?: SSEInterceptor
+    baseFetch?: typeof fetch
+    includeRunmeHeaders?: boolean
+    includeChatkitState?: boolean
+  }
 ) => {
   const {
     onSSEEvent,
     baseFetch,
     includeRunmeHeaders = true,
     includeChatkitState = true,
-  } = options ?? {};
+  } = options ?? {}
   return useMemo(() => {
-    const fetchImpl = baseFetch ?? fetch;
+    const fetchImpl = baseFetch ?? fetch
     const resolveRequestBody = async (
       input: RequestInfo | URL,
-      init?: RequestInit,
+      init?: RequestInit
     ): Promise<BodyInit | null | undefined> => {
       if (init?.body != null) {
-        return init.body;
+        return init.body
       }
       if (!(input instanceof Request)) {
-        return init?.body;
+        return init?.body
       }
-      const clone = input.clone();
-      const contentType = clone.headers.get("content-type")?.toLowerCase() ?? "";
-      if (contentType.includes("multipart/form-data")) {
+      const clone = input.clone()
+      const contentType = clone.headers.get('content-type')?.toLowerCase() ?? ''
+      if (contentType.includes('multipart/form-data')) {
         try {
-          return await clone.formData();
+          return await clone.formData()
         } catch {
-          return null;
+          return null
         }
       }
-      if (contentType.includes("application/x-www-form-urlencoded")) {
+      if (contentType.includes('application/x-www-form-urlencoded')) {
         try {
-          return new URLSearchParams(await clone.text());
+          return new URLSearchParams(await clone.text())
         } catch {
-          return null;
+          return null
         }
       }
       try {
-        return await clone.text();
+        return await clone.text()
       } catch {
-        return null;
+        return null
       }
-    };
+    }
     const authorizedFetch: typeof fetch = async (
       input: RequestInfo | URL,
-      init?: RequestInit,
+      init?: RequestInit
     ) => {
       try {
         const headers = new Headers(
           init?.headers ??
-            (input instanceof Request ? input.headers : undefined),
-        );
+            (input instanceof Request ? input.headers : undefined)
+        )
 
         if (includeRunmeHeaders) {
-          const authData = await getAuthData();
-          const idToken = authData?.idToken ?? undefined;
-          const oaiAccessToken = await getAccessToken();
+          const authData = await getAuthData()
+          const idToken = authData?.idToken ?? undefined
+          const oaiAccessToken = await getAccessToken()
           if (!oaiAccessToken) {
-            throw new UserNotLoggedInError();
+            throw new UserNotLoggedInError()
           }
           if (idToken) {
-            headers.set("Authorization", `Bearer ${idToken}`);
+            headers.set('Authorization', `Bearer ${idToken}`)
           }
-          headers.set("OpenAIAccessToken", oaiAccessToken);
+          headers.set('OpenAIAccessToken', oaiAccessToken)
         }
 
-        let body = await resolveRequestBody(input, init);
+        let body = await resolveRequestBody(input, init)
         const method =
-          init?.method ?? (input instanceof Request ? input.method : "GET");
-        if (includeChatkitState && method.toUpperCase() === "POST") {
-          const state = getChatkitState();
-          const chatkitStateJson = toJson(ChatkitStateSchema, state);
+          init?.method ?? (input instanceof Request ? input.method : 'GET')
+        if (includeChatkitState && method.toUpperCase() === 'POST') {
+          const state = getChatkitState()
+          const chatkitStateJson = toJson(ChatkitStateSchema, state)
           if (body == null) {
-            body = JSON.stringify({ chatkit_state: chatkitStateJson });
-            headers.set("Content-Type", "application/json");
+            body = JSON.stringify({ chatkit_state: chatkitStateJson })
+            headers.set('Content-Type', 'application/json')
           } else {
-            if (typeof body === "string") {
+            if (typeof body === 'string') {
               try {
-                const parsed = JSON.parse(body);
-                parsed.chatkit_state = chatkitStateJson;
-                body = JSON.stringify(parsed);
+                const parsed = JSON.parse(body)
+                parsed.chatkit_state = chatkitStateJson
+                body = JSON.stringify(parsed)
               } catch {
-                const payload = new FormData();
-                payload.append("payload", body);
+                const payload = new FormData()
+                payload.append('payload', body)
                 payload.append(
-                  "chatkit_state",
-                  JSON.stringify(chatkitStateJson),
-                );
-                body = payload;
+                  'chatkit_state',
+                  JSON.stringify(chatkitStateJson)
+                )
+                body = payload
               }
             } else if (body instanceof FormData) {
-              body.set("chatkit_state", JSON.stringify(chatkitStateJson));
+              body.set('chatkit_state', JSON.stringify(chatkitStateJson))
             } else if (body instanceof URLSearchParams) {
-              body.set("chatkit_state", JSON.stringify(chatkitStateJson));
+              body.set('chatkit_state', JSON.stringify(chatkitStateJson))
             } else if (body instanceof Blob || body instanceof ArrayBuffer) {
-              const payload = new FormData();
-              payload.append("payload", new Blob([body]));
-              payload.append("chatkit_state", JSON.stringify(chatkitStateJson));
-              body = payload;
+              const payload = new FormData()
+              payload.append('payload', new Blob([body]))
+              payload.append('chatkit_state', JSON.stringify(chatkitStateJson))
+              body = payload
             }
           }
         }
@@ -195,521 +264,646 @@ const useAuthorizedFetch = (
           ...init,
           headers,
           body,
-        };
+        }
 
-        const response = await fetchImpl(input, nextInit);
+        const response = await fetchImpl(input, nextInit)
         //return response;
         const isSSE =
           onSSEEvent &&
           response.headers
-            .get("content-type")
+            .get('content-type')
             ?.toLowerCase()
-            .includes("text/event-stream") &&
-          response.body;
+            .includes('text/event-stream') &&
+          response.body
 
         if (!isSSE || !response.body) {
-          return response;
+          return response
         }
 
-        const decoder = new TextDecoder();
-        const encoder = new TextEncoder();
-        const reader = response.body.getReader();
-        let buffer = "";
+        const decoder = new TextDecoder()
+        const encoder = new TextEncoder()
+        const reader = response.body.getReader()
+        let buffer = ''
 
         const stream = new ReadableStream<Uint8Array>({
           async pull(controller) {
-            const { done, value } = await reader.read();
+            const { done, value } = await reader.read()
             if (done) {
-              const tail = decoder.decode();
+              const tail = decoder.decode()
               if (tail) {
-                buffer += tail;
+                buffer += tail
               }
               if (buffer.length > 0) {
                 try {
-                  onSSEEvent?.(buffer);
+                  onSSEEvent?.(buffer)
                 } catch (eventError) {
-                  console.error("SSE interceptor error", eventError);
+                  console.error('SSE interceptor error', eventError)
                 }
-                controller.enqueue(encoder.encode(buffer));
+                controller.enqueue(encoder.encode(buffer))
               }
-              controller.close();
-              return;
+              controller.close()
+              return
             }
 
             if (value) {
-              buffer += decoder.decode(value, { stream: true });
-              let boundary;
-              while ((boundary = buffer.indexOf("\n\n")) !== -1) {
-                const rawEvent = buffer.slice(0, boundary + 2);
-                buffer = buffer.slice(boundary + 2);
+              buffer += decoder.decode(value, { stream: true })
+              let boundary
+              while ((boundary = buffer.indexOf('\n\n')) !== -1) {
+                const rawEvent = buffer.slice(0, boundary + 2)
+                buffer = buffer.slice(boundary + 2)
                 try {
-                  onSSEEvent?.(rawEvent);
+                  onSSEEvent?.(rawEvent)
                 } catch (eventError) {
-                  console.error("SSE interceptor error", eventError);
+                  console.error('SSE interceptor error', eventError)
                 }
-                controller.enqueue(encoder.encode(rawEvent));
+                controller.enqueue(encoder.encode(rawEvent))
               }
             }
           },
           async cancel(reason) {
             try {
-              await reader.cancel(reason);
+              await reader.cancel(reason)
             } catch (cancelError) {
-              console.error("Failed to cancel SSE reader", cancelError);
+              console.error('Failed to cancel SSE reader', cancelError)
             }
           },
-        });
+        })
 
         const interceptedResponse = new Response(stream, {
           status: response.status,
           statusText: response.statusText,
           headers: new Headers(response.headers),
-        });
+        })
 
-        return interceptedResponse;
+        return interceptedResponse
       } catch (error) {
-        console.error("ChatKit authorized fetch failed", error);
-        appLogger.error("ChatKit authorized fetch failed", {
+        console.error('ChatKit authorized fetch failed', error)
+        appLogger.error('ChatKit authorized fetch failed', {
           attrs: {
-            scope: "chatkit.fetch",
+            scope: 'chatkit.fetch',
             error: String(error),
           },
-        });
-        throw error;
+        })
+        throw error
       }
-    };
+    }
 
-    return authorizedFetch;
+    return authorizedFetch
   }, [
     baseFetch,
     onSSEEvent,
     getChatkitState,
     includeRunmeHeaders,
     includeChatkitState,
-  ]);
-};
+  ])
+}
 
 type ChatKitPanelInnerProps = {
-  defaultHarness: HarnessProfile;
-};
+  defaultHarness: HarnessProfile
+}
 
 function ChatKitPanelInner({ defaultHarness }: ChatKitPanelInnerProps) {
-  const [showLoginPrompt, setShowLoginPrompt] = useState(false);
-  const [codexStreamError, setCodexStreamError] = useState<string | null>(null);
-  const [codexThreadBootstrapComplete, setCodexThreadBootstrapComplete] = useState(
-    defaultHarness.adapter !== "codex",
-  );
-  const chatkitDomainKey = getConfiguredChatKitDomainKey();
-  const [showCodexDrawer, setShowCodexDrawer] = useState(false);
+  const [showLoginPrompt, setShowLoginPrompt] = useState(false)
+  const [codexStreamError, setCodexStreamError] = useState<string | null>(null)
+  const [codexThreadBootstrapComplete, setCodexThreadBootstrapComplete] =
+    useState(defaultHarness.adapter !== 'codex')
+  const chatkitDomainKey = getConfiguredChatKitDomainKey()
+  const [showCodexDrawer, setShowCodexDrawer] = useState(false)
   const syncedCodexStateRef = useRef<{
-    threadId: string | null;
-    previousResponseId: string | null;
+    threadId: string | null
+    previousResponseId: string | null
   }>({
     threadId: null,
     previousResponseId: null,
-  });
+  })
   const chatkitActionsRef = useRef<{
-    setThreadId: (threadId: string | null, source?: string) => Promise<void>;
-    fetchUpdates: (source?: string) => Promise<void>;
-  } | null>(null);
-  const lastAppliedCodexThreadRef = useRef<string | null>(null);
-  const { getChatkitState } = useCell();
-  const { getNotebookData, useNotebookSnapshot } = useNotebookContext();
-  const { getCurrentDoc } = useCurrentDoc();
-  const { getAllRenderers } = useOutput();
-  const codexProjects = useCodexProjects();
-  const { defaultProject } = codexProjects;
-  const codexConversation = useCodexConversationSnapshot();
-  const currentDocUri = getCurrentDoc();
-  const notebookSnapshot = useNotebookSnapshot(currentDocUri ?? "");
+    setThreadId: (threadId: string | null, source?: string) => Promise<void>
+    fetchUpdates: (source?: string) => Promise<void>
+  } | null>(null)
+  const lastAppliedCodexThreadRef = useRef<string | null>(null)
+  const { getChatkitState } = useCell()
+  const { getNotebookData, useNotebookSnapshot, useNotebookList } =
+    useNotebookContext()
+  const { getCurrentDoc } = useCurrentDoc()
+  const { getAllRenderers } = useOutput()
+  const codexProjects = useCodexProjects()
+  const { defaultProject } = codexProjects
+  const codexConversation = useCodexConversationSnapshot()
+  const currentDocUri = getCurrentDoc()
+  const openNotebookList = useNotebookList()
+  const notebookSnapshot = useNotebookSnapshot(currentDocUri ?? '')
   const orderedCells = useMemo(
     () => notebookSnapshot?.notebook.cells ?? [],
-    [notebookSnapshot],
-  );
+    [notebookSnapshot]
+  )
   const updateCell = useCallback(
     (cell: Cell) => {
       if (!cell?.refId || !currentDocUri) {
-        return;
+        return
       }
-      const data = getNotebookData(currentDocUri);
+      const data = getNotebookData(currentDocUri)
       if (!data) {
-        return;
+        return
       }
       for (const renderer of getAllRenderers().values()) {
-        renderer.onCellUpdate(cell as unknown as parser_pb.Cell);
+        renderer.onCellUpdate(cell as unknown as parser_pb.Cell)
       }
-      data.updateCell(cell as unknown as parser_pb.Cell);
+      data.updateCell(cell as unknown as parser_pb.Cell)
     },
-    [currentDocUri, getAllRenderers, getNotebookData],
-  );
+    [currentDocUri, getAllRenderers, getNotebookData]
+  )
 
   const getLatestCells = useCallback((): Cell[] => {
     if (!currentDocUri) {
-      return orderedCells;
+      return orderedCells
     }
-    const data = getNotebookData(currentDocUri);
-    return (data?.getNotebook().cells ?? orderedCells) as unknown as Cell[];
-  }, [currentDocUri, getNotebookData, orderedCells]);
+    const data = getNotebookData(currentDocUri)
+    return (data?.getNotebook().cells ?? orderedCells) as unknown as Cell[]
+  }, [currentDocUri, getNotebookData, orderedCells])
+
+  const resolveCodeModeNotebook = useCallback(
+    (target?: unknown) => {
+      const targetUri =
+        typeof target === 'string'
+          ? target
+          : typeof target === 'object' && target && 'uri' in target
+            ? (target as { uri?: string }).uri
+            : typeof target === 'object' &&
+                target &&
+                'handle' in target &&
+                (target as { handle?: { uri?: string } }).handle?.uri
+              ? (target as { handle?: { uri?: string } }).handle?.uri
+              : currentDocUri
+      if (!targetUri) {
+        return null
+      }
+      const data = getNotebookData(targetUri)
+      if (!data) {
+        return null
+      }
+
+      return {
+        getUri: () => data.getUri(),
+        getName: () => data.getName(),
+        getNotebook: () => data.getNotebook(),
+        updateCell: (cell: parser_pb.Cell) => {
+          for (const renderer of getAllRenderers().values()) {
+            renderer.onCellUpdate(cell)
+          }
+          data.updateCell(cell)
+        },
+        getCell: (refId: string) => data.getCell(refId),
+        appendCodeCell: data.appendCodeCell?.bind(data),
+        addCodeCellAfter: data.addCodeCellAfter?.bind(data),
+        addCodeCellBefore: data.addCodeCellBefore?.bind(data),
+        removeCell: data.removeCell?.bind(data),
+      }
+    },
+    [currentDocUri, getAllRenderers, getNotebookData]
+  )
+
+  const codeModeExecutor = useMemo(
+    () =>
+      createCodeModeExecutor({
+        mode: 'sandbox',
+        resolveNotebook: resolveCodeModeNotebook,
+        listNotebooks: () => {
+          const uris = new Set<string>()
+          for (const notebook of openNotebookList) {
+            if (typeof notebook?.uri === 'string' && notebook.uri.trim()) {
+              uris.add(notebook.uri)
+            }
+          }
+          if (currentDocUri) {
+            uris.add(currentDocUri)
+          }
+          return Array.from(uris)
+            .map((uri) => resolveCodeModeNotebook(uri))
+            .filter(
+              (
+                notebook
+              ): notebook is NonNullable<
+                ReturnType<typeof resolveCodeModeNotebook>
+              > => Boolean(notebook)
+            )
+        },
+      }),
+    [currentDocUri, openNotebookList, resolveCodeModeNotebook]
+  )
 
   const waitForCellExecutionToComplete = useCallback(
     async (refId: string, timeoutMs = 60_000): Promise<void> => {
       if (!currentDocUri) {
-        throw new Error("No active notebook for ExecuteCells");
+        throw new Error('No active notebook for ExecuteCells')
       }
-      const startedAt = Date.now();
+      const startedAt = Date.now()
       while (Date.now() - startedAt < timeoutMs) {
-        const data = getNotebookData(currentDocUri);
-        const updatedCell = data?.getNotebook().cells.find((cell) => cell.refId === refId);
-        const exitCode = updatedCell?.metadata?.[RunmeMetadataKey.ExitCode];
-        if (typeof exitCode === "string") {
-          return;
+        const data = getNotebookData(currentDocUri)
+        const updatedCell = data
+          ?.getNotebook()
+          .cells.find((cell) => cell.refId === refId)
+        const exitCode = updatedCell?.metadata?.[RunmeMetadataKey.ExitCode]
+        if (typeof exitCode === 'string') {
+          return
         }
-        await new Promise((resolve) => setTimeout(resolve, 100));
+        await new Promise((resolve) => setTimeout(resolve, 100))
       }
-      throw new Error(`Timed out waiting for cell execution to finish: ${refId}`);
+      throw new Error(
+        `Timed out waiting for cell execution to finish: ${refId}`
+      )
     },
-    [currentDocUri, getNotebookData],
-  );
+    [currentDocUri, getNotebookData]
+  )
 
   const executeCellsWithApproval = useCallback(
     async (bridgeCallId: string, refIds: string[]): Promise<Cell[]> => {
       if (!currentDocUri) {
-        throw new Error("No active notebook for ExecuteCells");
+        throw new Error('No active notebook for ExecuteCells')
       }
-      const notebookData = getNotebookData(currentDocUri);
+      const notebookData = getNotebookData(currentDocUri)
       if (!notebookData) {
-        throw new Error("No active notebook data for ExecuteCells");
+        throw new Error('No active notebook data for ExecuteCells')
       }
-      const normalizedRefIds = refIds.filter((id) => typeof id === "string" && id.trim() !== "");
+      const normalizedRefIds = refIds.filter(
+        (id) => typeof id === 'string' && id.trim() !== ''
+      )
       if (normalizedRefIds.length === 0) {
-        throw new Error("ExecuteCells request missing refIds");
+        throw new Error('ExecuteCells request missing refIds')
       }
 
-      await getCodexExecuteApprovalManager().requestApproval(bridgeCallId, normalizedRefIds);
+      await getCodexExecuteApprovalManager().requestApproval(
+        bridgeCallId,
+        normalizedRefIds
+      )
 
       for (const refId of normalizedRefIds) {
-        const cellData = notebookData.getCell(refId);
+        const cellData = notebookData.getCell(refId)
         if (!cellData) {
-          throw new Error(`Cell not found for ExecuteCells: ${refId}`);
+          throw new Error(`Cell not found for ExecuteCells: ${refId}`)
         }
-        cellData.run();
+        cellData.run()
       }
 
       for (const refId of normalizedRefIds) {
-        await waitForCellExecutionToComplete(refId);
+        await waitForCellExecutionToComplete(refId)
       }
 
-      const latestCells = notebookData.getNotebook().cells as unknown as Cell[];
+      const latestCells = notebookData.getNotebook().cells as unknown as Cell[]
       return normalizedRefIds
         .map((refId) => latestCells.find((cell) => cell.refId === refId))
-        .filter((cell): cell is Cell => Boolean(cell));
+        .filter((cell): cell is Cell => Boolean(cell))
     },
-    [currentDocUri, getNotebookData, waitForCellExecutionToComplete],
-  );
+    [currentDocUri, getNotebookData, waitForCellExecutionToComplete]
+  )
 
   const handleCodexBridgeToolCall = useCallback(
     async ({
       bridgeCallId,
       toolCallInput,
     }: {
-      bridgeCallId: string;
-      toolCallInput: unknown;
+      bridgeCallId: string
+      toolCallInput: unknown
     }): Promise<unknown> => {
-      let decodedInput;
+      const rawExecuteCodePayload = parseExecuteCodePayload(toolCallInput)
+      if (rawExecuteCodePayload) {
+        const callId = rawExecuteCodePayload.callId || bridgeCallId
+        try {
+          const result = await codeModeExecutor.execute({
+            code: rawExecuteCodePayload.code,
+            source: 'codex',
+          })
+          return buildExecuteCodeToolOutput({
+            callId,
+            previousResponseId: rawExecuteCodePayload.previousResponseId,
+            output: result.output,
+          })
+        } catch (error) {
+          return buildExecuteCodeToolOutput({
+            callId,
+            previousResponseId: rawExecuteCodePayload.previousResponseId,
+            output: getCodeModeErrorOutput(error),
+            clientError: String(error),
+          })
+        }
+      }
+
+      let decodedInput
       try {
         const payload =
-          typeof toolCallInput === "string"
+          typeof toolCallInput === 'string'
             ? toolCallInput
-            : JSON.stringify(toolCallInput ?? {});
-        decodedInput = fromJsonString(ToolCallInputSchema, payload);
+            : JSON.stringify(toolCallInput ?? {})
+        decodedInput = fromJsonString(ToolCallInputSchema, payload)
       } catch (error) {
         const failedOutput = create(ToolCallOutputSchema, {
           status: ToolCallOutput_Status.FAILED,
           clientError: `Failed to decode tool params: ${error}`,
-        });
-        return toJson(ToolCallOutputSchema, failedOutput);
+        })
+        return toJson(ToolCallOutputSchema, failedOutput)
       }
 
       const toolOutput = create(ToolCallOutputSchema, {
         callId: decodedInput.callId,
         previousResponseId: decodedInput.previousResponseId,
         status: ToolCallOutput_Status.SUCCESS,
-        clientError: "",
-      });
-      const latestCells = getLatestCells();
-      const cellMap = new Map<string, Cell>();
+        clientError: '',
+      })
+      const latestCells = getLatestCells()
+      const cellMap = new Map<string, Cell>()
       latestCells.forEach((cell) => {
-        cellMap.set(cell.refId, cell);
-      });
+        cellMap.set(cell.refId, cell)
+      })
 
-      const inputCase = decodedInput.input?.case;
+      const inputCase = String(decodedInput.input?.case ?? '')
       switch (inputCase) {
-        case "updateCells": {
-          const cells = decodedInput.input.value?.cells ?? [];
+        case 'updateCells': {
+          const cells = decodedInput.input.value?.cells ?? []
           if (cells.length === 0) {
-            toolOutput.status = ToolCallOutput_Status.FAILED;
-            toolOutput.clientError = "UpdateCells invoked without cells payload";
-            break;
+            toolOutput.status = ToolCallOutput_Status.FAILED
+            toolOutput.clientError = 'UpdateCells invoked without cells payload'
+            break
           }
-          cells.forEach((updatedCell: Cell) => updateCell(updatedCell));
+          cells.forEach((updatedCell: Cell) => updateCell(updatedCell))
           toolOutput.output = {
-            case: "updateCells",
+            case: 'updateCells',
             value: create(UpdateCellsResponseSchema, { cells }),
-          };
-          break;
+          }
+          break
         }
-        case "listCells": {
+        case 'listCells': {
           toolOutput.output = {
-            case: "listCells",
+            case: 'listCells',
             value: create(ListCellsResponseSchema, { cells: getLatestCells() }),
-          };
-          break;
+          }
+          break
         }
-        case "getCells": {
-          const requestedRefs = decodedInput.input.value?.refIds ?? [];
+        case 'getCells': {
+          const requestedRefs = decodedInput.input.value?.refIds ?? []
           const foundCells = requestedRefs
             .map((id: string) => cellMap.get(id))
-            .filter((cell): cell is Cell => Boolean(cell));
+            .filter((cell): cell is Cell => Boolean(cell))
           toolOutput.output = {
-            case: "getCells",
+            case: 'getCells',
             value: create(GetCellsResponseSchema, { cells: foundCells }),
-          };
-          break;
+          }
+          break
         }
-        case "executeCells": {
+        case 'executeCells': {
           try {
             const executedCells = await executeCellsWithApproval(
               bridgeCallId,
-              decodedInput.input.value?.refIds ?? [],
-            );
+              decodedInput.input.value?.refIds ?? []
+            )
             toolOutput.output = {
-              case: "executeCells",
+              case: 'executeCells',
               value: create(NotebookServiceExecuteCellsResponseSchema, {
                 cells: executedCells,
               }),
-            };
+            }
+          } catch (error) {
+            toolOutput.status = ToolCallOutput_Status.FAILED
+            toolOutput.clientError = String(error)
+          }
+          break
+        }
+        case 'executeCode': {
+          const code = decodedInput.input.value?.code ?? ''
+          try {
+            const result = await codeModeExecutor.execute({
+              code,
+              source: 'codex',
+            })
+            return buildExecuteCodeToolOutput({
+              callId: decodedInput.callId || bridgeCallId,
+              previousResponseId: decodedInput.previousResponseId ?? '',
+              output: result.output,
+            })
           } catch (error) {
-            toolOutput.status = ToolCallOutput_Status.FAILED;
-            toolOutput.clientError = String(error);
+            return buildExecuteCodeToolOutput({
+              callId: decodedInput.callId || bridgeCallId,
+              previousResponseId: decodedInput.previousResponseId ?? '',
+              output: getCodeModeErrorOutput(error),
+              clientError: String(error),
+            })
           }
-          break;
         }
         default: {
-          toolOutput.status = ToolCallOutput_Status.FAILED;
-          toolOutput.clientError = `Unsupported codex notebook tool input: ${String(inputCase)}`;
-          break;
+          toolOutput.status = ToolCallOutput_Status.FAILED
+          toolOutput.clientError = `Unsupported codex notebook tool input: ${String(inputCase)}`
+          break
         }
       }
-      return toJson(ToolCallOutputSchema, toolOutput);
+      return toJson(ToolCallOutputSchema, toolOutput) as Record<string, unknown>
     },
-    [executeCellsWithApproval, getLatestCells, updateCell],
-  );
+    [codeModeExecutor, executeCellsWithApproval, getLatestCells, updateCell]
+  )
   const handleSseEvent = useCallback(
     (rawEvent: string) => {
       const lines = rawEvent
         .split(/\r?\n/)
         .map((line) => line.trim())
-        .filter(Boolean);
+        .filter(Boolean)
 
       for (const line of lines) {
-        if (!line.startsWith("data:")) {
-          continue;
+        if (!line.startsWith('data:')) {
+          continue
         }
-        const payload = line.slice("data:".length).trim();
+        const payload = line.slice('data:'.length).trim()
         if (!payload) {
-          continue;
+          continue
         }
 
         try {
-          const parsed = JSON.parse(payload);
-          if (parsed?.type === "response.failed") {
+          const parsed = JSON.parse(payload)
+          if (parsed?.type === 'response.failed') {
             const message =
-              typeof parsed?.error?.message === "string"
+              typeof parsed?.error?.message === 'string'
                 ? parsed.error.message
-                : "Codex request failed.";
-            appLogger.error("ChatKit response stream failed", {
+                : 'Codex request failed.'
+            appLogger.error('ChatKit response stream failed', {
               attrs: {
-                scope: "chatkit.panel",
+                scope: 'chatkit.panel',
                 adapter: defaultHarness.adapter,
                 error: message,
               },
-            });
-            setCodexStreamError(message);
-            continue;
+            })
+            setCodexStreamError(message)
+            continue
           }
           if (
-            parsed?.type === "response.created" ||
-            parsed?.type === "response.output_text.delta" ||
-            parsed?.type === "response.completed"
+            parsed?.type === 'response.created' ||
+            parsed?.type === 'response.output_text.delta' ||
+            parsed?.type === 'response.completed'
           ) {
-            setCodexStreamError(null);
+            setCodexStreamError(null)
           }
-          if (parsed?.type !== "aisre.chatkit.state") {
-            continue;
+          if (parsed?.type !== 'aisre.chatkit.state') {
+            continue
           }
 
-          const item = parsed?.item ?? parsed?.Item;
+          const item = parsed?.item ?? parsed?.Item
           if (!item) {
-            continue;
+            continue
           }
 
-          const stateData = item.state ?? item.State ?? item;
+          const stateData = item.state ?? item.State ?? item
           if (!stateData) {
-            continue;
+            continue
           }
 
           const state = fromJsonString(
             ChatkitStateSchema,
-            JSON.stringify(stateData),
-          );
-          if (defaultHarness.adapter === "codex") {
-            appLogger.info("Ignoring Codex ChatKit state event", {
+            JSON.stringify(stateData)
+          )
+          if (defaultHarness.adapter === 'codex') {
+            appLogger.info('Ignoring Codex ChatKit state event', {
               attrs: {
-                scope: "chatkit.panel",
+                scope: 'chatkit.panel',
                 adapter: defaultHarness.adapter,
                 threadId: state.threadId ?? null,
                 previousResponseId: state.previousResponseId ?? null,
               },
-            });
-            continue;
+            })
+            continue
           }
-          appLogger.info("Received ChatKit state event", {
+          appLogger.info('Received ChatKit state event', {
             attrs: {
-              scope: "chatkit.panel",
+              scope: 'chatkit.panel',
               adapter: defaultHarness.adapter,
               threadId: state.threadId ?? null,
               previousResponseId: state.previousResponseId ?? null,
             },
-          });
+          })
           // setChatkitState(state);
           if (state.previousResponseId || state.threadId) {
             console.log(
-              "ChatKit state update",
+              'ChatKit state update',
               JSON.stringify(
                 {
                   previous_response_id: state.previousResponseId,
                   thread_id: state.threadId,
                 },
                 null,
-                2,
-              ),
-            );
+                2
+              )
+            )
           }
         } catch (error) {
-          console.error("Failed to parse SSE state event", error, payload);
+          console.error('Failed to parse SSE state event', error, payload)
         }
       }
     },
-    [defaultHarness.adapter],
-  );
-  const codexFetch = useMemo(() => createCodexChatkitFetch(), []);
+    [defaultHarness.adapter]
+  )
+  const codexFetch = useMemo(() => createCodexChatkitFetch(), [])
   const responsesDirectFetch = useMemo(
     () =>
       createResponsesDirectChatkitFetch({
         responsesApiBaseUrl:
-          defaultHarness.adapter === "responses-direct" ? defaultHarness.baseUrl : "",
+          defaultHarness.adapter === 'responses-direct'
+            ? defaultHarness.baseUrl
+            : '',
       }),
-    [defaultHarness.adapter, defaultHarness.baseUrl],
-  );
+    [defaultHarness.adapter, defaultHarness.baseUrl]
+  )
   const getAuthorizedChatkitState = useCallback(() => {
-    if (defaultHarness.adapter !== "codex") {
-      return getChatkitState();
+    if (defaultHarness.adapter !== 'codex') {
+      return getChatkitState()
     }
-    const controllerSnapshot = getCodexConversationController().getSnapshot();
+    const controllerSnapshot = getCodexConversationController().getSnapshot()
     return create(ChatkitStateSchema, {
       threadId:
         syncedCodexStateRef.current.threadId ??
         controllerSnapshot.currentThreadId ??
-        "",
-      previousResponseId: syncedCodexStateRef.current.previousResponseId ?? "",
-    });
-  }, [defaultHarness.adapter, getChatkitState]);
+        '',
+      previousResponseId: syncedCodexStateRef.current.previousResponseId ?? '',
+    })
+  }, [defaultHarness.adapter, getChatkitState])
   const authorizedFetch = useAuthorizedFetch(getAuthorizedChatkitState, {
     onSSEEvent: handleSseEvent,
     baseFetch:
-      defaultHarness.adapter === "codex"
-        ? codexFetch
-        : responsesDirectFetch,
-    includeRunmeHeaders: defaultHarness.adapter === "codex",
-    includeChatkitState: defaultHarness.adapter === "codex",
-  });
+      defaultHarness.adapter === 'codex' ? codexFetch : responsesDirectFetch,
+    includeRunmeHeaders: defaultHarness.adapter === 'codex',
+    includeChatkitState: defaultHarness.adapter === 'codex',
+  })
 
   const chatkitApiUrl = useMemo(() => {
-    return buildChatkitUrl(defaultHarness.baseUrl, defaultHarness.adapter);
-  }, [defaultHarness.adapter, defaultHarness.baseUrl]);
+    return buildChatkitUrl(defaultHarness.baseUrl, defaultHarness.adapter)
+  }, [defaultHarness.adapter, defaultHarness.baseUrl])
   const codexProxyWsUrl = useMemo(() => {
-    return buildCodexAppServerWsUrl(defaultHarness.baseUrl);
-  }, [defaultHarness.baseUrl]);
+    return buildCodexAppServerWsUrl(defaultHarness.baseUrl)
+  }, [defaultHarness.baseUrl])
   const codexBridgeUrl = useMemo(() => {
-    return buildCodexBridgeWsUrl(defaultHarness.baseUrl);
-  }, [defaultHarness.baseUrl]);
+    return buildCodexBridgeWsUrl(defaultHarness.baseUrl)
+  }, [defaultHarness.baseUrl])
   useEffect(() => {
-    appLogger.info("ChatKit host configured", {
+    appLogger.info('ChatKit host configured', {
       attrs: {
-        scope: "chatkit.panel",
+        scope: 'chatkit.panel',
         adapter: defaultHarness.adapter,
         apiUrl: chatkitApiUrl,
         domainKeyConfigured: Boolean(chatkitDomainKey),
         selectedProjectId:
-          defaultHarness.adapter === "codex"
+          defaultHarness.adapter === 'codex'
             ? codexConversation.selectedProject.id
             : null,
       },
-    });
+    })
   }, [
     chatkitApiUrl,
     chatkitDomainKey,
     codexConversation.selectedProject.id,
     defaultHarness.adapter,
-  ]);
+  ])
   const resolveCodexAuthorization = useCallback(async (): Promise<string> => {
-    const authData = await getAuthData();
-    const idToken = authData?.idToken?.trim();
+    const authData = await getAuthData()
+    const idToken = authData?.idToken?.trim()
     if (!idToken) {
-      const message = "Codex websocket auth requires an OIDC id token";
+      const message = 'Codex websocket auth requires an OIDC id token'
       appLogger.error(message, {
         attrs: {
-          scope: "chatkit.codex_auth",
+          scope: 'chatkit.codex_auth',
           adapter: defaultHarness.adapter,
           baseUrl: defaultHarness.baseUrl,
         },
-      });
-      setShowLoginPrompt(true);
-      throw new UserNotLoggedInError(message);
+      })
+      setShowLoginPrompt(true)
+      throw new UserNotLoggedInError(message)
     }
-    return `Bearer ${idToken}`;
-  }, [defaultHarness.adapter, defaultHarness.baseUrl]);
+    return `Bearer ${idToken}`
+  }, [defaultHarness.adapter, defaultHarness.baseUrl])
 
   useEffect(() => {
-    if (defaultHarness.adapter !== "codex") {
-      return;
+    if (defaultHarness.adapter !== 'codex') {
+      return
     }
-    const controller = getCodexConversationController();
-    controller.setSelectedProject(defaultProject.id);
-  }, [defaultHarness.adapter, defaultProject.id]);
+    const controller = getCodexConversationController()
+    controller.setSelectedProject(defaultProject.id)
+  }, [defaultHarness.adapter, defaultProject.id])
 
   useEffect(() => {
-    const proxy = getCodexAppServerProxyClient();
-    if (defaultHarness.adapter !== "codex") {
-      setCodexThreadBootstrapComplete(true);
-      proxy.setAuthorizationResolver(null);
-      proxy.disconnect();
-      return;
+    const proxy = getCodexAppServerProxyClient()
+    if (defaultHarness.adapter !== 'codex') {
+      setCodexThreadBootstrapComplete(true)
+      proxy.setAuthorizationResolver(null)
+      proxy.disconnect()
+      return
     }
-    setCodexThreadBootstrapComplete(false);
-    proxy.setAuthorizationResolver(resolveCodexAuthorization);
-    let canceled = false;
+    setCodexThreadBootstrapComplete(false)
+    setCodexStreamError(null)
+    proxy.setAuthorizationResolver(resolveCodexAuthorization)
+    let canceled = false
     void (async () => {
       try {
-        const authorization = await resolveCodexAuthorization();
+        const authorization = await resolveCodexAuthorization()
         if (canceled) {
-          return;
+          return
         }
-        await proxy.connect(codexProxyWsUrl, authorization);
+        await proxy.connect(codexProxyWsUrl, authorization)
         if (!canceled) {
-          const controller = getCodexConversationController();
-          await controller.refreshHistory();
-          const thread = await controller.ensureActiveThread();
+          const controller = getCodexConversationController()
+          await controller.refreshHistory()
+          const thread = await controller.ensureActiveThread()
           // setChatkitState(
           //   create(ChatkitStateSchema, {
           //     threadId: thread.id,
@@ -719,103 +913,106 @@ function ChatKitPanelInner({ defaultHarness }: ChatKitPanelInnerProps) {
           syncedCodexStateRef.current = {
             threadId: thread.id,
             previousResponseId: thread.previousResponseId ?? null,
-          };
-          setCodexThreadBootstrapComplete(true);
+          }
+          setCodexStreamError(null)
+          setCodexThreadBootstrapComplete(true)
         }
       } catch (error) {
         if (canceled) {
-          return;
+          return
         }
-        appLogger.error("Failed to connect codex app-server websocket", {
+        appLogger.error('Failed to connect codex app-server websocket', {
           attrs: {
-            scope: "chatkit.codex_proxy",
+            scope: 'chatkit.codex_proxy',
             error: String(error),
             url: codexProxyWsUrl,
           },
-        });
-        setCodexStreamError(`Failed to initialize Codex thread: ${String(error)}`);
-        setCodexThreadBootstrapComplete(true);
+        })
+        setCodexStreamError(
+          `Failed to initialize Codex thread: ${String(error)}`
+        )
+        setCodexThreadBootstrapComplete(true)
       }
-    })();
+    })()
     return () => {
-      canceled = true;
-      proxy.setAuthorizationResolver(null);
-      proxy.disconnect();
-    };
-  }, [codexProxyWsUrl, defaultHarness.adapter, resolveCodexAuthorization]);
+      canceled = true
+      proxy.setAuthorizationResolver(null)
+      proxy.disconnect()
+    }
+  }, [codexProxyWsUrl, defaultHarness.adapter, resolveCodexAuthorization])
 
   useEffect(() => {
-    if (defaultHarness.adapter !== "codex" || !codexThreadBootstrapComplete) {
-      lastAppliedCodexThreadRef.current = null;
-      return;
+    if (defaultHarness.adapter !== 'codex' || !codexThreadBootstrapComplete) {
+      lastAppliedCodexThreadRef.current = null
+      return
     }
-    const threadId = syncedCodexStateRef.current.threadId;
+    const threadId = syncedCodexStateRef.current.threadId
     if (!threadId || lastAppliedCodexThreadRef.current === threadId) {
-      return;
+      return
     }
-    lastAppliedCodexThreadRef.current = threadId;
-    void chatkitActionsRef.current?.setThreadId(threadId, "bootstrap_sync");
-  }, [codexThreadBootstrapComplete, defaultHarness.adapter]);
+    lastAppliedCodexThreadRef.current = threadId
+    void chatkitActionsRef.current?.setThreadId(threadId, 'bootstrap_sync')
+  }, [codexThreadBootstrapComplete, defaultHarness.adapter])
 
   useEffect(() => {
-    const bridge = getCodexToolBridge();
+    const bridge = getCodexToolBridge()
     return bridge.subscribe(() => {
-      const snapshot = bridge.getSnapshot();
+      const snapshot = bridge.getSnapshot()
       if (
-        defaultHarness.adapter === "codex" &&
-        (snapshot.state === "closed" || snapshot.state === "error")
+        defaultHarness.adapter === 'codex' &&
+        (snapshot.state === 'closed' || snapshot.state === 'error')
       ) {
-        getCodexExecuteApprovalManager().failAll("Codex bridge disconnected");
+        getCodexExecuteApprovalManager().failAll('Codex bridge disconnected')
       }
-    });
-  }, [defaultHarness.adapter]);
+    })
+  }, [defaultHarness.adapter])
 
   useEffect(() => {
-    const bridge = getCodexToolBridge();
-    if (defaultHarness.adapter !== "codex") {
-      bridge.setHandler(null);
-      return;
+    const bridge = getCodexToolBridge()
+    if (defaultHarness.adapter !== 'codex') {
+      bridge.setHandler(null)
+      return
     }
-    bridge.setHandler(handleCodexBridgeToolCall);
+    bridge.setHandler(handleCodexBridgeToolCall)
     return () => {
-      bridge.setHandler(null);
-    };
-  }, [defaultHarness.adapter, handleCodexBridgeToolCall]);
+      bridge.setHandler(null)
+    }
+  }, [defaultHarness.adapter, handleCodexBridgeToolCall])
 
   useEffect(() => {
-    const bridge = getCodexToolBridge();
-    if (defaultHarness.adapter !== "codex") {
-      bridge.disconnect();
-      getCodexExecuteApprovalManager().failAll("Codex bridge disabled");
-      return;
+    const bridge = getCodexToolBridge()
+    if (defaultHarness.adapter !== 'codex') {
+      bridge.disconnect()
+      getCodexExecuteApprovalManager().failAll('Codex bridge disabled')
+      return
     }
-    let canceled = false;
+    let canceled = false
     void (async () => {
       try {
-        const authorization = await resolveCodexAuthorization();
+        const authorization = await resolveCodexAuthorization()
         if (canceled) {
-          return;
+          return
         }
-        await bridge.connect(codexBridgeUrl, authorization);
+        await bridge.connect(codexBridgeUrl, authorization)
       } catch (error) {
         if (canceled) {
-          return;
+          return
         }
-        appLogger.error("Failed to connect codex bridge websocket", {
+        appLogger.error('Failed to connect codex bridge websocket', {
           attrs: {
-            scope: "chatkit.codex_bridge",
+            scope: 'chatkit.codex_bridge',
             error: String(error),
             url: codexBridgeUrl,
           },
-        });
+        })
       }
-    })();
+    })()
     return () => {
-      canceled = true;
-      bridge.disconnect();
-      getCodexExecuteApprovalManager().failAll("Codex bridge disconnected");
-    };
-  }, [codexBridgeUrl, defaultHarness.adapter, resolveCodexAuthorization]);
+      canceled = true
+      bridge.disconnect()
+      getCodexExecuteApprovalManager().failAll('Codex bridge disconnected')
+    }
+  }, [codexBridgeUrl, defaultHarness.adapter, resolveCodexAuthorization])
 
   const chatkit = useChatKit({
     api: {
@@ -824,10 +1021,12 @@ function ChatKitPanelInner({ defaultHarness }: ChatKitPanelInnerProps) {
       fetch: authorizedFetch,
     },
     initialThread:
-      defaultHarness.adapter === "codex" ? codexConversation.currentThreadId : undefined,
+      defaultHarness.adapter === 'codex'
+        ? codexConversation.currentThreadId
+        : undefined,
     theme: {
-      colorScheme: "light",
-      radius: "round",
+      colorScheme: 'light',
+      radius: 'round',
     },
     startScreen: {
       greeting: CHATKIT_GREETING,
@@ -838,29 +1037,29 @@ function ChatKitPanelInner({ defaultHarness }: ChatKitPanelInnerProps) {
       placeholder: CHATKIT_PLACEHOLDER,
       models: [
         {
-          id: "gpt-4o-mini",
-          label: "GPT-4o Mini",
+          id: 'gpt-4o-mini',
+          label: 'GPT-4o Mini',
         },
         {
-          id: "gpt-5",
-          label: "GPT-5",
+          id: 'gpt-5',
+          label: 'GPT-5',
         },
         // gpt-5.2 appears to be about 2x as slow as gpt-4.1-mini-2025-04-14
         // but for a simple query that's 2s vs 1s so not a huge difference
         // This is still 10x faster than gpt 5 which took about 10x and felt like
         // molasses.
         {
-          id: "gpt-5.2",
-          label: "GPT-5.2",
+          id: 'gpt-5.2',
+          label: 'GPT-5.2',
           default: true,
         },
         {
-          id: "gpt-5-mini",
-          label: "GPT-5 Mini",          
+          id: 'gpt-5-mini',
+          label: 'GPT-5 Mini',
         },
         {
-          id: "gpt-5-nano",
-          label: "GPT-5 Nano",
+          id: 'gpt-5-nano',
+          label: 'GPT-5 Nano',
         },
       ],
       // TODO(jlewi): We want to make the company knowledge tool optional but on by default.
@@ -879,33 +1078,33 @@ function ChatKitPanelInner({ defaultHarness }: ChatKitPanelInnerProps) {
     header: {
       enabled: true,
       title:
-        defaultHarness.adapter === "codex"
+        defaultHarness.adapter === 'codex'
           ? {
               enabled: true,
               text: codexConversation.selectedProject.name,
             }
           : undefined,
       leftAction:
-        defaultHarness.adapter === "codex"
+        defaultHarness.adapter === 'codex'
           ? {
-              icon: showCodexDrawer ? "close" : "menu",
+              icon: showCodexDrawer ? 'close' : 'menu',
               onClick: () => setShowCodexDrawer((previous) => !previous),
             }
           : undefined,
       rightAction:
-        defaultHarness.adapter === "codex"
+        defaultHarness.adapter === 'codex'
           ? {
-              icon: "compose",
+              icon: 'compose',
               onClick: () => {
                 void (async () => {
-                  const controller = getCodexConversationController();
-                  controller.startNewChat();
-                  setCodexStreamError(null);
+                  const controller = getCodexConversationController()
+                  controller.startNewChat()
+                  setCodexStreamError(null)
                   syncedCodexStateRef.current = {
                     threadId: null,
                     previousResponseId: null,
-                  };
-                  const thread = await controller.ensureActiveThread();
+                  }
+                  const thread = await controller.ensureActiveThread()
                   // setChatkitState(
                   //   create(ChatkitStateSchema, {
                   //     threadId: thread.id,
@@ -915,233 +1114,308 @@ function ChatKitPanelInner({ defaultHarness }: ChatKitPanelInnerProps) {
                   syncedCodexStateRef.current = {
                     threadId: thread.id,
                     previousResponseId: thread.previousResponseId ?? null,
-                  };
+                  }
                   await chatkitActionsRef.current?.setThreadId(
                     thread.id,
-                    "header_new_chat",
-                  );
-                })();
+                    'header_new_chat'
+                  )
+                })()
               },
             }
           : undefined,
     },
     history: {
-      enabled: defaultHarness.adapter !== "codex",
+      enabled: defaultHarness.adapter !== 'codex',
     },
     onClientTool: async (invocation) => {
       const toolOutput = create(ToolCallOutputSchema, {
-        callId: "",
-        previousResponseId: "",
+        callId: '',
+        previousResponseId: '',
         status: ToolCallOutput_Status.SUCCESS,
-        clientError: "",
-      });
+        clientError: '',
+      })
 
-      let decodedInput;
+      switch (invocation.name) {
+        case EXECUTE_CODE_DIRECT_TOOL:
+        case EXECUTE_CODE_TOOL: {
+          const executeCodePayload = parseExecuteCodePayload(invocation.params)
+          if (!executeCodePayload) {
+            toolOutput.status = ToolCallOutput_Status.FAILED
+            toolOutput.clientError =
+              'ExecuteCode tool invoked without valid code payload'
+            return toJson(ToolCallOutputSchema, toolOutput) as Record<
+              string,
+              unknown
+            >
+          }
+          if (!executeCodePayload.callId) {
+            toolOutput.status = ToolCallOutput_Status.FAILED
+            toolOutput.clientError =
+              'ExecuteCode is missing call_id in tool params'
+            return toJson(ToolCallOutputSchema, toolOutput) as Record<
+              string,
+              unknown
+            >
+          }
+
+          const callId = executeCodePayload.callId
+          const previousResponseId = executeCodePayload.previousResponseId
+          try {
+            const result = await codeModeExecutor.execute({
+              code: executeCodePayload.code,
+              source: 'chatkit',
+            })
+            if (invocation.name === EXECUTE_CODE_DIRECT_TOOL) {
+              return {
+                callId,
+                previousResponseId,
+                output: result.output,
+              }
+            }
+            return buildExecuteCodeToolOutput({
+              callId,
+              previousResponseId,
+              output: result.output,
+            })
+          } catch (error) {
+            if (invocation.name === EXECUTE_CODE_DIRECT_TOOL) {
+              return {
+                callId,
+                previousResponseId,
+                output: getCodeModeErrorOutput(error),
+                clientError: String(error),
+              }
+            }
+            return buildExecuteCodeToolOutput({
+              callId,
+              previousResponseId,
+              output: getCodeModeErrorOutput(error),
+              clientError: String(error),
+            })
+          }
+        }
+        case UPDATE_CELLS_TOOL:
+        case GET_CELLS_TOOL:
+        case LIST_CELLS_TOOL:
+          break
+        default: {
+          toolOutput.status = ToolCallOutput_Status.FAILED
+          toolOutput.clientError = `Unknown tool ${invocation.name}`
+          return toJson(ToolCallOutputSchema, toolOutput) as Record<
+            string,
+            unknown
+          >
+        }
+      }
+
+      let decodedInput
       try {
         const payload =
-          typeof invocation.params === "string"
+          typeof invocation.params === 'string'
             ? invocation.params
-            : JSON.stringify(invocation.params ?? {});
-        decodedInput = fromJsonString(ToolCallInputSchema, payload);
+            : JSON.stringify(invocation.params ?? {})
+        decodedInput = fromJsonString(ToolCallInputSchema, payload)
       } catch (error) {
-        console.error("Failed to decode tool params", error, invocation.params);
-        toolOutput.status = ToolCallOutput_Status.FAILED;
-        toolOutput.clientError = `Failed to decode tool params: ${error}`;
+        console.error('Failed to decode tool params', error, invocation.params)
+        toolOutput.status = ToolCallOutput_Status.FAILED
+        toolOutput.clientError = `Failed to decode tool params: ${error}`
         return {
           success: false,
           result: toJson(ToolCallOutputSchema, toolOutput),
-        };
+        }
       }
 
-      toolOutput.callId = decodedInput.callId;
-      toolOutput.previousResponseId = decodedInput.previousResponseId;
+      toolOutput.callId = decodedInput.callId
+      toolOutput.previousResponseId = decodedInput.previousResponseId
 
-      const inputCase = decodedInput.input?.case;
-      const cellMap = new Map<string, Cell>();
+      const inputCase = String(decodedInput.input?.case ?? '')
+      const cellMap = new Map<string, Cell>()
       orderedCells.forEach((cell) => {
-        cellMap.set(cell.refId, cell);
-      });
+        cellMap.set(cell.refId, cell)
+      })
 
       switch (invocation.name) {
         case UPDATE_CELLS_TOOL: {
-          console.log(`[ChatKit tool] ${invocation.name}`, decodedInput);
-          if (inputCase !== "updateCells") {
-            toolOutput.status = ToolCallOutput_Status.FAILED;
+          console.log(`[ChatKit tool] ${invocation.name}`, decodedInput)
+          if (inputCase !== 'updateCells') {
+            toolOutput.status = ToolCallOutput_Status.FAILED
             toolOutput.clientError =
-              "UpdateCells tool invoked without updateCells payload";
-            break;
+              'UpdateCells tool invoked without updateCells payload'
+            break
           }
 
-          const updateCellsRequest = decodedInput.input.value;
+          const updateCellsRequest = decodedInput.input.value
           if (!updateCellsRequest) {
-            toolOutput.status = ToolCallOutput_Status.FAILED;
-            toolOutput.clientError = "UpdateCells request missing payload";
-            break;
+            toolOutput.status = ToolCallOutput_Status.FAILED
+            toolOutput.clientError = 'UpdateCells request missing payload'
+            break
           }
 
-          const cells: Cell[] = updateCellsRequest.cells ?? [];
+          const cells: Cell[] = updateCellsRequest.cells ?? []
           if (cells.length === 0) {
-            toolOutput.status = ToolCallOutput_Status.FAILED;
-            toolOutput.clientError =
-              "UpdateCells invoked without cells payload";
+            toolOutput.status = ToolCallOutput_Status.FAILED
+            toolOutput.clientError = 'UpdateCells invoked without cells payload'
           }
 
           cells.forEach((updatedCell: Cell) => {
             try {
               if (!updatedCell?.refId) {
-                console.warn("Received cell without refId", updatedCell);
-                return;
+                console.warn('Received cell without refId', updatedCell)
+                return
               }
 
-              updateCell(updatedCell);
+              updateCell(updatedCell)
             } catch (error) {
               console.error(
-                "Failed to process UpdateCell payload",
+                'Failed to process UpdateCell payload',
                 error,
-                updatedCell,
-              );
-              toolOutput.status = ToolCallOutput_Status.FAILED;
-              toolOutput.clientError = `Failed to process UpdateCell payload: ${error}`;
+                updatedCell
+              )
+              toolOutput.status = ToolCallOutput_Status.FAILED
+              toolOutput.clientError = `Failed to process UpdateCell payload: ${error}`
             }
-          });
+          })
 
           toolOutput.output = {
-            case: "updateCells",
+            case: 'updateCells',
             value: create(UpdateCellsResponseSchema, {
               cells,
             }),
-          };
-          break;
+          }
+          break
         }
         case GET_CELLS_TOOL: {
-          console.log(`[ChatKit tool] ${invocation.name}`, decodedInput);
-          if (inputCase !== "getCells") {
-            toolOutput.status = ToolCallOutput_Status.FAILED;
+          console.log(`[ChatKit tool] ${invocation.name}`, decodedInput)
+          if (inputCase !== 'getCells') {
+            toolOutput.status = ToolCallOutput_Status.FAILED
             toolOutput.clientError =
-              "GetCells tool invoked without getCells payload";
-            break;
+              'GetCells tool invoked without getCells payload'
+            break
           }
 
-          const getCellsRequest = decodedInput.input.value;
+          const getCellsRequest = decodedInput.input.value
           if (!getCellsRequest) {
-            toolOutput.status = ToolCallOutput_Status.FAILED;
-            toolOutput.clientError = "GetCells request missing payload";
-            break;
+            toolOutput.status = ToolCallOutput_Status.FAILED
+            toolOutput.clientError = 'GetCells request missing payload'
+            break
           }
 
-          const requestedRefs = getCellsRequest.refIds ?? [];
+          const requestedRefs = getCellsRequest.refIds ?? []
           const foundCells = requestedRefs
             .map((id) => {
-              const cell = cellMap.get(id);
+              const cell = cellMap.get(id)
               if (!cell) {
-                console.warn(`Requested cell ${id} not found`);
+                console.warn(`Requested cell ${id} not found`)
               }
-              return cell;
+              return cell
             })
-            .filter((cell): cell is Cell => Boolean(cell));
+            .filter((cell): cell is Cell => Boolean(cell))
 
           toolOutput.output = {
-            case: "getCells",
+            case: 'getCells',
             value: create(GetCellsResponseSchema, {
               cells: foundCells,
             }),
-          };
-          break;
+          }
+          break
         }
         case LIST_CELLS_TOOL: {
-          console.log(`[ChatKit tool] ${invocation.name}`, decodedInput);
+          console.log(`[ChatKit tool] ${invocation.name}`, decodedInput)
           toolOutput.output = {
-            case: "listCells",
+            case: 'listCells',
             value: create(ListCellsResponseSchema, {
               cells: orderedCells,
             }),
-          };
-          break;
+          }
+          break
         }
         default: {
-          toolOutput.status = ToolCallOutput_Status.FAILED;
-          toolOutput.clientError = `Unknown tool ${invocation.name}`;
-          return toJson(ToolCallOutputSchema, toolOutput);
+          toolOutput.status = ToolCallOutput_Status.FAILED
+          toolOutput.clientError = `Unknown tool ${invocation.name}`
+          return toJson(ToolCallOutputSchema, toolOutput) as Record<
+            string,
+            unknown
+          >
         }
       }
 
-      return toJson(ToolCallOutputSchema, toolOutput);
+      return toJson(ToolCallOutputSchema, toolOutput) as Record<string, unknown>
     },
     onError: ({ error }) => {
-      const promptForLogin = () => setShowLoginPrompt(true);
+      const promptForLogin = () => setShowLoginPrompt(true)
       const errorText =
-        typeof error === "string"
+        typeof error === 'string'
           ? error
-          : error && typeof error === "object" && "message" in error
+          : error && typeof error === 'object' && 'message' in error
             ? String((error as { message?: unknown }).message)
-            : String(error);
+            : String(error)
 
       // This is a bit of a hacky way to check for authentication errors.
-      // Chatkit throws a StreamError if the user isn't logged in. 
+      // Chatkit throws a StreamError if the user isn't logged in.
       void (async () => {
         if (
-          defaultHarness.adapter === "responses-direct" &&
-          responsesDirectConfigManager.getSnapshot().authMethod !== "oauth"
+          defaultHarness.adapter === 'responses-direct' &&
+          responsesDirectConfigManager.getSnapshot().authMethod !== 'oauth'
         ) {
-          return;
+          return
         }
-        const token = await getAccessToken();
+        const token = await getAccessToken()
         if (!token) {
-          promptForLogin();
+          promptForLogin()
         }
-      })();
+      })()
 
-      console.error("ChatKit error", error);
-      appLogger.error("ChatKit error", {
+      console.error('ChatKit error', error)
+      appLogger.error('ChatKit error', {
         attrs: {
-          scope: "chatkit.panel",
+          scope: 'chatkit.panel',
           adapter: defaultHarness.adapter,
           baseUrl: defaultHarness.baseUrl,
           error: errorText,
         },
-      });
+      })
     },
     onThreadLoadStart: ({ threadId }) => {
-      console.log("[chatkit] thread load start", JSON.stringify({ threadId }));
-      appLogger.info("ChatKit thread load start", {
+      console.log('[chatkit] thread load start', JSON.stringify({ threadId }))
+      appLogger.info('ChatKit thread load start', {
         attrs: {
-          scope: "chatkit.panel",
+          scope: 'chatkit.panel',
           adapter: defaultHarness.adapter,
           threadId,
         },
-      });
+      })
     },
     onThreadLoadEnd: ({ threadId }) => {
-      console.log("[chatkit] thread load end", JSON.stringify({ threadId }));
-      appLogger.info("ChatKit thread load end", {
+      console.log('[chatkit] thread load end', JSON.stringify({ threadId }))
+      appLogger.info('ChatKit thread load end', {
         attrs: {
-          scope: "chatkit.panel",
+          scope: 'chatkit.panel',
           adapter: defaultHarness.adapter,
           threadId,
         },
-      });
+      })
     },
     onLog: ({ name, data }) => {
-      console.log("[chatkit] log", JSON.stringify({ name, data }));
-      appLogger.info("ChatKit diagnostic log", {
+      console.log('[chatkit] log', JSON.stringify({ name, data }))
+      appLogger.info('ChatKit diagnostic log', {
         attrs: {
-          scope: "chatkit.panel",
+          scope: 'chatkit.panel',
           adapter: defaultHarness.adapter,
           name,
           data: data ?? null,
         },
-      });
+      })
     },
     onThreadChange: ({ threadId }) => {
-      const localChatkitState = getChatkitState();
+      const localChatkitState = getChatkitState()
       const codexSnapshot =
-        defaultHarness.adapter === "codex"
+        defaultHarness.adapter === 'codex'
           ? getCodexConversationController().getSnapshot()
-          : null;
-      const stack = new Error("chatkit thread change").stack ?? null;
-      appLogger.info("ChatKit thread changed", {
+          : null
+      const stack = new Error('chatkit thread change').stack ?? null
+      appLogger.info('ChatKit thread changed', {
         attrs: {
-          scope: "chatkit.panel",
+          scope: 'chatkit.panel',
           adapter: defaultHarness.adapter,
           threadId: threadId ?? null,
           localThreadId: localChatkitState.threadId ?? null,
@@ -1150,38 +1424,43 @@ function ChatKitPanelInner({ defaultHarness }: ChatKitPanelInnerProps) {
           codexCurrentTurnId: codexSnapshot?.currentTurnId ?? null,
           stack,
         },
-      });
-      if (defaultHarness.adapter !== "codex") {
-        return;
+      })
+      if (defaultHarness.adapter !== 'codex') {
+        return
       }
       if (!threadId) {
-        const codexThreadId = codexSnapshot?.currentThreadId;
+        const codexThreadId = codexSnapshot?.currentThreadId
         if (codexThreadId) {
           const codexThread = codexConversation.threads.find(
-            (thread) => thread.id === codexThreadId,
-          );
-          appLogger.info("Ignoring null ChatKit thread change while Codex thread is active", {
-            attrs: {
-              scope: "chatkit.panel",
-              adapter: defaultHarness.adapter,
-              threadId: null,
-              codexCurrentThreadId: codexThreadId,
-              codexCurrentTurnId: codexSnapshot?.currentTurnId ?? null,
-            },
-          });
+            (thread) => thread.id === codexThreadId
+          )
+          appLogger.info(
+            'Ignoring null ChatKit thread change while Codex thread is active',
+            {
+              attrs: {
+                scope: 'chatkit.panel',
+                adapter: defaultHarness.adapter,
+                threadId: null,
+                codexCurrentThreadId: codexThreadId,
+                codexCurrentTurnId: codexSnapshot?.currentTurnId ?? null,
+              },
+            }
+          )
           // setChatkitState(
           //   create(ChatkitStateSchema, {
           //     threadId: codexThreadId,
           //     previousResponseId: codexThread?.previousResponseId ?? "",
           //   }),
           // );
-          return;
+          return
         }
         // setChatkitState(create(ChatkitStateSchema, {}));
-        return;
+        return
       }
-      const existing = codexConversation.threads.find((thread) => thread.id === threadId);
-      void existing;
+      const existing = codexConversation.threads.find(
+        (thread) => thread.id === threadId
+      )
+      void existing
       // setChatkitState(
       //   create(ChatkitStateSchema, {
       //     threadId,
@@ -1189,88 +1468,88 @@ function ChatKitPanelInner({ defaultHarness }: ChatKitPanelInnerProps) {
       //   }),
       // );
     },
-  });
+  })
   const setChatkitThreadId = useCallback(
-    async (threadId: string | null, source = "panel_ref") => {
-      appLogger.info("Calling ChatKit setThreadId", {
+    async (threadId: string | null, source = 'panel_ref') => {
+      appLogger.info('Calling ChatKit setThreadId', {
         attrs: {
-          scope: "chatkit.panel",
+          scope: 'chatkit.panel',
           adapter: defaultHarness.adapter,
           threadId,
           source,
         },
-      });
+      })
       try {
-        await chatkit.setThreadId(threadId);
-        appLogger.info("ChatKit setThreadId completed", {
+        await chatkit.setThreadId(threadId)
+        appLogger.info('ChatKit setThreadId completed', {
           attrs: {
-            scope: "chatkit.panel",
+            scope: 'chatkit.panel',
             adapter: defaultHarness.adapter,
             threadId,
             source,
           },
-        });
+        })
       } catch (error) {
-        appLogger.error("ChatKit setThreadId failed", {
+        appLogger.error('ChatKit setThreadId failed', {
           attrs: {
-            scope: "chatkit.panel",
+            scope: 'chatkit.panel',
             adapter: defaultHarness.adapter,
             threadId,
             source,
             error: String(error),
           },
-        });
-        throw error;
+        })
+        throw error
       }
     },
-    [chatkit, defaultHarness.adapter],
-  );
+    [chatkit, defaultHarness.adapter]
+  )
   const fetchChatkitUpdates = useCallback(
-    async (source = "panel_ref") => {
-      appLogger.info("Calling ChatKit fetchUpdates", {
+    async (source = 'panel_ref') => {
+      appLogger.info('Calling ChatKit fetchUpdates', {
         attrs: {
-          scope: "chatkit.panel",
+          scope: 'chatkit.panel',
           adapter: defaultHarness.adapter,
           source,
         },
-      });
+      })
       try {
-        await chatkit.fetchUpdates();
-        appLogger.info("ChatKit fetchUpdates completed", {
+        await chatkit.fetchUpdates()
+        appLogger.info('ChatKit fetchUpdates completed', {
           attrs: {
-            scope: "chatkit.panel",
+            scope: 'chatkit.panel',
             adapter: defaultHarness.adapter,
             source,
           },
-        });
+        })
       } catch (error) {
-        appLogger.error("ChatKit fetchUpdates failed", {
+        appLogger.error('ChatKit fetchUpdates failed', {
           attrs: {
-            scope: "chatkit.panel",
+            scope: 'chatkit.panel',
             adapter: defaultHarness.adapter,
             source,
             error: String(error),
           },
-        });
-        throw error;
+        })
+        throw error
       }
     },
-    [chatkit, defaultHarness.adapter],
-  );
+    [chatkit, defaultHarness.adapter]
+  )
   chatkitActionsRef.current = {
     setThreadId: setChatkitThreadId,
     fetchUpdates: fetchChatkitUpdates,
-  };
+  }
 
   const handleCodexNewChat = useCallback(async () => {
-    const controller = getCodexConversationController();
-    controller.startNewChat();
-    setCodexStreamError(null);
+    const controller = getCodexConversationController()
+    controller.startNewChat()
+    setCodexStreamError(null)
     syncedCodexStateRef.current = {
       threadId: null,
       previousResponseId: null,
-    };
-    const thread = await controller.ensureActiveThread();
+    }
+    const thread = await controller.ensureActiveThread()
     // setChatkitState(
     //   create(ChatkitStateSchema, {
     //     threadId: thread.id,
@@ -1280,70 +1559,64 @@ function ChatKitPanelInner({ defaultHarness }: ChatKitPanelInnerProps) {
     syncedCodexStateRef.current = {
       threadId: thread.id,
       previousResponseId: thread.previousResponseId ?? null,
-    };
-    await chatkitActionsRef.current?.setThreadId(thread.id, "new_chat");
-  }, []);
-
-  const handleCodexSelectThread = useCallback(
-    async (threadId: string) => {
-      const controller = getCodexConversationController();
-      const thread = await controller.selectThread(threadId);
-      // setChatkitState(
-      //   create(ChatkitStateSchema, {
-      //     threadId: thread.id,
-      //     previousResponseId: thread.previousResponseId ?? "",
-      //   }),
-      // );
-      syncedCodexStateRef.current = {
-        threadId: thread.id,
-        previousResponseId: thread.previousResponseId ?? null,
-      };
-      await chatkitActionsRef.current?.setThreadId(thread.id, "select_thread");
-      await chatkitActionsRef.current?.fetchUpdates("select_thread");
-      setShowCodexDrawer(false);
-    },
-    [],
-  );
-
-  const handleCodexProjectChange = useCallback(
-    async (projectId: string) => {
-      const controller = getCodexConversationController();
-      controller.setSelectedProject(projectId);
-      controller.startNewChat();
-      setCodexStreamError(null);
-      syncedCodexStateRef.current = {
-        threadId: null,
-        previousResponseId: null,
-      };
-      await controller.refreshHistory();
-      const thread = await controller.ensureActiveThread();
-      // setChatkitState(
-      //   create(ChatkitStateSchema, {
-      //     threadId: thread.id,
-      //     previousResponseId: thread.previousResponseId ?? "",
-      //   }),
-      // );
-      syncedCodexStateRef.current = {
-        threadId: thread.id,
-        previousResponseId: thread.previousResponseId ?? null,
-      };
-      await chatkitActionsRef.current?.setThreadId(thread.id, "project_change");
-    },
-    [],
-  );
+    }
+    await chatkitActionsRef.current?.setThreadId(thread.id, 'new_chat')
+  }, [])
+
+  const handleCodexSelectThread = useCallback(async (threadId: string) => {
+    const controller = getCodexConversationController()
+    const thread = await controller.selectThread(threadId)
+    // setChatkitState(
+    //   create(ChatkitStateSchema, {
+    //     threadId: thread.id,
+    //     previousResponseId: thread.previousResponseId ?? "",
+    //   }),
+    // );
+    syncedCodexStateRef.current = {
+      threadId: thread.id,
+      previousResponseId: thread.previousResponseId ?? null,
+    }
+    await chatkitActionsRef.current?.setThreadId(thread.id, 'select_thread')
+    await chatkitActionsRef.current?.fetchUpdates('select_thread')
+    setShowCodexDrawer(false)
+  }, [])
+
+  const handleCodexProjectChange = useCallback(async (projectId: string) => {
+    const controller = getCodexConversationController()
+    controller.setSelectedProject(projectId)
+    controller.startNewChat()
+    setCodexStreamError(null)
+    syncedCodexStateRef.current = {
+      threadId: null,
+      previousResponseId: null,
+    }
+    await controller.refreshHistory()
+    const thread = await controller.ensureActiveThread()
+    // setChatkitState(
+    //   create(ChatkitStateSchema, {
+    //     threadId: thread.id,
+    //     previousResponseId: thread.previousResponseId ?? "",
+    //   }),
+    // );
+    syncedCodexStateRef.current = {
+      threadId: thread.id,
+      previousResponseId: thread.previousResponseId ?? null,
+    }
+    await chatkitActionsRef.current?.setThreadId(thread.id, 'project_change')
+  }, [])
 
   const handleLogin = useCallback(() => {
-    setShowLoginPrompt(false);
-    getBrowserAdapter().loginWithRedirect();
-  }, []);
+    setShowLoginPrompt(false)
+    getBrowserAdapter().loginWithRedirect()
+  }, [])
 
   const handleDismissPrompt = useCallback(() => {
-    setShowLoginPrompt(false);
-  }, []);
+    setShowLoginPrompt(false)
+  }, [])
 
   return (
     <div className="relative h-full w-full">
-      {defaultHarness.adapter !== "codex" || codexThreadBootstrapComplete ? (
+      {defaultHarness.adapter !== 'codex' || codexThreadBootstrapComplete ? (
         <ChatKit control={chatkit.control} className="block h-full w-full" />
       ) : (
         <div
@@ -1353,7 +1626,7 @@ function ChatKitPanelInner({ defaultHarness }: ChatKitPanelInnerProps) {
           Initializing Codex thread...
         </div>
       )}
-      {defaultHarness.adapter === "codex" && codexStreamError ? (
+      {defaultHarness.adapter === 'codex' && codexStreamError ? (
         <div
           data-testid="codex-stream-error"
           className="absolute left-3 right-3 top-3 z-30 rounded border border-red-200 bg-red-50 px-3 py-2 text-sm text-red-800 shadow-sm"
@@ -1361,7 +1634,7 @@ function ChatKitPanelInner({ defaultHarness }: ChatKitPanelInnerProps) {
           {codexStreamError}
         </div>
       ) : null}
-      {defaultHarness.adapter === "codex" && showCodexDrawer ? (
+      {defaultHarness.adapter === 'codex' && showCodexDrawer ? (
         <div
           data-testid="codex-project-drawer"
           className="absolute inset-y-0 left-0 z-40 flex w-[280px] flex-col border-r border-nb-cell-border bg-white/95 shadow-lg"
@@ -1375,7 +1648,7 @@ function ChatKitPanelInner({ defaultHarness }: ChatKitPanelInnerProps) {
               className="w-full rounded border border-nb-cell-border bg-white px-2 py-1 text-sm text-nb-text"
               value={codexConversation.selectedProject.id}
               onChange={(event) => {
-                void handleCodexProjectChange(event.target.value);
+                void handleCodexProjectChange(event.target.value)
               }}
             >
               {codexProjects.projects.map((project) => (
@@ -1388,7 +1661,7 @@ function ChatKitPanelInner({ defaultHarness }: ChatKitPanelInnerProps) {
               type="button"
               className="mt-2 w-full rounded border border-nb-cell-border px-2 py-1 text-sm text-nb-text hover:bg-nb-surface-2"
               onClick={() => {
-                void handleCodexNewChat();
+                void handleCodexNewChat()
               }}
             >
               New chat
@@ -1401,7 +1674,9 @@ function ChatKitPanelInner({ defaultHarness }: ChatKitPanelInnerProps) {
             {codexConversation.loadingHistory ? (
               <div className="text-sm text-nb-text-muted">Loading...</div>
             ) : codexConversation.threads.length === 0 ? (
-              <div className="text-sm text-nb-text-muted">No conversations yet.</div>
+              <div className="text-sm text-nb-text-muted">
+                No conversations yet.
+              </div>
             ) : (
               <div className="space-y-2">
                 {codexConversation.threads.map((thread) => (
@@ -1411,16 +1686,20 @@ function ChatKitPanelInner({ defaultHarness }: ChatKitPanelInnerProps) {
                     data-testid={`codex-thread-${thread.id}`}
                     className={`w-full rounded border px-2 py-2 text-left text-sm ${
                       codexConversation.currentThreadId === thread.id
-                        ? "border-nb-accent bg-nb-surface-2"
-                        : "border-nb-cell-border bg-white"
+                        ? 'border-nb-accent bg-nb-surface-2'
+                        : 'border-nb-cell-border bg-white'
                     }`}
                     onClick={() => {
-                      void handleCodexSelectThread(thread.id);
+                      void handleCodexSelectThread(thread.id)
                     }}
                   >
-                    <div className="font-medium text-nb-text">{thread.title}</div>
+                    <div className="font-medium text-nb-text">
+                      {thread.title}
+                    </div>
                     {thread.updatedAt ? (
-                      <div className="mt-1 text-xs text-nb-text-muted">{thread.updatedAt}</div>
+                      <div className="mt-1 text-xs text-nb-text-muted">
+                        {thread.updatedAt}
+                      </div>
                     ) : null}
                   </button>
                 ))}
@@ -1435,7 +1714,7 @@ function ChatKitPanelInner({ defaultHarness }: ChatKitPanelInnerProps) {
             <p className="mb-4 text-nb-text">
               Please log in to use runme chat features.
             </p>
-            <div className="flex justify-end gap-2">              
+            <div className="flex justify-end gap-2">
               <button
                 type="button"
                 className="rounded border border-nb-text px-3 py-1 text-nb-text hover:bg-nb-surface-2"
@@ -1455,18 +1734,18 @@ function ChatKitPanelInner({ defaultHarness }: ChatKitPanelInnerProps) {
         </div>
       ) : null}
     </div>
-  );
+  )
 }
 
 function ChatKitPanel() {
-  const { defaultHarness } = useHarness();
-  const harnessSessionKey = `${defaultHarness.name}:${defaultHarness.baseUrl}:${defaultHarness.adapter}`;
+  const { defaultHarness } = useHarness()
+  const harnessSessionKey = `${defaultHarness.name}:${defaultHarness.baseUrl}:${defaultHarness.adapter}`
   return (
     <ChatKitPanelInner
       key={harnessSessionKey}
       defaultHarness={defaultHarness}
     />
-  );
+  )
 }
 
-export default ChatKitPanel;
+export default ChatKitPanel
diff --git a/app/src/contexts/NotebookContext.tsx b/app/src/contexts/NotebookContext.tsx
index 4e9a1fe5..3adf13c5 100644
--- a/app/src/contexts/NotebookContext.tsx
+++ b/app/src/contexts/NotebookContext.tsx
@@ -13,6 +13,7 @@ import {
 import { create } from "@bufbuild/protobuf";
 import { NotebookData, type NotebookSnapshot } from "../lib/notebookData";
 import { parser_pb } from "./CellContext";
+import { type NotebookDataLike } from "../lib/runtime/runmeConsole";
 import { useNotebookStore } from "./NotebookStoreContext";
 import { useContentsStore } from "./ContentsStoreContext";
 import { useFilesystemStore } from "./FilesystemStoreContext";
@@ -172,7 +173,14 @@ export function NotebookProvider({ children }: { children: ReactNode }) {
   const ensureNotebook = useCallback(
     ({ uri, name, notebook, loaded = false }: EnsureNotebookArgs) => {
       const existing = storeRef.current.get(uri);
+      const effectiveStore = resolveStore(
+        uri,
+        notebookStore,
+        fsStore,
+        contentsStore,
+      );
       if (existing) {
+        existing.data.setNotebookStore(effectiveStore ?? null);
         return existing.data;
       }
       const resolvedName =
@@ -184,18 +192,74 @@ export function NotebookProvider({ children }: { children: ReactNode }) {
           cells: [],
           metadata: {},
         });
-      const effectiveStore = resolveStore(
-        uri,
-        notebookStore,
-        fsStore,
-        contentsStore,
-      );
+      const resolveTargetUri = (target?: unknown): string | null => {
+        if (typeof target === "string" && target.trim() !== "") {
+          return target.trim();
+        }
+        if (
+          typeof target === "object" &&
+          target &&
+          "uri" in target &&
+          typeof (target as { uri?: unknown }).uri === "string" &&
+          (target as { uri: string }).uri.trim() !== ""
+        ) {
+          return (target as { uri: string }).uri.trim();
+        }
+        if (
+          typeof target === "object" &&
+          target &&
+          "handle" in target &&
+          typeof (target as { handle?: { uri?: unknown } }).handle?.uri ===
+            "string" &&
+          (
+            target as { handle: { uri: string } }
+          ).handle.uri.trim() !== ""
+        ) {
+          return (target as { handle: { uri: string } }).handle.uri.trim();
+        }
+        return null;
+      };
       const data = new NotebookData({
         uri,
         name: resolvedName,
         notebook: initialNotebook,
         notebookStore: effectiveStore ?? null,
         loaded,
+        resolveNotebookForAppKernel: (target?: unknown) => {
+          const targetUri = resolveTargetUri(target)
+          if (!targetUri) {
+            return storeRef.current.get(uri)?.data ?? null
+          }
+          return storeRef.current.get(targetUri)?.data ?? null
+        },
+        listNotebooksForAppKernel: () => {
+          const notebooksByUri = new Map<string, NotebookDataLike>()
+          for (const entry of storeRef.current.values()) {
+            notebooksByUri.set(entry.data.getUri(), entry.data)
+          }
+          for (const item of listCacheRef.current) {
+            if (!item?.uri || notebooksByUri.has(item.uri)) {
+              continue
+            }
+            const emptyNotebook = create(parser_pb.NotebookSchema, {
+              cells: [],
+              metadata: {},
+            })
+            const placeholder = {
+              getUri: () => item.uri,
+              getName: () => item.name ?? item.uri,
+              getNotebook: () => emptyNotebook,
+              updateCell: () => {},
+              getCell: () => null,
+            } satisfies NotebookDataLike
+            notebooksByUri.set(item.uri, placeholder)
+          }
+          const current = storeRef.current.get(uri)?.data
+          if (current && !notebooksByUri.has(current.getUri())) {
+            notebooksByUri.set(current.getUri(), current)
+          }
+          return Array.from(notebooksByUri.values())
+        },
       });
       const unsubscribe = data.subscribe(() => emit());
       storeRef.current.set(uri, { data, unsubscribe, loaded });
@@ -243,7 +307,7 @@ export function NotebookProvider({ children }: { children: ReactNode }) {
   // On first availability of notebookStore, restore placeholders for any
   // notebooks persisted in localStorage so subscribers can attach immediately.
   useEffect(() => {
-    if (hasRestoredNotebooks.current || !notebookStore) {
+    if (hasRestoredNotebooks.current) {
       return;
     }
     const stored = loadStoredOpenNotebooks();
@@ -264,7 +328,7 @@ export function NotebookProvider({ children }: { children: ReactNode }) {
     });
     //setOpenNotebooks(stored);
     hasRestoredNotebooks.current = true;
-  }, [ensureNotebook, notebookStore]);
+  }, [ensureNotebook, notebookStore, contentsStore, fsStore]);
 
   const useNotebookList = useCallback(() => {
     return openNotebooks;
@@ -317,6 +381,20 @@ export function NotebookProvider({ children }: { children: ReactNode }) {
     persistOpenNotebooks(openNotebooks);
   }, [openNotebooks]);
 
+  // If notebooks are open but no current document is selected, promote the first
+  // open notebook to be the active document so notebook helpers can resolve the
+  // UI's visible notebook by default.
+  useEffect(() => {
+    if (getCurrentDoc() || openNotebooks.length === 0) {
+      return;
+    }
+    const fallbackUri = openNotebooks[0]?.uri?.trim();
+    if (!fallbackUri) {
+      return;
+    }
+    setCurrentDoc(fallbackUri);
+  }, [getCurrentDoc, openNotebooks, setCurrentDoc]);
+
   // Load any notebooks that were open last session once a store is available.
   useEffect(() => {
     if (!notebookStore && !contentsStore) {
@@ -339,6 +417,7 @@ export function NotebookProvider({ children }: { children: ReactNode }) {
         if (!store) {
           continue;
         }
+        entry.data.setNotebookStore(store);
         try {
           const metadata = await store.getMetadata(item.uri);
           if (!metadata || metadata.type !== NotebookStoreItemType.File) {
diff --git a/app/src/lib/notebookData.test.ts b/app/src/lib/notebookData.test.ts
index 96aa7b01..8d078b1e 100644
--- a/app/src/lib/notebookData.test.ts
+++ b/app/src/lib/notebookData.test.ts
@@ -517,6 +517,36 @@ describe("NotebookData cell defaults", () => {
   });
 });
 
+describe("NotebookData persistence", () => {
+  it("adopts a notebook store after construction and persists later edits", async () => {
+    const save = vi.fn().mockResolvedValue(undefined);
+    const notebook = create(parser_pb.NotebookSchema, { cells: [] });
+    const model = new NotebookData({
+      notebook,
+      uri: "local://file/restored-before-store.md",
+      name: "restored-before-store.md",
+      notebookStore: null,
+      loaded: true,
+    });
+
+    model.setNotebookStore({ save });
+    model.appendCodeCell("javascript");
+
+    await waitForCondition(() => save.mock.calls.length > 0, 1500);
+
+    expect(save).toHaveBeenCalledWith(
+      "local://file/restored-before-store.md",
+      expect.objectContaining({
+        cells: expect.arrayContaining([
+          expect.objectContaining({
+            languageId: "javascript",
+          }),
+        ]),
+      }),
+    );
+  });
+});
+
 describe("NotebookData.runCodeCell", () => {
   it("returns empty run id when no runner is available", () => {
     getWithFallback.mockReturnValueOnce(undefined);
@@ -580,6 +610,40 @@ describe("NotebookData.runCodeCell", () => {
     expect(new TextDecoder().decode(stdoutItem!.data)).toContain("hello");
   });
 
+  it("awaits AppKernel completion from CellData.run before returning", async () => {
+    const cell = create(parser_pb.CellSchema, {
+      refId: "cell-await-run",
+      kind: parser_pb.CellKind.CODE,
+      languageId: "javascript",
+      outputs: [],
+      metadata: {
+        [RunmeMetadataKey.RunnerName]: APPKERNEL_RUNNER_NAME,
+      },
+      value: 'console.log("awaited appkernel output");',
+    });
+    const notebook = create(parser_pb.NotebookSchema, { cells: [cell] });
+    const model = new NotebookData({
+      notebook,
+      uri: "nb://test",
+      name: "test-notebook.runme.md",
+      notebookStore: null,
+      loaded: true,
+    });
+    const cellData = model.getCell(cell.refId);
+    expect(cellData).toBeTruthy();
+
+    await cellData!.run();
+
+    const updated = model.getCellSnapshot(cell.refId);
+    expect(updated?.metadata?.[RunmeMetadataKey.ExitCode]).toBe("0");
+    const stdoutText = (updated?.outputs ?? [])
+      .flatMap((o) => o.items)
+      .filter((i) => i.mime === MimeType.VSCodeNotebookStdOut)
+      .map((i) => new TextDecoder().decode(i.data))
+      .join("");
+    expect(stdoutText).toContain("awaited appkernel output");
+  });
+
   it("executes javascript with AppKernel even when runner metadata is not set", async () => {
     getWithFallback.mockReturnValueOnce(undefined);
     const cell = create(parser_pb.CellSchema, {
@@ -736,6 +800,111 @@ describe("NotebookData.runCodeCell", () => {
     expect(stdoutText).toContain("1");
   });
 
+  it("exposes notebooks.list across open notebooks inside browser appkernel javascript cells", async () => {
+    const cell = create(parser_pb.CellSchema, {
+      refId: "cell-appkernel-notebooks-list-browser",
+      kind: parser_pb.CellKind.CODE,
+      languageId: "javascript",
+      outputs: [],
+      metadata: {
+        [RunmeMetadataKey.RunnerName]: APPKERNEL_RUNNER_NAME,
+      },
+      value: [
+        "const list = await notebooks.list();",
+        "console.log(JSON.stringify(list.map((item) => item.name).sort()));",
+        "console.log(list.length);",
+      ].join("\n"),
+    });
+    const primaryNotebook = create(parser_pb.NotebookSchema, { cells: [cell] });
+    const secondaryNotebook = create(parser_pb.NotebookSchema, {
+      cells: [
+        create(parser_pb.CellSchema, {
+          refId: "cell-secondary",
+          kind: parser_pb.CellKind.CODE,
+          languageId: "javascript",
+          outputs: [],
+          metadata: {},
+          value: "console.log('secondary')",
+        }),
+      ],
+    });
+
+    const byUri = new Map<string, InstanceType<typeof NotebookData>>();
+    let primaryModel: InstanceType<typeof NotebookData> | null = null;
+    const resolveTargetUri = (target?: unknown): string | null => {
+      if (typeof target === "string" && target.trim() !== "") {
+        return target.trim();
+      }
+      if (
+        typeof target === "object" &&
+        target &&
+        "uri" in target &&
+        typeof (target as { uri?: unknown }).uri === "string" &&
+        (target as { uri: string }).uri.trim() !== ""
+      ) {
+        return (target as { uri: string }).uri.trim();
+      }
+      if (
+        typeof target === "object" &&
+        target &&
+        "handle" in target &&
+        typeof (target as { handle?: { uri?: unknown } }).handle?.uri ===
+          "string" &&
+        (
+          target as { handle: { uri: string } }
+        ).handle.uri.trim() !== ""
+      ) {
+        return (target as { handle: { uri: string } }).handle.uri.trim();
+      }
+      return null;
+    };
+    const resolveNotebook = (target?: unknown) => {
+      const targetUri = resolveTargetUri(target);
+      if (!targetUri) {
+        return primaryModel;
+      }
+      return byUri.get(targetUri) ?? null;
+    };
+    const listNotebooks = () => Array.from(byUri.values());
+
+    primaryModel = new NotebookData({
+      notebook: primaryNotebook,
+      uri: "nb://primary",
+      name: "primary-notebooks-list.runme.md",
+      notebookStore: null,
+      loaded: true,
+      resolveNotebookForAppKernel: resolveNotebook,
+      listNotebooksForAppKernel: listNotebooks,
+    });
+    const secondaryModel = new NotebookData({
+      notebook: secondaryNotebook,
+      uri: "nb://secondary",
+      name: "secondary-notebooks-list.runme.md",
+      notebookStore: null,
+      loaded: true,
+      resolveNotebookForAppKernel: resolveNotebook,
+      listNotebooksForAppKernel: listNotebooks,
+    });
+    byUri.set(primaryModel.getUri(), primaryModel);
+    byUri.set(secondaryModel.getUri(), secondaryModel);
+
+    primaryModel.runCodeCell(cell);
+    await waitForCondition(() => {
+      const snap = primaryModel?.getCellSnapshot(cell.refId);
+      return snap?.metadata?.[RunmeMetadataKey.ExitCode] === "0";
+    });
+
+    const updated = primaryModel.getCellSnapshot(cell.refId);
+    const stdoutText = (updated?.outputs ?? [])
+      .flatMap((o) => o.items)
+      .filter((i) => i.mime === MimeType.VSCodeNotebookStdOut)
+      .map((i) => new TextDecoder().decode(i.data))
+      .join("");
+    expect(stdoutText).toContain("primary-notebooks-list.runme.md");
+    expect(stdoutText).toContain("secondary-notebooks-list.runme.md");
+    expect(stdoutText).toContain("2");
+  });
+
   it("exposes drive and google helper namespaces in appkernel cells", async () => {
     const cell = create(parser_pb.CellSchema, {
       refId: "cell-appkernel-drive-helpers",
diff --git a/app/src/lib/notebookData.ts b/app/src/lib/notebookData.ts
index 4b4d1fcb..d0f328bf 100644
--- a/app/src/lib/notebookData.ts
+++ b/app/src/lib/notebookData.ts
@@ -27,11 +27,15 @@ import {
 } from "./runtime/jupyterManager";
 import { createAppJsGlobals } from "./runtime/appJsGlobals";
 import {
-  createNotebooksApi,
   createRunmeConsoleApi,
-  type NotebooksApi,
+  type NotebookDataLike,
   type RunmeConsoleApi,
 } from "./runtime/runmeConsole";
+import {
+  type NotebooksApiBridgeServer,
+  createHostNotebooksApi,
+  createNotebooksApiBridgeServer,
+} from "./runtime/notebooksApiBridge";
 import { JSKernel } from "./runtime/jsKernel";
 import { SandboxJSKernel } from "./runtime/sandboxJsKernel";
 import {
@@ -426,6 +430,10 @@ export class NotebookData {
   // (and in dev, a Vite page reload) on every change.
   private persistTimer: ReturnType<typeof setTimeout> | null = null;
   private readonly persistDelayMs = 750;
+  private readonly resolveNotebookForAppKernel: (
+    target?: unknown,
+  ) => NotebookDataLike | null;
+  private readonly listNotebooksForAppKernel: () => NotebookDataLike[];
 
   constructor({
     notebook,
@@ -433,12 +441,16 @@ export class NotebookData {
     name,
     notebookStore,
     loaded = false,
+    resolveNotebookForAppKernel,
+    listNotebooksForAppKernel,
   }: {
     notebook: parser_pb.Notebook;
     uri: string;
     name: string;
     notebookStore: NotebookSaveStore | null;
     loaded?: boolean;
+    resolveNotebookForAppKernel?: (target?: unknown) => NotebookDataLike | null;
+    listNotebooksForAppKernel?: () => NotebookDataLike[];
   }) {
     this.uri = uri;
     this.name = name;
@@ -448,6 +460,42 @@ export class NotebookData {
     this.sequence = this.computeHighestSequence();
     this.rebuildIndex();
     this.snapshotCache = this.buildSnapshot();
+    this.resolveNotebookForAppKernel =
+      resolveNotebookForAppKernel ??
+      ((target?: unknown) => {
+        if (!target || this.matchesNotebookTarget(target)) {
+          return this;
+        }
+        return null;
+      });
+    this.listNotebooksForAppKernel =
+      listNotebooksForAppKernel ??
+      (() => {
+        return [this];
+      });
+  }
+
+  private matchesNotebookTarget(target: unknown): boolean {
+    if (typeof target === "string") {
+      return target === this.getUri();
+    }
+    if (
+      typeof target === "object" &&
+      target &&
+      "uri" in target &&
+      (target as { uri?: string }).uri === this.getUri()
+    ) {
+      return true;
+    }
+    if (
+      typeof target === "object" &&
+      target &&
+      "handle" in target &&
+      (target as { handle?: { uri?: string } }).handle?.uri === this.getUri()
+    ) {
+      return true;
+    }
+    return false;
   }
 
   /** Subscribe to changes. Returns an unsubscribe function. */
@@ -611,6 +659,13 @@ export class NotebookData {
     this.schedulePersist();
   }
 
+  setNotebookStore(notebookStore: NotebookSaveStore | null): void {
+    if (!notebookStore || this.notebookStore) {
+      return;
+    }
+    this.notebookStore = notebookStore;
+  }
+
   // Returns the runID if the cell was started successfully.
   // empty string otherwise
   runCodeCell(cell: parser_pb.Cell): string {
@@ -822,33 +877,11 @@ export class NotebookData {
     const runmeApi = createRunmeConsoleApi({
       resolveNotebook: () => this,
     });
-    const notebooksApi = createNotebooksApi({
-      resolveNotebook: (target?: unknown) => {
-        if (!target) {
-          return this;
-        }
-        if (typeof target === "string" && target === this.getUri()) {
-          return this;
-        }
-        if (
-          typeof target === "object" &&
-          target &&
-          "uri" in target &&
-          (target as { uri?: string }).uri === this.getUri()
-        ) {
-          return this;
-        }
-        if (
-          typeof target === "object" &&
-          target &&
-          "handle" in target &&
-          (target as { handle?: { uri?: string } }).handle?.uri === this.getUri()
-        ) {
-          return this;
-        }
-        return null;
-      },
-      listNotebooks: () => [this],
+    const notebooksApiBridgeServer = createNotebooksApiBridgeServer({
+      notebooksApi: createHostNotebooksApi({
+        resolveNotebook: this.resolveNotebookForAppKernel,
+        listNotebooks: this.listNotebooksForAppKernel,
+      }),
     });
 
     let stdout = "";
@@ -887,7 +920,7 @@ export class NotebookData {
                     method,
                     args,
                     runmeApi,
-                    notebooksApi,
+                    notebooksApiBridgeServer,
                   ),
               },
               hooks,
@@ -895,33 +928,8 @@ export class NotebookData {
           : new JSKernel({
               globals: createAppJsGlobals({
                 runme: runmeApi,
-                resolveNotebook: (target?: unknown) => {
-                  if (!target) {
-                    return this;
-                  }
-                  if (typeof target === "string" && target === this.getUri()) {
-                    return this;
-                  }
-                  if (
-                    typeof target === "object" &&
-                    target &&
-                    "uri" in target &&
-                    (target as { uri?: string }).uri === this.getUri()
-                  ) {
-                    return this;
-                  }
-                  if (
-                    typeof target === "object" &&
-                    target &&
-                    "handle" in target &&
-                    (target as { handle?: { uri?: string } }).handle?.uri ===
-                      this.getUri()
-                  ) {
-                    return this;
-                  }
-                  return null;
-                },
-                listNotebooks: () => [this],
+                resolveNotebook: this.resolveNotebookForAppKernel,
+                listNotebooks: this.listNotebooksForAppKernel,
               }),
               hooks,
             }).run(source)
@@ -995,7 +1003,7 @@ export class NotebookData {
     method: string,
     args: unknown[],
     runmeApi: RunmeConsoleApi,
-    notebooksApi: NotebooksApi,
+    notebooksApiBridgeServer: NotebooksApiBridgeServer,
   ): Promise<unknown> {
     const target = args[0];
     switch (method) {
@@ -1020,19 +1028,13 @@ export class NotebookData {
           cellCount: notebook.getNotebook().cells.length,
         };
       }
-      case "notebooks.help":
-        return notebooksApi.help(args[0] as any);
-      case "notebooks.list":
-        return notebooksApi.list((args[0] as any) ?? undefined);
-      case "notebooks.get":
-        return notebooksApi.get((args[0] as any) ?? undefined);
-      case "notebooks.update":
-        return notebooksApi.update((args[0] as any) ?? { operations: [] });
-      case "notebooks.delete":
-        return notebooksApi.delete(args[0] as any);
-      case "notebooks.execute":
-        return notebooksApi.execute((args[0] as any) ?? { refIds: [] });
       default:
+        if (method.startsWith("notebooks.")) {
+          return notebooksApiBridgeServer.handleMessage({
+            method,
+            args,
+          });
+        }
         throw new Error(`Unsupported sandbox AppKernel method: ${method}`);
     }
   }
@@ -1632,13 +1634,27 @@ export class CellData {
     this.notebook.removeCell(this.refId);
   }
 
-  run(): void {
+  async run(): Promise<void> {
     const cell = this.snapshot;
-    if (!cell) return;
+    if (!cell) {
+      return;
+    }
     const runID = this.notebook.runCodeCell(cell);
     // Update the snapshot after running to pick up any metadata changes.
     this.cachedSnapshot = this.notebook.getCellSnapshot(this.refId);
     this.emitRunIDChange(runID ?? "");
+    if (!runID || this.hasRunCompleted(runID)) {
+      return;
+    }
+    await new Promise<void>((resolve) => {
+      const unsubscribe = this.subscribeToContentChange(() => {
+        this.cachedSnapshot = this.notebook.getCellSnapshot(this.refId);
+        if (this.hasRunCompleted(runID)) {
+          unsubscribe();
+          resolve();
+        }
+      });
+    });
   }
 
   getStreams(): StreamsLike | undefined {
@@ -1724,4 +1740,14 @@ export class CellData {
     }
     return DEFAULT_RUNNER_PLACEHOLDER;
   }
+
+  private hasRunCompleted(runID: string): boolean {
+    const snap = this.notebook.getCellSnapshot(this.refId);
+    if (!snap) {
+      return true;
+    }
+    const activeRunID = snap.metadata?.[RunmeMetadataKey.LastRunID];
+    const exitCode = snap.metadata?.[RunmeMetadataKey.ExitCode];
+    return activeRunID !== runID || typeof exitCode === "string";
+  }
 }
diff --git a/app/src/lib/runtime/codeModeExecutor.test.ts b/app/src/lib/runtime/codeModeExecutor.test.ts
new file mode 100644
index 00000000..29db13a4
--- /dev/null
+++ b/app/src/lib/runtime/codeModeExecutor.test.ts
@@ -0,0 +1,99 @@
+// @vitest-environment jsdom
+import { create } from '@bufbuild/protobuf'
+import { describe, expect, it, vi } from 'vitest'
+
+import { parser_pb } from '../../runme/client'
+import { appLogger } from '../logging/runtime'
+import {
+  createCodeModeExecutor,
+  getCodeModeErrorOutput,
+} from './codeModeExecutor'
+
+const createNotebook = () => {
+  const notebook = create(parser_pb.NotebookSchema, {
+    cells: [],
+  })
+  return {
+    getUri: () => 'local://test.runme.md',
+    getName: () => 'test.runme.md',
+    getNotebook: () => notebook,
+    updateCell: () => {},
+    getCell: () => null,
+  }
+}
+
+describe('codeModeExecutor', () => {
+  it('merges stdout and stderr into one ordered output string', async () => {
+    const infoSpy = vi.spyOn(appLogger, 'info')
+    const notebook = createNotebook()
+    const executor = createCodeModeExecutor({
+      mode: 'browser',
+      resolveNotebook: () => notebook,
+      listNotebooks: () => [notebook],
+    })
+
+    const result = await executor.execute({
+      source: 'chatkit',
+      code: "console.log('one'); console.error('two'); console.log('three');",
+    })
+
+    expect(result.output).toContain('one')
+    expect(result.output).toContain('two')
+    expect(result.output).toContain('three')
+    expect(result.output.indexOf('one')).toBeLessThan(
+      result.output.indexOf('two')
+    )
+    expect(result.output.indexOf('two')).toBeLessThan(
+      result.output.indexOf('three')
+    )
+    const started = infoSpy.mock.calls.find(
+      ([message]) => message === 'Code mode execution started'
+    )
+    const completed = infoSpy.mock.calls.find(
+      ([message]) => message === 'Code mode execution completed'
+    )
+    expect(started?.[1]?.attrs?.code).toContain("console.log('one')")
+    expect(completed?.[1]?.attrs?.output).toContain('one')
+    expect(completed?.[1]?.attrs?.output).toContain('two')
+    expect(completed?.[1]?.attrs?.output).toContain('three')
+    infoSpy.mockRestore()
+  })
+
+  it('truncates output when it exceeds the configured output budget', async () => {
+    const notebook = createNotebook()
+    const executor = createCodeModeExecutor({
+      mode: 'browser',
+      maxOutputBytes: 20,
+      resolveNotebook: () => notebook,
+      listNotebooks: () => [notebook],
+    })
+
+    const result = await executor.execute({
+      source: 'chatkit',
+      code: "console.log('abcdefghijklmnopqrstuvwxyz');",
+    })
+
+    expect(result.output).toContain('[output truncated]')
+  })
+
+  it('returns partial output when execution times out', async () => {
+    const notebook = createNotebook()
+    const executor = createCodeModeExecutor({
+      mode: 'browser',
+      timeoutMs: 20,
+      resolveNotebook: () => notebook,
+      listNotebooks: () => [notebook],
+    })
+
+    try {
+      await executor.execute({
+        source: 'codex',
+        code: "console.log('started'); await new Promise(() => {});",
+      })
+      expect.fail('expected timeout error')
+    } catch (error) {
+      expect(String(error)).toMatch(/timed out/)
+      expect(getCodeModeErrorOutput(error)).toContain('started')
+    }
+  })
+})
diff --git a/app/src/lib/runtime/codeModeExecutor.ts b/app/src/lib/runtime/codeModeExecutor.ts
new file mode 100644
index 00000000..0d3ab176
--- /dev/null
+++ b/app/src/lib/runtime/codeModeExecutor.ts
@@ -0,0 +1,255 @@
+import { appLogger } from '../logging/runtime'
+import { createAppJsGlobals } from './appJsGlobals'
+import { JSKernel } from './jsKernel'
+import {
+  type NotebookDataLike,
+  createRunmeConsoleApi,
+} from './runmeConsole'
+import {
+  type NotebooksApiBridgeServer,
+  createHostNotebooksApi,
+  createNotebooksApiBridgeServer,
+} from './notebooksApiBridge'
+import { SandboxJSKernel } from './sandboxJsKernel'
+
+export type CodeModeSource = 'chatkit' | 'codex'
+export type CodeModeRunnerMode = 'browser' | 'sandbox'
+
+const DEFAULT_TIMEOUT_MS = 15_000
+const DEFAULT_MAX_OUTPUT_BYTES = 256 * 1024
+const DEFAULT_MAX_CODE_BYTES = 64 * 1024
+const OUTPUT_TRUNCATED_SUFFIX = '\n[output truncated]\n'
+
+export type CodeModeExecutionError = Error & { output: string }
+
+function withOutput(error: unknown, output: string): CodeModeExecutionError {
+  const err = error instanceof Error ? error : new Error(String(error))
+  const typed = err as CodeModeExecutionError
+  typed.output = output
+  return typed
+}
+
+export function getCodeModeErrorOutput(error: unknown): string {
+  if (!error || typeof error !== 'object') {
+    return ''
+  }
+  const output = (error as { output?: unknown }).output
+  return typeof output === 'string' ? output : ''
+}
+
+export type CodeModeExecutor = {
+  execute(args: {
+    code: string
+    source: CodeModeSource
+  }): Promise<{ output: string }>
+}
+
+export function createCodeModeExecutor(options: {
+  mode?: CodeModeRunnerMode
+  timeoutMs?: number
+  maxOutputBytes?: number
+  maxCodeBytes?: number
+  resolveNotebook: (target?: unknown) => NotebookDataLike | null
+  listNotebooks?: () => NotebookDataLike[]
+}): CodeModeExecutor {
+  const mode = options.mode ?? 'sandbox'
+  const timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS
+  const maxOutputBytes = options.maxOutputBytes ?? DEFAULT_MAX_OUTPUT_BYTES
+  const maxCodeBytes = options.maxCodeBytes ?? DEFAULT_MAX_CODE_BYTES
+
+  const resolveNotebook = options.resolveNotebook
+  const listNotebooks =
+    options.listNotebooks ??
+    (() => {
+      const current = resolveNotebook()
+      return current ? [current] : []
+    })
+
+  return {
+    execute: async ({ code, source }) => {
+      const normalizedCode =
+        typeof code === 'string' ? code : String(code ?? '')
+      const codeBytes = new TextEncoder().encode(normalizedCode).length
+      if (codeBytes > maxCodeBytes) {
+        throw withOutput(
+          new Error(
+            `ExecuteCode rejected code payload larger than ${maxCodeBytes} bytes`
+          ),
+          ''
+        )
+      }
+
+      appLogger.info('Code mode execution started', {
+        attrs: {
+          scope: 'chatkit.code_mode',
+          source,
+          mode,
+          timeoutMs,
+          maxOutputBytes,
+          maxCodeBytes,
+          code: normalizedCode,
+          codeBytes,
+        },
+      })
+
+      const runmeApi = createRunmeConsoleApi({
+        resolveNotebook,
+      })
+      const notebooksApiBridgeServer = createNotebooksApiBridgeServer({
+        notebooksApi: createHostNotebooksApi({
+          resolveNotebook,
+          listNotebooks,
+        }),
+      })
+
+      const chunks: string[] = []
+      let outputBytes = 0
+      let truncated = false
+
+      const appendOutput = (data: string) => {
+        if (truncated || !data) {
+          return
+        }
+        const bytes = new TextEncoder().encode(data)
+        if (outputBytes + bytes.length <= maxOutputBytes) {
+          chunks.push(data)
+          outputBytes += bytes.length
+          return
+        }
+        const remaining = Math.max(0, maxOutputBytes - outputBytes)
+        if (remaining > 0) {
+          const clipped = data.slice(0, remaining)
+          chunks.push(clipped)
+        }
+        chunks.push(OUTPUT_TRUNCATED_SUFFIX)
+        outputBytes = maxOutputBytes
+        truncated = true
+      }
+
+      const globals = createAppJsGlobals({
+        runme: runmeApi,
+        sendOutput: appendOutput,
+        resolveNotebook,
+        listNotebooks,
+      })
+
+      const kernelRun =
+        mode === 'sandbox'
+          ? new SandboxJSKernel({
+              hooks: {
+                onStdout: appendOutput,
+                onStderr: appendOutput,
+              },
+              bridge: {
+                call: (method, args) =>
+                  handleSandboxAppKernelBridgeCall({
+                    method,
+                    args,
+                    runmeApi,
+                    notebooksApiBridgeServer,
+                  }),
+              },
+            }).run(normalizedCode)
+          : new JSKernel({
+              globals,
+              hooks: {
+                onStdout: appendOutput,
+                onStderr: appendOutput,
+              },
+            }).run(normalizedCode)
+
+      let timer: ReturnType<typeof setTimeout> | undefined
+      try {
+        await Promise.race([
+          kernelRun,
+          new Promise<void>((_resolve, reject) => {
+            timer = setTimeout(() => {
+              reject(new Error(`ExecuteCode timed out after ${timeoutMs}ms`))
+            }, timeoutMs)
+          }),
+        ])
+      } catch (error) {
+        const output = chunks.join('')
+        appLogger.error('Code mode execution failed', {
+          attrs: {
+            scope: 'chatkit.code_mode',
+            source,
+            mode,
+            timeoutMs,
+            maxOutputBytes,
+            code: normalizedCode,
+            output,
+            error: String(error),
+          },
+        })
+        throw withOutput(error, output)
+      } finally {
+        if (timer) {
+          clearTimeout(timer)
+        }
+      }
+
+      const output = chunks.join('')
+      appLogger.info('Code mode execution completed', {
+        attrs: {
+          scope: 'chatkit.code_mode',
+          source,
+          mode,
+          code: normalizedCode,
+          output,
+          outputBytes,
+          truncated,
+        },
+      })
+
+      return {
+        output,
+      }
+    },
+  }
+}
+
+async function handleSandboxAppKernelBridgeCall({
+  method,
+  args,
+  runmeApi,
+  notebooksApiBridgeServer,
+}: {
+  method: string
+  args: unknown[]
+  runmeApi: ReturnType<typeof createRunmeConsoleApi>
+  notebooksApiBridgeServer: NotebooksApiBridgeServer
+}): Promise<unknown> {
+  const target = args[0]
+  switch (method) {
+    case 'runme.clear':
+      return runmeApi.clear(target)
+    case 'runme.clearOutputs':
+      return runmeApi.clearOutputs(target)
+    case 'runme.runAll':
+      return runmeApi.runAll(target)
+    case 'runme.rerun':
+      return runmeApi.rerun(target)
+    case 'runme.help':
+      return runmeApi.help()
+    case 'runme.getCurrentNotebook': {
+      const notebook = runmeApi.getCurrentNotebook()
+      if (!notebook) {
+        return null
+      }
+      return {
+        uri: notebook.getUri(),
+        name: notebook.getName(),
+        cellCount: notebook.getNotebook().cells.length,
+      }
+    }
+    default:
+      if (method.startsWith('notebooks.')) {
+        return notebooksApiBridgeServer.handleMessage({
+          method,
+          args,
+        })
+      }
+      throw new Error(`Unsupported sandbox AppKernel method: ${method}`)
+  }
+}
diff --git a/app/src/lib/runtime/codexConversationController.test.ts b/app/src/lib/runtime/codexConversationController.test.ts
index f4caa040..2348a3cf 100644
--- a/app/src/lib/runtime/codexConversationController.test.ts
+++ b/app/src/lib/runtime/codexConversationController.test.ts
@@ -473,6 +473,72 @@ describe("CodexConversationController", () => {
     expect(controller.getSnapshot().currentTurnId).toBeNull();
   });
 
+  it("ignores a stale ChatKit thread id when the controller already has a current thread", async () => {
+    proxyClient.sendRequest.mockImplementation(async (method: string, params?: unknown) => {
+      if (method === "thread/start") {
+        return { threadId: "thread-fresh", title: "Runme Repo" };
+      }
+      if (method === "turn/start") {
+        expect(params).toEqual(
+          expect.objectContaining({
+            threadId: "thread-fresh",
+          }),
+        );
+        queueMicrotask(() => {
+          notificationHandlers.forEach((handler) => {
+            handler({
+              jsonrpc: "2.0",
+              method: "turn.message.started",
+              params: {
+                threadId: "thread-fresh",
+                turnId: "turn-1",
+                responseId: "resp-1",
+                itemId: "msg-1",
+              },
+            });
+            handler({
+              jsonrpc: "2.0",
+              method: "turn.output_text.done",
+              params: {
+                threadId: "thread-fresh",
+                turnId: "turn-1",
+                responseId: "resp-1",
+                itemId: "msg-1",
+                text: "done",
+              },
+            });
+            handler({
+              jsonrpc: "2.0",
+              method: "turn.completed",
+              params: {
+                threadId: "thread-fresh",
+                turnId: "turn-1",
+              },
+            });
+          });
+        });
+        return { turnId: "turn-1", itemId: "msg-1" };
+      }
+      return {};
+    });
+
+    const controller = createCodexConversationControllerForTests();
+    const events: any[] = [];
+    const nextState = await controller.streamUserMessage(
+      "hello",
+      { threadId: "thread-stale", previousResponseId: "resp-stale" },
+      {
+        emit: (payload) => events.push(payload),
+      },
+    );
+
+    expect(nextState).toEqual({
+      threadId: "thread-fresh",
+      previousResponseId: "resp-1",
+    });
+    expect(controller.getSnapshot().currentThreadId).toBe("thread-fresh");
+  });
+
   it("maps item-based codex notifications into ChatKit-compatible events", async () => {
     proxyClient.sendRequest.mockImplementation(async (method: string) => {
       if (method === "thread/start") {
diff --git a/app/src/lib/runtime/codexConversationController.ts b/app/src/lib/runtime/codexConversationController.ts
index 6da1c7e0..0d6e710c 100644
--- a/app/src/lib/runtime/codexConversationController.ts
+++ b/app/src/lib/runtime/codexConversationController.ts
@@ -610,10 +610,21 @@ class CodexConversationController {
     const proxy = getCodexAppServerProxyClient();
     const project = this.getSnapshot().selectedProject;
     const activeThread = await this.ensureActiveThread();
-    let threadId = chatkitState.threadId ?? this.currentThreadId ?? activeThread.id;
+    let threadId = this.currentThreadId ?? activeThread.id;
     if (!threadId) {
       throw new Error("No active Codex thread available before turn/start");
     }
+    if (chatkitState.threadId && chatkitState.threadId !== threadId) {
+      appLogger.info("Ignoring stale Codex ChatKit thread id", {
+        attrs: {
+          scope: "chatkit.codex_controller",
+          chatkitStateThreadId: chatkitState.threadId,
+          currentThreadId: this.currentThreadId,
+          activeThreadId: activeThread.id,
+          selectedThreadId: threadId,
+        },
+      });
+    }
 
     if (this.resumeRequired.has(threadId)) {
       const project = this.getSnapshot().selectedProject;
diff --git a/app/src/lib/runtime/jsKernel.ts b/app/src/lib/runtime/jsKernel.ts
index c5da60b8..c1417126 100644
--- a/app/src/lib/runtime/jsKernel.ts
+++ b/app/src/lib/runtime/jsKernel.ts
@@ -119,9 +119,9 @@ export class JSKernel {
               "- app.codex.project.list(): list configured codex projects",
               '- app.runCells(["cellID"]): approve pending codex ExecuteCells',
               "- notebooks.list(): list known notebooks",
-              "- notebooks.get([target]): get notebook document and handle",
-              "- notebooks.update({...}): apply notebook mutations",
-              "- notebooks.execute({ refIds }): run selected cells",
+              "- notebooks.get([target]): get notebook document and handle; omitted target = current UI notebook",
+              "- notebooks.update({ target, operations, ... }): apply notebook mutations",
+              "- notebooks.execute({ target, refIds }): run selected cells",
               "- help(): show this message",
             ].join("\n") + "\n",
           )),
diff --git a/app/src/lib/runtime/notebooksApiBridge.test.ts b/app/src/lib/runtime/notebooksApiBridge.test.ts
new file mode 100644
index 00000000..b37d21ed
--- /dev/null
+++ b/app/src/lib/runtime/notebooksApiBridge.test.ts
@@ -0,0 +1,71 @@
+import { describe, expect, it, vi } from 'vitest'
+
+import {
+  createNotebooksApiBridgeServer,
+  type NotebooksApiBridgeServer,
+} from './notebooksApiBridge'
+import type { NotebooksApi } from './runmeConsole'
+
+function createBridgeServer(
+  overrides: Partial<NotebooksApi> = {}
+): NotebooksApiBridgeServer {
+  const notebooksApi: NotebooksApi = {
+    help: vi.fn(async () => 'help text'),
+    list: vi.fn(async () => []),
+    get: vi.fn(async () => {
+      throw new Error('not implemented')
+    }),
+    update: vi.fn(async () => {
+      throw new Error('not implemented')
+    }),
+    delete: vi.fn(async () => {}),
+    execute: vi.fn(async () => {
+      throw new Error('not implemented')
+    }),
+    ...overrides,
+  }
+
+  return createNotebooksApiBridgeServer({
+    notebooksApi,
+  })
+}
+
+describe('createNotebooksApiBridgeServer', () => {
+  it('delegates sandbox notebook RPCs to the host NotebooksApi implementation', async () => {
+    const list = vi.fn(async () => [
+      {
+        uri: 'local://file/demo',
+        name: 'demo.json',
+        isOpen: true,
+        source: 'local' as const,
+      },
+    ])
+    const bridgeServer = createBridgeServer({ list })
+
+    await expect(
+      bridgeServer.handleMessage({
+        method: 'notebooks.list',
+        args: [{ openOnly: true }],
+      })
+    ).resolves.toEqual([
+      {
+        uri: 'local://file/demo',
+        name: 'demo.json',
+        isOpen: true,
+        source: 'local',
+      },
+    ])
+    expect(list).toHaveBeenCalledWith({ openOnly: true })
+  })
+
+  it('rejects unknown notebook RPC methods', async () => {
+    const bridgeServer = createBridgeServer()
+
+    await expect(
+      bridgeServer.handleMessage({
+        method: 'notebooks.unknown',
+        args: [],
+      })
+    ).rejects.toThrow('Unsupported sandbox NotebooksApi method: notebooks.unknown')
+  })
+})
diff --git a/app/src/lib/runtime/notebooksApiBridge.ts b/app/src/lib/runtime/notebooksApiBridge.ts
new file mode 100644
index 00000000..321cbb3f
--- /dev/null
+++ b/app/src/lib/runtime/notebooksApiBridge.ts
@@ -0,0 +1,73 @@
+import {
+  type NotebookDataLike,
+  type NotebookQuery,
+  type NotebookTarget,
+  type NotebooksApi,
+  createNotebooksApi,
+} from './runmeConsole'
+
+export type NotebooksApiBridgeRequest = {
+  method: string
+  args?: unknown[]
+}
+
+export type NotebooksApiBridgeServer = {
+  handleMessage: (request: NotebooksApiBridgeRequest) => Promise<unknown>
+}
+
+export const SANDBOX_NOTEBOOKS_API_METHODS = [
+  'notebooks.help',
+  'notebooks.list',
+  'notebooks.get',
+  'notebooks.update',
+  'notebooks.delete',
+  'notebooks.execute',
+] as const
+
+export function createHostNotebooksApi({
+  resolveNotebook,
+  listNotebooks,
+}: {
+  resolveNotebook: (target?: unknown) => NotebookDataLike | null
+  listNotebooks?: () => NotebookDataLike[]
+}): NotebooksApi {
+  return createNotebooksApi({
+    resolveNotebook,
+    listNotebooks,
+  })
+}
+
+export function createNotebooksApiBridgeServer({
+  notebooksApi,
+}: {
+  notebooksApi: NotebooksApi
+}): NotebooksApiBridgeServer {
+  return {
+    handleMessage: async ({ method, args = [] }) => {
+      switch (method) {
+        case 'notebooks.help':
+          return notebooksApi.help(args[0] as any)
+        case 'notebooks.list':
+          return notebooksApi.list((args[0] as NotebookQuery | undefined) ?? undefined)
+        case 'notebooks.get':
+          return notebooksApi.get((args[0] as NotebookTarget | undefined) ?? undefined)
+        case 'notebooks.update':
+          return notebooksApi.update(
+            (args[0] as Parameters<NotebooksApi['update']>[0] | undefined) ?? {
+              operations: [],
+            }
+          )
+        case 'notebooks.delete':
+          return notebooksApi.delete(args[0] as NotebookTarget)
+        case 'notebooks.execute':
+          return notebooksApi.execute(
+            (args[0] as Parameters<NotebooksApi['execute']>[0] | undefined) ?? {
+              refIds: [],
+            }
+          )
+        default:
+          throw new Error(`Unsupported sandbox NotebooksApi method: ${method}`)
+      }
+    },
+  }
+}
diff --git a/app/src/lib/runtime/responsesDirectChatkitFetch.test.ts b/app/src/lib/runtime/responsesDirectChatkitFetch.test.ts
index 7293f0aa..04dc7f73 100644
--- a/app/src/lib/runtime/responsesDirectChatkitFetch.test.ts
+++ b/app/src/lib/runtime/responsesDirectChatkitFetch.test.ts
@@ -169,4 +169,320 @@ describe("responsesDirectChatkitFetch", () => {
       }),
     );
   });
+
+  it("includes ExecuteCode function tool in Responses request payload", async () => {
+    const fetchMock = vi.spyOn(globalThis, "fetch").mockResolvedValueOnce(
+      sseResponse([
+        { type: "response.created", response: { id: "resp-tools" } },
+        { type: "response.completed", response: { id: "resp-tools" } },
+      ]),
+    );
+
+    const fetchFn = createResponsesDirectChatkitFetch();
+    const response = await fetchFn("/responses/direct/chatkit", {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+      },
+      body: JSON.stringify({
+        type: "threads.create",
+        params: {
+          input: {
+            content: [{ type: "input_text", text: "run code" }],
+            attachments: [],
+            inference_options: { model: "gpt-5.2" },
+          },
+        },
+      }),
+    });
+
+    await response.text();
+
+    const requestInit = fetchMock.mock.calls.at(0)?.[1];
+    const requestBody = JSON.parse(String(requestInit?.body ?? "{}")) as {
+      tools?: Array<Record<string, unknown>>;
+      instructions?: string;
+    };
+    const executeCodeTool = (requestBody.tools ?? []).find(
+      (tool) =>
+        tool?.type === "function" &&
+        tool?.name === "ExecuteCode",
+    ) as Record<string, unknown> | undefined;
+
+    expect(executeCodeTool).toBeDefined();
+    expect(executeCodeTool?.strict).toBe(true);
+    expect(executeCodeTool?.parameters).toEqual({
+      type: "object",
+      additionalProperties: false,
+      properties: {
+        code: { type: "string" },
+      },
+      required: ["code"],
+    });
+    expect(requestBody.instructions).toContain("single tool: ExecuteCode");
+    expect(requestBody.instructions).toContain("embedded in the Runme app ChatKit panel");
+    expect(requestBody.instructions).toContain("agent harnesses");
+    expect(requestBody.instructions).toContain(
+      "https://drive.google.com/drive/folders/1Qdg_VA4ZBlOKojJqW2CqSVuJ2p2I4yS5",
+    );
+    expect(requestBody.instructions).toContain("console.log(explorer.mountDrive(");
+    expect(requestBody.instructions).toContain("call notebooks.get({ handle: result.handle }) to verify the new cell exists");
+    expect(requestBody.instructions).toContain('report the new cell refId');
+    expect(requestBody.instructions).toContain(
+      "tell the user to click Run on that cell manually",
+    );
+    expect(requestBody.instructions).toContain('"runme.dev/runnerName": "appkernel-js"');
+    expect(requestBody.instructions).toContain("await help()");
+    expect(requestBody.instructions).toContain("notebooks.help");
+    expect(requestBody.instructions).toContain("Always await helper calls");
+    expect(requestBody.instructions).toContain("doc.notebook.cells");
+    expect(requestBody.instructions).toContain("new TextDecoder().decode(item.data)");
+    expect(requestBody.instructions).toContain('op="insert"');
+    expect(requestBody.instructions).toContain('Do not use JSON Patch style mutations');
+  });
+
+  it("propagates call_id and previous_response_id on tool-call items", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValueOnce(
+      sseResponse([
+        { type: "response.created", response: { id: "resp-prev" } },
+        {
+          type: "response.function_call_arguments.done",
+          item_id: "tool-item-1",
+          call_id: "call-1",
+          name: "ExecuteCode",
+          arguments: "{\"code\":\"console.log('hi')\"}",
+        },
+        { type: "response.completed", response: { id: "resp-prev" } },
+      ]),
+    );
+
+    const fetchFn = createResponsesDirectChatkitFetch();
+    const response = await fetchFn("/responses/direct/chatkit", {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+      },
+      body: JSON.stringify({
+        type: "threads.create",
+        params: {
+          input: {
+            content: [{ type: "input_text", text: "run code" }],
+            attachments: [],
+            inference_options: { model: "gpt-5.2" },
+          },
+        },
+      }),
+    });
+
+    const body = await response.text();
+    expect(body).toContain("\"type\":\"client_tool_call\"");
+    expect(body).toContain("\"call_id\":\"call-1\"");
+    expect(body).toContain("\"previous_response_id\":\"resp-prev\"");
+  });
+
+  it("falls back call_id to item_id only when item_id already looks like a call id", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValueOnce(
+      sseResponse([
+        { type: "response.created", response: { id: "resp-prev" } },
+        {
+          type: "response.function_call_arguments.done",
+          item_id: "call_fallback_from_item_id",
+          name: "ExecuteCode",
+          arguments: "{\"code\":\"console.log('hi')\"}",
+        },
+        { type: "response.completed", response: { id: "resp-prev" } },
+      ]),
+    );
+
+    const fetchFn = createResponsesDirectChatkitFetch();
+    const response = await fetchFn("/responses/direct/chatkit", {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+      },
+      body: JSON.stringify({
+        type: "threads.create",
+        params: {
+          input: {
+            content: [{ type: "input_text", text: "run code" }],
+            attachments: [],
+            inference_options: { model: "gpt-5.2" },
+          },
+        },
+      }),
+    });
+
+    const body = await response.text();
+    expect(body).toContain("\"type\":\"client_tool_call\"");
+    expect(body).toContain("\"call_id\":\"call_fallback_from_item_id\"");
+    expect(body).toContain("\"previous_response_id\":\"resp-prev\"");
+  });
+
+  it("recovers call_id from function_call output item when arguments.done omits call_id", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValueOnce(
+      sseResponse([
+        { type: "response.created", response: { id: "resp-prev" } },
+        {
+          type: "response.output_item.added",
+          item: {
+            id: "fc_abc123",
+            type: "function_call",
+            name: "ExecuteCode",
+            call_id: "call_84MJLvWD9WwoH8CO9DPT2CNy",
+          },
+        },
+        {
+          type: "response.function_call_arguments.done",
+          item_id: "fc_abc123",
+          arguments: "{\"code\":\"console.log('hi')\"}",
+        },
+        { type: "response.completed", response: { id: "resp-prev" } },
+      ]),
+    );
+
+    const fetchFn = createResponsesDirectChatkitFetch();
+    const response = await fetchFn("/responses/direct/chatkit", {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+      },
+      body: JSON.stringify({
+        type: "threads.create",
+        params: {
+          input: {
+            content: [{ type: "input_text", text: "run code" }],
+            attachments: [],
+            inference_options: { model: "gpt-5.2" },
+          },
+        },
+      }),
+    });
+
+    const body = await response.text();
+    expect(body).toContain("\"type\":\"client_tool_call\"");
+    expect(body).toContain("\"call_id\":\"call_84MJLvWD9WwoH8CO9DPT2CNy\"");
+    expect(body).not.toContain("\"call_id\":\"fc_abc123\"");
+  });
+
+  it("does not treat function-call item ids as call_id when no call_id is provided", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValueOnce(
+      sseResponse([
+        { type: "response.created", response: { id: "resp-prev" } },
+        {
+          type: "response.function_call_arguments.done",
+          item_id: "fc_no_call_id_present",
+          name: "ExecuteCode",
+          arguments: "{\"code\":\"console.log('hi')\"}",
+        },
+        { type: "response.completed", response: { id: "resp-prev" } },
+      ]),
+    );
+
+    const fetchFn = createResponsesDirectChatkitFetch();
+    const response = await fetchFn("/responses/direct/chatkit", {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+      },
+      body: JSON.stringify({
+        type: "threads.create",
+        params: {
+          input: {
+            content: [{ type: "input_text", text: "run code" }],
+            attachments: [],
+            inference_options: { model: "gpt-5.2" },
+          },
+        },
+      }),
+    });
+
+    const body = await response.text();
+    expect(body).toContain("\"type\":\"client_tool_call\"");
+    expect(body).not.toContain("\"call_id\":\"fc_no_call_id_present\"");
+  });
+
+  it("recovers tool name from function_call output item when arguments.done omits name", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValueOnce(
+      sseResponse([
+        { type: "response.created", response: { id: "resp-prev" } },
+        {
+          type: "response.output_item.added",
+          item: {
+            id: "tool-item-name-fallback",
+            type: "function_call",
+            name: "ExecuteCode",
+          },
+        },
+        {
+          type: "response.function_call_arguments.done",
+          item_id: "tool-item-name-fallback",
+          call_id: "call-name-fallback",
+          arguments: "{\"code\":\"console.log('hi')\"}",
+        },
+        { type: "response.completed", response: { id: "resp-prev" } },
+      ]),
+    );
+
+    const fetchFn = createResponsesDirectChatkitFetch();
+    const response = await fetchFn("/responses/direct/chatkit", {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+      },
+      body: JSON.stringify({
+        type: "threads.create",
+        params: {
+          input: {
+            content: [{ type: "input_text", text: "run code" }],
+            attachments: [],
+            inference_options: { model: "gpt-5.2" },
+          },
+        },
+      }),
+    });
+
+    const body = await response.text();
+    expect(body).toContain("\"type\":\"client_tool_call\"");
+    expect(body).toContain("\"name\":\"ExecuteCode\"");
+    expect(body).not.toContain("\"name\":\"unknown_tool\"");
+  });
+
+  it("includes code mode instructions in tool-output requests", async () => {
+    const fetchMock = vi.spyOn(globalThis, "fetch").mockResolvedValueOnce(
+      sseResponse([
+        { type: "response.created", response: { id: "resp-tool-output" } },
+        { type: "response.completed", response: { id: "resp-tool-output" } },
+      ]),
+    );
+
+    const fetchFn = createResponsesDirectChatkitFetch();
+    const response = await fetchFn("/responses/direct/chatkit", {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+      },
+      body: JSON.stringify({
+        type: "threads.add_client_tool_output",
+        params: {
+          id: "thread-test",
+          result: {
+            call_id: "call-1",
+            previous_response_id: "resp-prev",
+            output: "ok",
+          },
+        },
+      }),
+    });
+
+    await response.text();
+    const requestInit = fetchMock.mock.calls.at(0)?.[1];
+    const requestBody = JSON.parse(String(requestInit?.body ?? "{}")) as {
+      instructions?: string;
+      input?: Array<Record<string, unknown>>;
+    };
+    expect(requestBody.instructions).toContain("single tool: ExecuteCode");
+    expect(requestBody.instructions).toContain("notebooks.update");
+    expect(requestBody.instructions).toContain("Always await helper calls");
+    expect(requestBody.input?.[0]?.type).toBe("function_call_output");
+  });
 });
diff --git a/app/src/lib/runtime/responsesDirectChatkitFetch.ts b/app/src/lib/runtime/responsesDirectChatkitFetch.ts
index 04f4551c..1841b51d 100644
--- a/app/src/lib/runtime/responsesDirectChatkitFetch.ts
+++ b/app/src/lib/runtime/responsesDirectChatkitFetch.ts
@@ -1,203 +1,279 @@
-import { appLogger } from "../logging/runtime";
-import { getAccessToken } from "../../token";
-import { responsesDirectConfigManager } from "./responsesDirectConfigManager";
-import type { ChatKitThreadDetail } from "./chatkitProtocol";
+import { getAccessToken } from '../../token'
+import { appLogger } from '../logging/runtime'
+import type { ChatKitThreadDetail } from './chatkitProtocol'
+import { responsesDirectConfigManager } from './responsesDirectConfigManager'
 
-type JsonRecord = Record<string, unknown>;
+type JsonRecord = Record<string, unknown>
 
 type BodyPayload = {
-  raw: unknown;
-  json: JsonRecord;
-};
+  raw: unknown
+  json: JsonRecord
+}
 
 type StoredThread = {
-  id: string;
-  title: string;
-  createdAt: string;
-  updatedAt: string;
-  previousResponseId?: string;
-  model: string;
-  items: JsonRecord[];
-};
-
-const DEFAULT_OPENAI_RESPONSES_URL = "https://api.openai.com/v1/responses";
-const DEFAULT_MODEL = "gpt-5.2";
+  id: string
+  title: string
+  createdAt: string
+  updatedAt: string
+  previousResponseId?: string
+  model: string
+  items: JsonRecord[]
+}
+
+const DEFAULT_OPENAI_RESPONSES_URL = 'https://api.openai.com/v1/responses'
+const DEFAULT_MODEL = 'gpt-5.2'
+const EXECUTE_CODE_TOOL_NAME = 'ExecuteCode'
+const RUNME_PUBLIC_DOCS_DRIVE_FOLDER_URL =
+  'https://drive.google.com/drive/folders/1Qdg_VA4ZBlOKojJqW2CqSVuJ2p2I4yS5'
+const CODE_MODE_INSTRUCTIONS = [
+  'You are embedded in the Runme app ChatKit panel. When the user asks "What is Runme?" or asks about "Runme", assume they mean this app unless they say otherwise.',
+  'For high-level Runme questions, give a concise product overview, list key features (open notebooks from local files/Google Drive, execute notebook cells, share notebooks with collaborators), and explain core concepts such as notebooks, runners, and agent harnesses. Mention that Runme public docs are available in this Google Drive folder and users can add that folder in Explorer to browse docs in-app: ' +
+    RUNME_PUBLIC_DOCS_DRIVE_FOLDER_URL +
+    '. Ask whether they want you to check if the docs folder is mounted and help mount it if needed.',
+  'You are operating a Runme notebook through a single tool: ExecuteCode.',
+  'ExecuteCode runs JavaScript in sandboxed AppKernel and exposes helpers: runme, notebooks, and help.',
+  'Sandbox ExecuteCode does not expose explorer/drive helpers directly. If the user asks you to mount a Google Drive docs folder (or another Explorer-only operation), do not claim you completed the mount from sandbox. Instead, use ExecuteCode to append a browser JavaScript cell to the current notebook via notebooks.get() + notebooks.update(), then call notebooks.get({ handle: result.handle }) to verify the new cell exists, report the new cell refId, and tell the user to click Run on that cell manually. Example cell source: console.log(explorer.mountDrive("' +
+    RUNME_PUBLIC_DOCS_DRIVE_FOLDER_URL +
+    '")); console.log(explorer.listFolders()); Set the inserted cell languageId to "javascript" and include metadata { "runme.dev/runnerName": "appkernel-js" } so it runs in browser AppKernel.',
+  'Always await helper calls before reading or logging their results: await runme.getCurrentNotebook(), await runme.help(), await notebooks.help(...), await notebooks.list(...), await notebooks.get(...), await notebooks.update(...), await notebooks.execute(...). If console.log(...) prints {} for one of these helpers, you probably forgot await.',
+  'When you need notebook API details, inspect the runtime contract with await help(), await notebooks.help(), or await notebooks.help("update" | "get" | "execute").',
+  'notebooks.get(target?) returns { summary, handle, notebook }. If target is omitted, it returns the notebook currently selected in the UI. Read cell arrays from doc.notebook.cells, not doc.cells. notebooks.list(query?) returns NotebookSummary[]. notebooks.update({ target, expectedRevision?, operations }) and notebooks.execute({ target, refIds }) require an explicit target.',
+  'For notebook edits, first call const doc = await notebooks.get(); const cells = doc.notebook.cells ?? []; then call await notebooks.update({ target: { handle: doc.handle }, expectedRevision: doc.handle.revision, operations: [...] }). Re-read with await notebooks.get({ handle: result.handle }) after mutations when you need to report the final notebook state.',
+  'Supported notebooks.update operations are op="insert" with at={ index | beforeRefId | afterRefId } and cells=[{ kind, languageId?, value?, metadata? }], op="update" with refId and patch={ value?, languageId?, metadata?, outputs? }, and op="remove" with refIds=[...]. To append a cell, use at: { index: -1 }. To prepend, use at: { index: 0 }.',
+  'Notebook execution and cell outputs are binary payloads in cell.outputs[*].items[*].data. Decode stdout/stderr with new TextDecoder().decode(item.data) and filter by mime "application/vnd.code.notebook.stdout" or "application/vnd.code.notebook.stderr". Do not expect a direct item.text field.',
+  'Do not use JSON Patch style mutations such as { op: "add", path: "/cells/-", value: ... }. Do not construct raw protobuf cells with $typeName, numeric kind values, or numeric role values. Let notebooks.update create/normalize cells from the SDK-shaped insert/update payload.',
+  'Use notebooks.list(...) to enumerate open notebooks, notebooks.get(target?) to inspect notebook contents, notebooks.update({ target, ... }) to modify notebook cells, and notebooks.execute({ target, refIds }) only when execution is explicitly requested.',
+  'Use console.log for concise progress/output and prefer small, deterministic code snippets.',
+].join('\n')
+
+function buildCodeModeToolDefinition(): JsonRecord {
+  return {
+    type: 'function',
+    name: EXECUTE_CODE_TOOL_NAME,
+    description:
+      'Execute JavaScript in AppKernel and return one merged stdout/stderr output string.',
+    strict: true,
+    parameters: {
+      type: 'object',
+      additionalProperties: false,
+      properties: {
+        code: {
+          type: 'string',
+        },
+      },
+      required: ['code'],
+    },
+  }
+}
+
+function buildResponsesTools(vectorStores: string[]): JsonRecord[] {
+  const tools: JsonRecord[] = [buildCodeModeToolDefinition()]
+  if (vectorStores.length > 0) {
+    tools.push({
+      type: 'file_search',
+      max_num_results: 5,
+      vector_store_ids: vectorStores,
+    })
+  }
+  return tools
+}
 
 function resolveResponsesApiUrl(responsesApiBaseUrl: string): string {
-  const normalized = responsesApiBaseUrl.trim().replace(/\/+$/, "");
+  const normalized = responsesApiBaseUrl.trim().replace(/\/+$/, '')
   if (!normalized) {
-    return DEFAULT_OPENAI_RESPONSES_URL;
+    return DEFAULT_OPENAI_RESPONSES_URL
   }
   try {
-    const url = new URL(normalized);
-    if (!url.pathname || url.pathname === "/") {
-      url.pathname = "/v1/responses";
-      url.search = "";
-      url.hash = "";
+    const url = new URL(normalized)
+    if (!url.pathname || url.pathname === '/') {
+      url.pathname = '/v1/responses'
+      url.search = ''
+      url.hash = ''
     }
-    return url.toString();
+    return url.toString()
   } catch (error) {
-    appLogger.warn("Invalid responses-direct baseUrl override; using OpenAI default", {
-      attrs: {
-        scope: "chatkit.responses_direct",
-        baseUrl: normalized,
-        error: String(error),
-      },
-    });
-    return DEFAULT_OPENAI_RESPONSES_URL;
+    appLogger.warn(
+      'Invalid responses-direct baseUrl override; using OpenAI default',
+      {
+        attrs: {
+          scope: 'chatkit.responses_direct',
+          baseUrl: normalized,
+          error: String(error),
+        },
+      }
+    )
+    return DEFAULT_OPENAI_RESPONSES_URL
   }
 }
 
-async function resolveBody(input: RequestInfo | URL, init?: RequestInit): Promise<BodyInit | null | undefined> {
+async function resolveBody(
+  input: RequestInfo | URL,
+  init?: RequestInit
+): Promise<BodyInit | null | undefined> {
   if (init?.body != null) {
-    return init.body;
+    return init.body
   }
   if (!(input instanceof Request)) {
-    return init?.body;
+    return init?.body
   }
-  const clone = input.clone();
-  const contentType = clone.headers.get("content-type")?.toLowerCase() ?? "";
-  if (contentType.includes("multipart/form-data")) {
+  const clone = input.clone()
+  const contentType = clone.headers.get('content-type')?.toLowerCase() ?? ''
+  if (contentType.includes('multipart/form-data')) {
     try {
-      return await clone.formData();
+      return await clone.formData()
     } catch {
-      return null;
+      return null
     }
   }
-  if (contentType.includes("application/x-www-form-urlencoded")) {
+  if (contentType.includes('application/x-www-form-urlencoded')) {
     try {
-      return new URLSearchParams(await clone.text());
+      return new URLSearchParams(await clone.text())
     } catch {
-      return null;
+      return null
     }
   }
   try {
-    return await clone.text();
+    return await clone.text()
   } catch {
-    return null;
+    return null
   }
 }
 
-async function readBody(input: RequestInfo | URL, init?: RequestInit): Promise<BodyPayload> {
-  const body = await resolveBody(input, init);
-  if (typeof body === "string") {
+async function readBody(
+  input: RequestInfo | URL,
+  init?: RequestInit
+): Promise<BodyPayload> {
+  const body = await resolveBody(input, init)
+  if (typeof body === 'string') {
     try {
-      const parsed = JSON.parse(body) as JsonRecord;
-      return { raw: parsed, json: parsed };
+      const parsed = JSON.parse(body) as JsonRecord
+      return { raw: parsed, json: parsed }
     } catch {
-      return { raw: body, json: {} };
+      return { raw: body, json: {} }
     }
   }
   if (body instanceof FormData) {
-    const json: JsonRecord = {};
+    const json: JsonRecord = {}
     body.forEach((value, key) => {
-      if (typeof value === "string") {
+      if (typeof value === 'string') {
         try {
-          json[key] = JSON.parse(value);
+          json[key] = JSON.parse(value)
         } catch {
-          json[key] = value;
+          json[key] = value
         }
       }
-    });
-    return { raw: json, json };
+    })
+    return { raw: json, json }
   }
   if (body instanceof URLSearchParams) {
-    const json: JsonRecord = {};
+    const json: JsonRecord = {}
     body.forEach((value, key) => {
-      json[key] = value;
-    });
-    return { raw: json, json };
+      json[key] = value
+    })
+    return { raw: json, json }
   }
-  return { raw: null, json: {} };
+  return { raw: null, json: {} }
 }
 
 function asRecord(value: unknown): JsonRecord {
-  if (!value || typeof value !== "object" || Array.isArray(value)) {
-    return {};
+  if (!value || typeof value !== 'object' || Array.isArray(value)) {
+    return {}
   }
-  return value as JsonRecord;
+  return value as JsonRecord
 }
 
 function asString(value: unknown): string | undefined {
-  return typeof value === "string" && value.trim().length > 0 ? value : undefined;
+  return typeof value === 'string' && value.trim().length > 0
+    ? value
+    : undefined
 }
 
 function randomId(prefix: string): string {
-  if (typeof crypto !== "undefined" && typeof crypto.randomUUID === "function") {
-    return `${prefix}_${crypto.randomUUID()}`;
+  if (
+    typeof crypto !== 'undefined' &&
+    typeof crypto.randomUUID === 'function'
+  ) {
+    return `${prefix}_${crypto.randomUUID()}`
   }
-  return `${prefix}_${Math.random().toString(36).slice(2, 12)}`;
+  return `${prefix}_${Math.random().toString(36).slice(2, 12)}`
 }
 
 function jsonResponse(payload: unknown): Response {
   return new Response(JSON.stringify(payload), {
     status: 200,
     headers: {
-      "content-type": "application/json",
+      'content-type': 'application/json',
     },
-  });
+  })
 }
 
 function getPayloadRecord(payload: JsonRecord): JsonRecord {
   const params =
-    payload.params && typeof payload.params === "object" && !Array.isArray(payload.params)
+    payload.params &&
+    typeof payload.params === 'object' &&
+    !Array.isArray(payload.params)
       ? (payload.params as JsonRecord)
-      : null;
-  return params ?? payload;
+      : null
+  return params ?? payload
 }
 
 function extractInput(payload: JsonRecord): {
-  text: string;
-  model: string;
+  text: string
+  model: string
 } {
-  const source = getPayloadRecord(payload);
-  const inputRecord = asRecord(source.input);
-  const content = Array.isArray(inputRecord.content) ? inputRecord.content : [];
+  const source = getPayloadRecord(payload)
+  const inputRecord = asRecord(source.input)
+  const content = Array.isArray(inputRecord.content) ? inputRecord.content : []
   const text = content
     .map((item) => {
-      const part = asRecord(item);
-      return asString(part.text) ?? asString(part.value) ?? "";
+      const part = asRecord(item)
+      return asString(part.text) ?? asString(part.value) ?? ''
     })
-    .join("")
-    .trim();
-  const inference = asRecord(inputRecord.inference_options);
-  const model = asString(inference.model) ?? DEFAULT_MODEL;
-  return { text, model };
+    .join('')
+    .trim()
+  const inference = asRecord(inputRecord.inference_options)
+  const model = asString(inference.model) ?? DEFAULT_MODEL
+  return { text, model }
 }
 
 function toOutputString(value: unknown): string {
-  if (typeof value === "string") {
-    return value;
+  if (typeof value === 'string') {
+    return value
   }
   if (value == null) {
-    return "";
+    return ''
   }
   try {
-    return JSON.stringify(value);
+    return JSON.stringify(value)
   } catch {
-    return String(value);
+    return String(value)
   }
 }
 
 function extractToolOutput(payload: JsonRecord): {
-  callId: string;
-  previousResponseId: string;
-  output: string;
+  callId: string
+  previousResponseId: string
+  output: string
 } {
-  const source = getPayloadRecord(payload);
-  const result = asRecord(source.result);
-  const callId = asString(result.callId) ?? asString(result.call_id) ?? "";
+  const source = getPayloadRecord(payload)
+  const result = asRecord(source.result)
+  const callId = asString(result.callId) ?? asString(result.call_id) ?? ''
   const previousResponseId =
-    asString(result.previousResponseId) ?? asString(result.previous_response_id) ?? "";
-  const clientError = asString(result.clientError) ?? asString(result.client_error) ?? "";
-  const outputValue = result.output ?? result.result;
+    asString(result.previousResponseId) ??
+    asString(result.previous_response_id) ??
+    ''
+  const clientError =
+    asString(result.clientError) ?? asString(result.client_error) ?? ''
+  const outputValue = result.output ?? result.result
   const output =
-    clientError.length > 0 ? `Tool execution failed: ${clientError}` : toOutputString(outputValue);
-  return { callId, previousResponseId, output };
+    clientError.length > 0
+      ? `Tool execution failed: ${clientError}`
+      : toOutputString(outputValue)
+  return { callId, previousResponseId, output }
 }
 
 function readThreadId(payload: JsonRecord): string {
-  const source = getPayloadRecord(payload);
+  const source = getPayloadRecord(payload)
   return (
     asString(source.id) ??
     asString(source.thread_id) ??
@@ -205,519 +281,548 @@ function readThreadId(payload: JsonRecord): string {
     asString(payload.id) ??
     asString(payload.thread_id) ??
     asString(payload.threadId) ??
-    ""
-  );
+    ''
+  )
 }
 
 function buildThreadDetail(thread: StoredThread): ChatKitThreadDetail {
   const messages = {
     data: thread.items,
     has_more: false,
-  };
+  }
   return {
     id: thread.id,
     title: thread.title,
     created_at: thread.createdAt,
     updated_at: thread.updatedAt,
-    status: { type: "active" },
+    status: { type: 'active' },
     metadata: {},
     items: messages,
     messages,
-  };
+  }
 }
 
 function withUpdatedThreadTitle(thread: StoredThread, text: string): void {
-  if (thread.title !== "New conversation") {
-    return;
+  if (thread.title !== 'New conversation') {
+    return
   }
-  const trimmed = text.trim();
+  const trimmed = text.trim()
   if (!trimmed) {
-    return;
+    return
   }
-  const shortened = trimmed.slice(0, 80);
-  thread.title = shortened;
+  const shortened = trimmed.slice(0, 80)
+  thread.title = shortened
 }
 
 async function resolveResponsesDirectHeaders(): Promise<Headers> {
-  const config = responsesDirectConfigManager.getSnapshot();
+  const config = responsesDirectConfigManager.getSnapshot()
   const headers = new Headers({
-    "content-type": "application/json",
-  });
+    'content-type': 'application/json',
+  })
 
-  if (config.authMethod === "api_key") {
-    const apiKey = config.apiKey.trim();
+  if (config.authMethod === 'api_key') {
+    const apiKey = config.apiKey.trim()
     if (!apiKey) {
       throw new Error(
-        "Direct Responses API key auth selected but no key is configured. Run app.responsesDirect.setAPIKey(...).",
-      );
+        'Direct Responses API key auth selected but no key is configured. Run app.responsesDirect.setAPIKey(...).'
+      )
     }
-    headers.set("Authorization", `Bearer ${apiKey}`);
-    return headers;
+    headers.set('Authorization', `Bearer ${apiKey}`)
+    return headers
   }
 
-  const oauthToken = (await getAccessToken()).trim();
+  const oauthToken = (await getAccessToken()).trim()
   if (!oauthToken) {
-    throw new Error("Direct Responses OAuth requires ChatGPT sign-in.");
+    throw new Error('Direct Responses OAuth requires ChatGPT sign-in.')
   }
   if (!config.openaiOrganization) {
     throw new Error(
-      "Direct Responses OAuth requires OpenAI organization. Set agent.openai.organization in app-configs.yaml or use app.responsesDirect.setOpenAIOrganization(...).",
-    );
+      'Direct Responses OAuth requires OpenAI organization. Set agent.openai.organization in app-configs.yaml or use app.responsesDirect.setOpenAIOrganization(...).'
+    )
   }
   if (!config.openaiProject) {
     throw new Error(
-      "Direct Responses OAuth requires OpenAI project. Set agent.openai.project in app-configs.yaml or use app.responsesDirect.setOpenAIProject(...).",
-    );
+      'Direct Responses OAuth requires OpenAI project. Set agent.openai.project in app-configs.yaml or use app.responsesDirect.setOpenAIProject(...).'
+    )
   }
 
-  headers.set("Authorization", `Bearer ${oauthToken}`);
-  headers.set("OpenAI-Organization", config.openaiOrganization);
-  headers.set("OpenAI-Project", config.openaiProject);
-  return headers;
+  headers.set('Authorization', `Bearer ${oauthToken}`)
+  headers.set('OpenAI-Organization', config.openaiOrganization)
+  headers.set('OpenAI-Project', config.openaiProject)
+  return headers
 }
 
 function buildOpenAIResponsesRequestForInput(options: {
-  text: string;
-  model: string;
-  previousResponseId?: string;
-  vectorStores: string[];
+  text: string
+  model: string
+  previousResponseId?: string
+  vectorStores: string[]
 }): JsonRecord {
   const payload: JsonRecord = {
     model: options.model || DEFAULT_MODEL,
     stream: true,
+    instructions: CODE_MODE_INSTRUCTIONS,
     input: [
       {
-        role: "user",
+        role: 'user',
         content: [
           {
-            type: "input_text",
+            type: 'input_text',
             text: options.text,
           },
         ],
       },
     ],
     parallel_tool_calls: false,
-  };
+  }
 
   if (options.previousResponseId) {
-    payload.previous_response_id = options.previousResponseId;
+    payload.previous_response_id = options.previousResponseId
   }
 
-  if (options.vectorStores.length > 0) {
-    payload.tools = [
-      {
-        type: "file_search",
-        max_num_results: 5,
-        vector_store_ids: options.vectorStores,
-      },
-    ];
-  }
+  payload.tools = buildResponsesTools(options.vectorStores)
 
-  return payload;
+  return payload
 }
 
 function buildOpenAIResponsesRequestForToolOutput(options: {
-  callId: string;
-  output: string;
-  model: string;
-  previousResponseId?: string;
-  vectorStores: string[];
+  callId: string
+  output: string
+  model: string
+  previousResponseId?: string
+  vectorStores: string[]
 }): JsonRecord {
   const payload: JsonRecord = {
     model: options.model || DEFAULT_MODEL,
     stream: true,
+    instructions: CODE_MODE_INSTRUCTIONS,
     input: [
       {
-        type: "function_call_output",
+        type: 'function_call_output',
         call_id: options.callId,
         output: options.output,
       },
     ],
     parallel_tool_calls: false,
-  };
+  }
 
   if (options.previousResponseId) {
-    payload.previous_response_id = options.previousResponseId;
+    payload.previous_response_id = options.previousResponseId
   }
 
-  if (options.vectorStores.length > 0) {
-    payload.tools = [
-      {
-        type: "file_search",
-        max_num_results: 5,
-        vector_store_ids: options.vectorStores,
-      },
-    ];
-  }
+  payload.tools = buildResponsesTools(options.vectorStores)
 
-  return payload;
+  return payload
 }
 
 async function readErrorBody(response: Response): Promise<string> {
   try {
-    const text = await response.text();
-    return text || `${response.status} ${response.statusText}`;
+    const text = await response.text()
+    return text || `${response.status} ${response.statusText}`
   } catch {
-    return `${response.status} ${response.statusText}`;
+    return `${response.status} ${response.statusText}`
   }
 }
 
 async function consumeSSE(
   response: Response,
   onEvent: (event: JsonRecord) => void,
-  signal?: AbortSignal | null,
+  signal?: AbortSignal | null
 ): Promise<void> {
   if (!response.body) {
-    throw new Error("Responses API returned no stream body");
+    throw new Error('Responses API returned no stream body')
   }
-  const reader = response.body.getReader();
-  const decoder = new TextDecoder();
-  let buffer = "";
+  const reader = response.body.getReader()
+  const decoder = new TextDecoder()
+  let buffer = ''
 
   while (true) {
     if (signal?.aborted) {
-      throw new Error(String(signal.reason ?? "Request aborted"));
+      throw new Error(String(signal.reason ?? 'Request aborted'))
     }
-    const { done, value } = await reader.read();
+    const { done, value } = await reader.read()
     if (done) {
-      break;
+      break
     }
-    buffer += decoder.decode(value, { stream: true });
-    let boundary = buffer.indexOf("\n\n");
+    buffer += decoder.decode(value, { stream: true })
+    let boundary = buffer.indexOf('\n\n')
     while (boundary !== -1) {
-      const block = buffer.slice(0, boundary);
-      buffer = buffer.slice(boundary + 2);
-      const lines = block.split("\n");
+      const block = buffer.slice(0, boundary)
+      buffer = buffer.slice(boundary + 2)
+      const lines = block.split('\n')
       const dataLines = lines
         .map((line) => line.trim())
-        .filter((line) => line.startsWith("data:"))
+        .filter((line) => line.startsWith('data:'))
         .map((line) => line.slice(5).trim())
-        .filter((line) => line.length > 0);
+        .filter((line) => line.length > 0)
       if (dataLines.length > 0) {
-        const data = dataLines.join("\n");
-        if (data !== "[DONE]") {
+        const data = dataLines.join('\n')
+        if (data !== '[DONE]') {
           try {
-            onEvent(JSON.parse(data) as JsonRecord);
+            onEvent(JSON.parse(data) as JsonRecord)
           } catch (error) {
-            appLogger.warn("Failed to parse OpenAI responses SSE event", {
+            appLogger.warn('Failed to parse OpenAI responses SSE event', {
               attrs: {
-                scope: "chatkit.responses_direct",
+                scope: 'chatkit.responses_direct',
                 error: String(error),
                 payload: data,
               },
-            });
+            })
           }
         }
       }
-      boundary = buffer.indexOf("\n\n");
+      boundary = buffer.indexOf('\n\n')
     }
   }
 }
 
 function buildStreamResponse(
   producer: (sink: { emit: (payload: unknown) => void }) => Promise<void>,
-  options?: { signal?: AbortSignal | null },
+  options?: { signal?: AbortSignal | null }
 ): Response {
-  const encoder = new TextEncoder();
+  const encoder = new TextEncoder()
   const stream = new ReadableStream<Uint8Array>({
     start(controller) {
-      let closed = false;
+      let closed = false
       const emit = (payload: unknown) => {
         if (closed) {
-          return;
+          return
         }
-        controller.enqueue(encoder.encode(`data: ${JSON.stringify(payload)}\n\n`));
-      };
+        controller.enqueue(
+          encoder.encode(`data: ${JSON.stringify(payload)}\n\n`)
+        )
+      }
       const close = () => {
         if (closed) {
-          return;
+          return
         }
-        closed = true;
-        controller.close();
-      };
+        closed = true
+        controller.close()
+      }
 
       if (options?.signal?.aborted) {
         emit({
-          type: "response.failed",
-          error: { message: String(options.signal.reason ?? "Request aborted") },
-        });
-        close();
-        return;
+          type: 'response.failed',
+          error: {
+            message: String(options.signal.reason ?? 'Request aborted'),
+          },
+        })
+        close()
+        return
       }
 
       if (options?.signal) {
         options.signal.addEventListener(
-          "abort",
+          'abort',
           () => {
             if (!closed) {
               emit({
-                type: "response.failed",
-                error: { message: String(options.signal?.reason ?? "Request aborted") },
-              });
+                type: 'response.failed',
+                error: {
+                  message: String(options.signal?.reason ?? 'Request aborted'),
+                },
+              })
             }
-            close();
+            close()
           },
-          { once: true },
-        );
+          { once: true }
+        )
       }
 
       void producer({ emit })
         .catch((error) => {
           emit({
-            type: "response.failed",
+            type: 'response.failed',
             error: { message: String(error) },
-          });
-          appLogger.error("Direct Responses stream producer failed", {
+          })
+          appLogger.error('Direct Responses stream producer failed', {
             attrs: {
-              scope: "chatkit.responses_direct",
+              scope: 'chatkit.responses_direct',
               error: String(error),
             },
-          });
+          })
         })
         .finally(() => {
-          close();
-        });
+          close()
+        })
     },
-  });
+  })
 
   return new Response(stream, {
     status: 200,
     headers: {
-      "content-type": "text/event-stream",
-      "cache-control": "no-cache",
-      connection: "keep-alive",
+      'content-type': 'text/event-stream',
+      'cache-control': 'no-cache',
+      connection: 'keep-alive',
     },
-  });
+  })
 }
 
 export function createResponsesDirectChatkitFetch(options?: {
-  responsesApiBaseUrl?: string;
+  responsesApiBaseUrl?: string
 }): typeof fetch {
-  const responsesApiUrl = resolveResponsesApiUrl(options?.responsesApiBaseUrl ?? "");
-  const threads = new Map<string, StoredThread>();
+  const responsesApiUrl = resolveResponsesApiUrl(
+    options?.responsesApiBaseUrl ?? ''
+  )
+  const threads = new Map<string, StoredThread>()
 
   const ensureThread = (threadId?: string): StoredThread => {
-    const normalized = threadId?.trim() ?? "";
+    const normalized = threadId?.trim() ?? ''
     if (normalized && threads.has(normalized)) {
-      return threads.get(normalized)!;
+      return threads.get(normalized)!
     }
-    const now = new Date().toISOString();
+    const now = new Date().toISOString()
     const created: StoredThread = {
-      id: normalized || randomId("thread"),
-      title: "New conversation",
+      id: normalized || randomId('thread'),
+      title: 'New conversation',
       createdAt: now,
       updatedAt: now,
       model: DEFAULT_MODEL,
       items: [],
-    };
-    threads.set(created.id, created);
-    return created;
-  };
+    }
+    threads.set(created.id, created)
+    return created
+  }
 
   const streamOpenAI = async (options: {
-    thread: StoredThread;
-    responsesApiUrl: string;
-    requestPayload: JsonRecord;
-    emit: (payload: unknown) => void;
-    signal?: AbortSignal | null;
+    thread: StoredThread
+    responsesApiUrl: string
+    requestPayload: JsonRecord
+    emit: (payload: unknown) => void
+    signal?: AbortSignal | null
   }): Promise<void> => {
-    const headers = await resolveResponsesDirectHeaders();
+    const headers = await resolveResponsesDirectHeaders()
     const response = await fetch(options.responsesApiUrl, {
-      method: "POST",
+      method: 'POST',
       headers,
       body: JSON.stringify(options.requestPayload),
       signal: options.signal ?? undefined,
-    });
+    })
     if (!response.ok) {
-      throw new Error(await readErrorBody(response));
+      throw new Error(await readErrorBody(response))
     }
 
-    const assistantTextByItem = new Map<string, string>();
+    const assistantTextByItem = new Map<string, string>()
+    const toolNameByItem = new Map<string, string>()
+    const toolCallIdByItem = new Map<string, string>()
 
     await consumeSSE(
       response,
       (event) => {
-        const type = asString(event.type) ?? "";
+        const type = asString(event.type) ?? ''
         if (!type) {
-          return;
+          return
         }
         switch (type) {
-          case "response.created": {
-            const responseRecord = asRecord(event.response);
-            const responseId = asString(responseRecord.id);
+          case 'response.created': {
+            const responseRecord = asRecord(event.response)
+            const responseId = asString(responseRecord.id)
             if (responseId) {
-              options.thread.previousResponseId = responseId;
-              options.thread.updatedAt = new Date().toISOString();
+              options.thread.previousResponseId = responseId
+              options.thread.updatedAt = new Date().toISOString()
               options.emit({
-                type: "aisre.chatkit.state",
+                type: 'aisre.chatkit.state',
                 item: {
                   state: {
                     threadId: options.thread.id,
                     previousResponseId: responseId,
                   },
                 },
-              });
+              })
             }
-            return;
+            return
           }
-          case "response.output_item.added": {
-            const item = asRecord(event.item);
-            if (asString(item.type) !== "message") {
-              return;
+          case 'response.output_item.added': {
+            const item = asRecord(event.item)
+            if (asString(item.type) === 'function_call') {
+              const itemId = asString(item.id)
+              const toolName = asString(item.name)
+              const toolCallId = asString(item.call_id)
+              if (itemId && toolName) {
+                toolNameByItem.set(itemId, toolName)
+              }
+              if (itemId && toolCallId) {
+                toolCallIdByItem.set(itemId, toolCallId)
+              }
+              return
             }
-            const itemId = asString(item.id) ?? randomId("assistant");
-            assistantTextByItem.set(itemId, "");
+            if (asString(item.type) !== 'message') {
+              return
+            }
+            const itemId = asString(item.id) ?? randomId('assistant')
+            assistantTextByItem.set(itemId, '')
             options.emit({
-              type: "thread.item.added",
+              type: 'thread.item.added',
               item: {
                 id: itemId,
-                type: "assistant_message",
+                type: 'assistant_message',
                 thread_id: options.thread.id,
                 created_at: new Date().toISOString(),
-                status: "in_progress",
+                status: 'in_progress',
                 content: [],
               },
-            });
+            })
             options.emit({
-              type: "thread.item.updated",
+              type: 'thread.item.updated',
               item_id: itemId,
               update: {
-                type: "assistant_message.content_part.added",
+                type: 'assistant_message.content_part.added',
                 content_index: 0,
                 content: {
-                  type: "output_text",
-                  text: "",
+                  type: 'output_text',
+                  text: '',
                   annotations: [],
                 },
               },
-            });
-            return;
+            })
+            return
           }
-          case "response.output_text.delta": {
-            const itemId = asString(event.item_id);
-            const delta = asString(event.delta) ?? "";
+          case 'response.output_text.delta': {
+            const itemId = asString(event.item_id)
+            const delta = asString(event.delta) ?? ''
             if (!itemId || !delta) {
-              return;
+              return
             }
-            assistantTextByItem.set(itemId, `${assistantTextByItem.get(itemId) ?? ""}${delta}`);
+            assistantTextByItem.set(
+              itemId,
+              `${assistantTextByItem.get(itemId) ?? ''}${delta}`
+            )
             options.emit({
-              type: "thread.item.updated",
+              type: 'thread.item.updated',
               item_id: itemId,
               update: {
-                type: "assistant_message.content_part.text_delta",
+                type: 'assistant_message.content_part.text_delta',
                 content_index: 0,
                 delta,
               },
-            });
-            return;
+            })
+            return
           }
-          case "response.output_item.done": {
-            const item = asRecord(event.item);
-            if (asString(item.type) !== "message") {
-              return;
+          case 'response.output_item.done': {
+            const item = asRecord(event.item)
+            if (asString(item.type) !== 'message') {
+              return
             }
-            const itemId = asString(item.id) ?? randomId("assistant");
-            const parts = Array.isArray(item.content) ? item.content : [];
+            const itemId = asString(item.id) ?? randomId('assistant')
+            const parts = Array.isArray(item.content) ? item.content : []
             const textFromDone = parts
-              .map((part) => asString(asRecord(part).text) ?? "")
-              .join("");
-            const finalText = textFromDone || assistantTextByItem.get(itemId) || "";
+              .map((part) => asString(asRecord(part).text) ?? '')
+              .join('')
+            const finalText =
+              textFromDone || assistantTextByItem.get(itemId) || ''
             options.emit({
-              type: "thread.item.updated",
+              type: 'thread.item.updated',
               item_id: itemId,
               update: {
-                type: "assistant_message.content_part.done",
+                type: 'assistant_message.content_part.done',
                 content_index: 0,
                 content: {
-                  type: "output_text",
+                  type: 'output_text',
                   text: finalText,
                   annotations: [],
                 },
               },
-            });
+            })
             const assistantItem: JsonRecord = {
               id: itemId,
-              type: "assistant_message",
+              type: 'assistant_message',
               thread_id: options.thread.id,
               created_at: new Date().toISOString(),
-              status: "completed",
+              status: 'completed',
               content: [
                 {
-                  type: "output_text",
+                  type: 'output_text',
                   text: finalText,
                   annotations: [],
                 },
               ],
-            };
-            options.thread.items.push(assistantItem);
-            options.thread.updatedAt = new Date().toISOString();
-            return;
+            }
+            options.thread.items.push(assistantItem)
+            options.thread.updatedAt = new Date().toISOString()
+            return
           }
-          case "response.function_call_arguments.done": {
-            const callId = asString(event.call_id) ?? "";
-            const itemId = asString(event.item_id) ?? randomId("tool");
-            const name = asString(event.name) ?? "unknown_tool";
-            let argumentsObject: unknown = {};
-            const argumentsRaw = asString(event.arguments) ?? "{}";
+          case 'response.function_call_arguments.done': {
+            const itemId = asString(event.item_id) ?? randomId('tool')
+            const fallbackCallId = itemId.startsWith('call_')
+              ? itemId
+              : undefined
+            const callId =
+              asString(event.call_id) ??
+              asString(asRecord(event.item).call_id) ??
+              toolCallIdByItem.get(itemId) ??
+              fallbackCallId
+            const name =
+              asString(event.name) ??
+              toolNameByItem.get(itemId) ??
+              asString(asRecord(event.item).name) ??
+              EXECUTE_CODE_TOOL_NAME
+            let argumentsObject: JsonRecord = {}
+            const argumentsRaw = asString(event.arguments) ?? '{}'
             try {
-              argumentsObject = JSON.parse(argumentsRaw);
+              argumentsObject = asRecord(JSON.parse(argumentsRaw))
             } catch {
-              argumentsObject = {};
+              argumentsObject = {}
+            }
+            argumentsObject.call_id = callId
+            if (options.thread.previousResponseId) {
+              argumentsObject.previous_response_id =
+                options.thread.previousResponseId
             }
             const toolItem: JsonRecord = {
               id: itemId,
-              type: "client_tool_call",
+              type: 'client_tool_call',
               thread_id: options.thread.id,
               created_at: new Date().toISOString(),
-              status: "pending",
+              status: 'pending',
               call_id: callId,
               name,
               arguments: argumentsObject,
-            };
-            options.thread.items.push(toolItem);
-            options.thread.updatedAt = new Date().toISOString();
+            }
+            options.thread.items.push(toolItem)
+            options.thread.updatedAt = new Date().toISOString()
             options.emit({
-              type: "thread.item.done",
+              type: 'thread.item.done',
               item: toolItem,
-            });
-            return;
+            })
+            return
           }
-          case "response.completed": {
+          case 'response.completed': {
             const endOfTurn: JsonRecord = {
-              id: randomId("end"),
-              type: "end_of_turn",
+              id: randomId('end'),
+              type: 'end_of_turn',
               thread_id: options.thread.id,
               created_at: new Date().toISOString(),
-            };
-            options.thread.items.push(endOfTurn);
-            options.thread.updatedAt = new Date().toISOString();
+            }
+            options.thread.items.push(endOfTurn)
+            options.thread.updatedAt = new Date().toISOString()
             options.emit({
-              type: "thread.item.done",
+              type: 'thread.item.done',
               item: endOfTurn,
-            });
-            return;
+            })
+            return
           }
           default:
-            return;
+            return
         }
       },
-      options.signal,
-    );
-  };
+      options.signal
+    )
+  }
 
   return async (input: RequestInfo | URL, init?: RequestInit) => {
-    const { json } = await readBody(input, init);
-    const requestType = asString(getPayloadRecord(json).type) ?? asString(json.type) ?? "";
-    appLogger.info("Direct Responses ChatKit fetch request", {
+    const { json } = await readBody(input, init)
+    const requestType =
+      asString(getPayloadRecord(json).type) ?? asString(json.type) ?? ''
+    appLogger.info('Direct Responses ChatKit fetch request', {
       attrs: {
-        scope: "chatkit.responses_direct",
+        scope: 'chatkit.responses_direct',
         responsesApiUrl,
         requestType,
         payload: json,
       },
-    });
+    })
 
-    if (requestType === "threads.list") {
+    if (requestType === 'threads.list') {
       const payload = {
         data: [...threads.values()].map((thread) => ({
           id: thread.id,
@@ -725,55 +830,60 @@ export function createResponsesDirectChatkitFetch(options?: {
           updated_at: thread.updatedAt,
         })),
         has_more: false,
-      };
-      return jsonResponse(payload);
+      }
+      return jsonResponse(payload)
     }
 
-    if (requestType === "threads.get_by_id" || requestType === "threads.get") {
-      const threadId = readThreadId(json);
-      const thread = threadId ? threads.get(threadId) : undefined;
+    if (requestType === 'threads.get_by_id' || requestType === 'threads.get') {
+      const threadId = readThreadId(json)
+      const thread = threadId ? threads.get(threadId) : undefined
       if (!thread) {
-        return new Response(JSON.stringify({ error: "thread_not_found" }), {
+        return new Response(JSON.stringify({ error: 'thread_not_found' }), {
           status: 404,
-          headers: { "content-type": "application/json" },
-        });
+          headers: { 'content-type': 'application/json' },
+        })
       }
-      return jsonResponse(buildThreadDetail(thread));
+      return jsonResponse(buildThreadDetail(thread))
     }
 
-    if (requestType === "items.list" || requestType === "messages.list") {
-      const threadId = readThreadId(json);
-      const thread = threadId ? threads.get(threadId) : undefined;
+    if (requestType === 'items.list' || requestType === 'messages.list') {
+      const threadId = readThreadId(json)
+      const thread = threadId ? threads.get(threadId) : undefined
       return jsonResponse({
         data: thread?.items ?? [],
         has_more: false,
-      });
+      })
     }
 
-    if (requestType === "threads.create" || requestType === "threads.add_user_message") {
-      const payload = getPayloadRecord(json);
-      const { text, model } = extractInput(json);
+    if (
+      requestType === 'threads.create' ||
+      requestType === 'threads.add_user_message'
+    ) {
+      const payload = getPayloadRecord(json)
+      const { text, model } = extractInput(json)
       if (!text) {
         return jsonResponse({
           data: null,
-          error: "missing_user_input",
-        });
+          error: 'missing_user_input',
+        })
       }
 
       const requestedThreadId =
-        requestType === "threads.add_user_message" ? readThreadId(json) : undefined;
-      const thread = ensureThread(requestedThreadId);
-      thread.model = model || thread.model || DEFAULT_MODEL;
-      withUpdatedThreadTitle(thread, text);
+        requestType === 'threads.add_user_message'
+          ? readThreadId(json)
+          : undefined
+      const thread = ensureThread(requestedThreadId)
+      thread.model = model || thread.model || DEFAULT_MODEL
+      withUpdatedThreadTitle(thread, text)
 
       const userItem: JsonRecord = {
-        id: randomId("msg"),
-        type: "user_message",
+        id: randomId('msg'),
+        type: 'user_message',
         thread_id: thread.id,
         created_at: new Date().toISOString(),
         content: [
           {
-            type: "input_text",
+            type: 'input_text',
             text,
           },
         ],
@@ -782,69 +892,71 @@ export function createResponsesDirectChatkitFetch(options?: {
         inference_options: {
           model: model || DEFAULT_MODEL,
         },
-      };
-      thread.items.push(userItem);
-      thread.updatedAt = new Date().toISOString();
+      }
+      thread.items.push(userItem)
+      thread.updatedAt = new Date().toISOString()
 
-      const vectorStores = responsesDirectConfigManager.getSnapshot().vectorStores;
+      const vectorStores =
+        responsesDirectConfigManager.getSnapshot().vectorStores
       const requestPayload = buildOpenAIResponsesRequestForInput({
         text,
         model: model || thread.model || DEFAULT_MODEL,
         previousResponseId: thread.previousResponseId,
         vectorStores,
-      });
+      })
 
       return buildStreamResponse(
         async (sink) => {
-          if (requestType === "threads.create") {
+          if (requestType === 'threads.create') {
             sink.emit({
-              type: "thread.created",
+              type: 'thread.created',
               thread: {
                 id: thread.id,
                 title: thread.title,
                 created_at: thread.createdAt,
               },
-            });
+            })
           }
           sink.emit({
-            type: "thread.item.added",
+            type: 'thread.item.added',
             item: userItem,
-          });
+          })
           sink.emit({
-            type: "thread.item.done",
+            type: 'thread.item.done',
             item: userItem,
-          });
+          })
           await streamOpenAI({
             thread,
             responsesApiUrl,
             requestPayload,
             emit: sink.emit,
             signal: init?.signal ?? null,
-          });
+          })
         },
-        { signal: init?.signal ?? null },
-      );
+        { signal: init?.signal ?? null }
+      )
     }
 
-    if (requestType === "threads.add_client_tool_output") {
-      const threadId = readThreadId(json);
+    if (requestType === 'threads.add_client_tool_output') {
+      const threadId = readThreadId(json)
       if (!threadId) {
-        return jsonResponse({ data: null, error: "thread_id_required" });
+        return jsonResponse({ data: null, error: 'thread_id_required' })
       }
-      const thread = ensureThread(threadId);
-      const { callId, previousResponseId, output } = extractToolOutput(json);
+      const thread = ensureThread(threadId)
+      const { callId, previousResponseId, output } = extractToolOutput(json)
       if (!callId) {
-        return jsonResponse({ data: null, error: "call_id_required" });
+        return jsonResponse({ data: null, error: 'call_id_required' })
       }
 
-      const vectorStores = responsesDirectConfigManager.getSnapshot().vectorStores;
+      const vectorStores =
+        responsesDirectConfigManager.getSnapshot().vectorStores
       const requestPayload = buildOpenAIResponsesRequestForToolOutput({
         callId,
         output,
         model: thread.model || DEFAULT_MODEL,
         previousResponseId: previousResponseId || thread.previousResponseId,
         vectorStores,
-      });
+      })
 
       return buildStreamResponse(
         async (sink) => {
@@ -854,17 +966,17 @@ export function createResponsesDirectChatkitFetch(options?: {
             requestPayload,
             emit: sink.emit,
             signal: init?.signal ?? null,
-          });
+          })
         },
-        { signal: init?.signal ?? null },
-      );
+        { signal: init?.signal ?? null }
+      )
     }
 
     return jsonResponse({
       data: null,
       error: requestType
         ? `unsupported_responses_direct_request:${requestType}`
-        : "unsupported_responses_direct_request:missing_type",
-    });
-  };
+        : 'unsupported_responses_direct_request:missing_type',
+    })
+  }
 }
diff --git a/app/src/lib/runtime/runmeConsole.test.ts b/app/src/lib/runtime/runmeConsole.test.ts
index f7293ba2..3a119010 100644
--- a/app/src/lib/runtime/runmeConsole.test.ts
+++ b/app/src/lib/runtime/runmeConsole.test.ts
@@ -11,7 +11,7 @@ import {
 } from "./runmeConsole";
 
 type FakeCellRunner = {
-  run: () => void;
+  run: () => void | Promise<void>;
   getRunID: () => string;
   calls: number;
 };
@@ -359,6 +359,28 @@ describe("createNotebooksApi", () => {
     expect(document.notebook.cells[0]?.refId).toBe("cell-a");
   });
 
+  it("uses the current notebook when notebooks.get omits target", async () => {
+    const notebook = create(parser_pb.NotebookSchema, {
+      cells: [codeCell("cell-a", "echo a")],
+    });
+    const current = new FakeNotebookData("local://current", "Current", notebook);
+    const resolveNotebook = vi.fn((target?: unknown) => {
+      if (target === undefined) {
+        return current;
+      }
+      return null;
+    });
+    const api = createNotebooksApi({
+      resolveNotebook,
+      listNotebooks: () => [current],
+    });
+
+    const document = await api.get();
+
+    expect(document.handle.uri).toBe("local://current");
+    expect(resolveNotebook).toHaveBeenCalledWith();
+  });
+
   it("lists notebooks with query filters", async () => {
     const a = new FakeNotebookData(
       "local://one",
@@ -391,6 +413,7 @@ describe("createNotebooksApi", () => {
     });
 
     const updated = await api.update({
+      target: { uri: "local://one" },
       operations: [
         {
           op: "insert",
@@ -406,6 +429,7 @@ describe("createNotebooksApi", () => {
         ?.refId ?? "";
 
     const afterPatch = await api.update({
+      target: { handle: updated.handle },
       operations: [
         {
           op: "update",
@@ -418,8 +442,135 @@ describe("createNotebooksApi", () => {
     expect(inserted?.value).toContain("updated");
 
     const afterRemove = await api.update({
+      target: { handle: afterPatch.handle },
       operations: [{ op: "remove", refIds: [insertedRefId] }],
     });
     expect(afterRemove.notebook.cells.find((cell) => cell.refId === insertedRefId)).toBeUndefined();
   });
+
+  it("rejects notebooks.update without an explicit target", async () => {
+    const notebook = create(parser_pb.NotebookSchema, {
+      cells: [codeCell("cell-a", "echo a")],
+    });
+    const model = new FakeNotebookData("local://one", "One", notebook);
+    const api = createNotebooksApi({
+      resolveNotebook: () => model,
+      listNotebooks: () => [model],
+    });
+
+    await expect(
+      api.update({
+        operations: [{ op: "remove", refIds: ["cell-a"] }],
+      }),
+    ).rejects.toThrow(
+      "notebooks.update requires an explicit target notebook.",
+    );
+  });
+
+  it("rejects string notebook targets with an actionable error", async () => {
+    const notebook = create(parser_pb.NotebookSchema, {
+      cells: [codeCell("cell-a", "echo a")],
+    });
+    const model = new FakeNotebookData("local://one", "One", notebook);
+    const api = createNotebooksApi({
+      resolveNotebook: () => model,
+      listNotebooks: () => [model],
+    });
+
+    await expect(api.get("local://one" as any)).rejects.toThrow(
+      'Use target: { uri: "local://..." } or target: { handle: { uri: "local://...", revision: "..." } }.',
+    );
+  });
+
+  it("rejects unsupported notebooks.update operations with a concrete insert example", async () => {
+    const notebook = create(parser_pb.NotebookSchema, {
+      cells: [codeCell("cell-a", "echo a")],
+    });
+    const model = new FakeNotebookData("local://one", "One", notebook);
+    const api = createNotebooksApi({
+      resolveNotebook: () => model,
+      listNotebooks: () => [model],
+    });
+
+    await expect(
+      api.update({
+        target: { uri: "local://one" },
+        operations: [{ op: "add", path: "/cells/-", value: {} } as any],
+      }),
+    ).rejects.toThrow(
+      'Supported ops are "insert", "update", and "remove". To append a cell, use operations: [{ op: "insert", at: { index: -1 }, cells: [{ kind: "code", languageId: "python", value: "print(\\"hello\\")" }] }].',
+    );
+  });
+
+  it("rejects non-array notebooks.update operations", async () => {
+    const notebook = create(parser_pb.NotebookSchema, {
+      cells: [codeCell("cell-a", "echo a")],
+    });
+    const model = new FakeNotebookData("local://one", "One", notebook);
+    const api = createNotebooksApi({
+      resolveNotebook: () => model,
+      listNotebooks: () => [model],
+    });
+
+    await expect(
+      api.update({
+        target: { uri: "local://one" },
+        operations: { op: "insert" } as any,
+      }),
+    ).rejects.toThrow(
+      'Invalid notebooks.update operations: expected an array of notebook mutations',
+    );
+  });
+
+  it("awaits asynchronous cell execution in notebooks.execute", async () => {
+    const notebook = create(parser_pb.NotebookSchema, {
+      cells: [codeCell("cell-a", "echo a")],
+    });
+    const model = new FakeNotebookData("local://one", "One", notebook);
+    const runner = model.getCell("cell-a");
+    if (!runner) {
+      throw new Error("expected runner for cell-a");
+    }
+
+    let completed = false;
+    runner.run = async () => {
+      runner.calls += 1;
+      await new Promise<void>((resolve) => {
+        setTimeout(resolve, 10);
+      });
+      completed = true;
+    };
+
+    const api = createNotebooksApi({
+      resolveNotebook: () => model,
+      listNotebooks: () => [model],
+    });
+
+    await api.execute({
+      target: { uri: "local://one" },
+      refIds: ["cell-a"],
+    });
+
+    expect(completed).toBe(true);
+    expect(runner.calls).toBe(1);
+  });
+
+  it("rejects notebooks.execute without an explicit target", async () => {
+    const notebook = create(parser_pb.NotebookSchema, {
+      cells: [codeCell("cell-a", "echo a")],
+    });
+    const model = new FakeNotebookData("local://one", "One", notebook);
+    const api = createNotebooksApi({
+      resolveNotebook: () => model,
+      listNotebooks: () => [model],
+    });
+
+    await expect(
+      api.execute({
+        refIds: ["cell-a"],
+      }),
+    ).rejects.toThrow(
+      "notebooks.execute requires an explicit target notebook.",
+    );
+  });
 });
diff --git a/app/src/lib/runtime/runmeConsole.ts b/app/src/lib/runtime/runmeConsole.ts
index cb90c1be..8ac2a155 100644
--- a/app/src/lib/runtime/runmeConsole.ts
+++ b/app/src/lib/runtime/runmeConsole.ts
@@ -4,7 +4,7 @@ import md5 from "md5";
 import { RunmeMetadataKey, parser_pb } from "../../runme/client";
 
 type CellRunnerLike = {
-  run: () => void;
+  run: () => void | Promise<void>;
   getRunID: () => string;
 };
 
@@ -121,6 +121,15 @@ export type RunmeConsoleApi = {
   help: () => string;
 };
 
+function isPromiseLike(value: unknown): value is PromiseLike<unknown> {
+  return (
+    typeof value === "object" &&
+    value !== null &&
+    "then" in value &&
+    typeof (value as { then?: unknown }).then === "function"
+  );
+}
+
 function inferNotebookSource(uri: string): NotebookSummary["source"] {
   const normalized = (uri ?? "").toLowerCase();
   if (normalized.startsWith("local://")) {
@@ -165,9 +174,15 @@ function makeDocument(notebook: NotebookDataLike): NotebookDocument {
 }
 
 function resolveTargetUri(target?: NotebookTarget): string | null {
-  if (!target || typeof target !== "object") {
+  if (target === undefined) {
     return null;
   }
+  if (!target || typeof target !== "object") {
+    throw new Error(
+      `Invalid notebook target ${JSON.stringify(target)}. ` +
+        `Use target: { uri: "local://..." } or target: { handle: { uri: "local://...", revision: "..." } }.`,
+    );
+  }
   if ("uri" in target && typeof target.uri === "string" && target.uri.trim() !== "") {
     return target.uri.trim();
   }
@@ -175,11 +190,35 @@ function resolveTargetUri(target?: NotebookTarget): string | null {
     "handle" in target &&
     target.handle &&
     typeof target.handle.uri === "string" &&
-    target.handle.uri.trim() !== ""
+      target.handle.uri.trim() !== ""
   ) {
     return target.handle.uri.trim();
   }
-  return null;
+  throw new Error(
+    `Invalid notebook target ${JSON.stringify(target)}. ` +
+      `Use target: { uri: "local://..." } or target: { handle: { uri: "local://...", revision: "..." } }.`,
+  );
+}
+
+function formatMissingTargetError(method: "update" | "delete" | "execute"): string {
+  if (method === "update") {
+    return (
+      "notebooks.update requires an explicit target notebook. " +
+      "Pass target: { handle: doc.handle } after const doc = await notebooks.get(), " +
+      'or target: { uri: "local://..." }.'
+    );
+  }
+  if (method === "execute") {
+    return (
+      "notebooks.execute requires an explicit target notebook. " +
+      "Pass target: { handle: doc.handle } after const doc = await notebooks.get(), " +
+      'or target: { uri: "local://..." }.'
+    );
+  }
+  return (
+    "notebooks.delete requires an explicit target notebook. " +
+    'Pass target: { uri: "local://..." } or target: { handle: { uri: "local://...", revision: "..." } }.'
+  );
 }
 
 function resolveInsertIndex(
@@ -314,6 +353,19 @@ function updateCellPatch(
   notebook.updateCell(updated);
 }
 
+function formatNotebookMutationError(index: number, operation: unknown): string {
+  const op =
+    operation && typeof operation === "object" && "op" in operation
+      ? JSON.stringify((operation as { op?: unknown }).op)
+      : JSON.stringify(operation);
+  return (
+    `Unsupported notebooks.update operation at operations[${index}]: ${op}. ` +
+    `Supported ops are "insert", "update", and "remove". ` +
+    `To append a cell, use ` +
+    `operations: [{ op: "insert", at: { index: -1 }, cells: [{ kind: "code", languageId: "python", value: "print(\\"hello\\")" }] }].`
+  );
+}
+
 export function createNotebooksApi({
   resolveNotebook,
   listNotebooks,
@@ -330,6 +382,16 @@ export function createNotebooksApi({
     return resolved;
   };
 
+  const resolveNotebookByRequiredTarget = (
+    method: "update" | "delete" | "execute",
+    target?: NotebookTarget,
+  ): NotebookDataLike => {
+    if (target === undefined) {
+      throw new Error(formatMissingTargetError(method));
+    }
+    return resolveNotebookByTarget(target);
+  };
+
   const listKnownNotebooks = (): NotebookDataLike[] => {
     const listed = listNotebooks?.() ?? [];
     if (listed.length > 0) {
@@ -344,24 +406,24 @@ export function createNotebooksApi({
       return "notebooks.list(query?: { openOnly?: boolean; uriPrefix?: string; nameContains?: string; limit?: number }): Promise<NotebookSummary[]>";
     }
     if (topic === "get") {
-      return "notebooks.get(target?: { uri } | { handle: { uri, revision } }): Promise<NotebookDocument>";
+      return "notebooks.get(target?: { uri } | { handle: { uri, revision } }): Promise<NotebookDocument>. When target is omitted, returns the current notebook selected in the UI.";
     }
     if (topic === "update") {
-      return "notebooks.update({ target?, expectedRevision?, operations: NotebookMutation[] }): Promise<NotebookDocument>";
+      return "notebooks.update({ target, expectedRevision?, operations: NotebookMutation[] }): Promise<NotebookDocument>. target is required.";
     }
     if (topic === "delete") {
-      return "notebooks.delete(target): Promise<void>";
+      return "notebooks.delete(target): Promise<void>. target is required.";
     }
     if (topic === "execute") {
-      return "notebooks.execute({ target?, refIds: string[] }): Promise<{ handle, cells }>";
+      return "notebooks.execute({ target, refIds: string[] }): Promise<{ handle, cells }>. target is required.";
     }
     return [
       "Notebook SDK methods:",
       "- notebooks.list(query?)",
-      "- notebooks.get(target?)",
-      "- notebooks.update({ target?, expectedRevision?, operations })",
+      "- notebooks.get(target?)              # omitted target = current UI notebook",
+      "- notebooks.update({ target, expectedRevision?, operations })",
       "- notebooks.delete(target)",
-      "- notebooks.execute({ target?, refIds })",
+      "- notebooks.execute({ target, refIds })",
       "- notebooks.help(topic?)",
     ].join("\n");
   };
@@ -393,7 +455,7 @@ export function createNotebooksApi({
       return makeDocument(notebook);
     },
     update: async (args) => {
-      const notebook = resolveNotebookByTarget(args.target);
+      const notebook = resolveNotebookByRequiredTarget("update", args.target);
       const beforeHandle = makeHandle(notebook);
       if (
         args.expectedRevision &&
@@ -405,7 +467,16 @@ export function createNotebooksApi({
         );
       }
 
-      for (const operation of args.operations ?? []) {
+      const operations = args.operations ?? [];
+      if (!Array.isArray(operations)) {
+        throw new Error(
+          `Invalid notebooks.update operations: expected an array of notebook mutations, got ${JSON.stringify(
+            operations,
+          )}.`,
+        );
+      }
+
+      for (const [index, operation] of operations.entries()) {
         if (operation.op === "insert") {
           insertCells(notebook, operation.at, operation.cells);
           continue;
@@ -421,23 +492,29 @@ export function createNotebooksApi({
           for (const refId of operation.refIds ?? []) {
             notebook.removeCell(refId);
           }
+          continue;
         }
+        throw new Error(formatNotebookMutationError(index, operation));
       }
 
       return makeDocument(notebook);
     },
     delete: async (_target: NotebookTarget) => {
+      resolveNotebookByRequiredTarget("delete", _target);
       throw new Error("notebooks.delete is not supported in v0 runtime.");
     },
     execute: async (args) => {
-      const notebook = resolveNotebookByTarget(args.target);
+      const notebook = resolveNotebookByRequiredTarget("execute", args.target);
       const executedCells: parser_pb.Cell[] = [];
       for (const refId of args.refIds ?? []) {
         const cellRunner = notebook.getCell(refId);
         if (!cellRunner) {
           throw new Error(`Cell not found: ${refId}`);
         }
-        cellRunner.run();
+        const runResult = cellRunner.run();
+        if (isPromiseLike(runResult)) {
+          await runResult;
+        }
         const cell = notebook.getNotebook().cells.find((candidate) => candidate.refId === refId);
         if (cell) {
           executedCells.push(cell);
diff --git a/app/src/lib/runtime/sandboxJsKernel.ts b/app/src/lib/runtime/sandboxJsKernel.ts
index 523ff873..8e622415 100644
--- a/app/src/lib/runtime/sandboxJsKernel.ts
+++ b/app/src/lib/runtime/sandboxJsKernel.ts
@@ -1,4 +1,5 @@
 import { appLogger } from "../logging/runtime";
+import { SANDBOX_NOTEBOOKS_API_METHODS } from "./notebooksApiBridge";
 
 type KernelHooks = {
   onStdout?: (data: string) => void;
@@ -89,14 +90,16 @@ const SANDBOX_SRC_DOC = `<!doctype html>
           help: () => hostCall("runme.help", []),
         };
 
-        const notebooks = {
-          help: (topic) => hostCall("notebooks.help", [topic]),
-          list: (query) => hostCall("notebooks.list", [query]),
-          get: (target) => hostCall("notebooks.get", [target]),
-          update: (args) => hostCall("notebooks.update", [args]),
-          delete: (target) => hostCall("notebooks.delete", [target]),
-          execute: (args) => hostCall("notebooks.execute", [args]),
-        };
+        const createSandboxNotebooksApiClient = (callHost) => ({
+          help: (topic) => callHost("notebooks.help", [topic]),
+          list: (query) => callHost("notebooks.list", [query]),
+          get: (target) => callHost("notebooks.get", [target]),
+          update: (args) => callHost("notebooks.update", [args]),
+          delete: (target) => callHost("notebooks.delete", [target]),
+          execute: (args) => callHost("notebooks.execute", [args]),
+        });
+
+        const notebooks = createSandboxNotebooksApiClient(hostCall);
 
         const help = () => {
           consoleProxy.log("Sandbox JS helpers:");
@@ -108,9 +111,9 @@ const SANDBOX_SRC_DOC = `<!doctype html>
           consoleProxy.log("- runme.help()");
           consoleProxy.log("- notebooks.help([topic])");
           consoleProxy.log("- notebooks.list([query])");
-          consoleProxy.log("- notebooks.get([target])");
-          consoleProxy.log("- notebooks.update({ target?, expectedRevision?, operations })");
-          consoleProxy.log("- notebooks.execute({ target?, refIds })");
+          consoleProxy.log("- notebooks.get([target]) # omitted target = current UI notebook");
+          consoleProxy.log("- notebooks.update({ target, expectedRevision?, operations })");
+          consoleProxy.log("- notebooks.execute({ target, refIds })");
           consoleProxy.log("- help()");
         };
 
@@ -199,12 +202,7 @@ export class SandboxJSKernel {
       "runme.rerun",
       "runme.getCurrentNotebook",
       "runme.help",
-      "notebooks.help",
-      "notebooks.list",
-      "notebooks.get",
-      "notebooks.update",
-      "notebooks.delete",
-      "notebooks.execute",
+      ...SANDBOX_NOTEBOOKS_API_METHODS,
     ],
   }: {
     bridge: SandboxBridge;
diff --git a/docs-dev/chatkit-and-cdp.md b/docs-dev/chatkit-and-cdp.md
new file mode 100644
index 00000000..be1a5df2
--- /dev/null
+++ b/docs-dev/chatkit-and-cdp.md
@@ -0,0 +1,201 @@
+# ChatKit and CDP
+
+## Goal
+
+This note documents how to drive the embedded ChatKit UI in local Chrome
+through Chrome DevTools Protocol (CDP), including:
+
+- how to enter and submit a prompt,
+- how to wait for and read the assistant response.
+
+This is useful when validating notebook-editing behavior end to end against the
+`responses-direct` ChatKit harness.
+
+## Attach to the Correct ChatKit Frame
+
+The ChatKit UI is rendered in a cross-origin iframe inside the
+`<openai-chatkit>` custom element, so parent-page DOM access is blocked.
+
+Use CDP target discovery instead:
+
+1. Read `http://127.0.0.1:9222/json/list` and select the active Runme page
+   target, e.g. the first target with `type === "page"` and
+   `url === "http://localhost:5173/"`.
+2. Connect to the browser websocket from `http://127.0.0.1:9222/json/version`.
+3. Call `Target.getTargets` and select the `iframe` target whose
+   `parentFrameId` matches the active page target id and whose URL contains
+   `cdn.platform.openai.com/deployments/chatkit`.
+4. Connect to that iframe target's `webSocketDebuggerUrl`.
+5. Call `Runtime.enable` on the iframe target connection.
+
+This matters when there are multiple `localhost:5173` tabs open. If you attach
+to the wrong ChatKit iframe target, notebook state can appear empty or stale.
+
+## Send a Prompt
+
+Send prompts by evaluating JavaScript inside the ChatKit iframe target with
+`Runtime.evaluate`.
+
+The method used to enter text is:
+
+- focus the composer element,
+- set its value through the native textarea/input setter,
+- dispatch bubbling `input` and `change` events,
+- click the enabled button whose `aria-label` contains `Send`.
+
+Composer selector:
+
+```js
+const composer = document.querySelector(
+  '#chatkit-composer-input, textarea, [contenteditable="true"], [role="textbox"]'
+)
+```
+
+Prompt submission snippet:
+
+```js
+(() => {
+  const prompt = 'What is runme?'
+  const composer = document.querySelector(
+    '#chatkit-composer-input, textarea, [contenteditable="true"], [role="textbox"]'
+  )
+  if (!composer) {
+    throw new Error('Composer textbox not found')
+  }
+
+  composer.focus()
+
+  if (composer.tagName === 'TEXTAREA' || composer.tagName === 'INPUT') {
+    const setter =
+      Object.getOwnPropertyDescriptor(HTMLTextAreaElement.prototype, 'value')
+        ?.set ||
+      Object.getOwnPropertyDescriptor(HTMLInputElement.prototype, 'value')?.set
+    if (setter) {
+      setter.call(composer, prompt)
+    } else {
+      composer.value = prompt
+    }
+    composer.dispatchEvent(new Event('input', { bubbles: true }))
+    composer.dispatchEvent(new Event('change', { bubbles: true }))
+  } else {
+    composer.textContent = prompt
+    composer.dispatchEvent(
+      new InputEvent('input', {
+        bubbles: true,
+        inputType: 'insertText',
+        data: prompt,
+      })
+    )
+  }
+
+  const button = Array.from(document.querySelectorAll('button')).find((el) => {
+    const label = (el.getAttribute('aria-label') || '').toLowerCase()
+    const disabled =
+      el.disabled === true || el.getAttribute('aria-disabled') === 'true'
+    return !disabled && label.includes('send')
+  })
+  if (!button) {
+    throw new Error('Send message button not found')
+  }
+  button.click()
+  return true
+})()
+```
+
+Run that script through CDP with:
+
+```js
+await client.send('Runtime.evaluate', {
+  expression,
+  returnByValue: true,
+  awaitPromise: true,
+})
+```
+
+## Start a Fresh Thread
+
+To start from a clean conversation, click the button with
+`aria-label="New chat"`, then poll until `document.querySelectorAll('article')`
+is empty.
+
+```js
+(() => {
+  const button = Array.from(document.querySelectorAll('button')).find(
+    (el) => el.getAttribute('aria-label') === 'New chat'
+  )
+  if (!button) {
+    throw new Error('New chat button not found')
+  }
+  button.click()
+  return true
+})()
+```
+
+## Wait for the Assistant Response
+
+Poll iframe DOM state with `Runtime.evaluate` until:
+
+- there are at least two `article` elements,
+- the newest assistant article has non-empty text,
+- no button with `aria-label` containing `Stop` is present,
+- the tuple `(article count, last assistant text, composer value, stop-button state)`
+  is unchanged for two consecutive polls.
+
+Polling every 1 second with a 2-consecutive-stable threshold worked reliably in
+local testing.
+
+State probe:
+
+```js
+(() => {
+  const articles = Array.from(document.querySelectorAll('article')).map(
+    (article, index) => ({
+      index,
+      text: (article.innerText || '').trim(),
+    })
+  )
+  const composer = document.querySelector(
+    '#chatkit-composer-input, textarea, [contenteditable="true"], [role="textbox"]'
+  )
+  const stopButton = Array.from(document.querySelectorAll('button')).find((el) =>
+    (el.getAttribute('aria-label') || '').toLowerCase().includes('stop')
+  )
+  return {
+    articles,
+    composerValue: composer
+      ? String(composer.value || composer.textContent || '')
+      : null,
+    hasStopButton: !!stopButton,
+  }
+})()
+```
+
+## Read the AI Response
+
+Chat turns are represented as `article` elements in the iframe DOM.
+
+Extract transcript text with:
+
+```js
+(() => {
+  return Array.from(document.querySelectorAll('article')).map(
+    (article, index) => ({
+      index,
+      text: (article.innerText || '').trim(),
+    })
+  )
+})()
+```
+
+The assistant message is the last article whose text starts with
+`"The assistant said:"`. Strip that prefix if you only need the response body.
+
+## Notes
+
+- Prefer `Runtime.evaluate` against the ChatKit iframe target, not parent-page
+  DOM queries, because the iframe is cross-origin.
+- If multiple Runme tabs are open, always bind the iframe target by
+  `parentFrameId`.
+- If the assistant response looks unrelated to the visible notebook state,
+  suspect that the CDP client is attached to the wrong ChatKit iframe target
+  before debugging application code.
diff --git a/docs-dev/design/0310_appkernel_sandbox.md b/docs-dev/design/0310_appkernel_sandbox.md
index 87629009..eb40c775 100644
--- a/docs-dev/design/0310_appkernel_sandbox.md
+++ b/docs-dev/design/0310_appkernel_sandbox.md
@@ -191,8 +191,11 @@ Design notes:
   (`beforeRefId`/`afterRefId`).
 - SDK owns enum/string normalization so model code does not manually encode
   protobuf details like `CellKind`.
-- When `target` is omitted, calls resolve to a session-pinned default notebook
-  selected at run start (not the live currently selected UI tab).
+- When `target` is omitted on `notebooks.get()`, the call resolves to the
+  notebook that is currently active in the UI at execution time. This is
+  syntactic sugar for the common "inspect the active notebook" case.
+- `notebooks.update(...)`, `notebooks.execute(...)`, and `notebooks.delete(...)`
+  require an explicit `target` so writes and execution remain unambiguous.
 
 ### Layer 1b: Example Snippets
 
@@ -203,7 +206,7 @@ console.log(await notebooks.help("update")); // update(...) signature + examples
 ```
 
 ```ts
-// 1) Get the current (session-pinned) notebook and its contents
+// 1) Get the current notebook selected in the UI and its contents
 const current = await notebooks.get();
 console.log(current.handle.uri, current.handle.revision);
 console.log(current.notebook);
@@ -387,7 +390,8 @@ To fix "wrong notebook updated":
 - Host rejects writes to notebooks outside the session allowlist.
 - Host rejects stale writes when revision no longer matches.
 
-No write operation can implicitly target "current notebook".
+No write or execute operation can implicitly target "current notebook"; only
+`notebooks.get()` supports the omitted-target shorthand.
 
 ## Storage and IndexedDB
 
@@ -572,7 +576,8 @@ Current:
 Planned:
 
 - `HostNotebooksApi.list/get` target resolution uses `NotebookContext` data (or an
-  extracted registry service) so targeting is explicit and tab-safe.
+  extracted registry service). `get()` without a target returns the active UI
+  notebook; all write/execute methods require an explicit target handle or URI.
 - This directly addresses wrong-notebook writes from implicit "current tab"
   behavior.
 
diff --git a/docs-dev/design/20260331_code_mode.md b/docs-dev/design/20260331_code_mode.md
new file mode 100644
index 00000000..9a9ba64b
--- /dev/null
+++ b/docs-dev/design/20260331_code_mode.md
@@ -0,0 +1,263 @@
+# 20260331 Code Mode (Single ExecuteCode Tool)
+
+## Status
+
+Draft proposal.
+
+## Objective
+
+Define a single tool call that allows AI (ChatKit and Codex) to execute
+JavaScript via AppKernel.
+
+Tool name: `ExecuteCode`
+
+- Input: one string argument containing the JS program.
+- Output: one string containing merged stdout/stderr from execution.
+
+## Motivation
+
+Today notebook manipulation often requires multiple tool calls
+(`list/get/update/execute`-style composition). That is brittle for agent flows,
+and increases contract surface area.
+
+For code mode, we want one primitive:
+
+- model writes JS program,
+- host executes program in AppKernel,
+- model receives one output string.
+
+This aligns with Codex-style workflows where code is the control plane.
+
+## Scope (v0)
+
+- One tool for both ChatKit and Codex: `ExecuteCode`.
+- JavaScript only.
+- Execution environment provides AppKernel helpers (`runme`, `notebooks`).
+- Returned output is a single merged text stream.
+- ChatKit Responses integration is browser-direct only (`responses-direct` harness).
+
+## Non-Goals (v0)
+
+- Multi-language execution.
+- Structured output channels (separate stdout/stderr fields).
+- Replacing all existing notebook tools immediately.
+- Supporting `ExecuteCode` through Runme `/chatkit` responses proxy path.
+
+## API Contract
+
+### Request
+
+```ts
+type ExecuteCodeRequest = {
+  code: string;
+};
+```
+
+### Response
+
+```ts
+type ExecuteCodeResponse = {
+  output: string; // merged stdout+stderr, execution order preserved
+};
+```
+
+### Error signaling
+
+- Tool invocation failure should use the existing tool failure status/channel.
+- `output` should still contain any emitted text produced before failure.
+
+## Execution Semantics
+
+- Runtime executes the `code` string as AppKernel JavaScript.
+- Stdout and stderr are merged into one ordered stream.
+- Ordering is based on event arrival in host runtime.
+- Output is returned as a single string after execution completes.
+- Recommended hard limits:
+  - execution timeout (for example 15s v0),
+  - max output bytes (for example 256KB v0) with truncation suffix.
+
+## Runtime Architecture
+
+Shared executor with two transports:
+
+1. Caller receives `ExecuteCode`.
+2. Caller invokes shared host `CodeModeExecutor.execute(code)`.
+3. Executor runs AppKernel (sandbox mode by default).
+4. Executor captures merged output stream.
+5. Executor returns `{ output }`.
+
+Transports:
+
+- ChatKit: browser-local Responses tool handling (`responses-direct`).
+- Codex: existing Runme MCP path (`/mcp/notebooks`) and existing codex bridge
+  (`/codex/ws`).
+
+Proposed interface:
+
+```ts
+interface CodeModeExecutor {
+  execute(args: { code: string; source: "chatkit" | "codex" }): Promise<{ output: string }>;
+}
+```
+
+## Tool Definition Distribution (Responses vs Codex)
+
+`ExecuteCode` must be exposed through two integration surfaces:
+
+1. Responses API function tools (ChatKit / responses-direct path).
+2. MCP tools (Codex path).
+
+Proposed delivery model:
+
+- `responses-direct`:
+  - web app sends `ExecuteCode` as a function tool in `responses.create`
+    `tools[]` payload (JSON schema).
+  - web app handles function call locally by invoking `CodeModeExecutor`.
+- `codex`:
+  - Runme keeps the existing MCP endpoint/toolset and adds `ExecuteCode` to it.
+  - Codex discovers it via MCP tool listing and calls it through Runme MCP.
+
+Implication: same logical tool, two wire representations (Responses function
+tool schema and MCP tool schema), generated from one source of truth.
+
+## Runme Server Changes Required
+
+Short answer: yes for Codex, no for responses-direct.
+
+- Codex:
+  - keep existing transport path (`/mcp/notebooks`, `/codex/ws`,
+    `/codex/app-server/ws`).
+  - update proto contract to add `ExecuteCode`.
+  - regenerate MCP tool descriptors from proto.
+  - register the new MCP tool handler in the existing server.
+  - no new endpoint is required.
+- ChatKit `responses-direct`:
+  - no mandatory server change for tool execution, because tool schema and
+    execution can be browser-local.
+- ChatKit `responses` via `/chatkit`:
+  - explicitly out of scope for `ExecuteCode` in this design.
+  - this code-mode routing path should be removed/retired.
+
+## Source of Truth for Tool Specs
+
+Recommendation: use protobuf service definitions as canonical source of truth.
+
+Why:
+
+- existing notebook tool flow already uses generated proto schemas,
+- supports consistent generation for TS, Go, and MCP metadata,
+- reduces schema drift between ChatKit and Codex integrations.
+
+Proposed pattern:
+
+1. Define `ExecuteCode` RPC/message contract in proto.
+2. Generate:
+   - TS message/schema types for web app,
+   - Go types for Runme server bridge payloads,
+   - MCP tool metadata (existing plugin path / generated descriptors),
+   - Responses function JSON schema artifact for browser `responses-direct`.
+3. Wire the generated MCP descriptor into the existing MCP server registration
+   and implement handler dispatch in the existing bridge/service code.
+4. Add a contract test that asserts generated Responses JSON schema and MCP
+   schema remain semantically aligned.
+
+If we do not use proto:
+
+- keep canonical JSON schema files in-repo and generate TS/Go bindings from
+  those; this is viable but higher drift risk and duplicates parts of current
+  proto-based pipeline.
+
+## Environment Exposed to Code
+
+The executed JS program can use:
+
+- `runme` helpers,
+- `notebooks` helpers,
+- other explicitly approved AppKernel globals.
+
+No additional capability should be exposed implicitly by transport.
+
+## Sandboxing and Policy
+
+Default policy for `ExecuteCode` should be sandboxed AppKernel execution.
+
+Host-side policy checks before execution:
+
+- method allowlist (`ExecuteCode` only for code mode),
+- payload size limit on `code`,
+- timeout and output budget,
+- optional per-session/user approval gate.
+
+## Integration Plan
+
+1. Add `ExecuteCode` to canonical tool contract source (proto recommended).
+2. Generate MCP + Responses function schemas from canonical contract.
+3. Register `ExecuteCode` in the existing Runme MCP server on
+   `/mcp/notebooks` (no new endpoint).
+4. Implement shared `CodeModeExecutor` in app runtime.
+5. Bridge Codex `ExecuteCode` tool calls over the existing `/codex/ws` path to
+   browser execution.
+6. Wire ChatKit tool handler to `CodeModeExecutor` (`responses-direct` only).
+7. Keep legacy notebook CRUD tools for compatibility during migration.
+8. Migrate prompts/tool-choice logic to prefer `ExecuteCode`.
+9. Remove/retire code-mode tool routing through Runme `/chatkit` path.
+
+## Implementation Decision
+
+For v0, we will not introduce a new MCP transport or endpoint.
+
+- Reuse current Codex wiring:
+  - tool exposure through existing `/mcp/notebooks`,
+  - tool execution bridge through existing `/codex/ws`.
+- Evolve the proto tool contract to match the JS-facing API:
+  - `ExecuteCodeRequest { code: string }`
+  - `ExecuteCodeResponse { output: string }`
+- Add `ExecuteCode` to MCP tool registration on the existing server.
+
+## Migration Strategy
+
+Phase 0:
+
+- Add `ExecuteCode` without removing existing notebook tools.
+- Use feature flag to enable in ChatKit/Codex sessions.
+
+Phase 1:
+
+- Default model/tooling to `ExecuteCode` for notebook workflows.
+- Keep legacy tools as fallback.
+
+Phase 2:
+
+- Remove or significantly reduce legacy notebook tool surface.
+
+## Test Plan
+
+Unit:
+
+- merges stdout/stderr into one output string in event order,
+- truncates output at configured limit with marker,
+- handles thrown errors and returns partial output.
+
+Integration:
+
+- ChatKit `ExecuteCode` executes and returns output,
+- Codex `ExecuteCode` executes and returns output,
+- executed code can call `notebooks.get()` and read notebook content,
+- sandbox mode denies disallowed host calls.
+
+E2E CUJ:
+
+- prompt -> single `ExecuteCode` call -> notebook mutation/result visible,
+- no multi-tool choreography required.
+
+## Open Questions
+
+- Should response include `exitCode` in v0, or rely only on tool status + output?
+- Should tool enforce sandbox-only, or allow explicit `browser` mode override?
+- Do we need deterministic output tagging (for example `[stderr]`) while still using one string?
+
+## References
+
+- [Issue #154: Improve Codex Manipulation Of Notebooks](https://github.com/runmedev/web/issues/154)
+- [0310 AppKernel Sandbox Design](/Users/jlewi/code/runmecodex/web/docs-dev/design/0310_appkernel_sandbox.md)
+- [PR #158](https://github.com/runmedev/web/pull/158)