Skip to content

Commit 90ef0e3

Browse files
committed
sync(bfmono): chore(workloops): require chief draft requests to use assistant action decks (+19 more) (bfmono@662ef26c4)
This PR is an automated gambitmono sync of bfmono Gambit packages. - Source: `packages/gambit/` - Core: `packages/gambit/packages/gambit-core/` - bfmono rev: 662ef26c4 Changes: - 662ef26c4 chore(workloops): require chief draft requests to use assistant action decks - 4c93fdf01 fix(gambit): route codex action tools through mcp app-server - ed0a77510 fix(gambit): send codex sandbox policy over app-server - 0b8f12707 fix(gambit): route codex system prompts through developer instructions - f25e00a04 fix(bfdesktop): keep chief runtime on container-local codex - fcc13f6e7 fix(gambit): fail fast when codex app-server exits early - f11655105 feat(gambit): add codex app-server transport - 148c26943 test(gambit-core): relax worker async-start cancellation timing - 510d1dd75 fix(gambit): allow headless repl --message without a tty - e64dcbacd fix(gambit-core): honor timeoutMs=0 in worker sandbox - 8a5a19912 fix(gambit-simulator): use publish-safe gambit imports - 6b73128ca chore(gambit-simulator): align generated artifacts with extracted ui path - 5ef26bf23 refactor(gambit): extract simulator into sibling package - d13a6c8df refactor(gambit-core): remove built-in deck and card shims - e42baa6d3 refactor(bfdesktop): replace codex repair flow with runtime auth state - f674c499a fix(bfdesktop): allow codex responses websocket destination - 10d6293ff fix(codex): skip git repo check in auth probe - b84d951a1 feat(gambit): declare provider auth import sources and sidecar repair state - 5853271d4 fix(bfdesktop): embed provider manifests for bundled startup - d49c1802a chore(gambit): finalize provider-local manifest cutover verification Do not edit this repo directly; make changes in bfmono and re-run the sync.
1 parent 523ed16 commit 90ef0e3

8 files changed

Lines changed: 1213 additions & 914 deletions

File tree

packages/gambit-core/src/runtime.test.ts

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3333,6 +3333,81 @@ Deno.test("responses mode stores response items and calls responses()", async ()
33333333
assert((updatedState?.messages?.length ?? 0) > 0);
33343334
});
33353335

3336+
Deno.test("responses mode omits plain request.tools for codex-cli decks", async () => {
3337+
const dir = await Deno.makeTempDir();
3338+
const modHref = modImportPath();
3339+
3340+
const childPath = await writeTempDeck(
3341+
dir,
3342+
"codex_action_child.deck.ts",
3343+
`
3344+
import { defineDeck } from "${modHref}";
3345+
import { z } from "zod";
3346+
export default defineDeck({
3347+
contextSchema: z.object({ label: z.string() }),
3348+
responseSchema: z.object({ ok: z.boolean() }),
3349+
run: () => ({ ok: true }),
3350+
});
3351+
`,
3352+
);
3353+
3354+
const deckPath = await writeTempDeck(
3355+
dir,
3356+
"codex_actions_root.deck.ts",
3357+
`
3358+
import { defineDeck } from "${modHref}";
3359+
import { z } from "zod";
3360+
export default defineDeck({
3361+
contextSchema: z.string(),
3362+
responseSchema: z.string(),
3363+
modelParams: { model: "codex-cli/default" },
3364+
actionDecks: [{ name: "draft_assistant_task", path: "${childPath}" }],
3365+
});
3366+
`,
3367+
);
3368+
3369+
let capturedTools: unknown;
3370+
const traces: Array<TraceEvent> = [];
3371+
const provider: ModelProvider = {
3372+
responses({ request }) {
3373+
capturedTools = request.tools;
3374+
return Promise.resolve({
3375+
id: "resp_codex_tools",
3376+
object: "response",
3377+
output: [{
3378+
type: "message",
3379+
role: "assistant",
3380+
content: [{ type: "output_text", text: "ok" }],
3381+
}],
3382+
});
3383+
},
3384+
chat() {
3385+
throw new Error("chat should not be called in responses mode");
3386+
},
3387+
};
3388+
3389+
const result = await runDeck({
3390+
path: deckPath,
3391+
input: undefined,
3392+
inputProvided: false,
3393+
initialUserMessage: "draft something",
3394+
modelProvider: provider,
3395+
isRoot: true,
3396+
responsesMode: true,
3397+
trace: (event) => traces.push(event),
3398+
});
3399+
3400+
assertEquals(result, "ok");
3401+
assertEquals(capturedTools, undefined);
3402+
const modelCall = traces.find((event) => event.type === "model.call") as
3403+
| Extract<TraceEvent, { type: "model.call" }>
3404+
| undefined;
3405+
assert(modelCall);
3406+
assertEquals(modelCall.model, "codex-cli/default");
3407+
assertEquals(modelCall.toolCount, 0);
3408+
assertEquals(modelCall.tools, []);
3409+
});
3410+
33363411
Deno.test(
33373412
"responses mode validates and deterministically canonicalizes declared extension items",
33383413
async () => {

packages/gambit-core/src/runtime.ts

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,6 +1064,12 @@ async function resolveModelChoice(args: {
10641064
return { model: args.model, params: args.params };
10651065
}
10661066

1067+
function shouldExposeProviderToolArray(model: string): boolean {
1068+
const normalized = model.trim().toLowerCase();
1069+
if (!normalized) return true;
1070+
return normalized !== "codex-cli" && !normalized.startsWith("codex-cli/");
1071+
}
1072+
10671073
function resolveContextSchema(deck: LoadedDeck) {
10681074
return deck.contextSchema ?? deck.inputSchema;
10691075
}
@@ -4530,6 +4536,10 @@ async function runLlmDeck(
45304536
});
45314537
const model = resolved.model;
45324538
const providerParams = resolved.params;
4539+
const providerTools = shouldExposeProviderToolArray(model) ? tools : [];
4540+
const providerResponseTools = providerTools.length > 0
4541+
? providerTools as Array<ResponseToolDefinition>
4542+
: undefined;
45334543

45344544
const stateMessages = ctx.state?.messages?.length;
45354545
ctx.trace?.({
@@ -4540,9 +4550,9 @@ async function runLlmDeck(
45404550
model,
45414551
stream: ctx.stream,
45424552
messageCount: messages.length,
4543-
toolCount: tools.length,
4553+
toolCount: providerTools.length,
45444554
messages: messages.map(sanitizeMessage),
4545-
tools,
4555+
tools: providerTools,
45464556
stateMessages,
45474557
mode: useResponses ? "responses" : "chat",
45484558
responseItems: useResponses
@@ -4566,7 +4576,7 @@ async function runLlmDeck(
45664576
request: {
45674577
model,
45684578
input: responseItems,
4569-
tools: tools as Array<ResponseToolDefinition>,
4579+
tools: providerResponseTools,
45704580
text: responseTextConfig,
45714581
stream: ctx.stream,
45724582
params: providerParams,

src/cli.codex_smoke.test.ts

Lines changed: 68 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -93,23 +93,54 @@ ${body}
9393
async function writeMockCodexBin(dir: string): Promise<{
9494
binPath: string;
9595
argsLogPath: string;
96+
requestLogPath: string;
9697
}> {
9798
const binPath = path.join(dir, "mock-codex.sh");
9899
const argsLogPath = path.join(dir, "codex-args.log");
100+
const requestLogPath = path.join(dir, "codex-requests.log");
99101
const script = `#!/usr/bin/env bash
100102
set -euo pipefail
101103
if [ -z "\${CODEX_ARGS_LOG:-}" ]; then
102104
echo "missing CODEX_ARGS_LOG" >&2
103105
exit 1
104106
fi
107+
if [ -z "\${CODEX_REQUESTS_LOG:-}" ]; then
108+
echo "missing CODEX_REQUESTS_LOG" >&2
109+
exit 1
110+
fi
105111
printf '%s\n' "$@" > "$CODEX_ARGS_LOG"
106-
echo '{"type":"thread.started","thread_id":"thread-smoke"}'
107-
echo '{"type":"item.completed","item":{"id":"msg_1","type":"agent_message","text":"ok"}}'
108-
echo '{"type":"turn.completed","usage":{"input_tokens":1,"output_tokens":1,"total_tokens":2}}'
112+
extract_id() {
113+
printf '%s\\n' "$1" | sed -n 's/.*"id":\\("[^"]*"\\|[0-9][0-9]*\\).*/\\1/p'
114+
}
115+
while IFS= read -r line; do
116+
printf '%s\\n' "$line" >> "$CODEX_REQUESTS_LOG"
117+
case "$line" in
118+
*'"method":"initialize"'*)
119+
id="$(extract_id "$line")"
120+
printf '{"id":%s,"result":{"capabilities":{"experimentalApi":true}}}\\n' "$id"
121+
;;
122+
*'"method":"initialized"'*)
123+
;;
124+
*'"method":"thread/start"'*)
125+
id="$(extract_id "$line")"
126+
printf '{"id":%s,"result":{"thread":{"id":"thread-smoke"}}}\\n' "$id"
127+
;;
128+
*'"method":"thread/resume"'*)
129+
id="$(extract_id "$line")"
130+
printf '{"id":%s,"result":{"thread":{"id":"thread-smoke"}}}\\n' "$id"
131+
;;
132+
*'"method":"turn/start"'*)
133+
id="$(extract_id "$line")"
134+
printf '{"id":%s,"result":{"turn":{"id":"turn-smoke","status":"inProgress","items":[],"error":null}}}\\n' "$id"
135+
printf '{"method":"item/completed","params":{"threadId":"thread-smoke","turnId":"turn-smoke","item":{"type":"agentMessage","id":"msg_1","text":"ok","phase":null,"memoryCitation":null}}}\\n'
136+
printf '{"method":"turn/completed","params":{"threadId":"thread-smoke","turn":{"id":"turn-smoke","status":"completed","items":[],"error":null,"startedAt":0,"completedAt":0,"durationMs":1}}}\\n'
137+
;;
138+
esac
139+
done
109140
`;
110141
await Deno.writeTextFile(binPath, script);
111142
await Deno.chmod(binPath, 0o755);
112-
return { binPath, argsLogPath };
143+
return { binPath, argsLogPath, requestLogPath };
113144
}
114145

115146
async function runCheck(
@@ -187,6 +218,7 @@ async function runDeck(input: {
187218
deckPath: string;
188219
codexBinPath: string;
189220
argsLogPath: string;
221+
requestLogPath: string;
190222
cwd?: string;
191223
command?: "run" | "repl";
192224
extraArgs?: Array<string>;
@@ -195,6 +227,7 @@ async function runDeck(input: {
195227
stdout: string;
196228
stderr: string;
197229
argsLog: string;
230+
requestLog: string;
198231
}> {
199232
const args = await denoRunArgs([
200233
input.command ?? "run",
@@ -210,22 +243,30 @@ async function runDeck(input: {
210243
GAMBIT_CODEX_BIN: input.codexBinPath,
211244
GAMBIT_CODEX_DISABLE_MCP: "1",
212245
CODEX_ARGS_LOG: input.argsLogPath,
246+
CODEX_REQUESTS_LOG: input.requestLogPath,
213247
},
214248
stdout: "piped",
215249
stderr: "piped",
216250
});
217251
const out = await command.output();
218252
let argsLog = "";
253+
let requestLog = "";
219254
try {
220255
argsLog = await Deno.readTextFile(input.argsLogPath);
221256
} catch {
222257
// no-op for failure assertions
223258
}
259+
try {
260+
requestLog = await Deno.readTextFile(input.requestLogPath);
261+
} catch {
262+
// no-op for failure assertions
263+
}
224264
return {
225265
code: out.code,
226266
stdout: new TextDecoder().decode(out.stdout),
227267
stderr: new TextDecoder().decode(out.stderr),
228268
argsLog,
269+
requestLog,
229270
};
230271
}
231272

@@ -330,22 +371,33 @@ Deno.test({
330371
deckPath: defaultDeck,
331372
codexBinPath: mock.binPath,
332373
argsLogPath: mock.argsLogPath,
374+
requestLogPath: mock.requestLogPath,
333375
cwd: dir,
334376
});
335377
assertEquals(
336378
defaultRun.code,
337379
0,
338380
formatCommandDiagnostics("run codex-cli/default", defaultRun),
339381
);
340-
assertEquals(defaultRun.argsLog.includes("\n-m\n"), false);
382+
assertEquals(defaultRun.argsLog.endsWith("\napp-server\n"), true);
341383
assertEquals(defaultRun.argsLog.includes('model_verbosity="high"'), true);
342384
assertEquals(defaultRun.argsLog.includes("project_doc_max_bytes="), false);
343385
assertEquals(
344-
defaultRun.argsLog.includes('instructions="Smoke deck."'),
386+
defaultRun.argsLog.includes('developer_instructions="Smoke deck."'),
345387
true,
346388
);
347389
assertEquals(defaultRun.argsLog.includes("SYSTEM:\n"), false);
348-
assertEquals(defaultRun.argsLog.endsWith("\nhi\n"), true);
390+
assertEquals(
391+
defaultRun.requestLog.includes('"method":"thread/start"'),
392+
true,
393+
);
394+
assertEquals(defaultRun.requestLog.includes('"model":null'), true);
395+
assertEquals(
396+
defaultRun.requestLog.includes(
397+
'"input":[{"type":"text","text":"hi"}]',
398+
),
399+
true,
400+
);
349401

350402
const passthroughDeck = await writeDeck(
351403
dir,
@@ -356,6 +408,7 @@ Deno.test({
356408
deckPath: passthroughDeck,
357409
codexBinPath: mock.binPath,
358410
argsLogPath: mock.argsLogPath,
411+
requestLogPath: mock.requestLogPath,
359412
cwd: dir,
360413
});
361414
assertEquals(
@@ -364,11 +417,15 @@ Deno.test({
364417
formatCommandDiagnostics("run codex-cli/gpt-5.2-codex", passthroughRun),
365418
);
366419
assertEquals(
367-
passthroughRun.argsLog.includes("\n-m\ngpt-5.2-codex\n"),
420+
passthroughRun.argsLog.includes('model_verbosity="high"'),
368421
true,
369422
);
370423
assertEquals(
371-
passthroughRun.argsLog.includes('model_verbosity="high"'),
424+
passthroughRun.requestLog.includes('"method":"thread/start"'),
425+
true,
426+
);
427+
assertEquals(
428+
passthroughRun.requestLog.includes('"model":"gpt-5.2-codex"'),
372429
true,
373430
);
374431

@@ -383,6 +440,7 @@ Deno.test({
383440
deckPath: projectDocDeck,
384441
codexBinPath: mock.binPath,
385442
argsLogPath: mock.argsLogPath,
443+
requestLogPath: mock.requestLogPath,
386444
cwd: dir,
387445
});
388446
assertEquals(
@@ -422,6 +480,7 @@ Deno.test({
422480
deckPath,
423481
codexBinPath: mock.binPath,
424482
argsLogPath: mock.argsLogPath,
483+
requestLogPath: mock.requestLogPath,
425484
cwd: dir,
426485
});
427486
assertEquals(

src/mcp_server.test.ts

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { assertEquals } from "@std/assert";
1+
import { assert, assertEquals } from "@std/assert";
22
import * as path from "@std/path";
33
import { handleMcpRequest } from "./mcp_server.ts";
44

@@ -89,6 +89,26 @@ async function withMcpEnvLock<T>(fn: () => Promise<T>): Promise<T> {
8989
}
9090
}
9191

92+
leakTolerantTest(
93+
"mcp server negotiates initialize protocol version",
94+
async () => {
95+
const response = await handleMcpRequest({
96+
jsonrpc: "2.0",
97+
id: 0,
98+
method: "initialize",
99+
params: {
100+
protocolVersion: "2025-06-18",
101+
},
102+
});
103+
const result = (response as {
104+
result?: {
105+
protocolVersion?: string;
106+
};
107+
}).result;
108+
assertEquals(result?.protocolVersion, "2025-06-18");
109+
},
110+
);
111+
92112
leakTolerantTest(
93113
"mcp server errors tools/list when root deck env is missing",
94114
async () => {
@@ -132,11 +152,24 @@ leakTolerantTest(
132152
});
133153
const payload = (response as {
134154
result?: {
135-
tools?: Array<{ name: string }>;
155+
tools?: Array<{
156+
name: string;
157+
inputSchema?: {
158+
type?: string;
159+
properties?: Record<string, { type?: string }>;
160+
};
161+
}>;
136162
};
137163
}).result;
138164
const names = (payload?.tools ?? []).map((tool) => tool.name).sort();
139165
assertEquals(names, ["external_only", "lookup"]);
166+
const lookup = (payload?.tools ?? []).find((tool) =>
167+
tool.name === "lookup"
168+
);
169+
assert(lookup);
170+
assert(lookup.inputSchema);
171+
assertEquals(lookup.inputSchema.type, "object");
172+
assertEquals(lookup.inputSchema.properties?.query?.type, "string");
140173
} finally {
141174
if (previous === undefined) {
142175
Deno.env.delete("GAMBIT_MCP_ROOT_DECK_PATH");

0 commit comments

Comments
 (0)