Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions plugins/codex/prompts/adversarial-review.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ Actively try to disprove the change.
Look for violated invariants, missing guards, unhandled failure paths, and assumptions that stop being true under stress.
Trace how bad inputs, retries, concurrent actions, or partially completed operations move through the code.
If the user supplied a focus area, weight it heavily, but still report any other material issue you can defend.
{{REVIEW_COLLECTION_GUIDANCE}}
</review_method>

<finding_bar>
Expand Down
1 change: 1 addition & 0 deletions plugins/codex/scripts/codex-companion.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ function buildAdversarialReviewPrompt(context, focusText) {
REVIEW_KIND: "Adversarial Review",
TARGET_LABEL: context.target.label,
USER_FOCUS: focusText || "No extra focus provided.",
REVIEW_COLLECTION_GUIDANCE: context.collectionGuidance,
REVIEW_INPUT: context.content
});
}
Expand Down
186 changes: 155 additions & 31 deletions plugins/codex/scripts/lib/git.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@ import fs from "node:fs";
import path from "node:path";

import { isProbablyText } from "./fs.mjs";
import { runCommand, runCommandChecked } from "./process.mjs";
import { formatCommandFailure, runCommand, runCommandChecked } from "./process.mjs";

const MAX_UNTRACKED_BYTES = 24 * 1024;
const DEFAULT_INLINE_DIFF_MAX_FILES = 2;
const DEFAULT_INLINE_DIFF_MAX_BYTES = 256 * 1024;

function git(cwd, args, options = {}) {
return runCommand("git", args, { cwd, ...options });
Expand All @@ -14,6 +16,64 @@ function gitChecked(cwd, args, options = {}) {
return runCommandChecked("git", args, { cwd, ...options });
}

function listUniqueFiles(...groups) {
return [...new Set(groups.flat().filter(Boolean))].sort();
}

function normalizeMaxInlineFiles(value) {
const parsed = Number(value);
if (!Number.isFinite(parsed) || parsed < 0) {
return DEFAULT_INLINE_DIFF_MAX_FILES;
}
return Math.floor(parsed);
}

function normalizeMaxInlineDiffBytes(value) {
const parsed = Number(value);
if (!Number.isFinite(parsed) || parsed < 0) {
return DEFAULT_INLINE_DIFF_MAX_BYTES;
}
return Math.floor(parsed);
}

function measureGitOutputBytes(cwd, args, maxBytes) {
const result = git(cwd, args, { maxBuffer: maxBytes + 1 });
if (result.error && /** @type {NodeJS.ErrnoException} */ (result.error).code === "ENOBUFS") {
return maxBytes + 1;
}
if (result.error) {
throw result.error;
}
if (result.status !== 0) {
throw new Error(formatCommandFailure(result));
}
return Buffer.byteLength(result.stdout, "utf8");
}

function measureCombinedGitOutputBytes(cwd, argSets, maxBytes) {
let totalBytes = 0;
for (const args of argSets) {
const remainingBytes = maxBytes - totalBytes;
if (remainingBytes < 0) {
return maxBytes + 1;
}
totalBytes += measureGitOutputBytes(cwd, args, remainingBytes);
if (totalBytes > maxBytes) {
return totalBytes;
}
}
return totalBytes;
}

function buildBranchComparison(cwd, baseRef) {
const mergeBase = gitChecked(cwd, ["merge-base", "HEAD", baseRef]).stdout.trim();
return {
mergeBase,
commitRange: `${mergeBase}..HEAD`,
reviewRange: `${baseRef}...HEAD`
};
}

export function ensureGitRepository(cwd) {
const result = git(cwd, ["rev-parse", "--show-toplevel"]);
const errorCode = result.error && "code" in result.error ? result.error.code : null;
Expand Down Expand Up @@ -161,62 +221,126 @@ function formatUntrackedFile(cwd, relativePath) {
return [`### ${relativePath}`, "```", buffer.toString("utf8").trimEnd(), "```"].join("\n");
}

function collectWorkingTreeContext(cwd, state) {
const status = gitChecked(cwd, ["status", "--short"]).stdout.trim();
const stagedDiff = gitChecked(cwd, ["diff", "--cached", "--binary", "--no-ext-diff", "--submodule=diff"]).stdout;
const unstagedDiff = gitChecked(cwd, ["diff", "--binary", "--no-ext-diff", "--submodule=diff"]).stdout;
const untrackedBody = state.untracked.map((file) => formatUntrackedFile(cwd, file)).join("\n\n");

const parts = [
formatSection("Git Status", status),
formatSection("Staged Diff", stagedDiff),
formatSection("Unstaged Diff", unstagedDiff),
formatSection("Untracked Files", untrackedBody)
];
function collectWorkingTreeContext(cwd, state, options = {}) {
const includeDiff = options.includeDiff !== false;
const status = gitChecked(cwd, ["status", "--short", "--untracked-files=all"]).stdout.trim();
const changedFiles = listUniqueFiles(state.staged, state.unstaged, state.untracked);

let parts;
if (includeDiff) {
const stagedDiff = gitChecked(cwd, ["diff", "--cached", "--binary", "--no-ext-diff", "--submodule=diff"]).stdout;
const unstagedDiff = gitChecked(cwd, ["diff", "--binary", "--no-ext-diff", "--submodule=diff"]).stdout;
const untrackedBody = state.untracked.map((file) => formatUntrackedFile(cwd, file)).join("\n\n");
parts = [
formatSection("Git Status", status),
formatSection("Staged Diff", stagedDiff),
formatSection("Unstaged Diff", unstagedDiff),
formatSection("Untracked Files", untrackedBody)
];
} else {
const stagedStat = gitChecked(cwd, ["diff", "--shortstat", "--cached"]).stdout.trim();
const unstagedStat = gitChecked(cwd, ["diff", "--shortstat"]).stdout.trim();
const untrackedBody = state.untracked.map((file) => formatUntrackedFile(cwd, file)).join("\n\n");
parts = [
formatSection("Git Status", status),
formatSection("Staged Diff Stat", stagedStat),
formatSection("Unstaged Diff Stat", unstagedStat),
formatSection("Changed Files", changedFiles.join("\n")),
formatSection("Untracked Files", untrackedBody)
];
}

return {
mode: "working-tree",
summary: `Reviewing ${state.staged.length} staged, ${state.unstaged.length} unstaged, and ${state.untracked.length} untracked file(s).`,
content: parts.join("\n")
content: parts.join("\n"),
changedFiles
};
}

function collectBranchContext(cwd, baseRef) {
const mergeBase = gitChecked(cwd, ["merge-base", "HEAD", baseRef]).stdout.trim();
const commitRange = `${mergeBase}..HEAD`;
function collectBranchContext(cwd, baseRef, options = {}) {
const includeDiff = options.includeDiff !== false;
const comparison = options.comparison ?? buildBranchComparison(cwd, baseRef);
const currentBranch = getCurrentBranch(cwd);
const logOutput = gitChecked(cwd, ["log", "--oneline", "--decorate", commitRange]).stdout.trim();
const diffStat = gitChecked(cwd, ["diff", "--stat", commitRange]).stdout.trim();
const diff = gitChecked(cwd, ["diff", "--binary", "--no-ext-diff", "--submodule=diff", commitRange]).stdout;
const changedFiles = gitChecked(cwd, ["diff", "--name-only", comparison.commitRange]).stdout.trim().split("\n").filter(Boolean);
const logOutput = gitChecked(cwd, ["log", "--oneline", "--decorate", comparison.commitRange]).stdout.trim();
const diffStat = gitChecked(cwd, ["diff", "--stat", comparison.commitRange]).stdout.trim();

return {
mode: "branch",
summary: `Reviewing branch ${currentBranch} against ${baseRef} from merge-base ${mergeBase}.`,
content: [
formatSection("Commit Log", logOutput),
formatSection("Diff Stat", diffStat),
formatSection("Branch Diff", diff)
].join("\n")
summary: `Reviewing branch ${currentBranch} against ${baseRef} from merge-base ${comparison.mergeBase}.`,
content: includeDiff
? [
formatSection("Commit Log", logOutput),
formatSection("Diff Stat", diffStat),
formatSection(
"Branch Diff",
gitChecked(cwd, ["diff", "--binary", "--no-ext-diff", "--submodule=diff", comparison.commitRange]).stdout
)
].join("\n")
: [
formatSection("Commit Log", logOutput),
formatSection("Diff Stat", diffStat),
formatSection("Changed Files", changedFiles.join("\n"))
].join("\n"),
changedFiles,
comparison
};
}

export function collectReviewContext(cwd, target) {
function buildAdversarialCollectionGuidance(options = {}) {
if (options.includeDiff !== false) {
return "Use the repository context below as primary evidence.";
}

return "The repository context below is a lightweight summary. Inspect the target diff yourself with read-only git commands before finalizing findings.";
}

export function collectReviewContext(cwd, target, options = {}) {
const repoRoot = getRepoRoot(cwd);
const state = getWorkingTreeState(cwd);
const currentBranch = getCurrentBranch(cwd);
const currentBranch = getCurrentBranch(repoRoot);
const maxInlineFiles = normalizeMaxInlineFiles(options.maxInlineFiles);
const maxInlineDiffBytes = normalizeMaxInlineDiffBytes(options.maxInlineDiffBytes);
let details;
let includeDiff;
let diffBytes;

if (target.mode === "working-tree") {
details = collectWorkingTreeContext(repoRoot, state);
const state = getWorkingTreeState(repoRoot);
diffBytes = measureCombinedGitOutputBytes(
repoRoot,
[
["diff", "--cached", "--binary", "--no-ext-diff", "--submodule=diff"],
["diff", "--binary", "--no-ext-diff", "--submodule=diff"]
],
maxInlineDiffBytes
);
includeDiff =
options.includeDiff ??
(listUniqueFiles(state.staged, state.unstaged, state.untracked).length <= maxInlineFiles &&
diffBytes <= maxInlineDiffBytes);
details = collectWorkingTreeContext(repoRoot, state, { includeDiff });
} else {
details = collectBranchContext(repoRoot, target.baseRef);
const comparison = buildBranchComparison(repoRoot, target.baseRef);
const fileCount = gitChecked(repoRoot, ["diff", "--name-only", comparison.commitRange]).stdout.trim().split("\n").filter(Boolean).length;
diffBytes = measureGitOutputBytes(
repoRoot,
["diff", "--binary", "--no-ext-diff", "--submodule=diff", comparison.commitRange],
maxInlineDiffBytes
);
includeDiff = options.includeDiff ?? (fileCount <= maxInlineFiles && diffBytes <= maxInlineDiffBytes);
details = collectBranchContext(repoRoot, target.baseRef, { includeDiff, comparison });
}

return {
cwd: repoRoot,
repoRoot,
branch: currentBranch,
target,
fileCount: details.changedFiles.length,
diffBytes,
inputMode: includeDiff ? "inline-diff" : "self-collect",
collectionGuidance: buildAdversarialCollectionGuidance({ includeDiff }),
...details
};
}
1 change: 1 addition & 0 deletions plugins/codex/scripts/lib/process.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export function runCommand(command, args = [], options = {}) {
env: options.env,
encoding: "utf8",
input: options.input,
maxBuffer: options.maxBuffer,
stdio: options.stdio ?? "pipe",
shell: process.platform === "win32" ? (process.env.SHELL || true) : false,
windowsHide: true
Expand Down
80 changes: 80 additions & 0 deletions tests/git.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,23 @@ test("resolveReviewTarget requires an explicit base when no default branch can b
);
});

test("collectReviewContext keeps inline diffs for tiny adversarial reviews", () => {
const cwd = makeTempDir();
initGitRepo(cwd);
fs.writeFileSync(path.join(cwd, "app.js"), "console.log('v1');\n");
run("git", ["add", "app.js"], { cwd });
run("git", ["commit", "-m", "init"], { cwd });
fs.writeFileSync(path.join(cwd, "app.js"), "console.log('INLINE_MARKER');\n");

const target = resolveReviewTarget(cwd, {});
const context = collectReviewContext(cwd, target);

assert.equal(context.inputMode, "inline-diff");
assert.equal(context.fileCount, 1);
assert.match(context.collectionGuidance, /primary evidence/i);
assert.match(context.content, /INLINE_MARKER/);
});

test("collectReviewContext skips untracked directories in working tree review", () => {
const cwd = makeTempDir();
initGitRepo(cwd);
Expand Down Expand Up @@ -101,3 +118,66 @@ test("collectReviewContext skips broken untracked symlinks instead of crashing",
assert.match(context.content, /### broken-link/);
assert.match(context.content, /skipped: broken symlink or unreadable file/i);
});

test("collectReviewContext falls back to lightweight context for larger adversarial reviews", () => {
const cwd = makeTempDir();
initGitRepo(cwd);
for (const name of ["a.js", "b.js", "c.js"]) {
fs.writeFileSync(path.join(cwd, name), `export const value = "${name}-v1";\n`);
}
run("git", ["add", "a.js", "b.js", "c.js"], { cwd });
run("git", ["commit", "-m", "init"], { cwd });
fs.writeFileSync(path.join(cwd, "a.js"), 'export const value = "SELF_COLLECT_MARKER_A";\n');
fs.writeFileSync(path.join(cwd, "b.js"), 'export const value = "SELF_COLLECT_MARKER_B";\n');
fs.writeFileSync(path.join(cwd, "c.js"), 'export const value = "SELF_COLLECT_MARKER_C";\n');

const target = resolveReviewTarget(cwd, {});
const context = collectReviewContext(cwd, target);

assert.equal(context.inputMode, "self-collect");
assert.equal(context.fileCount, 3);
assert.match(context.collectionGuidance, /lightweight summary/i);
assert.match(context.collectionGuidance, /read-only git commands/i);
assert.doesNotMatch(context.content, /SELF_COLLECT_MARKER_[ABC]/);
assert.match(context.content, /## Changed Files/);
});

test("collectReviewContext falls back to lightweight context for oversized single-file diffs", () => {
const cwd = makeTempDir();
initGitRepo(cwd);
fs.writeFileSync(path.join(cwd, "app.js"), "export const value = 'v1';\n");
run("git", ["add", "app.js"], { cwd });
run("git", ["commit", "-m", "init"], { cwd });
fs.writeFileSync(path.join(cwd, "app.js"), `export const value = '${"x".repeat(512)}';\n`);

const target = resolveReviewTarget(cwd, {});
const context = collectReviewContext(cwd, target, { maxInlineDiffBytes: 128 });

assert.equal(context.fileCount, 1);
assert.equal(context.inputMode, "self-collect");
assert.ok(context.diffBytes > 128);
assert.doesNotMatch(context.content, /xxx/);
assert.match(context.content, /## Changed Files/);
});

test("collectReviewContext keeps untracked file content in lightweight working tree context", () => {
const cwd = makeTempDir();
initGitRepo(cwd);
for (const name of ["a.js", "b.js"]) {
fs.writeFileSync(path.join(cwd, name), `export const value = "${name}-v1";\n`);
}
run("git", ["add", "a.js", "b.js"], { cwd });
run("git", ["commit", "-m", "init"], { cwd });
fs.writeFileSync(path.join(cwd, "a.js"), 'export const value = "TRACKED_MARKER_A";\n');
fs.writeFileSync(path.join(cwd, "b.js"), 'export const value = "TRACKED_MARKER_B";\n');
fs.writeFileSync(path.join(cwd, "new-risk.js"), 'export const value = "UNTRACKED_RISK_MARKER";\n');

const target = resolveReviewTarget(cwd, {});
const context = collectReviewContext(cwd, target);

assert.equal(context.inputMode, "self-collect");
assert.equal(context.fileCount, 3);
assert.doesNotMatch(context.content, /TRACKED_MARKER_[AB]/);
assert.match(context.content, /## Untracked Files/);
assert.match(context.content, /UNTRACKED_RISK_MARKER/);
});
27 changes: 27 additions & 0 deletions tests/runtime.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,33 @@ test("adversarial review accepts the same base-branch targeting as review", () =
assert.match(result.stdout, /Missing empty-state guard/);
});

test("adversarial review asks Codex to inspect larger diffs itself", () => {
const repo = makeTempDir();
const binDir = makeTempDir();
installFakeCodex(binDir);
initGitRepo(repo);
fs.mkdirSync(path.join(repo, "src"));
for (const name of ["a.js", "b.js", "c.js"]) {
fs.writeFileSync(path.join(repo, "src", name), `export const value = "${name}-v1";\n`);
}
run("git", ["add", "src/a.js", "src/b.js", "src/c.js"], { cwd: repo });
run("git", ["commit", "-m", "init"], { cwd: repo });
fs.writeFileSync(path.join(repo, "src", "a.js"), 'export const value = "PROMPT_SELF_COLLECT_A";\n');
fs.writeFileSync(path.join(repo, "src", "b.js"), 'export const value = "PROMPT_SELF_COLLECT_B";\n');
fs.writeFileSync(path.join(repo, "src", "c.js"), 'export const value = "PROMPT_SELF_COLLECT_C";\n');

const result = run("node", [SCRIPT, "adversarial-review"], {
cwd: repo,
env: buildEnv(binDir)
});

assert.equal(result.status, 0, result.stderr);
const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
assert.match(state.lastTurnStart.prompt, /lightweight summary/i);
assert.match(state.lastTurnStart.prompt, /read-only git commands/i);
assert.doesNotMatch(state.lastTurnStart.prompt, /PROMPT_SELF_COLLECT_[ABC]/);
});

test("review includes reasoning output when the app server returns it", () => {
const repo = makeTempDir();
const binDir = makeTempDir();
Expand Down