diff --git a/plugins/codex/prompts/adversarial-review.md b/plugins/codex/prompts/adversarial-review.md index c8f8123..78668af 100644 --- a/plugins/codex/prompts/adversarial-review.md +++ b/plugins/codex/prompts/adversarial-review.md @@ -32,6 +32,7 @@ Actively try to disprove the change. Look for violated invariants, missing guards, unhandled failure paths, and assumptions that stop being true under stress. Trace how bad inputs, retries, concurrent actions, or partially completed operations move through the code. If the user supplied a focus area, weight it heavily, but still report any other material issue you can defend. +{{REVIEW_COLLECTION_GUIDANCE}} diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 201d1c7..f005a8c 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -241,6 +241,7 @@ function buildAdversarialReviewPrompt(context, focusText) { REVIEW_KIND: "Adversarial Review", TARGET_LABEL: context.target.label, USER_FOCUS: focusText || "No extra focus provided.", + REVIEW_COLLECTION_GUIDANCE: context.collectionGuidance, REVIEW_INPUT: context.content }); } diff --git a/plugins/codex/scripts/lib/git.mjs b/plugins/codex/scripts/lib/git.mjs index 6fca213..1749cfc 100644 --- a/plugins/codex/scripts/lib/git.mjs +++ b/plugins/codex/scripts/lib/git.mjs @@ -2,9 +2,11 @@ import fs from "node:fs"; import path from "node:path"; import { isProbablyText } from "./fs.mjs"; -import { runCommand, runCommandChecked } from "./process.mjs"; +import { formatCommandFailure, runCommand, runCommandChecked } from "./process.mjs"; const MAX_UNTRACKED_BYTES = 24 * 1024; +const DEFAULT_INLINE_DIFF_MAX_FILES = 2; +const DEFAULT_INLINE_DIFF_MAX_BYTES = 256 * 1024; function git(cwd, args, options = {}) { return runCommand("git", args, { cwd, ...options }); @@ -14,6 +16,64 @@ function gitChecked(cwd, args, options = {}) { return runCommandChecked("git", args, { cwd, ...options }); } +function listUniqueFiles(...groups) { + return [...new Set(groups.flat().filter(Boolean))].sort(); +} + +function normalizeMaxInlineFiles(value) { + const parsed = Number(value); + if (!Number.isFinite(parsed) || parsed < 0) { + return DEFAULT_INLINE_DIFF_MAX_FILES; + } + return Math.floor(parsed); +} + +function normalizeMaxInlineDiffBytes(value) { + const parsed = Number(value); + if (!Number.isFinite(parsed) || parsed < 0) { + return DEFAULT_INLINE_DIFF_MAX_BYTES; + } + return Math.floor(parsed); +} + +function measureGitOutputBytes(cwd, args, maxBytes) { + const result = git(cwd, args, { maxBuffer: maxBytes + 1 }); + if (result.error && /** @type {NodeJS.ErrnoException} */ (result.error).code === "ENOBUFS") { + return maxBytes + 1; + } + if (result.error) { + throw result.error; + } + if (result.status !== 0) { + throw new Error(formatCommandFailure(result)); + } + return Buffer.byteLength(result.stdout, "utf8"); +} + +function measureCombinedGitOutputBytes(cwd, argSets, maxBytes) { + let totalBytes = 0; + for (const args of argSets) { + const remainingBytes = maxBytes - totalBytes; + if (remainingBytes < 0) { + return maxBytes + 1; + } + totalBytes += measureGitOutputBytes(cwd, args, remainingBytes); + if (totalBytes > maxBytes) { + return totalBytes; + } + } + return totalBytes; +} + +function buildBranchComparison(cwd, baseRef) { + const mergeBase = gitChecked(cwd, ["merge-base", "HEAD", baseRef]).stdout.trim(); + return { + mergeBase, + commitRange: `${mergeBase}..HEAD`, + reviewRange: `${baseRef}...HEAD` + }; +} + export function ensureGitRepository(cwd) { const result = git(cwd, ["rev-parse", "--show-toplevel"]); const errorCode = result.error && "code" in result.error ? result.error.code : null; @@ -161,55 +221,115 @@ function formatUntrackedFile(cwd, relativePath) { return [`### ${relativePath}`, "```", buffer.toString("utf8").trimEnd(), "```"].join("\n"); } -function collectWorkingTreeContext(cwd, state) { - const status = gitChecked(cwd, ["status", "--short"]).stdout.trim(); - const stagedDiff = gitChecked(cwd, ["diff", "--cached", "--binary", "--no-ext-diff", "--submodule=diff"]).stdout; - const unstagedDiff = gitChecked(cwd, ["diff", "--binary", "--no-ext-diff", "--submodule=diff"]).stdout; - const untrackedBody = state.untracked.map((file) => formatUntrackedFile(cwd, file)).join("\n\n"); - - const parts = [ - formatSection("Git Status", status), - formatSection("Staged Diff", stagedDiff), - formatSection("Unstaged Diff", unstagedDiff), - formatSection("Untracked Files", untrackedBody) - ]; +function collectWorkingTreeContext(cwd, state, options = {}) { + const includeDiff = options.includeDiff !== false; + const status = gitChecked(cwd, ["status", "--short", "--untracked-files=all"]).stdout.trim(); + const changedFiles = listUniqueFiles(state.staged, state.unstaged, state.untracked); + + let parts; + if (includeDiff) { + const stagedDiff = gitChecked(cwd, ["diff", "--cached", "--binary", "--no-ext-diff", "--submodule=diff"]).stdout; + const unstagedDiff = gitChecked(cwd, ["diff", "--binary", "--no-ext-diff", "--submodule=diff"]).stdout; + const untrackedBody = state.untracked.map((file) => formatUntrackedFile(cwd, file)).join("\n\n"); + parts = [ + formatSection("Git Status", status), + formatSection("Staged Diff", stagedDiff), + formatSection("Unstaged Diff", unstagedDiff), + formatSection("Untracked Files", untrackedBody) + ]; + } else { + const stagedStat = gitChecked(cwd, ["diff", "--shortstat", "--cached"]).stdout.trim(); + const unstagedStat = gitChecked(cwd, ["diff", "--shortstat"]).stdout.trim(); + const untrackedBody = state.untracked.map((file) => formatUntrackedFile(cwd, file)).join("\n\n"); + parts = [ + formatSection("Git Status", status), + formatSection("Staged Diff Stat", stagedStat), + formatSection("Unstaged Diff Stat", unstagedStat), + formatSection("Changed Files", changedFiles.join("\n")), + formatSection("Untracked Files", untrackedBody) + ]; + } return { mode: "working-tree", summary: `Reviewing ${state.staged.length} staged, ${state.unstaged.length} unstaged, and ${state.untracked.length} untracked file(s).`, - content: parts.join("\n") + content: parts.join("\n"), + changedFiles }; } -function collectBranchContext(cwd, baseRef) { - const mergeBase = gitChecked(cwd, ["merge-base", "HEAD", baseRef]).stdout.trim(); - const commitRange = `${mergeBase}..HEAD`; +function collectBranchContext(cwd, baseRef, options = {}) { + const includeDiff = options.includeDiff !== false; + const comparison = options.comparison ?? buildBranchComparison(cwd, baseRef); const currentBranch = getCurrentBranch(cwd); - const logOutput = gitChecked(cwd, ["log", "--oneline", "--decorate", commitRange]).stdout.trim(); - const diffStat = gitChecked(cwd, ["diff", "--stat", commitRange]).stdout.trim(); - const diff = gitChecked(cwd, ["diff", "--binary", "--no-ext-diff", "--submodule=diff", commitRange]).stdout; + const changedFiles = gitChecked(cwd, ["diff", "--name-only", comparison.commitRange]).stdout.trim().split("\n").filter(Boolean); + const logOutput = gitChecked(cwd, ["log", "--oneline", "--decorate", comparison.commitRange]).stdout.trim(); + const diffStat = gitChecked(cwd, ["diff", "--stat", comparison.commitRange]).stdout.trim(); return { mode: "branch", - summary: `Reviewing branch ${currentBranch} against ${baseRef} from merge-base ${mergeBase}.`, - content: [ - formatSection("Commit Log", logOutput), - formatSection("Diff Stat", diffStat), - formatSection("Branch Diff", diff) - ].join("\n") + summary: `Reviewing branch ${currentBranch} against ${baseRef} from merge-base ${comparison.mergeBase}.`, + content: includeDiff + ? [ + formatSection("Commit Log", logOutput), + formatSection("Diff Stat", diffStat), + formatSection( + "Branch Diff", + gitChecked(cwd, ["diff", "--binary", "--no-ext-diff", "--submodule=diff", comparison.commitRange]).stdout + ) + ].join("\n") + : [ + formatSection("Commit Log", logOutput), + formatSection("Diff Stat", diffStat), + formatSection("Changed Files", changedFiles.join("\n")) + ].join("\n"), + changedFiles, + comparison }; } -export function collectReviewContext(cwd, target) { +function buildAdversarialCollectionGuidance(options = {}) { + if (options.includeDiff !== false) { + return "Use the repository context below as primary evidence."; + } + + return "The repository context below is a lightweight summary. Inspect the target diff yourself with read-only git commands before finalizing findings."; +} + +export function collectReviewContext(cwd, target, options = {}) { const repoRoot = getRepoRoot(cwd); - const state = getWorkingTreeState(cwd); - const currentBranch = getCurrentBranch(cwd); + const currentBranch = getCurrentBranch(repoRoot); + const maxInlineFiles = normalizeMaxInlineFiles(options.maxInlineFiles); + const maxInlineDiffBytes = normalizeMaxInlineDiffBytes(options.maxInlineDiffBytes); let details; + let includeDiff; + let diffBytes; if (target.mode === "working-tree") { - details = collectWorkingTreeContext(repoRoot, state); + const state = getWorkingTreeState(repoRoot); + diffBytes = measureCombinedGitOutputBytes( + repoRoot, + [ + ["diff", "--cached", "--binary", "--no-ext-diff", "--submodule=diff"], + ["diff", "--binary", "--no-ext-diff", "--submodule=diff"] + ], + maxInlineDiffBytes + ); + includeDiff = + options.includeDiff ?? + (listUniqueFiles(state.staged, state.unstaged, state.untracked).length <= maxInlineFiles && + diffBytes <= maxInlineDiffBytes); + details = collectWorkingTreeContext(repoRoot, state, { includeDiff }); } else { - details = collectBranchContext(repoRoot, target.baseRef); + const comparison = buildBranchComparison(repoRoot, target.baseRef); + const fileCount = gitChecked(repoRoot, ["diff", "--name-only", comparison.commitRange]).stdout.trim().split("\n").filter(Boolean).length; + diffBytes = measureGitOutputBytes( + repoRoot, + ["diff", "--binary", "--no-ext-diff", "--submodule=diff", comparison.commitRange], + maxInlineDiffBytes + ); + includeDiff = options.includeDiff ?? (fileCount <= maxInlineFiles && diffBytes <= maxInlineDiffBytes); + details = collectBranchContext(repoRoot, target.baseRef, { includeDiff, comparison }); } return { @@ -217,6 +337,10 @@ export function collectReviewContext(cwd, target) { repoRoot, branch: currentBranch, target, + fileCount: details.changedFiles.length, + diffBytes, + inputMode: includeDiff ? "inline-diff" : "self-collect", + collectionGuidance: buildAdversarialCollectionGuidance({ includeDiff }), ...details }; } diff --git a/plugins/codex/scripts/lib/process.mjs b/plugins/codex/scripts/lib/process.mjs index 58cedf6..af28d1c 100644 --- a/plugins/codex/scripts/lib/process.mjs +++ b/plugins/codex/scripts/lib/process.mjs @@ -7,6 +7,7 @@ export function runCommand(command, args = [], options = {}) { env: options.env, encoding: "utf8", input: options.input, + maxBuffer: options.maxBuffer, stdio: options.stdio ?? "pipe", shell: process.platform === "win32" ? (process.env.SHELL || true) : false, windowsHide: true diff --git a/tests/git.test.mjs b/tests/git.test.mjs index dd6a7a4..14ff257 100644 --- a/tests/git.test.mjs +++ b/tests/git.test.mjs @@ -69,6 +69,23 @@ test("resolveReviewTarget requires an explicit base when no default branch can b ); }); +test("collectReviewContext keeps inline diffs for tiny adversarial reviews", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.writeFileSync(path.join(cwd, "app.js"), "console.log('v1');\n"); + run("git", ["add", "app.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "app.js"), "console.log('INLINE_MARKER');\n"); + + const target = resolveReviewTarget(cwd, {}); + const context = collectReviewContext(cwd, target); + + assert.equal(context.inputMode, "inline-diff"); + assert.equal(context.fileCount, 1); + assert.match(context.collectionGuidance, /primary evidence/i); + assert.match(context.content, /INLINE_MARKER/); +}); + test("collectReviewContext skips untracked directories in working tree review", () => { const cwd = makeTempDir(); initGitRepo(cwd); @@ -101,3 +118,66 @@ test("collectReviewContext skips broken untracked symlinks instead of crashing", assert.match(context.content, /### broken-link/); assert.match(context.content, /skipped: broken symlink or unreadable file/i); }); + +test("collectReviewContext falls back to lightweight context for larger adversarial reviews", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + for (const name of ["a.js", "b.js", "c.js"]) { + fs.writeFileSync(path.join(cwd, name), `export const value = "${name}-v1";\n`); + } + run("git", ["add", "a.js", "b.js", "c.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "a.js"), 'export const value = "SELF_COLLECT_MARKER_A";\n'); + fs.writeFileSync(path.join(cwd, "b.js"), 'export const value = "SELF_COLLECT_MARKER_B";\n'); + fs.writeFileSync(path.join(cwd, "c.js"), 'export const value = "SELF_COLLECT_MARKER_C";\n'); + + const target = resolveReviewTarget(cwd, {}); + const context = collectReviewContext(cwd, target); + + assert.equal(context.inputMode, "self-collect"); + assert.equal(context.fileCount, 3); + assert.match(context.collectionGuidance, /lightweight summary/i); + assert.match(context.collectionGuidance, /read-only git commands/i); + assert.doesNotMatch(context.content, /SELF_COLLECT_MARKER_[ABC]/); + assert.match(context.content, /## Changed Files/); +}); + +test("collectReviewContext falls back to lightweight context for oversized single-file diffs", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.writeFileSync(path.join(cwd, "app.js"), "export const value = 'v1';\n"); + run("git", ["add", "app.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "app.js"), `export const value = '${"x".repeat(512)}';\n`); + + const target = resolveReviewTarget(cwd, {}); + const context = collectReviewContext(cwd, target, { maxInlineDiffBytes: 128 }); + + assert.equal(context.fileCount, 1); + assert.equal(context.inputMode, "self-collect"); + assert.ok(context.diffBytes > 128); + assert.doesNotMatch(context.content, /xxx/); + assert.match(context.content, /## Changed Files/); +}); + +test("collectReviewContext keeps untracked file content in lightweight working tree context", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + for (const name of ["a.js", "b.js"]) { + fs.writeFileSync(path.join(cwd, name), `export const value = "${name}-v1";\n`); + } + run("git", ["add", "a.js", "b.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "a.js"), 'export const value = "TRACKED_MARKER_A";\n'); + fs.writeFileSync(path.join(cwd, "b.js"), 'export const value = "TRACKED_MARKER_B";\n'); + fs.writeFileSync(path.join(cwd, "new-risk.js"), 'export const value = "UNTRACKED_RISK_MARKER";\n'); + + const target = resolveReviewTarget(cwd, {}); + const context = collectReviewContext(cwd, target); + + assert.equal(context.inputMode, "self-collect"); + assert.equal(context.fileCount, 3); + assert.doesNotMatch(context.content, /TRACKED_MARKER_[AB]/); + assert.match(context.content, /## Untracked Files/); + assert.match(context.content, /UNTRACKED_RISK_MARKER/); +}); diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index 6000c89..9a8a1c8 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -148,6 +148,33 @@ test("adversarial review accepts the same base-branch targeting as review", () = assert.match(result.stdout, /Missing empty-state guard/); }); +test("adversarial review asks Codex to inspect larger diffs itself", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.mkdirSync(path.join(repo, "src")); + for (const name of ["a.js", "b.js", "c.js"]) { + fs.writeFileSync(path.join(repo, "src", name), `export const value = "${name}-v1";\n`); + } + run("git", ["add", "src/a.js", "src/b.js", "src/c.js"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "src", "a.js"), 'export const value = "PROMPT_SELF_COLLECT_A";\n'); + fs.writeFileSync(path.join(repo, "src", "b.js"), 'export const value = "PROMPT_SELF_COLLECT_B";\n'); + fs.writeFileSync(path.join(repo, "src", "c.js"), 'export const value = "PROMPT_SELF_COLLECT_C";\n'); + + const result = run("node", [SCRIPT, "adversarial-review"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8")); + assert.match(state.lastTurnStart.prompt, /lightweight summary/i); + assert.match(state.lastTurnStart.prompt, /read-only git commands/i); + assert.doesNotMatch(state.lastTurnStart.prompt, /PROMPT_SELF_COLLECT_[ABC]/); +}); + test("review includes reasoning output when the app server returns it", () => { const repo = makeTempDir(); const binDir = makeTempDir();