diff --git a/prompts/review-item.md b/prompts/review-item.md index acc6ecba40..6a7bb9f013 100644 --- a/prompts/review-item.md +++ b/prompts/review-item.md @@ -163,6 +163,13 @@ For PRs, include a dedicated security review pass in addition to the functional For PRs, include a dedicated `realBehaviorProof` assessment before any pass, automerge, or repair verdict. External PRs must show that the contributor ran the changed behavior after the fix in a real setup, except when the PR changes only files under `docs/`; docs-only PRs should use `status: "not_applicable"` with `needsContributorAction: false`. Unit tests, mocks, snapshots, lint, typechecks, and CI are supplemental only; they are not real behavior proof by themselves. Treat screenshots, recordings, terminal screenshots, console output, copied live output, linked artifacts, and redacted runtime logs as valid proof, including for non-visual CLI, console, text, or error-message changes. Prefer asking for screenshots or videos when they can show the behavior, including terminal screenshots for text or console changes, while keeping logs and live output acceptable. Remind contributors to redact private information like IP addresses, API keys, phone numbers, non-public endpoints, and other private details before posting evidence. A plain app screenshot is sufficient only for behavior it directly shows. Do not mark screenshot-only proof sufficient for browser runtime, CSP, CORS, `connect-src`, auth callback, network, or security changes when the proof only says no console error, warning, or violation is visible; require console output, a network trace, terminal/live output, logs, a recording with diagnostics, or a linked artifact that actually shows the runtime path. Use your tools and best judgement: inspect the PR body, comments, links, screenshots, videos, logs, terminal output, and changed behavior context; you may download/open GitHub attachment links, generate stills or contact sheets from videos, inspect terminal screenshots and logs, and compare the proof against the PR diff. Use the provided scratch directory for downloaded artifacts and keep the target checkout read-only. Use `status: "sufficient"` only when the evidence convincingly shows after-fix real behavior and an observed improved result. Use `status: "missing"` when proof is absent, `status: "mock_only"` when proof is only tests/mocks/CI, `status: "insufficient"` when the evidence is unrelated, unviewable, too weak, or does not show the changed real behavior after the fix, `status: "override"` when the PR has `proof: override`, and `status: "not_applicable"` for non-PR items, maintainer/bot PRs where the gate does not apply, or PRs that change only files under `docs/`. When proof is missing, mock-only, or insufficient, set `needsContributorAction: true`, make the PR a human-only merge blocker, and do not request ClawSweeper repair markers because automation cannot prove the contributor's setup for them. +Missing, mock-only, or insufficient real behavior proof is not a substitute for +the diff review. Even when proof blocks merge, still finish the code/docs +correctness pass and populate `reviewFindings` for high-confidence source +defects. In particular, do not stop after asking for proof when the diff changes +documented contracts, duplicate/idempotency keys, task lifecycle state, fallback +delivery behavior, request scoping, or persisted settings. + For PRs, always fill `telegramVisibleProof`. Use `status: "needed"` only when the PR touches Telegram behavior and the user-visible change can be easily demonstrated by the `telegram-crabbox-e2e-proof` skill, such as message formatting, slash-command output, reply text, attachments, reactions, threading, mentions, or other visible Telegram chat behavior. Use `status: "not_needed"` for non-Telegram PRs and for Telegram changes that are internal-only, test-only, docs-only, logging-only, retry/network reliability only, auth/secret plumbing only, or otherwise not meaningfully visible in a short Telegram Desktop recording. For PRs, also emit Codex `/review`-style findings in `reviewFindings`. @@ -203,6 +210,14 @@ documented defaults. A new default must not change an existing user's stored value during upgrade unless the PR includes an explicit, narrow, tested migration and the behavior is clearly intentional. +For task, media, tool-call, retry, duplicate-guard, idempotency, or fallback +delivery changes, explicitly compare the new request-key and active-task scoping +against current source, docs, and tests. Check whether the PR changes +same-request dedupe into all-request serialization, allows a new duplicate send, +removes a documented fallback, or adds a fallback that docs still deny. If the +code and docs now disagree, add a review finding rather than hiding it as a +generic risk. + Treat provider fallback removal, fail-closed routing, missing-harness behavior, startup/install checks, and strict config validation as upgrade-sensitive even when they fix a real bug. If current users may only discover the change because diff --git a/src/repair/git-publish.ts b/src/repair/git-publish.ts index cc0aace4ec..9d5dd734eb 100644 --- a/src/repair/git-publish.ts +++ b/src/repair/git-publish.ts @@ -1,5 +1,13 @@ import { spawnSync } from "node:child_process"; -import { cpSync, existsSync, mkdirSync, readdirSync, rmSync, statSync } from "node:fs"; +import { + cpSync, + existsSync, + mkdirSync, + readFileSync, + readdirSync, + rmSync, + statSync, +} from "node:fs"; import { mkdtempSync } from "node:fs"; import { tmpdir } from "node:os"; import { dirname, join, relative, resolve } from "node:path"; @@ -32,6 +40,16 @@ export type GitRunOptions = { export type PublishResult = "committed" | "unchanged"; +type PreservedFiles = { + root: string; + files: PreservedFile[]; +}; + +type PreservedFile = { + sourceRel: string; + targetRel: string; +}; + const GENERATED_PUBLISH_PATHS = [ "apply-report.json", "repair-apply-report.json", @@ -213,16 +231,19 @@ function syncStatePublishPaths(paths: readonly string[], stateRoot: string): voi if (!destination.startsWith(`${stateRoot}/`) && destination !== stateRoot) { throw new Error(`Refusing to publish outside state root: ${path}`); } - const preserved = preserveStateOnlyFiles({ path, source, destination }); + const preservedStateOnly = preserveStateOnlyFiles({ path, source, destination }); + const preservedRecords = preserveNewerStateReviewRecords({ path, source, destination }); try { rmSync(destination, { force: true, recursive: true }); if (existsSync(source)) { mkdirSync(dirname(destination), { recursive: true }); cpSync(source, destination, { recursive: true }); } - restorePreservedFiles(preserved, destination); + restorePreservedFiles(preservedStateOnly, destination); + restorePreservedFiles(preservedRecords, destination, { overwrite: true }); } finally { - rmSync(preserved.root, { force: true, recursive: true }); + rmSync(preservedStateOnly.root, { force: true, recursive: true }); + rmSync(preservedRecords.root, { force: true, recursive: true }); } } } @@ -235,11 +256,11 @@ function preserveStateOnlyFiles({ path: string; source: string; destination: string; -}): { root: string; files: string[] } { +}): PreservedFiles { const root = mkdtempSync(join(tmpdir(), "clawsweeper-state-preserve-")); if (!existsSync(destination)) return { root, files: [] }; - const files: string[] = []; + const files: PreservedFile[] = []; for (const file of listFiles(destination)) { const rel = relative(destination, file); if (!shouldPreserveStateOnlyFile(path, rel)) continue; @@ -247,7 +268,7 @@ function preserveStateOnlyFiles({ const target = resolve(root, rel); mkdirSync(dirname(target), { recursive: true }); cpSync(file, target); - files.push(rel); + files.push({ sourceRel: rel, targetRel: rel }); } return { root, files }; } @@ -266,12 +287,12 @@ function preserveStateOnlyCommitFiles({ }: { path: string; sourceCommit: string; -}): { root: string; files: string[] } { +}): PreservedFiles { const root = mkdtempSync(join(tmpdir(), "clawsweeper-state-preserve-")); const source = resolve(path); if (!existsSync(source)) return { root, files: [] }; - const files: string[] = []; + const files: PreservedFile[] = []; const commitPathPrefix = path.replace(/\/+$/, ""); for (const file of listFiles(source)) { const rel = relative(source, file); @@ -280,16 +301,94 @@ function preserveStateOnlyCommitFiles({ const target = resolve(root, rel); mkdirSync(dirname(target), { recursive: true }); cpSync(file, target); - files.push(rel); + files.push({ sourceRel: rel, targetRel: rel }); + } + return { root, files }; +} + +function preserveNewerStateReviewRecords({ + path, + source, + destination, +}: { + path: string; + source: string; + destination: string; +}): PreservedFiles { + const root = mkdtempSync(join(tmpdir(), "clawsweeper-state-preserve-")); + if (!isRecordPublishPath(path) || !existsSync(destination) || !existsSync(source)) { + return { root, files: [] }; + } + + if (statSync(destination).isFile() || statSync(source).isFile()) { + if (!statSync(destination).isFile() || !statSync(source).isFile()) { + return { root, files: [] }; + } + if (!/^records\/[^/]+\/(?:items|closed)\/[^/]+\.md$/.test(normalizePath(path))) { + return { root, files: [] }; + } + if (!stateRecordIsNewer(destination, source)) return { root, files: [] }; + const sourceRel = "record.md"; + cpSync(destination, resolve(root, sourceRel)); + return { root, files: [{ sourceRel, targetRel: "" }] }; + } + + const files: PreservedFile[] = []; + for (const file of listFiles(destination)) { + const rel = relative(destination, file); + const stateRel = normalizePath(join(path, rel)); + if (!/^records\/[^/]+\/(?:items|closed)\/[^/]+\.md$/.test(stateRel)) continue; + const sourceFile = resolve(source, rel); + if (!existsSync(sourceFile)) continue; + if (!stateRecordIsNewer(file, sourceFile)) continue; + const target = resolve(root, rel); + mkdirSync(dirname(target), { recursive: true }); + cpSync(file, target); + files.push({ sourceRel: rel, targetRel: rel }); } return { root, files }; } -function restorePreservedFiles(preserved: { root: string; files: string[] }, destination: string) { - for (const rel of preserved.files) { - const source = resolve(preserved.root, rel); - const target = resolve(destination, rel); - if (existsSync(target)) continue; +function isRecordPublishPath(path: string): boolean { + return path === "records" || path.startsWith("records/"); +} + +function normalizePath(path: string): string { + return path.replaceAll("\\", "/"); +} + +function stateRecordIsNewer(stateFile: string, sourceFile: string): boolean { + const state = reviewRecordTimestamps(readFileSync(stateFile, "utf8")); + const source = reviewRecordTimestamps(readFileSync(sourceFile, "utf8")); + if (state.reviewedAt !== source.reviewedAt) return state.reviewedAt > source.reviewedAt; + return state.syncedAt > source.syncedAt; +} + +function reviewRecordTimestamps(markdown: string): { reviewedAt: number; syncedAt: number } { + return { + reviewedAt: frontMatterTime(markdown, "reviewed_at"), + syncedAt: frontMatterTime(markdown, "review_comment_synced_at"), + }; +} + +function frontMatterTime(markdown: string, key: string): number { + const match = markdown.match(new RegExp(`^${key}:\\s*(.+)$`, "m")); + if (!match) return 0; + const value = match[1]; + if (!value) return 0; + const parsed = Date.parse(value.trim()); + return Number.isFinite(parsed) ? parsed : 0; +} + +function restorePreservedFiles( + preserved: PreservedFiles, + destination: string, + options: { overwrite?: boolean } = {}, +) { + for (const file of preserved.files) { + const source = resolve(preserved.root, file.sourceRel); + const target = file.targetRel ? resolve(destination, file.targetRel) : destination; + if (!options.overwrite && existsSync(target)) continue; mkdirSync(dirname(target), { recursive: true }); cpSync(source, target); } diff --git a/test/clawsweeper.test.ts b/test/clawsweeper.test.ts index d8966a1558..8f69d16041 100644 --- a/test/clawsweeper.test.ts +++ b/test/clawsweeper.test.ts @@ -7017,6 +7017,9 @@ test("review prompt requires upgrade and preference overwrite checks", () => { ); assert.match(prompt, /Call out upgrade and settings breakage directly in `reviewFindings`/); assert.match(prompt, /existing config\/preferences can be overwritten/); + assert.match(prompt, /task, media, tool-call, retry, duplicate-guard/); + assert.match(prompt, /same-request dedupe into all-request serialization/); + assert.match(prompt, /code and docs now disagree/); assert.match(prompt, /preserving the existing\s+behavior as the default/); assert.match(prompt, /explicit strict config option/); assert.match(prompt, /default compatibility mode and the\s+opt-in strict mode/); @@ -7042,6 +7045,8 @@ test("review prompt requires real behavior proof for PR reviews", () => { prompt, /Unit tests, mocks, snapshots, lint, typechecks, and CI are supplemental only/, ); + assert.match(prompt, /not a substitute for\s+the diff review/); + assert.match(prompt, /still finish the code\/docs\s+correctness pass/); assert.match(prompt, /do not request ClawSweeper repair markers/); }); diff --git a/test/repair/git-publish.test.ts b/test/repair/git-publish.test.ts index c9444413a6..4d7ec81f0c 100644 --- a/test/repair/git-publish.test.ts +++ b/test/repair/git-publish.test.ts @@ -286,6 +286,98 @@ test("publishMainCommit publishes generated paths to state branch when state roo assert.throws(() => run("git", ["--git-dir", origin, "show", "main:results/ledger.txt"], root)); }); +test("publishMainCommit preserves newer state review records during broad publishes", () => { + const root = fs.mkdtempSync(path.join(os.tmpdir(), "clawsweeper-publish-")); + const origin = path.join(root, "origin.git"); + const work = path.join(root, "work"); + const state = path.join(root, "state"); + run("git", ["init", "--bare", origin], root); + run("git", ["clone", origin, state], root); + configureUser(state); + write( + path.join(state, "records/openclaw-openclaw/items/1.md"), + "---\nreviewed_at: 2026-05-20T02:00:00.000Z\nreview_comment_synced_at: 2026-05-20T02:01:00.000Z\n---\nnewer report\n", + ); + run("git", ["add", "."], state); + run("git", ["commit", "-m", "initial state"], state); + run("git", ["push", "origin", "HEAD:state"], state); + run("git", ["--git-dir", origin, "symbolic-ref", "HEAD", "refs/heads/state"], root); + run("git", ["checkout", "-B", "state", "origin/state"], state); + + fs.mkdirSync(work); + write( + path.join(work, "records/openclaw-openclaw/items/1.md"), + "---\nreviewed_at: 2026-05-20T01:00:00.000Z\nreview_comment_synced_at: 2026-05-20T01:01:00.000Z\n---\nstale report\n", + ); + write( + path.join(work, "records/openclaw-openclaw/items/2.md"), + "---\nreviewed_at: 2026-05-20T01:30:00.000Z\n---\nnew local report\n", + ); + + const result = withEnv({ CLAWSWEEPER_STATE_DIR: state }, () => + withCwd(work, () => + publishMainCommit({ + message: "chore: sync sweep review comments checkpoint 1", + paths: ["records"], + maxAttempts: 1, + pushAttempts: 1, + }), + ), + ); + + assert.equal(result, "committed"); + assert.equal( + run("git", ["--git-dir", origin, "show", "state:records/openclaw-openclaw/items/1.md"], root), + "---\nreviewed_at: 2026-05-20T02:00:00.000Z\nreview_comment_synced_at: 2026-05-20T02:01:00.000Z\n---\nnewer report\n", + ); + assert.equal( + run("git", ["--git-dir", origin, "show", "state:records/openclaw-openclaw/items/2.md"], root), + "---\nreviewed_at: 2026-05-20T01:30:00.000Z\n---\nnew local report\n", + ); +}); + +test("publishMainCommit preserves newer state review records during exact file publishes", () => { + const root = fs.mkdtempSync(path.join(os.tmpdir(), "clawsweeper-publish-")); + const origin = path.join(root, "origin.git"); + const work = path.join(root, "work"); + const state = path.join(root, "state"); + run("git", ["init", "--bare", origin], root); + run("git", ["clone", origin, state], root); + configureUser(state); + write( + path.join(state, "records/openclaw-openclaw/items/1.md"), + "---\nreviewed_at: 2026-05-20T02:00:00.000Z\n---\nnewer exact report\n", + ); + run("git", ["add", "."], state); + run("git", ["commit", "-m", "initial state"], state); + run("git", ["push", "origin", "HEAD:state"], state); + run("git", ["--git-dir", origin, "symbolic-ref", "HEAD", "refs/heads/state"], root); + run("git", ["checkout", "-B", "state", "origin/state"], state); + + fs.mkdirSync(work); + write( + path.join(work, "records/openclaw-openclaw/items/1.md"), + "---\nreviewed_at: 2026-05-20T01:00:00.000Z\n---\nstale exact report\n", + ); + + const result = withEnv({ CLAWSWEEPER_STATE_DIR: state }, () => + withCwd(work, () => + publishMainCommit({ + message: "chore: apply event sweep result for openclaw-openclaw#1", + paths: ["records/openclaw-openclaw/items/1.md"], + maxAttempts: 1, + pushAttempts: 1, + }), + ), + ); + + assert.equal(result, "unchanged"); + assert.equal( + run("git", ["--git-dir", origin, "show", "state:records/openclaw-openclaw/items/1.md"], root), + "---\nreviewed_at: 2026-05-20T02:00:00.000Z\n---\nnewer exact report\n", + ); +}); + test("publishMainCommit preserves state-only automerge jobs on broad jobs publishes", () => { const root = fs.mkdtempSync(path.join(os.tmpdir(), "clawsweeper-publish-")); const origin = path.join(root, "origin.git");