From 15f45eecadf3543b06958b6f5e47c94c2b6590c2 Mon Sep 17 00:00:00 2001 From: cafitac Date: Fri, 8 May 2026 01:28:19 +0900 Subject: [PATCH] feat: harden query preview cleanup restore artifact integrity --- .../g4-readiness-and-first-mutation-plan.md | 10 +- .../stage-g-cautious-automation.md | 8 +- .dev/status/current-handoff.md | 58 +++++--- src/agent_memory/api/cli.py | 103 ++++++++++++-- tests/test_cli.py | 132 ++++++++++++++++++ 5 files changed, 276 insertions(+), 35 deletions(-) diff --git a/.dev/roadmap/memory-consolidation/g4-readiness-and-first-mutation-plan.md b/.dev/roadmap/memory-consolidation/g4-readiness-and-first-mutation-plan.md index e79877f..f8feefa 100644 --- a/.dev/roadmap/memory-consolidation/g4-readiness-and-first-mutation-plan.md +++ b/.dev/roadmap/memory-consolidation/g4-readiness-and-first-mutation-plan.md @@ -179,6 +179,7 @@ Required RED tests before implementation: 13. Apply runs first against a disposable DB copy and proceeds only if count/hash/rollback checks pass. 14. A restore dry-run command validates rollback artifacts without mutating the DB or printing raw query previews; live restore remains blocked until a separate explicit policy slice. 15. Rollback artifacts are source-bound with a hashed DB fingerprint; restore dry-run fails closed on source/target DB mismatch. +16. Restore dry-run fails closed on artifact integrity problems such as wrong policy, invalid operation, declared row-count mismatch, duplicate row ids, or missing source fingerprint; the failure output remains aggregate/hash-only and read-only. Required operator safety before live DB apply: @@ -186,7 +187,7 @@ Required operator safety before live DB apply: - Export or back up the DB before mutation. - Run apply only with explicit policy/actor/reason and disposable-copy preflight. - Re-run storage-health and query-preview cleanup preview after mutation. -- Run restore dry-run against the private rollback artifact before considering any future live restore design; source/target DB fingerprint mismatches must remain blocking read-only errors. +- Run restore dry-run against the private rollback artifact before considering any future live restore design; source/target DB fingerprint mismatches and artifact integrity failures must remain blocking read-only errors. - Verify non-empty `query_preview` count becomes 0 or the remaining rows are explicitly explained. - Keep backup and rollback artifact paths out of git; rollback artifacts may contain private local query-preview values. @@ -235,9 +236,14 @@ Completed since the original draft: - `ordinary trace metadata default cleanup` became the second narrow explicit mutation in G4b. It normalized only already-metadata-only ordinary `turn` traces by filling conservative metadata defaults. - H1-H4 hardening and retrieval-eval expansion continued through `v0.1.99`; latest runtime QA passed at `/Users/reddit/.agent-memory/reports/v0.1.99-runtime-qa-20260507T074118`. -The next G4 slice is not live broad mutation. The docs/RED-test-only broader background consolidation apply-mode contract landed in PR #200 and was runtime-verified through v0.1.99. The next safe move is one disposable-DB-backed explicit policy/action slice. That contract must keep the original hard blocks: no ordinary conversation auto-approval, no raw transcript/prompt/query/query-preview persistence, no default retrieval ranking change, no broad LLM extraction from ordinary turns, and no apply mode without explicit named policy, actor, reason, audit, and restore guidance. The first hardening step required the named query-preview cleanup policy on the existing G4a cleanup apply path and shipped in v0.1.100. The next hardening step is rollback-manifest output: before clearing eligible legacy values, apply writes a private local rollback artifact and emits only path/hash/count metadata in stdout/audit. +The next G4 slice is not live broad mutation. The docs/RED-test-only broader background consolidation apply-mode contract landed in PR #200 and was runtime-verified through v0.1.99. The next safe move is one disposable-DB-backed explicit policy/action slice. That contract must keep the original hard blocks: no ordinary conversation auto-approval, no raw transcript/prompt/query/query-preview persistence, no default retrieval ranking change, no broad LLM extraction from ordinary turns, and no apply mode without explicit named policy, actor, reason, audit, and restore guidance. The first hardening step required the named query-preview cleanup policy on the existing G4a cleanup apply path and shipped in v0.1.100. The v0.1.104 hardening line adds source DB binding to rollback artifacts and restore dry-run. The next hardening step is artifact-integrity fail-closed behavior: restore dry-run should reject wrong-policy, invalid-operation, row-count-mismatched, duplicate-id, or missing-fingerprint artifacts as read-only structured errors before any future live restore is designed. ## Current G4a safety hardening: disposable-copy apply check `dogfood query-preview-cleanup --apply` remains the only narrow mutation being hardened. After the v0.1.101 named-policy and rollback-manifest release, the current slice requires the command to copy the target SQLite DB to a private local disposable artifact, run the same cleanup on that copy, and compare expected eligible/cleared/remaining counts plus rollback-manifest metadata before mutating the target DB. The disposable copy can contain private query-preview data; stdout/audit metadata must stay hash/count/path only and broad G4 apply mode remains blocked. + + +## Current G4a safety hardening: restore artifact-integrity check + +`dogfood query-preview-cleanup --apply` remains the only narrow mutation being hardened. After the v0.1.104 named-policy, rollback-manifest, disposable-copy preflight, restore dry-run, and source-binding release, the current slice tightens `dogfood query-preview-cleanup-restore --dry-run` so malformed or tampered artifacts fail closed with structured JSON. The dry-run remains read-only and aggregate/hash-only, reports blocked reasons such as `artifact_policy_invalid`, `artifact_operation_invalid`, `artifact_row_count_mismatch`, `duplicate_artifact_row_ids`, and `source_database_fingerprint_missing`, and keeps live restore unavailable. Broad G4 apply mode remains blocked. diff --git a/.dev/roadmap/memory-consolidation/stage-g-cautious-automation.md b/.dev/roadmap/memory-consolidation/stage-g-cautious-automation.md index 535e933..cac296c 100644 --- a/.dev/roadmap/memory-consolidation/stage-g-cautious-automation.md +++ b/.dev/roadmap/memory-consolidation/stage-g-cautious-automation.md @@ -166,7 +166,7 @@ Keep collecting scheduled dry-run artifacts while making the next four-step sequ ## PR G4-plan: Draft background apply-mode contract before implementation -Status: Complete for first narrow cleanup mutations. The query-preview cleanup path now has a named policy gate, rollback-manifest hardening, disposable-copy preflight hardening, restore dry-run validation, and source-database fingerprint hardening in progress; broader consolidation apply mode still requires a separate contract before mutating code. +Status: Complete for first narrow cleanup mutations. The query-preview cleanup path now has a named policy gate, rollback-manifest hardening, disposable-copy preflight hardening, restore dry-run validation, and source-database fingerprint hardening complete and artifact-integrity hardening in progress; broader consolidation apply mode still requires a separate contract before mutating code. ### Objective @@ -182,7 +182,7 @@ Define exactly what future apply mode may mutate, what it must audit, and what r ## PR G4a: Add first narrow mutation for legacy query-preview cleanup -Status: Implemented in PR #142, released in `v0.1.77` via PR #143, applied once to the live DB, and hardened through `v0.1.103` with a named policy gate, rollback manifest, disposable-copy preflight before target DB mutation, and read-only restore dry-run validation. Current follow-up source-binds rollback artifacts with a DB fingerprint and makes restore dry-run fail closed on artifact/target mismatch; live restore and broader G4 consolidation apply mode remain blocked by explicit policy/readiness work. +Status: Implemented in PR #142, released in `v0.1.77` via PR #143, applied once to the live DB, and hardened through `v0.1.104` with a named policy gate, rollback manifest, disposable-copy preflight before target DB mutation, read-only restore dry-run validation, and source DB binding. Current follow-up makes restore dry-run fail closed on artifact integrity problems such as wrong policy, invalid operation, row-count mismatch, duplicate row ids, or missing source fingerprint; live restore and broader G4 consolidation apply mode remain blocked by explicit policy/readiness work. ### Objective @@ -196,7 +196,7 @@ Clear legacy `retrieval_observations.query_preview` values from old versions wit - Raw query preview values are never printed. - The command writes audit-safe operation metadata, including rollback manifest path/hash/count without raw values in stdout/audit. - The command preflights apply on a private disposable DB copy before target DB mutation. -- A restore dry-run validates rollback artifacts and target-row compatibility without mutating or printing raw query previews; source/target DB fingerprint mismatch is blocking; live restore remains unavailable. +- A restore dry-run validates rollback artifacts and target-row compatibility without mutating or printing raw query previews; source/target DB fingerprint mismatch and artifact integrity failures are blocking; live restore remains unavailable. - Storage-health and cleanup preview can verify the result afterward. - Retrieval/Hermes behavior is unchanged. @@ -275,4 +275,4 @@ Allow controlled application only after dry-run output is trusted and the broade ## Current G4a safety hardening: restore dry-run check -`dogfood query-preview-cleanup --apply` remains the only narrow mutation being hardened. After the v0.1.102 named-policy, rollback-manifest, and disposable-copy preflight release, the current slice adds read-only `dogfood query-preview-cleanup-restore --dry-run`. It validates the private rollback artifact kind/policy/row shape/hash and reports target rows found, restorable rows, already-populated rows, and missing rows without mutating the DB or printing raw query-preview values. Live restore remains unavailable and broad G4 apply mode remains blocked. +`dogfood query-preview-cleanup --apply` remains the only narrow mutation being hardened. After the v0.1.104 named-policy, rollback-manifest, disposable-copy preflight, restore dry-run, and source-binding release, the current slice hardens read-only `dogfood query-preview-cleanup-restore --dry-run` against malformed or tampered artifacts. It rejects wrong-policy, invalid-operation, declared row-count mismatch, duplicate row id, and missing/mismatched source fingerprint cases as structured read-only errors before reporting any restorable rows. Live restore remains unavailable and broad G4 apply mode remains blocked. diff --git a/.dev/status/current-handoff.md b/.dev/status/current-handoff.md index 3bc1db9..3b70b3a 100644 --- a/.dev/status/current-handoff.md +++ b/.dev/status/current-handoff.md @@ -1,7 +1,7 @@ # agent-memory current handoff Status: AI-authored draft. Not yet human-approved. -Last updated: 2026-05-07 23:06 KST +Last updated: 2026-05-08 01:19 KST ## Trigger for the next session @@ -16,15 +16,15 @@ read this file first. Do not ask the user to restate context. Verify repo state, ## Ready-to-say answer -agent-memory is currently verified through `v0.1.103`: PR #206 tightened the first narrow G4a cleanup mutation so `dogfood query-preview-cleanup --apply` requires the named policy `legacy-query-preview-cleanup-v1`; PR #209 added rollback-manifest/private-artifact output; PR #212 added a disposable-copy preflight gate before target DB mutation; PR #214 added read-only restore dry-run validation for rollback artifacts; release-sync PR #215 published `v0.1.103`. GitHub Release, npm, and PyPI all report `v0.1.103`. The live Hermes `default`/`personal-oss` plus `earlypay` hook runtimes were upgraded to `/Users/reddit/.agent-memory/runtime/v0.1.103/.venv/bin/agent-memory`; installed-runtime QA passed with report `/Users/reddit/.agent-memory/reports/v0.1.103-runtime-qa-20260507T134856`. Checked-in retrieval-eval coverage remains 21 tasks. +agent-memory is currently verified through `v0.1.104`: PR #206 tightened the first narrow G4a cleanup mutation so `dogfood query-preview-cleanup --apply` requires the named policy `legacy-query-preview-cleanup-v1`; PR #209 added rollback-manifest/private-artifact output; PR #212 added a disposable-copy preflight gate before target DB mutation; PR #214 added read-only restore dry-run validation for rollback artifacts; PR #216 source-bound rollback artifacts with a hashed DB fingerprint; PR #218 stabilized the Linux/SQLite retrieval-eval comparator advisory; release-sync PR #217 published `v0.1.104`. GitHub Release, npm, and PyPI all report `v0.1.104`. The live Hermes `default`/`personal-oss` plus `earlypay` hook runtimes were upgraded to `/Users/reddit/.agent-memory/runtime/v0.1.104/.venv/bin/agent-memory`; installed-runtime QA passed with report `/Users/reddit/.agent-memory/reports/v0.1.104-runtime-qa-20260507T160731`. Checked-in retrieval-eval coverage remains 21 tasks. Storage/privacy cleanup remains clean: legacy `retrieval_observations.query_preview` rows are expected to stay at 0, ordinary metadata-only violations are normalized, graph exports stay local/read-only/redacted by default, and broad G4 consolidation apply mode remains blocked. The latest installed-runtime dogfood snapshot reports `storage-health` read-only/non-mutating; scheduled dry-run remains read-only and recommends continuing dogfood evidence before broad G4 mutation rather than enabling broad apply mode. ## Current next slice -Current slice: v0.1.103 release/runtime QA is complete and the first narrow cleanup mutation now has explicit named-policy, rollback-manifest, disposable-copy preflight, and restore dry-run gates. The next safety hardening slice is still not broad apply or live restore: bind rollback artifacts to a source database fingerprint and have restore dry-run fail closed on artifact/target DB mismatch without printing raw query previews or mutating either DB. +Current slice: v0.1.104 release/runtime QA is complete and the first narrow cleanup mutation now has explicit named-policy, rollback-manifest, disposable-copy preflight, restore dry-run, and source DB binding gates. The next safety hardening slice is still not broad apply or live restore: make restore dry-run fail closed on malformed/tampered rollback artifacts such as wrong policy, invalid operation, declared row-count mismatch, duplicate row ids, or missing source fingerprint, without printing raw query previews or mutating any DB. -Why this is the best next move: v0.1.103 leaves packaging, runtime QA, the 21-task retrieval-eval harness, named policy, rollback manifest, private rollback artifact, disposable preflight, and restore dry-run healthy. The remaining rollback risk is using a valid artifact against the wrong DB: before any future restore apply exists, artifacts should carry a source DB fingerprint and restore dry-run should report mismatch as a blocking read-only error. Broader consolidation apply mode remains blocked until explicit policy/action paths prove preview, disposable preflight, audit, rollback, dry-run restore, source binding, and privacy behavior on narrow/disposable evidence. +Why this is the best next move: v0.1.104 leaves packaging, runtime QA, the 21-task retrieval-eval harness, named policy, rollback manifest, private rollback artifact, disposable preflight, restore dry-run, and source binding healthy. The remaining rollback risk is accepting a syntactically valid but semantically tampered artifact: before any future restore apply exists, restore dry-run should report artifact integrity failures as blocking read-only errors. Broader consolidation apply mode remains blocked until explicit policy/action paths prove preview, disposable preflight, audit, rollback, dry-run restore, source binding, artifact integrity, and privacy behavior on narrow/disposable evidence. Recommended local backup commands: @@ -54,9 +54,9 @@ Current branch expectation: - Root checkout should normally be on `main` unless a docs/feature branch is active. - Latest merged retrieval-quality PR: #210 `test: stabilize soft regression advisory assertion`. -- Latest merged G4a hardening PR: #214 `feat: add query preview cleanup restore dry run`. -- Latest merged release-sync PR: #215 `chore: release v0.1.103 [skip release]`. -- Latest completed release: `v0.1.103`. +- Latest merged G4a hardening PR: #216 `feat: source-bind query preview cleanup restore dry run`. +- Latest merged release-sync PR: #217 `chore: release v0.1.104 [skip release]`. +- Latest completed release: `v0.1.104`. Expected GitHub identity: @@ -67,26 +67,26 @@ Expected GitHub identity: Latest completed release: -- `v0.1.103` -- GitHub release: `https://github.com/cafitac/agent-memory/releases/tag/v0.1.103` -- npm package: `@cafitac/agent-memory@0.1.103` -- PyPI package: `cafitac-agent-memory==0.1.103` +- `v0.1.104` +- GitHub release: `https://github.com/cafitac/agent-memory/releases/tag/v0.1.104` +- npm package: `@cafitac/agent-memory@0.1.104` +- PyPI package: `cafitac-agent-memory==0.1.104` -Latest verified source checkout snapshot, checked 2026-05-07 23:06 KST: +Latest verified source checkout snapshot, checked 2026-05-08 01:19 KST: - branch: `main`, synced with `origin/main` before this restore-dry-run branch -- latest release-sync commit: `chore: release v0.1.103 [skip release]` via PR #215 -- latest G4a hardening merges: PR #206 `feat: require policy for query preview cleanup apply`, PR #209 `feat: add query preview cleanup rollback manifest`, PR #212 `feat: preflight query preview cleanup apply on disposable copy`, PR #214 `feat: add query preview cleanup restore dry run` -- latest stabilization merge: PR #210 `test: stabilize soft regression advisory assertion` +- latest release-sync commit: `chore: release v0.1.104 [skip release]` via PR #217 +- latest G4a hardening merges: PR #206 `feat: require policy for query preview cleanup apply`, PR #209 `feat: add query preview cleanup rollback manifest`, PR #212 `feat: preflight query preview cleanup apply on disposable copy`, PR #214 `feat: add query preview cleanup restore dry run`, PR #216 `feat: source-bind query preview cleanup restore dry run` +- latest stabilization merges: PR #210 `test: stabilize soft regression advisory assertion`, PR #218 `test: stabilize comparator avoid delta assertion` - previous G4 contract merge: PR #200 `docs: checkpoint broad g4 apply contract` - previous G4 contract stabilization merge: PR #202 `test: stabilize retrieval avoid delta assertion` - previous retrieval-quality merge commit: PR #195 `test: add procedure prompt budget fixture` - open PRs: none observed before this restore-dry-run branch -- GitHub Release, npm, and PyPI all report `v0.1.103` -- published-install QA passed from fresh PyPI venv and npm smoke; `agent_memory.__version__ == "0.1.103"` -- live Hermes `default`, `personal-oss`, and `earlypay` configs use the pinned v0.1.103 runtime +- GitHub Release, npm, and PyPI all report `v0.1.104` +- published-install QA passed from fresh PyPI venv and npm smoke; `agent_memory.__version__ == "0.1.104"` +- live Hermes `default`, `personal-oss`, and `earlypay` configs use the pinned v0.1.104 runtime - checked-in retrieval-eval fixtures remain at 21 tasks; local full tests passed after PR #210 -- installed runtime dogfood storage-health, scheduled dry-run, query-preview disposable-gated apply smoke, restore dry-run smoke, and hook smoke passed; broad G4 remains blocked +- installed runtime dogfood storage-health, scheduled dry-run, query-preview disposable-gated apply smoke, restore dry-run/source-binding smoke, and hook smoke passed; broad G4 remains blocked Expected local untracked artifacts to preserve in the root checkout: @@ -146,6 +146,26 @@ Still forbidden after this slice: - default retrieval/ranking behavior changes; - live restore/apply without a separate RED-tested policy/actor/reason/audit/disposable path. +## In-progress G4a restore artifact-integrity hardening slice + +Current branch: `g4/query-preview-cleanup-restore-artifact-integrity`. + +Scope: + +- Keep `dogfood query-preview-cleanup-restore --dry-run` read-only and non-mutating. +- Add operator-facing integrity checks for wrong policy, invalid operation, declared row-count mismatch, duplicate row ids, and missing/mismatched source fingerprint. +- Fail closed with structured JSON `status=error`, aggregate counts, and blocked reasons rather than accepting tampered artifacts. +- Do not print raw `query_preview`, token, API-key-like strings, or raw reason text. +- Keep live restore unavailable (`restore_apply_available=false`, `live_restore_not_implemented`) until a separate explicit policy/apply slice exists. + +Still forbidden after this slice: + +- broad G4 apply mode; +- ordinary conversation auto-approval; +- raw transcript or raw query text in stdout/audit metadata; +- default retrieval/ranking behavior changes; +- live restore/apply without a separate RED-tested policy/actor/reason/audit/disposable path. + ## v0.1.100 policy hardening release and runtime QA completed PR #206 `feat: require policy for query preview cleanup apply`, release-sync PR #207, and stabilization PR #208 merged. diff --git a/src/agent_memory/api/cli.py b/src/agent_memory/api/cli.py index b07a4c6..3736cb5 100644 --- a/src/agent_memory/api/cli.py +++ b/src/agent_memory/api/cli.py @@ -4380,14 +4380,73 @@ def _dogfood_query_preview_cleanup_restore_dry_run_payload(args: argparse.Namesp artifact_payload = json.loads(artifact_text) except json.JSONDecodeError as exc: raise ValueError("query-preview-cleanup restore artifact must be valid JSON") from exc - if artifact_payload.get("kind") != "query_preview_cleanup_rollback_artifact": - raise ValueError("query-preview-cleanup restore requires a query_preview_cleanup_rollback_artifact") + artifact_kind = artifact_payload.get("kind") + if artifact_kind != "query_preview_cleanup_rollback_artifact": + return { + "kind": kind, + "read_only": True, + "mutated": False, + "status": "error", + "database": {"path": str(db_path), "exists": True}, + "artifact": { + "kind": artifact_kind, + "path": str(artifact_path), + "exists": True, + "artifact_sha256": artifact_sha256, + }, + "restore_preview": { + "operation": "restore_stored_query_excerpts", + "dry_run": True, + "restore_apply_available": False, + "candidate_restore_count": 0, + "target_rows_found_count": 0, + "restorable_count": 0, + "already_has_query_preview_count": 0, + "missing_row_count": 0, + "skipped_count": 0, + }, + "privacy": { + "raw_query_preview_included": False, + "sample_values_included": False, + "artifact_contains_private_query_preview": False, + }, + "blocked_reasons": ["artifact_kind_invalid", "live_restore_not_implemented"], + "warnings": ["artifact_kind_invalid", "live_restore_not_implemented"], + } policy = artifact_payload.get("policy") if policy != QUERY_PREVIEW_CLEANUP_POLICY: - raise ValueError( - "query-preview-cleanup restore requires rollback artifact policy " - f"{QUERY_PREVIEW_CLEANUP_POLICY}" - ) + return { + "kind": kind, + "read_only": True, + "mutated": False, + "status": "error", + "database": {"path": str(db_path), "exists": True}, + "artifact": { + "kind": artifact_kind, + "path": str(artifact_path), + "exists": True, + "policy": policy, + "artifact_sha256": artifact_sha256, + }, + "restore_preview": { + "operation": "restore_stored_query_excerpts", + "dry_run": True, + "restore_apply_available": False, + "candidate_restore_count": 0, + "target_rows_found_count": 0, + "restorable_count": 0, + "already_has_query_preview_count": 0, + "missing_row_count": 0, + "skipped_count": 0, + }, + "privacy": { + "raw_query_preview_included": False, + "sample_values_included": False, + "artifact_contains_private_query_preview": False, + }, + "blocked_reasons": ["artifact_policy_invalid", "live_restore_not_implemented"], + "warnings": ["artifact_policy_invalid", "live_restore_not_implemented"], + } rows = artifact_payload.get("rows") if not isinstance(rows, list): raise ValueError("query-preview-cleanup restore artifact rows must be a list") @@ -4401,6 +4460,11 @@ def _dogfood_query_preview_cleanup_restore_dry_run_payload(args: argparse.Namesp created_at = row.get("created_at") candidate_rows.append({"id": int(row["id"]), "query_preview": query_preview, "created_at": created_at}) candidate_ids = [row["id"] for row in candidate_rows] + duplicate_id_count = len(candidate_ids) - len(set(candidate_ids)) + declared_row_count = artifact_payload.get("row_count") + declared_row_count_matches = declared_row_count == len(candidate_rows) + operation = artifact_payload.get("operation") + operation_valid = operation == "restore_stored_query_excerpts" eligible_ids_sha256 = _query_preview_cleanup_ids_sha256(candidate_ids) artifact_source_database = artifact_payload.get("source_database") artifact_source_fingerprint = ( @@ -4412,7 +4476,8 @@ def _dogfood_query_preview_cleanup_restore_dry_run_payload(args: argparse.Namesp restorable_count = 0 already_has_query_preview_count = 0 missing_row_count = 0 - if source_database_matched: + artifact_integrity_passed = duplicate_id_count == 0 and declared_row_count_matches and operation_valid + if source_database_matched and artifact_integrity_passed: with _open_readonly_sqlite(db_path) as connection: if not _table_exists(connection, "retrieval_observations"): missing_row_count = len(candidate_rows) @@ -4433,6 +4498,8 @@ def _dogfood_query_preview_cleanup_restore_dry_run_payload(args: argparse.Namesp skipped_count = already_has_query_preview_count + missing_row_count if not source_database_matched: skipped_count = len(candidate_rows) + if source_database_matched and not artifact_integrity_passed: + skipped_count = len(candidate_rows) warnings = ["live_restore_not_implemented"] blocked_reasons = ["live_restore_not_implemented"] if artifact_source_fingerprint is None: @@ -4441,9 +4508,18 @@ def _dogfood_query_preview_cleanup_restore_dry_run_payload(args: argparse.Namesp elif not source_database_matched: warnings.append("source_database_mismatch") blocked_reasons.append("source_database_mismatch") - if skipped_count and source_database_matched: + if not operation_valid: + warnings.append("artifact_operation_invalid") + blocked_reasons.append("artifact_operation_invalid") + if not declared_row_count_matches: + warnings.append("artifact_row_count_mismatch") + blocked_reasons.append("artifact_row_count_mismatch") + if duplicate_id_count: + warnings.append("duplicate_artifact_row_ids") + blocked_reasons.append("duplicate_artifact_row_ids") + if skipped_count and source_database_matched and artifact_integrity_passed: warnings.append("some_artifact_rows_are_not_currently_restorable") - status = "error" if not source_database_matched else "warning" + status = "error" if not source_database_matched or not artifact_integrity_passed else "warning" return { "kind": kind, "read_only": True, @@ -4455,13 +4531,20 @@ def _dogfood_query_preview_cleanup_restore_dry_run_payload(args: argparse.Namesp "path": str(artifact_path), "exists": True, "policy": policy, - "operation": artifact_payload.get("operation"), + "operation": operation, "parameters": artifact_payload.get("parameters", {}), "row_count": len(candidate_rows), + "declared_row_count": declared_row_count, "artifact_sha256": artifact_sha256, "eligible_ids_sha256": eligible_ids_sha256, "source_database": artifact_source_database if isinstance(artifact_source_database, dict) else None, }, + "artifact_integrity": { + "passed": artifact_integrity_passed, + "operation_valid": operation_valid, + "declared_row_count_matches": declared_row_count_matches, + "duplicate_id_count": duplicate_id_count, + }, "source_database_match": { "matched": source_database_matched, "artifact_fingerprint_sha256": artifact_source_fingerprint, diff --git a/tests/test_cli.py b/tests/test_cli.py index 7248eae..c953bfa 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,4 +1,5 @@ import fcntl +import hashlib import json import os import sqlite3 @@ -1740,6 +1741,137 @@ def test_python_module_cli_dogfood_query_preview_cleanup_restore_dry_run_blocks_ assert other_rows == [] +def test_python_module_cli_dogfood_query_preview_cleanup_restore_dry_run_reports_wrong_policy_as_read_only_error( + tmp_path: Path, +) -> None: + db_path = tmp_path / "query-preview-cleanup-wrong-policy.db" + initialize_database(db_path) + artifact_path = tmp_path / "wrong-policy-rollback-artifact.json" + artifact_path.write_text( + json.dumps( + { + "kind": "query_preview_cleanup_rollback_artifact", + "policy": "legacy-query-preview-cleanup-v0", + "operation": "restore_stored_query_excerpts", + "row_count": 0, + "rows": [], + } + ) + ) + + result = subprocess.run( + [ + sys.executable, + "-m", + "agent_memory.api.cli", + "dogfood", + "query-preview-cleanup-restore", + str(db_path), + str(artifact_path), + "--dry-run", + ], + cwd=Path(__file__).resolve().parents[1], + env={**os.environ, "PYTHONPATH": "src"}, + capture_output=True, + text=True, + ) + + assert result.returncode == 0, result.stderr + payload = json.loads(result.stdout) + assert payload["kind"] == "dogfood_query_preview_cleanup_restore_dry_run" + assert payload["read_only"] is True + assert payload["mutated"] is False + assert payload["status"] == "error" + assert payload["artifact"]["exists"] is True + assert payload["artifact"]["policy"] == "legacy-query-preview-cleanup-v0" + assert "artifact_policy_invalid" in payload["blocked_reasons"] + assert payload["restore_preview"]["restorable_count"] == 0 + assert "SHOULD_NOT_LEAK" not in result.stdout + assert "token=" not in result.stdout + + +def test_python_module_cli_dogfood_query_preview_cleanup_restore_dry_run_blocks_artifact_integrity_mismatch( + tmp_path: Path, +) -> None: + db_path = tmp_path / "query-preview-cleanup-integrity.db" + initialize_database(db_path) + with sqlite3.connect(db_path) as connection: + connection.execute( + """ + INSERT INTO retrieval_observations(surface, query_sha256, query_preview, preferred_scope, limit_value, created_at) + VALUES (?, ?, ?, ?, ?, ?) + """, + ("cli-test", "c" * 64, None, "project:integrity", 5, "2026-01-01 00:00:00"), + ) + + resolved_path = db_path.expanduser().resolve(strict=False) + source_database = { + "fingerprint_sha256": hashlib.sha256( + f"query-preview-cleanup-source-db-v1\0{resolved_path}".encode() + ).hexdigest(), + "fingerprint_version": "query-preview-cleanup-source-db-v1", + "path_sha256": hashlib.sha256(str(resolved_path).encode()).hexdigest(), + "path_basename": resolved_path.name, + } + artifact_path = tmp_path / "tampered-rollback-artifact.json" + artifact_path.write_text( + json.dumps( + { + "kind": "query_preview_cleanup_rollback_artifact", + "policy": "legacy-query-preview-cleanup-v1", + "operation": "restore_stored_query_excerpts", + "parameters": {"older_than": "2026-01-02T00:00:00"}, + "source_database": source_database, + "row_count": 3, + "rows": [ + {"id": 1, "query_preview": "token=SHOULD_NOT_LEAK", "created_at": "2026-01-01 00:00:00"}, + {"id": 1, "query_preview": "token=SHOULD_NOT_LEAK", "created_at": "2026-01-01 00:00:00"}, + ], + "privacy": {"artifact_contains_private_query_preview": True, "do_not_commit": True}, + } + ) + ) + + result = subprocess.run( + [ + sys.executable, + "-m", + "agent_memory.api.cli", + "dogfood", + "query-preview-cleanup-restore", + str(db_path), + str(artifact_path), + "--dry-run", + ], + cwd=Path(__file__).resolve().parents[1], + env={**os.environ, "PYTHONPATH": "src"}, + capture_output=True, + text=True, + ) + + assert result.returncode == 0, result.stderr + payload = json.loads(result.stdout) + assert payload["kind"] == "dogfood_query_preview_cleanup_restore_dry_run" + assert payload["read_only"] is True + assert payload["mutated"] is False + assert payload["status"] == "error" + assert payload["artifact"]["row_count"] == 2 + assert payload["artifact"]["declared_row_count"] == 3 + assert payload["artifact_integrity"]["passed"] is False + assert payload["artifact_integrity"]["duplicate_id_count"] == 1 + assert payload["artifact_integrity"]["declared_row_count_matches"] is False + assert "artifact_row_count_mismatch" in payload["blocked_reasons"] + assert "duplicate_artifact_row_ids" in payload["blocked_reasons"] + assert payload["restore_preview"]["restorable_count"] == 0 + assert payload["restore_preview"]["skipped_count"] == 2 + assert "SHOULD_NOT_LEAK" not in result.stdout + assert "token=" not in result.stdout + + with sqlite3.connect(db_path) as connection: + rows = connection.execute("SELECT id, query_preview FROM retrieval_observations ORDER BY id").fetchall() + assert rows == [(1, None)] + + def test_python_module_cli_dogfood_ordinary_trace_metadata_cleanup_apply_requires_actor_reason_and_fills_safe_defaults_without_leaks( tmp_path: Path, ) -> None: