diff --git a/repository-sensitive-artifact-guard/README.md b/repository-sensitive-artifact-guard/README.md new file mode 100644 index 00000000..f82957b0 --- /dev/null +++ b/repository-sensitive-artifact-guard/README.md @@ -0,0 +1,51 @@ +# Repository Sensitive Artifact Guard + +This module is a dependency-free, synthetic-data-only guard for SCIBASE Project +Repository & Version Control. It evaluates commit, tag, merge request, and +export bundle metadata before sensitive artifacts become durable in scientific +repository history. + +## What It Checks + +- Synthetic credential and private-key indicators. +- PHI-like or raw participant identifier signals. +- Restricted datasets accidentally routed to public export or DOI snapshot + surfaces. +- Notebook output leakage before reproducibility artifacts are tagged. +- Sensitive path names such as `.env`, credentials files, or key material. +- Large datasets, model weights, and binary results that should be routed + through Git LFS. +- Deterministic rewrite, remediation, LFS routing, and rollback packets. + +## Local Commands + +```bash +npm run check +npm test +npm run demo +``` + +The demo writes reviewer artifacts under `reports/`: + +- `sensitive-artifact-packet.json` +- `sensitive-artifact-report.md` +- `summary.svg` +- `demo.mp4` + +## Requirements Map + +| Issue #10 requirement | Coverage in this slice | +| --- | --- | +| Repository structure and components | Evaluates manuscript, data, code, notebook, results, protocols, and metadata paths. | +| File and metadata versioning | Produces commit-level decisions, audit digests, and rollback packets before merge/tag/export. | +| Git LFS support | Flags large datasets, model weights, and binary results that are not LFS pointers. | +| Collaboration and merge requests | Blocks merge/tag/export surfaces until steward remediation is complete. | +| Export bundles and DOI snapshots | Prevents restricted or sensitive synthetic artifacts from entering public release surfaces. | + +## Safety Boundaries + +- Uses only synthetic fixtures in `sample-data.js`. +- Does not scan real repositories, real secrets, patient data, private projects, + credentials, Git providers, or external services. +- Does not include real secret values, participant identifiers, or institutional + data. diff --git a/repository-sensitive-artifact-guard/demo.js b/repository-sensitive-artifact-guard/demo.js new file mode 100644 index 00000000..14453848 --- /dev/null +++ b/repository-sensitive-artifact-guard/demo.js @@ -0,0 +1,136 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const { spawnSync } = require('child_process'); +const sampleBundles = require('./sample-data'); +const { evaluateRepository } = require('./index'); + +const reportDir = path.join(__dirname, 'reports'); +const asOfDate = '2026-05-22'; + +fs.mkdirSync(reportDir, { recursive: true }); + +const portfolio = evaluateRepository(sampleBundles, { asOfDate }); +const jsonPath = path.join(reportDir, 'sensitive-artifact-packet.json'); +const markdownPath = path.join(reportDir, 'sensitive-artifact-report.md'); +const svgPath = path.join(reportDir, 'summary.svg'); +const mp4Path = path.join(reportDir, 'demo.mp4'); + +fs.writeFileSync(jsonPath, `${JSON.stringify(portfolio, null, 2)}\n`); +fs.writeFileSync(markdownPath, renderMarkdown(portfolio)); +fs.writeFileSync(svgPath, renderSvg(portfolio)); +renderVideo(portfolio, mp4Path); + +console.log(`Wrote ${path.relative(process.cwd(), jsonPath)}`); +console.log(`Wrote ${path.relative(process.cwd(), markdownPath)}`); +console.log(`Wrote ${path.relative(process.cwd(), svgPath)}`); +console.log(`Wrote ${path.relative(process.cwd(), mp4Path)}`); + +function renderMarkdown(portfolio) { + return [ + '# Repository Sensitive Artifact Commit Guard Report', + '', + `As of: ${portfolio.asOfDate}`, + `Repository digest: \`${portfolio.auditDigest}\``, + '', + '## Summary', + '', + `- Commit bundles reviewed: ${portfolio.bundleCount}`, + `- Findings: ${portfolio.findingCount}`, + `- Held commits: ${portfolio.heldCommits.join(', ') || 'none'}`, + `- Actions: ${Object.entries(portfolio.byAction).map(([action, count]) => `${action}=${count}`).join(', ')}`, + '', + '## Bundle Decisions', + '', + '| Commit | Branch | Surface | Action | Severity | Findings | Held paths |', + '| --- | --- | --- | --- | --- | ---: | --- |', + ...portfolio.packets.map((packet) => `| ${packet.commitId} | ${packet.branch} | ${packet.targetSurface} | ${packet.action} | ${packet.severity} | ${packet.findingCount} | ${packet.heldPaths.join('
') || 'none'} |`), + '', + '## Guardrails', + '', + '- Uses synthetic commit, file, signal, and export metadata only.', + '- Does not scan real repositories, real secrets, patient data, private projects, or external services.', + '- Blocks release surfaces when synthetic secret, restricted-data, or patient-identifier indicators appear.', + '- Emits deterministic rewrite, LFS routing, remediation, and rollback packets for reviewers.', + '' + ].join('\n'); +} + +function renderSvg(portfolio) { + const actions = Object.entries(portfolio.byAction) + .map(([action, count]) => `${escapeXml(action)} (${count})`) + .join(' / '); + + return ` + + Repository sensitive artifact guard summary + Synthetic commit bundle decisions for sensitive artifact gating. + + + Repository sensitive artifact guard + Commit, tag, and export checks before sensitive artifacts become durable + + ${portfolio.bundleCount} bundles + commit/tag/export + + ${portfolio.heldCommits.length} held + rewrite or review + + ${portfolio.findingCount} findings + deterministic packets + Actions: ${actions} + Digest: ${portfolio.auditDigest.slice(0, 32)}... + +`; +} + +function renderVideo(portfolio, outputPath) { + const font = '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf'; + const filters = [ + drawText(font, 'SCIBASE #10 Project Repository & Version Control', 64, 72, 37, 'white'), + drawText(font, 'Repository sensitive artifact commit guard', 64, 136, 35, 'white'), + drawText(font, `Reviewed ${portfolio.bundleCount} synthetic commit bundles`, 84, 238, 33, '0xdbeafe'), + drawText(font, `${portfolio.heldCommits.length} commits held before merge tag or export`, 84, 304, 32, '0xfecaca'), + drawText(font, `${portfolio.findingCount} findings with rollback and remediation packets`, 84, 370, 30, '0xdcfce7'), + drawText(font, 'Checks synthetic credentials PHI markers restricted data notebook outputs and LFS gaps', 84, 462, 25, '0xe0f2fe'), + drawText(font, 'No real repository scans patient data secrets credentials or external services', 84, 522, 25, '0xfef3c7'), + drawText(font, `Audit digest ${portfolio.auditDigest.slice(0, 24)}`, 84, 596, 24, '0xcbd5e1') + ].join(','); + + const result = spawnSync('ffmpeg', [ + '-y', + '-f', 'lavfi', + '-i', 'color=c=0x1f2933:s=1280x720:d=4:r=25', + '-vf', filters, + '-c:v', 'libx264', + '-pix_fmt', 'yuv420p', + outputPath + ], { encoding: 'utf8' }); + + if (result.status !== 0) { + const message = [result.stdout, result.stderr].filter(Boolean).join('\n'); + throw new Error(`ffmpeg failed to render demo.mp4:\n${message}`); + } +} + +function drawText(font, text, x, y, size, color) { + return `drawtext=fontfile='${font}':text='${escapeDrawText(text)}':x=${x}:y=${y}:fontsize=${size}:fontcolor=${color}`; +} + +function escapeDrawText(value) { + return String(value) + .replace(/\\/g, '\\\\') + .replace(/:/g, '\\:') + .replace(/'/g, "\\'") + .replace(/,/g, '\\,') + .replace(/&/g, '\\&'); +} + +function escapeXml(value) { + return String(value) + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"'); +} diff --git a/repository-sensitive-artifact-guard/index.js b/repository-sensitive-artifact-guard/index.js new file mode 100644 index 00000000..c3e5b011 --- /dev/null +++ b/repository-sensitive-artifact-guard/index.js @@ -0,0 +1,335 @@ +'use strict'; + +const crypto = require('crypto'); + +const DEFAULT_POLICY = Object.freeze({ + asOfDate: '2026-05-22', + largeFileBytes: 25 * 1024 * 1024, + publicExposureVisibilities: ['public', 'preprint_export', 'doi_snapshot'], + criticalSignals: ['credential_marker', 'private_key_marker', 'raw_patient_identifier', 'restricted_raw_dataset'], + highSignals: ['notebook_output_leak', 'consent_form_marker', 'human_subjects_column', 'license_restricted_data'], + lfsRequiredKinds: ['dataset', 'model_weights', 'binary_result'], + releaseSurfaces: ['tag', 'merge_request', 'export_bundle', 'doi_snapshot'] +}); + +function stableStringify(value) { + if (Array.isArray(value)) return `[${value.map(stableStringify).join(',')}]`; + if (value && typeof value === 'object') { + return `{${Object.keys(value).sort().map((key) => `${JSON.stringify(key)}:${stableStringify(value[key])}`).join(',')}}`; + } + return JSON.stringify(value); +} + +function digest(value) { + return crypto.createHash('sha256').update(stableStringify(value)).digest('hex'); +} + +function normalizeToken(value) { + return String(value || '').trim().toLowerCase().replace(/[^a-z0-9]+/g, '_').replace(/^_+|_+$/g, ''); +} + +function normalizePath(path) { + return String(path || '').replace(/\\/g, '/').replace(/^\/+/, ''); +} + +function inferComponent(path) { + const normalized = normalizePath(path); + const first = normalized.split('/')[0] || 'root'; + if (['manuscript', 'data', 'code', 'notebooks', 'results', 'protocols'].includes(first)) return first; + if (normalized === 'metadata.json') return 'metadata'; + return first; +} + +function isPublicExposure(artifact, policy) { + const visibility = normalizeToken(artifact.visibility || artifact.exportVisibility || 'private'); + return policy.publicExposureVisibilities.includes(visibility); +} + +function artifactSignals(artifact) { + return new Set((artifact.signals || []).map(normalizeToken).filter(Boolean)); +} + +function addFinding(findings, code, severity, message, remediation) { + findings.push({ code, severity, message, remediation }); +} + +function evaluateArtifact(artifact, options = {}) { + const policy = Object.assign({}, DEFAULT_POLICY, options.policy || {}); + const path = normalizePath(artifact.path); + const component = artifact.component || inferComponent(path); + const kind = normalizeToken(artifact.kind || component); + const signals = artifactSignals(artifact); + const findings = []; + const publicExposure = isPublicExposure(artifact, policy); + const sizeBytes = Number(artifact.sizeBytes || 0); + const lfsPointer = Boolean(artifact.lfsPointer); + + for (const signal of signals) { + if (policy.criticalSignals.includes(signal)) { + addFinding( + findings, + signal, + publicExposure ? 'critical' : 'high', + `${path} contains ${signal.replace(/_/g, ' ')} evidence`, + publicExposure + ? 'remove artifact from public/tag/export surfaces and rewrite the commit before merge' + : 'route artifact to restricted storage and require steward approval before merge' + ); + } else if (policy.highSignals.includes(signal)) { + addFinding( + findings, + signal, + publicExposure ? 'high' : 'medium', + `${path} contains ${signal.replace(/_/g, ' ')} evidence`, + publicExposure + ? 'redact or replace the artifact before creating a public snapshot' + : 'attach restricted-data review evidence before merge' + ); + } + } + + if (/(\.env|\.pem|id_rsa|credentials|secrets?)($|[./_-])/i.test(path)) { + addFinding( + findings, + 'sensitive_path_name', + publicExposure ? 'critical' : 'high', + `${path} matches a sensitive path naming rule`, + 'move the file out of version control and add a deny rule before rewriting the commit' + ); + } + + if (sizeBytes > policy.largeFileBytes && !lfsPointer && policy.lfsRequiredKinds.includes(kind)) { + addFinding( + findings, + 'missing_lfs_pointer', + 'medium', + `${path} is ${sizeBytes} bytes and is not routed through Git LFS`, + 'replace inline blob with an LFS pointer and preserve the content hash in the manifest' + ); + } + + if (publicExposure && normalizeToken(artifact.dataClass) === 'restricted') { + addFinding( + findings, + 'restricted_data_public_export', + 'critical', + `${path} is restricted but is included in a public release surface`, + 'block public export and create a restricted-access manifest entry' + ); + } + + const selected = selectArtifactAction(findings); + const packet = { + path, + component, + kind, + visibility: normalizeToken(artifact.visibility || artifact.exportVisibility || 'private'), + dataClass: normalizeToken(artifact.dataClass || 'unspecified'), + sizeBytes, + lfsPointer, + publicExposure, + action: selected.action, + severity: selected.severity, + reason: selected.reason, + findingCount: findings.length, + findings + }; + + packet.auditDigest = digest(Object.assign({}, packet, { auditDigest: undefined })); + return packet; +} + +function selectArtifactAction(findings) { + if (findings.some((finding) => finding.severity === 'critical')) { + return { + action: 'reject_commit_and_quarantine_export', + severity: 'critical', + reason: 'critical sensitive artifact exposure is present in a release surface' + }; + } + + if (findings.some((finding) => finding.severity === 'high')) { + return { + action: 'hold_tag_for_steward_review', + severity: 'high', + reason: 'sensitive artifact evidence needs steward review before merge or tag' + }; + } + + if (findings.some((finding) => finding.severity === 'medium')) { + return { + action: 'require_lfs_or_redaction_before_merge', + severity: 'medium', + reason: 'repository hygiene remediation is required before merge' + }; + } + + return { + action: 'allow_commit_tag_export', + severity: 'low', + reason: 'artifact satisfies sensitive-data and LFS policy' + }; +} + +function evaluateCommitBundle(bundle, options = {}) { + const policy = Object.assign({}, DEFAULT_POLICY, options.policy || {}, { + asOfDate: options.asOfDate || (options.policy && options.policy.asOfDate) || DEFAULT_POLICY.asOfDate + }); + + const artifacts = (bundle.artifacts || []).map((artifact) => evaluateArtifact(artifact, { policy })); + const selected = selectBundleAction(artifacts); + const rollbackPacket = buildRollbackPacket(bundle, artifacts, selected); + const summary = artifacts.reduce((acc, artifact) => { + acc.byAction[artifact.action] = (acc.byAction[artifact.action] || 0) + 1; + acc.bySeverity[artifact.severity] = (acc.bySeverity[artifact.severity] || 0) + 1; + if (artifact.severity === 'critical' || artifact.severity === 'high') { + acc.heldPaths.push(artifact.path); + } + acc.findingCount += artifact.findingCount; + return acc; + }, { + byAction: {}, + bySeverity: {}, + heldPaths: [], + findingCount: 0 + }); + + const packet = { + asOfDate: policy.asOfDate, + repositoryId: bundle.repositoryId, + branch: bundle.branch, + targetSurface: normalizeToken(bundle.targetSurface || 'merge_request'), + commitId: bundle.commitId, + action: selected.action, + severity: selected.severity, + reason: selected.reason, + artifactCount: artifacts.length, + findingCount: summary.findingCount, + heldPaths: summary.heldPaths, + byAction: summary.byAction, + bySeverity: summary.bySeverity, + artifacts, + rollbackPacket, + policySnapshot: { + largeFileBytes: policy.largeFileBytes, + releaseSurfaces: policy.releaseSurfaces.slice(), + publicExposureVisibilities: policy.publicExposureVisibilities.slice() + } + }; + + packet.auditDigest = digest(Object.assign({}, packet, { auditDigest: undefined })); + return packet; +} + +function selectBundleAction(artifacts) { + if (artifacts.some((artifact) => artifact.severity === 'critical')) { + return { + action: 'reject_bundle_and_require_history_rewrite', + severity: 'critical', + reason: 'commit bundle contains critical sensitive artifact exposure' + }; + } + + if (artifacts.some((artifact) => artifact.severity === 'high')) { + return { + action: 'hold_release_for_steward_review', + severity: 'high', + reason: 'commit bundle contains sensitive artifacts that need steward approval' + }; + } + + if (artifacts.some((artifact) => artifact.severity === 'medium')) { + return { + action: 'require_repository_hygiene_fix', + severity: 'medium', + reason: 'commit bundle needs LFS routing or redaction before merge' + }; + } + + return { + action: 'allow_merge_tag_export', + severity: 'low', + reason: 'commit bundle satisfies sensitive-artifact policy' + }; +} + +function buildRollbackPacket(bundle, artifacts, selected) { + const rejectedArtifacts = artifacts.filter((artifact) => artifact.severity === 'critical' || artifact.severity === 'high'); + const hygieneArtifacts = artifacts.filter((artifact) => artifact.severity === 'medium'); + const remediationActions = []; + + for (const artifact of rejectedArtifacts) { + for (const finding of artifact.findings) { + remediationActions.push({ + path: artifact.path, + code: finding.code, + action: finding.remediation + }); + } + } + + for (const artifact of hygieneArtifacts) { + remediationActions.push({ + path: artifact.path, + code: 'repository_hygiene', + action: artifact.findings.map((finding) => finding.remediation).join('; ') + }); + } + + return { + commitId: bundle.commitId, + branch: bundle.branch, + decision: selected.action, + rewriteRequired: selected.severity === 'critical', + releaseHoldRequired: selected.severity === 'critical' || selected.severity === 'high', + heldPaths: rejectedArtifacts.map((artifact) => artifact.path), + lfsPaths: hygieneArtifacts.filter((artifact) => artifact.findings.some((finding) => finding.code === 'missing_lfs_pointer')).map((artifact) => artifact.path), + remediationActions, + rollbackDigest: digest({ + commitId: bundle.commitId, + rejected: rejectedArtifacts.map((artifact) => artifact.auditDigest), + hygiene: hygieneArtifacts.map((artifact) => artifact.auditDigest) + }) + }; +} + +function evaluateRepository(bundles, options = {}) { + const packets = bundles.map((bundle) => evaluateCommitBundle(bundle, options)); + const summary = packets.reduce((acc, packet) => { + acc.byAction[packet.action] = (acc.byAction[packet.action] || 0) + 1; + acc.bySeverity[packet.severity] = (acc.bySeverity[packet.severity] || 0) + 1; + if (packet.severity === 'critical' || packet.severity === 'high') { + acc.heldCommits.push(packet.commitId); + } + acc.findingCount += packet.findingCount; + return acc; + }, { + byAction: {}, + bySeverity: {}, + heldCommits: [], + findingCount: 0 + }); + + const portfolio = { + asOfDate: options.asOfDate || DEFAULT_POLICY.asOfDate, + bundleCount: packets.length, + findingCount: summary.findingCount, + heldCommits: summary.heldCommits, + byAction: summary.byAction, + bySeverity: summary.bySeverity, + packets + }; + + portfolio.auditDigest = digest(portfolio); + return portfolio; +} + +module.exports = { + DEFAULT_POLICY, + digest, + evaluateArtifact, + evaluateCommitBundle, + evaluateRepository, + inferComponent, + normalizePath +}; diff --git a/repository-sensitive-artifact-guard/package.json b/repository-sensitive-artifact-guard/package.json new file mode 100644 index 00000000..f0d4b0cb --- /dev/null +++ b/repository-sensitive-artifact-guard/package.json @@ -0,0 +1,13 @@ +{ + "name": "repository-sensitive-artifact-guard", + "version": "1.0.0", + "private": true, + "description": "Synthetic sensitive artifact commit guard for SCIBASE project repository version control.", + "main": "index.js", + "scripts": { + "check": "node --check index.js && node --check sample-data.js && node --check test.js && node --check demo.js", + "test": "node test.js", + "demo": "node demo.js" + }, + "license": "MIT" +} diff --git a/repository-sensitive-artifact-guard/reports/demo.mp4 b/repository-sensitive-artifact-guard/reports/demo.mp4 new file mode 100644 index 00000000..a5d3c0ce Binary files /dev/null and b/repository-sensitive-artifact-guard/reports/demo.mp4 differ diff --git a/repository-sensitive-artifact-guard/reports/sensitive-artifact-packet.json b/repository-sensitive-artifact-guard/reports/sensitive-artifact-packet.json new file mode 100644 index 00000000..55c470fa --- /dev/null +++ b/repository-sensitive-artifact-guard/reports/sensitive-artifact-packet.json @@ -0,0 +1,467 @@ +{ + "asOfDate": "2026-05-22", + "bundleCount": 4, + "findingCount": 7, + "heldCommits": [ + "syn-commit-a1" + ], + "byAction": { + "reject_bundle_and_require_history_rewrite": 1, + "require_repository_hygiene_fix": 2, + "allow_merge_tag_export": 1 + }, + "bySeverity": { + "critical": 1, + "medium": 2, + "low": 1 + }, + "packets": [ + { + "asOfDate": "2026-05-22", + "repositoryId": "repo-immunology-preprint", + "branch": "release/preprint-v2", + "targetSurface": "doi_snapshot", + "commitId": "syn-commit-a1", + "action": "reject_bundle_and_require_history_rewrite", + "severity": "critical", + "reason": "commit bundle contains critical sensitive artifact exposure", + "artifactCount": 3, + "findingCount": 5, + "heldPaths": [ + "notebooks/immune-response-analysis.ipynb", + "data/raw-participant-table.csv" + ], + "byAction": { + "reject_commit_and_quarantine_export": 2, + "allow_commit_tag_export": 1 + }, + "bySeverity": { + "critical": 2, + "low": 1 + }, + "artifacts": [ + { + "path": "notebooks/immune-response-analysis.ipynb", + "component": "notebooks", + "kind": "notebook", + "visibility": "doi_snapshot", + "dataClass": "controlled", + "sizeBytes": 420000, + "lfsPointer": false, + "publicExposure": true, + "action": "reject_commit_and_quarantine_export", + "severity": "critical", + "reason": "critical sensitive artifact exposure is present in a release surface", + "findingCount": 2, + "findings": [ + { + "code": "notebook_output_leak", + "severity": "high", + "message": "notebooks/immune-response-analysis.ipynb contains notebook output leak evidence", + "remediation": "redact or replace the artifact before creating a public snapshot" + }, + { + "code": "credential_marker", + "severity": "critical", + "message": "notebooks/immune-response-analysis.ipynb contains credential marker evidence", + "remediation": "remove artifact from public/tag/export surfaces and rewrite the commit before merge" + } + ], + "auditDigest": "220204068bbc84a2b447c1c16cbd18a963d47a6fa01e08ac06add1e211d32ac4" + }, + { + "path": "data/raw-participant-table.csv", + "component": "data", + "kind": "dataset", + "visibility": "doi_snapshot", + "dataClass": "restricted", + "sizeBytes": 3100000, + "lfsPointer": true, + "publicExposure": true, + "action": "reject_commit_and_quarantine_export", + "severity": "critical", + "reason": "critical sensitive artifact exposure is present in a release surface", + "findingCount": 3, + "findings": [ + { + "code": "raw_patient_identifier", + "severity": "critical", + "message": "data/raw-participant-table.csv contains raw patient identifier evidence", + "remediation": "remove artifact from public/tag/export surfaces and rewrite the commit before merge" + }, + { + "code": "human_subjects_column", + "severity": "high", + "message": "data/raw-participant-table.csv contains human subjects column evidence", + "remediation": "redact or replace the artifact before creating a public snapshot" + }, + { + "code": "restricted_data_public_export", + "severity": "critical", + "message": "data/raw-participant-table.csv is restricted but is included in a public release surface", + "remediation": "block public export and create a restricted-access manifest entry" + } + ], + "auditDigest": "3313b25881330eb53b3353c89819ca949859cd917342fe1e9eec11393914a5ed" + }, + { + "path": "metadata.json", + "component": "metadata", + "kind": "metadata", + "visibility": "doi_snapshot", + "dataClass": "public", + "sizeBytes": 9000, + "lfsPointer": false, + "publicExposure": true, + "action": "allow_commit_tag_export", + "severity": "low", + "reason": "artifact satisfies sensitive-data and LFS policy", + "findingCount": 0, + "findings": [], + "auditDigest": "02cdf3814edd4569cadf8bef8e934a4146358c5916f868f2783c70fd2bc594a7" + } + ], + "rollbackPacket": { + "commitId": "syn-commit-a1", + "branch": "release/preprint-v2", + "decision": "reject_bundle_and_require_history_rewrite", + "rewriteRequired": true, + "releaseHoldRequired": true, + "heldPaths": [ + "notebooks/immune-response-analysis.ipynb", + "data/raw-participant-table.csv" + ], + "lfsPaths": [], + "remediationActions": [ + { + "path": "notebooks/immune-response-analysis.ipynb", + "code": "notebook_output_leak", + "action": "redact or replace the artifact before creating a public snapshot" + }, + { + "path": "notebooks/immune-response-analysis.ipynb", + "code": "credential_marker", + "action": "remove artifact from public/tag/export surfaces and rewrite the commit before merge" + }, + { + "path": "data/raw-participant-table.csv", + "code": "raw_patient_identifier", + "action": "remove artifact from public/tag/export surfaces and rewrite the commit before merge" + }, + { + "path": "data/raw-participant-table.csv", + "code": "human_subjects_column", + "action": "redact or replace the artifact before creating a public snapshot" + }, + { + "path": "data/raw-participant-table.csv", + "code": "restricted_data_public_export", + "action": "block public export and create a restricted-access manifest entry" + } + ], + "rollbackDigest": "2d361cb08289483349152f81801f8a51b8781c397b909d7ee79beca05affcc61" + }, + "policySnapshot": { + "largeFileBytes": 26214400, + "releaseSurfaces": [ + "tag", + "merge_request", + "export_bundle", + "doi_snapshot" + ], + "publicExposureVisibilities": [ + "public", + "preprint_export", + "doi_snapshot" + ] + }, + "auditDigest": "424383e3c6a982552096818218e1e28e5c698d40382e2a7f5d7a0fd61b16eb33" + }, + { + "asOfDate": "2026-05-22", + "repositoryId": "repo-vision-model", + "branch": "experiment/model-card", + "targetSurface": "merge_request", + "commitId": "syn-commit-b2", + "action": "require_repository_hygiene_fix", + "severity": "medium", + "reason": "commit bundle needs LFS routing or redaction before merge", + "artifactCount": 2, + "findingCount": 1, + "heldPaths": [], + "byAction": { + "require_lfs_or_redaction_before_merge": 1, + "allow_commit_tag_export": 1 + }, + "bySeverity": { + "medium": 1, + "low": 1 + }, + "artifacts": [ + { + "path": "results/model-weights.bin", + "component": "results", + "kind": "model_weights", + "visibility": "private", + "dataClass": "internal", + "sizeBytes": 88000000, + "lfsPointer": false, + "publicExposure": false, + "action": "require_lfs_or_redaction_before_merge", + "severity": "medium", + "reason": "repository hygiene remediation is required before merge", + "findingCount": 1, + "findings": [ + { + "code": "missing_lfs_pointer", + "severity": "medium", + "message": "results/model-weights.bin is 88000000 bytes and is not routed through Git LFS", + "remediation": "replace inline blob with an LFS pointer and preserve the content hash in the manifest" + } + ], + "auditDigest": "7ebcf0dd67a4fed01b63399c3b80fc9e65e6d6a8d49fd798a847e7e1dda66646" + }, + { + "path": "code/train.py", + "component": "code", + "kind": "code", + "visibility": "private", + "dataClass": "internal", + "sizeBytes": 18000, + "lfsPointer": false, + "publicExposure": false, + "action": "allow_commit_tag_export", + "severity": "low", + "reason": "artifact satisfies sensitive-data and LFS policy", + "findingCount": 0, + "findings": [], + "auditDigest": "faae729dacbc638fafbfae0891042f66917d0fe999de874d48270ea1f9350e6b" + } + ], + "rollbackPacket": { + "commitId": "syn-commit-b2", + "branch": "experiment/model-card", + "decision": "require_repository_hygiene_fix", + "rewriteRequired": false, + "releaseHoldRequired": false, + "heldPaths": [], + "lfsPaths": [ + "results/model-weights.bin" + ], + "remediationActions": [ + { + "path": "results/model-weights.bin", + "code": "repository_hygiene", + "action": "replace inline blob with an LFS pointer and preserve the content hash in the manifest" + } + ], + "rollbackDigest": "d009487d0a8e8725e85f33627f61ae977a6790a5dc2fc2ffb5efb9f676e14d7c" + }, + "policySnapshot": { + "largeFileBytes": 26214400, + "releaseSurfaces": [ + "tag", + "merge_request", + "export_bundle", + "doi_snapshot" + ], + "publicExposureVisibilities": [ + "public", + "preprint_export", + "doi_snapshot" + ] + }, + "auditDigest": "b60678148e4c1bfc725fd083eb877c64197715d12c56c569b7efb27becf8781d" + }, + { + "asOfDate": "2026-05-22", + "repositoryId": "repo-clinical-protocol", + "branch": "protocol/steward-review", + "targetSurface": "tag", + "commitId": "syn-commit-c3", + "action": "require_repository_hygiene_fix", + "severity": "medium", + "reason": "commit bundle needs LFS routing or redaction before merge", + "artifactCount": 2, + "findingCount": 1, + "heldPaths": [], + "byAction": { + "require_lfs_or_redaction_before_merge": 1, + "allow_commit_tag_export": 1 + }, + "bySeverity": { + "medium": 1, + "low": 1 + }, + "artifacts": [ + { + "path": "protocols/consent-appendix.md", + "component": "protocols", + "kind": "protocol", + "visibility": "private", + "dataClass": "restricted", + "sizeBytes": 25000, + "lfsPointer": false, + "publicExposure": false, + "action": "require_lfs_or_redaction_before_merge", + "severity": "medium", + "reason": "repository hygiene remediation is required before merge", + "findingCount": 1, + "findings": [ + { + "code": "consent_form_marker", + "severity": "medium", + "message": "protocols/consent-appendix.md contains consent form marker evidence", + "remediation": "attach restricted-data review evidence before merge" + } + ], + "auditDigest": "3cd1a0e4d71d7975d585a1abaf2a2420e5cdecd5b2be3cec6cbc0630e28759b5" + }, + { + "path": "data/deidentified-summary.csv", + "component": "data", + "kind": "dataset", + "visibility": "private", + "dataClass": "controlled", + "sizeBytes": 1400000, + "lfsPointer": true, + "publicExposure": false, + "action": "allow_commit_tag_export", + "severity": "low", + "reason": "artifact satisfies sensitive-data and LFS policy", + "findingCount": 0, + "findings": [], + "auditDigest": "22875de84ec3a319f7385d6ea559446f06b4ef2305562b5a08e0ae43bde59447" + } + ], + "rollbackPacket": { + "commitId": "syn-commit-c3", + "branch": "protocol/steward-review", + "decision": "require_repository_hygiene_fix", + "rewriteRequired": false, + "releaseHoldRequired": false, + "heldPaths": [], + "lfsPaths": [], + "remediationActions": [ + { + "path": "protocols/consent-appendix.md", + "code": "repository_hygiene", + "action": "attach restricted-data review evidence before merge" + } + ], + "rollbackDigest": "df4de51ae2c78e04b47af6142bd9c7503fdd1fe53b37ab34647a9e381a9cb76e" + }, + "policySnapshot": { + "largeFileBytes": 26214400, + "releaseSurfaces": [ + "tag", + "merge_request", + "export_bundle", + "doi_snapshot" + ], + "publicExposureVisibilities": [ + "public", + "preprint_export", + "doi_snapshot" + ] + }, + "auditDigest": "8050d881c9e55d35d18586d56c62bd43ead20f3ce5d403bddd0c70a08781622a" + }, + { + "asOfDate": "2026-05-22", + "repositoryId": "repo-open-methods", + "branch": "main", + "targetSurface": "export_bundle", + "commitId": "syn-commit-d4", + "action": "allow_merge_tag_export", + "severity": "low", + "reason": "commit bundle satisfies sensitive-artifact policy", + "artifactCount": 3, + "findingCount": 0, + "heldPaths": [], + "byAction": { + "allow_commit_tag_export": 3 + }, + "bySeverity": { + "low": 3 + }, + "artifacts": [ + { + "path": "manuscript/paper.md", + "component": "manuscript", + "kind": "manuscript", + "visibility": "preprint_export", + "dataClass": "public", + "sizeBytes": 35000, + "lfsPointer": false, + "publicExposure": true, + "action": "allow_commit_tag_export", + "severity": "low", + "reason": "artifact satisfies sensitive-data and LFS policy", + "findingCount": 0, + "findings": [], + "auditDigest": "df93c69e65ee4841da2e653425a550955496e2ede5992eadb10c7e9c4293137b" + }, + { + "path": "code/analysis.py", + "component": "code", + "kind": "code", + "visibility": "preprint_export", + "dataClass": "public", + "sizeBytes": 22000, + "lfsPointer": false, + "publicExposure": true, + "action": "allow_commit_tag_export", + "severity": "low", + "reason": "artifact satisfies sensitive-data and LFS policy", + "findingCount": 0, + "findings": [], + "auditDigest": "a5954543617d7530c6145088cf4d6ebb351f217d527b36abb4c2daa0c0e53c87" + }, + { + "path": "data/aggregate-results.csv", + "component": "data", + "kind": "dataset", + "visibility": "preprint_export", + "dataClass": "public", + "sizeBytes": 1800000, + "lfsPointer": true, + "publicExposure": true, + "action": "allow_commit_tag_export", + "severity": "low", + "reason": "artifact satisfies sensitive-data and LFS policy", + "findingCount": 0, + "findings": [], + "auditDigest": "86e5ceef48c7725fd22421b41f55777bdbc44d6226fee32d28a179ba8b2287aa" + } + ], + "rollbackPacket": { + "commitId": "syn-commit-d4", + "branch": "main", + "decision": "allow_merge_tag_export", + "rewriteRequired": false, + "releaseHoldRequired": false, + "heldPaths": [], + "lfsPaths": [], + "remediationActions": [], + "rollbackDigest": "b714fd9e51e1f1470afa48f675b7612d0c6c308243f9dfbca00dc93f40f058de" + }, + "policySnapshot": { + "largeFileBytes": 26214400, + "releaseSurfaces": [ + "tag", + "merge_request", + "export_bundle", + "doi_snapshot" + ], + "publicExposureVisibilities": [ + "public", + "preprint_export", + "doi_snapshot" + ] + }, + "auditDigest": "08e00cfb14a099c30e7fc771c6044cd3afebebcff0e0b94a646f5f4e8eecf327" + } + ], + "auditDigest": "e1296212638d85509a535fc84efed4e2d03a9ec40e22e217970b4dd876304373" +} diff --git a/repository-sensitive-artifact-guard/reports/sensitive-artifact-report.md b/repository-sensitive-artifact-guard/reports/sensitive-artifact-report.md new file mode 100644 index 00000000..f6013bdb --- /dev/null +++ b/repository-sensitive-artifact-guard/reports/sensitive-artifact-report.md @@ -0,0 +1,27 @@ +# Repository Sensitive Artifact Commit Guard Report + +As of: 2026-05-22 +Repository digest: `e1296212638d85509a535fc84efed4e2d03a9ec40e22e217970b4dd876304373` + +## Summary + +- Commit bundles reviewed: 4 +- Findings: 7 +- Held commits: syn-commit-a1 +- Actions: reject_bundle_and_require_history_rewrite=1, require_repository_hygiene_fix=2, allow_merge_tag_export=1 + +## Bundle Decisions + +| Commit | Branch | Surface | Action | Severity | Findings | Held paths | +| --- | --- | --- | --- | --- | ---: | --- | +| syn-commit-a1 | release/preprint-v2 | doi_snapshot | reject_bundle_and_require_history_rewrite | critical | 5 | notebooks/immune-response-analysis.ipynb
data/raw-participant-table.csv | +| syn-commit-b2 | experiment/model-card | merge_request | require_repository_hygiene_fix | medium | 1 | none | +| syn-commit-c3 | protocol/steward-review | tag | require_repository_hygiene_fix | medium | 1 | none | +| syn-commit-d4 | main | export_bundle | allow_merge_tag_export | low | 0 | none | + +## Guardrails + +- Uses synthetic commit, file, signal, and export metadata only. +- Does not scan real repositories, real secrets, patient data, private projects, or external services. +- Blocks release surfaces when synthetic secret, restricted-data, or patient-identifier indicators appear. +- Emits deterministic rewrite, LFS routing, remediation, and rollback packets for reviewers. diff --git a/repository-sensitive-artifact-guard/reports/summary.svg b/repository-sensitive-artifact-guard/reports/summary.svg new file mode 100644 index 00000000..703f0e5d --- /dev/null +++ b/repository-sensitive-artifact-guard/reports/summary.svg @@ -0,0 +1,20 @@ + + + Repository sensitive artifact guard summary + Synthetic commit bundle decisions for sensitive artifact gating. + + + Repository sensitive artifact guard + Commit, tag, and export checks before sensitive artifacts become durable + + 4 bundles + commit/tag/export + + 1 held + rewrite or review + + 7 findings + deterministic packets + Actions: reject_bundle_and_require_history_rewrite (1) / require_repository_hygiene_fix (2) / allow_merge_tag_export (1) + Digest: e1296212638d85509a535fc84efed4e2... + diff --git a/repository-sensitive-artifact-guard/sample-data.js b/repository-sensitive-artifact-guard/sample-data.js new file mode 100644 index 00000000..d5da11a2 --- /dev/null +++ b/repository-sensitive-artifact-guard/sample-data.js @@ -0,0 +1,126 @@ +'use strict'; + +module.exports = [ + { + repositoryId: 'repo-immunology-preprint', + branch: 'release/preprint-v2', + targetSurface: 'doi_snapshot', + commitId: 'syn-commit-a1', + artifacts: [ + { + path: 'notebooks/immune-response-analysis.ipynb', + kind: 'notebook', + visibility: 'doi_snapshot', + dataClass: 'controlled', + sizeBytes: 420000, + lfsPointer: false, + signals: ['notebook_output_leak', 'credential_marker'] + }, + { + path: 'data/raw-participant-table.csv', + kind: 'dataset', + visibility: 'doi_snapshot', + dataClass: 'restricted', + sizeBytes: 3100000, + lfsPointer: true, + signals: ['raw_patient_identifier', 'human_subjects_column'] + }, + { + path: 'metadata.json', + kind: 'metadata', + visibility: 'doi_snapshot', + dataClass: 'public', + sizeBytes: 9000, + lfsPointer: false, + signals: [] + } + ] + }, + { + repositoryId: 'repo-vision-model', + branch: 'experiment/model-card', + targetSurface: 'merge_request', + commitId: 'syn-commit-b2', + artifacts: [ + { + path: 'results/model-weights.bin', + kind: 'model_weights', + visibility: 'private', + dataClass: 'internal', + sizeBytes: 88000000, + lfsPointer: false, + signals: [] + }, + { + path: 'code/train.py', + kind: 'code', + visibility: 'private', + dataClass: 'internal', + sizeBytes: 18000, + lfsPointer: false, + signals: [] + } + ] + }, + { + repositoryId: 'repo-clinical-protocol', + branch: 'protocol/steward-review', + targetSurface: 'tag', + commitId: 'syn-commit-c3', + artifacts: [ + { + path: 'protocols/consent-appendix.md', + kind: 'protocol', + visibility: 'private', + dataClass: 'restricted', + sizeBytes: 25000, + lfsPointer: false, + signals: ['consent_form_marker'] + }, + { + path: 'data/deidentified-summary.csv', + kind: 'dataset', + visibility: 'private', + dataClass: 'controlled', + sizeBytes: 1400000, + lfsPointer: true, + signals: [] + } + ] + }, + { + repositoryId: 'repo-open-methods', + branch: 'main', + targetSurface: 'export_bundle', + commitId: 'syn-commit-d4', + artifacts: [ + { + path: 'manuscript/paper.md', + kind: 'manuscript', + visibility: 'preprint_export', + dataClass: 'public', + sizeBytes: 35000, + lfsPointer: false, + signals: [] + }, + { + path: 'code/analysis.py', + kind: 'code', + visibility: 'preprint_export', + dataClass: 'public', + sizeBytes: 22000, + lfsPointer: false, + signals: [] + }, + { + path: 'data/aggregate-results.csv', + kind: 'dataset', + visibility: 'preprint_export', + dataClass: 'public', + sizeBytes: 1800000, + lfsPointer: true, + signals: [] + } + ] + } +]; diff --git a/repository-sensitive-artifact-guard/test.js b/repository-sensitive-artifact-guard/test.js new file mode 100644 index 00000000..bc623abe --- /dev/null +++ b/repository-sensitive-artifact-guard/test.js @@ -0,0 +1,129 @@ +'use strict'; + +const assert = require('assert'); +const sampleBundles = require('./sample-data'); +const { + evaluateArtifact, + evaluateCommitBundle, + evaluateRepository, + inferComponent, + normalizePath +} = require('./index'); + +const asOfDate = '2026-05-22'; + +function testPathNormalizationAndComponentInference() { + assert.equal(normalizePath('/data/raw.csv'), 'data/raw.csv'); + assert.equal(inferComponent('notebooks/run.ipynb'), 'notebooks'); + assert.equal(inferComponent('metadata.json'), 'metadata'); +} + +function testPublicCredentialExposureRejectsArtifact() { + const packet = evaluateArtifact({ + path: 'notebooks/example.ipynb', + kind: 'notebook', + visibility: 'doi_snapshot', + dataClass: 'controlled', + sizeBytes: 12000, + lfsPointer: false, + signals: ['credential_marker'] + }); + + assert.equal(packet.action, 'reject_commit_and_quarantine_export'); + assert.equal(packet.severity, 'critical'); + assert.ok(packet.findings.some((finding) => finding.code === 'credential_marker')); +} + +function testSensitivePathNameIsHeld() { + const packet = evaluateArtifact({ + path: 'code/.env', + kind: 'code', + visibility: 'private', + dataClass: 'internal', + sizeBytes: 300, + lfsPointer: false, + signals: [] + }); + + assert.equal(packet.action, 'hold_tag_for_steward_review'); + assert.ok(packet.findings.some((finding) => finding.code === 'sensitive_path_name')); +} + +function testMissingLfsPointerRequiresHygieneFix() { + const packet = evaluateArtifact({ + path: 'results/model-weights.bin', + kind: 'model_weights', + visibility: 'private', + dataClass: 'internal', + sizeBytes: 88000000, + lfsPointer: false, + signals: [] + }); + + assert.equal(packet.action, 'require_lfs_or_redaction_before_merge'); + assert.equal(packet.severity, 'medium'); + assert.ok(packet.findings.some((finding) => finding.code === 'missing_lfs_pointer')); +} + +function testCleanArtifactAllowed() { + const packet = evaluateArtifact({ + path: 'code/analysis.py', + kind: 'code', + visibility: 'preprint_export', + dataClass: 'public', + sizeBytes: 22000, + lfsPointer: false, + signals: [] + }); + + assert.equal(packet.action, 'allow_commit_tag_export'); + assert.equal(packet.findingCount, 0); +} + +function testCriticalBundleBuildsRewritePacket() { + const packet = evaluateCommitBundle(sampleBundles[0], { asOfDate }); + + assert.equal(packet.action, 'reject_bundle_and_require_history_rewrite'); + assert.equal(packet.severity, 'critical'); + assert.equal(packet.rollbackPacket.rewriteRequired, true); + assert.ok(packet.heldPaths.includes('notebooks/immune-response-analysis.ipynb')); + assert.ok(packet.heldPaths.includes('data/raw-participant-table.csv')); + assert.equal(packet.auditDigest.length, 64); +} + +function testPrivateConsentFormHoldsReleaseWithoutRewrite() { + const packet = evaluateCommitBundle(sampleBundles[2], { asOfDate }); + + assert.equal(packet.action, 'require_repository_hygiene_fix'); + assert.equal(packet.severity, 'medium'); + assert.equal(packet.rollbackPacket.rewriteRequired, false); + assert.ok(packet.rollbackPacket.remediationActions.some((action) => action.path === 'protocols/consent-appendix.md')); +} + +function testRepositorySummary() { + const portfolio = evaluateRepository(sampleBundles, { asOfDate }); + + assert.equal(portfolio.bundleCount, 4); + assert.equal(portfolio.byAction.reject_bundle_and_require_history_rewrite, 1); + assert.equal(portfolio.byAction.require_repository_hygiene_fix, 2); + assert.equal(portfolio.byAction.allow_merge_tag_export, 1); + assert.ok(portfolio.heldCommits.includes('syn-commit-a1')); + assert.equal(portfolio.auditDigest.length, 64); +} + +const tests = [ + testPathNormalizationAndComponentInference, + testPublicCredentialExposureRejectsArtifact, + testSensitivePathNameIsHeld, + testMissingLfsPointerRequiresHygieneFix, + testCleanArtifactAllowed, + testCriticalBundleBuildsRewritePacket, + testPrivateConsentFormHoldsReleaseWithoutRewrite, + testRepositorySummary +]; + +for (const test of tests) { + test(); +} + +console.log(`${tests.length} repository sensitive artifact guard tests passed`);