From 26322c03f5c4377bb8c0b0c6bca6a9c4ab9283da Mon Sep 17 00:00:00 2001 From: taherd <183945978+taherdhanera@users.noreply.github.com> Date: Fri, 22 May 2026 22:35:26 +0530 Subject: [PATCH] Add data availability statement guard --- .../README.md | 35 ++ .../demo-video.js | 173 ++++++++++ .../demo.js | 18 + .../index.js | 320 ++++++++++++++++++ .../package.json | 14 + .../reports/demo.webm | Bin 0 -> 29909 bytes .../reports/reviewer-packet.md | 51 +++ .../reports/summary.json | 188 ++++++++++ .../reports/summary.svg | 16 + .../requirements-map.md | 18 + .../sample-data.js | 106 ++++++ .../test.js | 70 ++++ 12 files changed, 1009 insertions(+) create mode 100644 collaborative-data-availability-statement-guard/README.md create mode 100644 collaborative-data-availability-statement-guard/demo-video.js create mode 100644 collaborative-data-availability-statement-guard/demo.js create mode 100644 collaborative-data-availability-statement-guard/index.js create mode 100644 collaborative-data-availability-statement-guard/package.json create mode 100644 collaborative-data-availability-statement-guard/reports/demo.webm create mode 100644 collaborative-data-availability-statement-guard/reports/reviewer-packet.md create mode 100644 collaborative-data-availability-statement-guard/reports/summary.json create mode 100644 collaborative-data-availability-statement-guard/reports/summary.svg create mode 100644 collaborative-data-availability-statement-guard/requirements-map.md create mode 100644 collaborative-data-availability-statement-guard/sample-data.js create mode 100644 collaborative-data-availability-statement-guard/test.js diff --git a/collaborative-data-availability-statement-guard/README.md b/collaborative-data-availability-statement-guard/README.md new file mode 100644 index 00000000..f7990e73 --- /dev/null +++ b/collaborative-data-availability-statement-guard/README.md @@ -0,0 +1,35 @@ +# Collaborative Data Availability Statement Guard + +Self-contained Real-time Collaborative Research Editor slice for +`SCIBASE-AI/SCIBASE.AI#12`. + +The guard evaluates whether a collaborative manuscript can be exported with +complete data and code availability statements. It checks required availability +sections, repository accessions, statement citations, dataset/code licenses, +reviewer-only access windows, de-identification evidence for human-derived +material, role-based approvals, blocking comments, and unmerged editor changes. + +This is intentionally separate from reference-library merging, notification +visibility, accessibility, presence privacy, evidence binding, and general +embargo-release workflows. Its job is to gate the final manuscript export when +the availability statement and linked artifact evidence are not review-ready. + +## Run + +```bash +npm run check +npm test +npm run demo +npm run demo:video +``` + +## Outputs + +- `reports/summary.json` +- `reports/reviewer-packet.md` +- `reports/summary.svg` +- `reports/demo.webm` + +All data is synthetic. The module does not call repository hosts, journal +systems, identity services, storage APIs, email systems, or live manuscript +export services. diff --git a/collaborative-data-availability-statement-guard/demo-video.js b/collaborative-data-availability-statement-guard/demo-video.js new file mode 100644 index 00000000..3298eb2c --- /dev/null +++ b/collaborative-data-availability-statement-guard/demo-video.js @@ -0,0 +1,173 @@ +const fs = require("fs"); +const os = require("os"); +const path = require("path"); +const { execFileSync } = require("child_process"); + +const reportDir = path.join(__dirname, "reports"); +const outputPath = path.join(reportDir, "demo.webm"); + +const chromeCandidates = [ + process.env.CHROME_PATH, + "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe", + "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe", + "C:\\Program Files\\Microsoft\\Edge\\Application\\msedge.exe", + "C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe" +].filter(Boolean); + +function findBrowser() { + const found = chromeCandidates.find((candidate) => fs.existsSync(candidate)); + if (!found) { + throw new Error("Chrome or Edge was not found. Set CHROME_PATH to generate reports/demo.webm."); + } + return found; +} + +function fileUrl(filePath) { + return `file:///${filePath.replace(/\\/g, "/")}`; +} + +const html = String.raw` + +
+ +recording+ + +`; + +fs.mkdirSync(reportDir, { recursive: true }); + +const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "data-availability-demo-")); +const htmlPath = path.join(tempDir, "demo.html"); +const profileDir = path.join(tempDir, "profile"); +fs.writeFileSync(htmlPath, html, "utf8"); + +const stdout = execFileSync( + findBrowser(), + [ + "--headless=new", + "--disable-gpu", + "--disable-dev-shm-usage", + "--autoplay-policy=no-user-gesture-required", + "--run-all-compositor-stages-before-draw", + "--virtual-time-budget=7500", + `--user-data-dir=${profileDir}`, + "--dump-dom", + fileUrl(htmlPath) + ], + { encoding: "utf8", maxBuffer: 30 * 1024 * 1024 } +); + +const match = stdout.match(/data:video\/webm;base64,([A-Za-z0-9+/=]+)/); +if (!match) { + throw new Error(`Demo video generation failed. Browser output ended with: ${stdout.slice(-600)}`); +} + +fs.writeFileSync(outputPath, Buffer.from(match[1], "base64")); +console.log(`Generated ${path.relative(process.cwd(), outputPath)}`); diff --git a/collaborative-data-availability-statement-guard/demo.js b/collaborative-data-availability-statement-guard/demo.js new file mode 100644 index 00000000..97c3730c --- /dev/null +++ b/collaborative-data-availability-statement-guard/demo.js @@ -0,0 +1,18 @@ +const fs = require("fs"); +const path = require("path"); +const { project } = require("./sample-data"); +const { buildReviewPacket, renderMarkdownReport, renderSvgSummary } = require("./index"); + +const reportDir = path.join(__dirname, "reports"); +fs.mkdirSync(reportDir, { recursive: true }); + +const packet = buildReviewPacket(project); + +fs.writeFileSync(path.join(reportDir, "summary.json"), `${JSON.stringify(packet, null, 2)}\n`, "utf8"); +fs.writeFileSync(path.join(reportDir, "reviewer-packet.md"), renderMarkdownReport(packet), "utf8"); +fs.writeFileSync(path.join(reportDir, "summary.svg"), renderSvgSummary(packet), "utf8"); + +console.log(`Generated reports for ${packet.guard}`); +console.log(`Decision: ${packet.decision}`); +console.log(`Score: ${packet.score}`); +console.log(`Findings: ${packet.findings.length}`); diff --git a/collaborative-data-availability-statement-guard/index.js b/collaborative-data-availability-statement-guard/index.js new file mode 100644 index 00000000..be1a95c9 --- /dev/null +++ b/collaborative-data-availability-statement-guard/index.js @@ -0,0 +1,320 @@ +const SEVERITY_WEIGHTS = { + critical: 36, + high: 22, + medium: 11, + low: 4 +}; + +function daysBetween(a, b) { + const left = new Date(a).getTime(); + const right = new Date(b).getTime(); + return Math.floor((right - left) / (24 * 60 * 60 * 1000)); +} + +function normalize(value) { + return String(value || "").trim().toLowerCase(); +} + +function addFinding(findings, severity, rule, message, action, refs = []) { + findings.push({ severity, rule, message, action, refs }); +} + +function sectionById(manuscript) { + return new Map(manuscript.sections.map((section) => [section.id, section])); +} + +function accessionPattern(kind) { + if (kind === "code") { + return /^(GH|GL|DOI)-[A-Za-z0-9._/-]+$/; + } + return /^(ZEN|DRYAD|FIGSHARE|OSF|DOI)-[A-Za-z0-9._/-]+$/; +} + +function referencedAccessions(manuscript) { + return new Set(manuscript.citations.map((citation) => normalize(citation.accessionId)).filter(Boolean)); +} + +function evaluateStatementReadiness(project) { + const findings = []; + const sections = sectionById(project.manuscript); + const citationAccessions = referencedAccessions(project.manuscript); + + for (const sectionId of project.policy.requiredSections) { + const section = sections.get(sectionId); + if (!section || normalize(section.text).length < 40) { + addFinding( + findings, + "critical", + "missing-required-availability-section", + `Required section ${sectionId} is missing or too thin for export.`, + "Block manuscript export until the collaborative editor contains a complete availability statement.", + [sectionId] + ); + } + } + + for (const citation of project.manuscript.citations) { + if (!sections.has(citation.sectionId)) { + addFinding( + findings, + "high", + "citation-anchor-missing", + `Citation ${citation.id} points at missing section ${citation.sectionId}.`, + "Repair the citation anchor before generating the final manuscript package.", + [citation.id, citation.sectionId] + ); + } + } + + for (const repository of project.repositories) { + const accession = normalize(repository.accessionId); + if (!accession) { + addFinding( + findings, + "critical", + "repository-accession-missing", + `${repository.label} has no stable accession or repository identifier.`, + "Attach a stable repository accession or mark the material as non-distributable with reviewer evidence.", + [repository.id] + ); + } else if (!accessionPattern(repository.kind).test(repository.accessionId)) { + addFinding( + findings, + "high", + "repository-accession-format-invalid", + `${repository.label} has accession ${repository.accessionId}, which does not match accepted export formats.`, + "Normalize the repository identifier before export so readers can resolve the artifact.", + [repository.id, repository.accessionId] + ); + } else if (!citationAccessions.has(accession)) { + addFinding( + findings, + "medium", + "repository-not-mentioned-in-statement", + `${repository.label} is registered but not cited in the availability sections.`, + "Add the repository accession to the data or code availability statement.", + [repository.id, repository.accessionId] + ); + } + + if (repository.kind === "dataset" && !project.policy.acceptedDatasetLicenses.includes(repository.license)) { + addFinding( + findings, + "high", + "dataset-license-missing-or-unaccepted", + `${repository.label} has dataset license ${repository.license || "none"}.`, + "Add an accepted dataset license or document why restricted access is required.", + [repository.id] + ); + } + + if (repository.kind === "code" && !project.policy.acceptedCodeLicenses.includes(repository.license)) { + addFinding( + findings, + "high", + "code-license-missing-or-unaccepted", + `${repository.label} has code license ${repository.license || "none"}.`, + "Add an accepted code license before exposing reproducibility controls.", + [repository.id] + ); + } + + if (repository.containsHumanDerivedData && !repository.deidentificationEvidenceId) { + addFinding( + findings, + "critical", + "human-derived-data-without-deidentification-evidence", + `${repository.label} contains human-derived material without de-identification evidence.`, + "Block export until the data steward links de-identification or restriction evidence.", + [repository.id] + ); + } + + if (repository.access === "embargoed" && repository.releaseDate) { + const releaseLag = daysBetween(project.manuscript.exportDeadline, repository.releaseDate); + if (releaseLag > project.policy.publicReleaseGraceDays) { + addFinding( + findings, + "medium", + "embargo-release-lags-export", + `${repository.label} releases ${releaseLag} days after the manuscript export deadline.`, + "Confirm the target journal accepts this availability timing before final export.", + [repository.id, repository.releaseDate] + ); + } + } + + if (repository.access !== "public" && repository.reviewerLinkExpiresAt) { + const reviewerDays = daysBetween(project.asOfDate, repository.reviewerLinkExpiresAt); + if (reviewerDays < project.policy.reviewerLinkMinimumDays) { + addFinding( + findings, + "high", + "reviewer-link-expires-before-review-window", + `${repository.label} reviewer access expires in ${reviewerDays} days.`, + "Refresh reviewer-only links before export so peer reviewers can inspect restricted artifacts.", + [repository.id, repository.reviewerLinkExpiresAt] + ); + } + } + } + + for (const role of project.policy.requiredApproverRoles) { + const approver = project.collaborators.find((collaborator) => collaborator.role === role); + if (!approver || approver.approval !== "approved") { + addFinding( + findings, + "high", + "required-availability-approver-missing", + `Required ${role} approval is not complete.`, + "Hold export until all role-based collaborators approve the availability statement.", + [role] + ); + } + } + + for (const comment of project.editorState.unresolvedComments) { + if (comment.severity === "blocking") { + addFinding( + findings, + "high", + "blocking-availability-comment-open", + `Blocking comment ${comment.id} remains open on ${comment.sectionId}.`, + "Resolve blocking availability comments before final manuscript export.", + [comment.id, comment.sectionId] + ); + } + } + + for (const change of project.editorState.pendingChanges) { + if (change.status !== "merged") { + addFinding( + findings, + "medium", + "availability-change-unmerged", + `Pending change ${change.id} in ${change.sectionId} has not been merged.`, + "Merge, reject, or explicitly defer the collaborative availability edit before export.", + [change.id, change.sectionId] + ); + } + } + + const severitySummary = findings.reduce( + (summary, finding) => { + summary[finding.severity] += 1; + return summary; + }, + { critical: 0, high: 0, medium: 0, low: 0 } + ); + const score = Math.max(0, 100 - findings.reduce((sum, finding) => sum + SEVERITY_WEIGHTS[finding.severity], 0)); + + return { findings, severitySummary, score }; +} + +function decisionFromEvaluation(evaluation) { + if (evaluation.severitySummary.critical > 0) { + return "block-export-until-availability-evidence-is-clean"; + } + if (evaluation.score < 75 || evaluation.severitySummary.high > 0) { + return "hold-export-for-availability-review"; + } + if (evaluation.score < 90) { + return "manual-review-before-export"; + } + return "availability-statement-ready"; +} + +function buildReviewerActions(findings) { + return findings.map((finding) => ({ + priority: finding.severity === "critical" || finding.severity === "high" ? "blocking" : "review", + rule: finding.rule, + action: finding.action, + refs: finding.refs + })); +} + +function buildReviewPacket(project) { + const evaluation = evaluateStatementReadiness(project); + return { + guard: "collaborative-data-availability-statement-guard", + issue: "SCIBASE-AI/SCIBASE.AI#12", + manuscriptId: project.manuscript.id, + title: project.manuscript.title, + targetJournal: project.manuscript.targetJournal, + asOfDate: project.asOfDate, + decision: decisionFromEvaluation(evaluation), + score: evaluation.score, + severitySummary: evaluation.severitySummary, + findings: evaluation.findings, + reviewerActions: buildReviewerActions(evaluation.findings), + safety: [ + "Synthetic manuscript, repository, collaborator, and review data only", + "No GitHub, Zenodo, journal, identity, storage, or email network calls", + "No private manuscript content, human-subject records, credentials, or live export mutations" + ] + }; +} + +function renderMarkdownReport(packet) { + const lines = [ + "# Collaborative Data Availability Statement Guard", + "", + `Manuscript: ${packet.title}`, + `Issue: ${packet.issue}`, + `Decision: ${packet.decision}`, + `Score: ${packet.score}`, + "", + "## Severity Summary", + "", + "| Severity | Count |", + "| --- | ---: |" + ]; + + for (const severity of ["critical", "high", "medium", "low"]) { + lines.push(`| ${severity} | ${packet.severitySummary[severity]} |`); + } + + lines.push("", "## Findings", ""); + for (const finding of packet.findings) { + lines.push(`- **${finding.severity} / ${finding.rule}**: ${finding.message}`); + lines.push(` - Action: ${finding.action}`); + lines.push(` - Refs: ${finding.refs.join(", ") || "none"}`); + } + + lines.push("", "## Safety", ""); + for (const item of packet.safety) { + lines.push(`- ${item}`); + } + + return `${lines.join("\n")}\n`; +} + +function renderSvgSummary(packet) { + const barWidth = Math.max(44, Math.min(760, packet.score * 7.6)); + const totalFindings = packet.findings.length; + return ` +`; +} + +module.exports = { + buildReviewPacket, + decisionFromEvaluation, + evaluateStatementReadiness, + renderMarkdownReport, + renderSvgSummary +}; diff --git a/collaborative-data-availability-statement-guard/package.json b/collaborative-data-availability-statement-guard/package.json new file mode 100644 index 00000000..bc525426 --- /dev/null +++ b/collaborative-data-availability-statement-guard/package.json @@ -0,0 +1,14 @@ +{ + "name": "collaborative-data-availability-statement-guard", + "version": "1.0.0", + "description": "Deterministic export guard for collaborative manuscript data and code availability statements.", + "main": "index.js", + "private": true, + "type": "commonjs", + "scripts": { + "check": "node --check index.js && node --check sample-data.js && node --check demo.js && node --check demo-video.js && node --check test.js", + "test": "node test.js", + "demo": "node demo.js", + "demo:video": "node demo-video.js" + } +} diff --git a/collaborative-data-availability-statement-guard/reports/demo.webm b/collaborative-data-availability-statement-guard/reports/demo.webm new file mode 100644 index 0000000000000000000000000000000000000000..7b86aeca6579ec8e37f9650e4d9658559c74af10 GIT binary patch literal 29909 zcmeFXW0+-4vmm<5wr$()vTb$QW|z8b+qUg4+qT_h+co`uIA^{y_s*F+cjn(&&)&Hr zGGa$YM#hSWl{>LTw+nKFg8hJmp8kAq{Xm88{2+z=16@rFZG=Mpph14Xn99CpK!Cq4 z=OQH84Ci06?GefqfoPItrplFmwtvUaRjRH2U
ZP53Qcd_w}25s@Oucg6>N&Kg2)mLmwSFQbPT64>AGmlU8+?pKdZZDKcp(`{6yeX
zXyzdaGK~gr)HW>!ycBM5kZD=pw0>L^X7??%V@ogQexS}#9v_t&I}2Gl!gq*p4< AsH%zOwG!(LNn%7)4Utg?&3#+NV;u+rLrQzgau+`2
z#)wo~HMd@2(gdv=@xIprga;i_vSpP1aQ5zJqmWIJ$(MvX;uDm+P|=jJLE@wxC(`(!
z5pvERq}LA >3hi7Q8EuC
zu~w7vX2<$Ba9+t3u~n1I?ahBhBMVel&lZWHNs;?oQbQA$|DpL;VC{o{mm|(nO)!a=
zhL@zVOVBV?MWB10>wq%SjTp1^StyhQZ4C