diff --git a/external-validity-transfer-assistant/README.md b/external-validity-transfer-assistant/README.md new file mode 100644 index 00000000..fd799619 --- /dev/null +++ b/external-validity-transfer-assistant/README.md @@ -0,0 +1,38 @@ +# External Validity Transfer Assistant + +This module is a focused slice for SCIBASE issue #16, AI-Powered Research Assistant Suite. + +It adds a deterministic research-assistant review gate for external validity and population-transfer risk before AI-generated peer-review packets are shown to authors, reviewers, funders, or lab leads. + +## What It Checks + +- Whether broad manuscript claims are backed by evidence from the asserted populations. +- Whether claimed deployment settings are covered by linked study artifacts. +- Whether assay or instrument contexts match the manuscript language. +- Whether runtime environments have reproducible rerun evidence. +- Whether strong claims are missing required subgroup coverage. +- Whether a broad transfer claim lacks external validation. + +## Outputs + +The demo creates: + +- `reports/summary.json`: structured review packet. +- `reports/reviewer-packet.md`: reviewer-facing findings, actions, and research gaps. +- `reports/summary.svg`: visual transfer-risk summary. + +## Why This Is Distinct + +This is not another broad AI assistant, preregistration checker, retraction sentinel, prompt-safety guard, statistical review, benchmark-leakage auditor, figure/table checker, supplement-readiness module, funding/COI checker, or evidence-trace assistant. + +It focuses specifically on whether a manuscript's claims transfer beyond the exact population, setting, assay, and runtime contexts represented by the linked evidence. + +## Local Validation + +```bash +npm run check +npm test +npm run demo +``` + +The module uses synthetic data only. It makes no network calls and uses no credentials, private manuscripts, protected health information, payment data, or external APIs. diff --git a/external-validity-transfer-assistant/demo-video.js b/external-validity-transfer-assistant/demo-video.js new file mode 100644 index 00000000..38293ac6 --- /dev/null +++ b/external-validity-transfer-assistant/demo-video.js @@ -0,0 +1,174 @@ +const fs = require("fs"); +const os = require("os"); +const path = require("path"); +const { execFileSync } = require("child_process"); + +const reportDir = path.join(__dirname, "reports"); +const outputPath = path.join(reportDir, "demo.webm"); + +const chromeCandidates = [ + process.env.CHROME_PATH, + "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe", + "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe", + "C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe", + "C:\\Program Files\\Microsoft\\Edge\\Application\\msedge.exe" +].filter(Boolean); + +function findBrowser() { + const found = chromeCandidates.find((candidate) => fs.existsSync(candidate)); + if (!found) { + throw new Error("Chrome or Edge was not found. Set CHROME_PATH to generate reports/demo.webm."); + } + return found; +} + +function fileUrl(filePath) { + return `file:///${filePath.replace(/\\/g, "/")}`; +} + +const html = String.raw` + + + + External validity transfer assistant demo + + + + +
recording
+ + +`; + +fs.mkdirSync(reportDir, { recursive: true }); + +const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "external-validity-demo-")); +const htmlPath = path.join(tempDir, "demo.html"); +const profileDir = path.join(tempDir, "profile"); +fs.writeFileSync(htmlPath, html, "utf8"); + +const browserPath = findBrowser(); +const stdout = execFileSync( + browserPath, + [ + "--headless=new", + "--disable-gpu", + "--disable-dev-shm-usage", + "--autoplay-policy=no-user-gesture-required", + "--run-all-compositor-stages-before-draw", + "--virtual-time-budget=7000", + `--user-data-dir=${profileDir}`, + "--dump-dom", + fileUrl(htmlPath) + ], + { encoding: "utf8", maxBuffer: 30 * 1024 * 1024 } +); + +const match = stdout.match(/data:video\/webm;base64,([A-Za-z0-9+/=]+)/); +if (!match) { + throw new Error(`Demo video generation failed. Browser output ended with: ${stdout.slice(-600)}`); +} + +fs.writeFileSync(outputPath, Buffer.from(match[1], "base64")); +console.log(`Generated ${path.relative(process.cwd(), outputPath)}`); diff --git a/external-validity-transfer-assistant/demo.js b/external-validity-transfer-assistant/demo.js new file mode 100644 index 00000000..b1c7c2d5 --- /dev/null +++ b/external-validity-transfer-assistant/demo.js @@ -0,0 +1,22 @@ +const fs = require("fs"); +const path = require("path"); +const { project } = require("./sample-data"); +const { buildReviewPacket, renderMarkdownReport, renderSvgSummary } = require("./index"); + +const reportDir = path.join(__dirname, "reports"); +fs.mkdirSync(reportDir, { recursive: true }); + +const packet = buildReviewPacket(project); + +fs.writeFileSync( + path.join(reportDir, "summary.json"), + `${JSON.stringify(packet, null, 2)}\n`, + "utf8" +); +fs.writeFileSync(path.join(reportDir, "reviewer-packet.md"), renderMarkdownReport(packet), "utf8"); +fs.writeFileSync(path.join(reportDir, "summary.svg"), renderSvgSummary(packet), "utf8"); + +console.log(`Generated reports for ${packet.assistant}`); +console.log(`Decision: ${packet.decision}`); +console.log(`Average score: ${packet.averageScore}`); +console.log(`Findings: ${packet.peerReviewSuggestions.length}`); diff --git a/external-validity-transfer-assistant/index.js b/external-validity-transfer-assistant/index.js new file mode 100644 index 00000000..aba2a708 --- /dev/null +++ b/external-validity-transfer-assistant/index.js @@ -0,0 +1,360 @@ +const DEFAULT_WEIGHTS = { + critical: 35, + high: 22, + medium: 12, + low: 6 +}; + +function unique(values) { + return [...new Set((values || []).filter(Boolean))]; +} + +function toScopeSet(records, key) { + return new Set(records.map((record) => record[key]).filter(Boolean)); +} + +function missingFromScope(expected, observed) { + const observedSet = observed instanceof Set ? observed : new Set(observed); + return unique(expected).filter((value) => !observedSet.has(value)); +} + +function hasExternalValidation(records) { + return records.some((record) => record.externalValidation || record.type === "external-validation"); +} + +function hasRunnableEvidence(records) { + return records.some((record) => record.reproducible && record.environment); +} + +function severityForMissing(count, expectedCount) { + if (count === 0) { + return null; + } + if (expectedCount >= 3 && count >= 2) { + return "high"; + } + if (count === expectedCount) { + return "high"; + } + return "medium"; +} + +function addFinding(findings, severity, rule, message, action) { + findings.push({ severity, rule, message, action }); +} + +function evaluateClaim(claim, evidence, manuscript) { + const linkedEvidence = evidence.filter((record) => claim.evidenceIds.includes(record.id)); + const findings = []; + const scope = claim.assertedScope || {}; + + if (linkedEvidence.length === 0) { + addFinding( + findings, + "critical", + "missing-evidence", + `Claim ${claim.id} has no linked evidence artifacts.`, + "Link at least one dataset, runbook, protocol, or validation artifact before review." + ); + } + + const observedPopulations = toScopeSet(linkedEvidence, "population"); + const observedSettings = toScopeSet(linkedEvidence, "setting"); + const observedInstruments = toScopeSet(linkedEvidence, "instrument"); + const observedEnvironments = toScopeSet(linkedEvidence, "environment"); + + const missingPopulations = missingFromScope(scope.populations, observedPopulations); + const missingSettings = missingFromScope(scope.settings, observedSettings); + const missingInstruments = missingFromScope(scope.instruments, observedInstruments); + const missingEnvironments = missingFromScope(scope.environments, observedEnvironments); + + const populationSeverity = severityForMissing(missingPopulations.length, (scope.populations || []).length); + if (populationSeverity) { + addFinding( + findings, + populationSeverity, + "population-transfer-gap", + `Claim ${claim.id} asserts populations not represented in linked evidence: ${missingPopulations.join(", ")}.`, + "Narrow the claim wording or add external validation for each missing population." + ); + } + + const settingSeverity = severityForMissing(missingSettings.length, (scope.settings || []).length); + if (settingSeverity) { + addFinding( + findings, + settingSeverity, + "setting-transfer-gap", + `Claim ${claim.id} asserts settings not represented in linked evidence: ${missingSettings.join(", ")}.`, + "Add site-level validation evidence or mark the setting as a future research gap." + ); + } + + if (missingInstruments.length > 0) { + addFinding( + findings, + "medium", + "assay-transfer-gap", + `Claim ${claim.id} references unsupported assay or instrument contexts: ${missingInstruments.join(", ")}.`, + "Separate assay-specific claims and document conversion limits before reviewer release." + ); + } + + if (missingEnvironments.length > 0) { + addFinding( + findings, + "medium", + "runtime-transfer-gap", + `Claim ${claim.id} references runtime environments not covered by rerun evidence: ${missingEnvironments.join(", ")}.`, + "Run the pipeline in each deployment-like environment or downgrade deployment readiness language." + ); + } + + const broadScope = (scope.populations || []).length > 1 || (scope.settings || []).length > 1; + if (broadScope && !hasExternalValidation(linkedEvidence)) { + addFinding( + findings, + "high", + "no-external-validation", + `Claim ${claim.id} has broad transfer language without external validation evidence.`, + "Hold broad generalizability language until at least one independent validation artifact is linked." + ); + } + + if (!hasRunnableEvidence(linkedEvidence)) { + addFinding( + findings, + "high", + "no-runnable-transfer-evidence", + `Claim ${claim.id} lacks reproducible runtime evidence for the asserted context.`, + "Add a deterministic runbook, manifest, or notebook rerun before the assistant marks the claim reproducible." + ); + } + + const missingRequiredSubgroups = missingFromScope(manuscript.requiredSubgroups || [], observedPopulations); + if (claim.confidence === "strong" && missingRequiredSubgroups.length > 0) { + addFinding( + findings, + "medium", + "strong-claim-subgroup-undercoverage", + `Strong claim ${claim.id} does not cover required subgroup evidence: ${missingRequiredSubgroups.join(", ")}.`, + "Convert the claim to qualified language or create a subgroup-specific validation plan." + ); + } + + const score = Math.max( + 0, + 100 - findings.reduce((total, finding) => total + DEFAULT_WEIGHTS[finding.severity], 0) + ); + + return { + id: claim.id, + text: claim.text, + score, + decision: decisionFromScore(score), + evidenceCount: linkedEvidence.length, + observedScope: { + populations: [...observedPopulations], + settings: [...observedSettings], + instruments: [...observedInstruments], + environments: [...observedEnvironments], + externalValidation: hasExternalValidation(linkedEvidence), + runnableEvidence: hasRunnableEvidence(linkedEvidence) + }, + findings + }; +} + +function decisionFromScore(score) { + if (score >= 82) { + return "review-ready"; + } + if (score >= 62) { + return "revise-before-release"; + } + if (score >= 42) { + return "hold-for-transfer-evidence"; + } + return "quarantine-from-review-packet"; +} + +function summarizeSeverity(claimReviews) { + const summary = { critical: 0, high: 0, medium: 0, low: 0 }; + for (const review of claimReviews) { + for (const finding of review.findings) { + summary[finding.severity] += 1; + } + } + return summary; +} + +function createResearchGaps(project, claimReviews) { + const missingTerms = new Set(); + for (const review of claimReviews) { + for (const finding of review.findings) { + if (finding.rule.includes("population") || finding.rule.includes("subgroup")) { + for (const value of review.text.match(/pediatric|underrepresented ancestry|adult/g) || []) { + missingTerms.add(value); + } + } + if (finding.rule.includes("setting") || finding.rule.includes("runtime")) { + for (const value of review.text.match(/low-resource clinic|community hospital|international site|cpu-only/g) || []) { + missingTerms.add(value); + } + } + } + } + + const generated = project.corpusSignals + .filter((signal) => signal.labFit.some((capability) => project.labCapabilities.includes(capability))) + .map((signal) => ({ + id: signal.id, + topic: signal.topic, + reason: signal.reason, + priority: missingTerms.size > 0 ? "high" : "medium", + suggestedNextStep: `Use ${signal.topic} as a targeted validation or grant-planning workstream.` + })); + + return generated.slice(0, 5); +} + +function createReproducibilityActions(claimReviews) { + const actions = []; + for (const review of claimReviews) { + for (const finding of review.findings) { + if (finding.rule.includes("runtime") || finding.rule.includes("runnable") || finding.rule.includes("external")) { + actions.push({ + claimId: review.id, + priority: finding.severity === "high" || finding.severity === "critical" ? "blocking" : "recommended", + action: finding.action + }); + } + } + } + return actions; +} + +function buildReviewPacket(project) { + const claimReviews = project.manuscript.claims.map((claim) => + evaluateClaim(claim, project.evidence, project.manuscript) + ); + const severitySummary = summarizeSeverity(claimReviews); + const averageScore = Math.round( + claimReviews.reduce((total, review) => total + review.score, 0) / claimReviews.length + ); + + return { + projectId: project.id, + title: project.title, + assistant: "external-validity-transfer-assistant", + issue: "SCIBASE-AI/SCIBASE.AI#16", + averageScore, + decision: decisionFromScore(averageScore), + severitySummary, + claimReviews, + peerReviewSuggestions: claimReviews.flatMap((review) => + review.findings.map((finding) => ({ + claimId: review.id, + severity: finding.severity, + suggestion: finding.message, + action: finding.action + })) + ), + reproducibilityActions: createReproducibilityActions(claimReviews), + researchGaps: createResearchGaps(project, claimReviews), + safetyNotes: [ + "Synthetic data only.", + "No external APIs, credentials, private manuscripts, or live clinical data are used.", + "The assistant produces deterministic review packets suitable for pre-submission review." + ] + }; +} + +function renderMarkdownReport(packet) { + const lines = [ + `# ${packet.title}`, + "", + `Assistant: ${packet.assistant}`, + `Overall decision: ${packet.decision}`, + `Average transfer score: ${packet.averageScore}`, + "", + "## Severity Summary", + "", + `- Critical: ${packet.severitySummary.critical}`, + `- High: ${packet.severitySummary.high}`, + `- Medium: ${packet.severitySummary.medium}`, + `- Low: ${packet.severitySummary.low}`, + "", + "## Claim Reviews", + "" + ]; + + for (const review of packet.claimReviews) { + lines.push(`### ${review.id}`); + lines.push(""); + lines.push(`Decision: ${review.decision}`); + lines.push(`Score: ${review.score}`); + lines.push(`Evidence artifacts: ${review.evidenceCount}`); + if (review.findings.length === 0) { + lines.push("- No transfer-risk findings."); + } else { + for (const finding of review.findings) { + lines.push(`- ${finding.severity.toUpperCase()} ${finding.rule}: ${finding.message}`); + lines.push(` Action: ${finding.action}`); + } + } + lines.push(""); + } + + lines.push("## Reproducibility Actions"); + lines.push(""); + for (const action of packet.reproducibilityActions) { + lines.push(`- ${action.priority}: ${action.claimId} - ${action.action}`); + } + + lines.push(""); + lines.push("## Research Gap Prompts"); + lines.push(""); + for (const gap of packet.researchGaps) { + lines.push(`- ${gap.priority}: ${gap.topic} - ${gap.reason}`); + } + + return `${lines.join("\n")}\n`; +} + +function renderSvgSummary(packet) { + const barWidth = Math.max(10, packet.averageScore * 4); + const statusColor = packet.averageScore >= 62 ? "#2563eb" : "#b91c1c"; + const rows = packet.claimReviews + .map((review, index) => { + const y = 130 + index * 52; + const width = Math.max(10, review.score * 4); + return [ + `${review.id}`, + ``, + ``, + `${review.score} - ${review.decision}` + ].join("\n"); + }) + .join("\n"); + + return [ + ``, + ``, + `External Validity Transfer Assistant`, + `Average transfer score`, + ``, + ``, + `${packet.averageScore} - ${packet.decision}`, + rows, + `Synthetic deterministic demo. No credentials, private manuscripts, or external APIs.`, + `` + ].join("\n"); +} + +module.exports = { + buildReviewPacket, + evaluateClaim, + renderMarkdownReport, + renderSvgSummary +}; diff --git a/external-validity-transfer-assistant/package.json b/external-validity-transfer-assistant/package.json new file mode 100644 index 00000000..13d41e83 --- /dev/null +++ b/external-validity-transfer-assistant/package.json @@ -0,0 +1,14 @@ +{ + "name": "external-validity-transfer-assistant", + "version": "1.0.0", + "description": "Deterministic AI research assistant slice for external validity and population transfer review.", + "main": "index.js", + "private": true, + "type": "commonjs", + "scripts": { + "check": "node --check index.js && node --check sample-data.js && node --check demo.js && node --check demo-video.js && node --check test.js", + "test": "node test.js", + "demo": "node demo.js", + "demo:video": "node demo-video.js" + } +} diff --git a/external-validity-transfer-assistant/reports/demo.webm b/external-validity-transfer-assistant/reports/demo.webm new file mode 100644 index 00000000..1dee4557 Binary files /dev/null and b/external-validity-transfer-assistant/reports/demo.webm differ diff --git a/external-validity-transfer-assistant/reports/reviewer-packet.md b/external-validity-transfer-assistant/reports/reviewer-packet.md new file mode 100644 index 00000000..595d6676 --- /dev/null +++ b/external-validity-transfer-assistant/reports/reviewer-packet.md @@ -0,0 +1,66 @@ +# Portable biomarker triage assistant for multi-site oncology cohorts + +Assistant: external-validity-transfer-assistant +Overall decision: hold-for-transfer-evidence +Average transfer score: 44 + +## Severity Summary + +- Critical: 0 +- High: 5 +- Medium: 5 +- Low: 0 + +## Claim Reviews + +### claim-generalizable-oncology + +Decision: quarantine-from-review-packet +Score: 0 +Evidence artifacts: 2 +- HIGH population-transfer-gap: Claim claim-generalizable-oncology asserts populations not represented in linked evidence: pediatric, underrepresented ancestry. + Action: Narrow the claim wording or add external validation for each missing population. +- HIGH setting-transfer-gap: Claim claim-generalizable-oncology asserts settings not represented in linked evidence: community hospital, international site. + Action: Add site-level validation evidence or mark the setting as a future research gap. +- MEDIUM assay-transfer-gap: Claim claim-generalizable-oncology references unsupported assay or instrument contexts: single-cell-rna-seq. + Action: Separate assay-specific claims and document conversion limits before reviewer release. +- MEDIUM runtime-transfer-gap: Claim claim-generalizable-oncology references runtime environments not covered by rerun evidence: cuda, cpu-only. + Action: Run the pipeline in each deployment-like environment or downgrade deployment readiness language. +- HIGH no-external-validation: Claim claim-generalizable-oncology has broad transfer language without external validation evidence. + Action: Hold broad generalizability language until at least one independent validation artifact is linked. +- MEDIUM strong-claim-subgroup-undercoverage: Strong claim claim-generalizable-oncology does not cover required subgroup evidence: pediatric, underrepresented ancestry. + Action: Convert the claim to qualified language or create a subgroup-specific validation plan. + +### claim-reproducible-pipeline + +Decision: review-ready +Score: 100 +Evidence artifacts: 3 +- No transfer-risk findings. + +### claim-deployment-ready + +Decision: quarantine-from-review-packet +Score: 32 +Evidence artifacts: 1 +- MEDIUM setting-transfer-gap: Claim claim-deployment-ready asserts settings not represented in linked evidence: low-resource clinic. + Action: Add site-level validation evidence or mark the setting as a future research gap. +- HIGH no-external-validation: Claim claim-deployment-ready has broad transfer language without external validation evidence. + Action: Hold broad generalizability language until at least one independent validation artifact is linked. +- HIGH no-runnable-transfer-evidence: Claim claim-deployment-ready lacks reproducible runtime evidence for the asserted context. + Action: Add a deterministic runbook, manifest, or notebook rerun before the assistant marks the claim reproducible. +- MEDIUM strong-claim-subgroup-undercoverage: Strong claim claim-deployment-ready does not cover required subgroup evidence: pediatric, underrepresented ancestry. + Action: Convert the claim to qualified language or create a subgroup-specific validation plan. + +## Reproducibility Actions + +- recommended: claim-generalizable-oncology - Run the pipeline in each deployment-like environment or downgrade deployment readiness language. +- blocking: claim-generalizable-oncology - Hold broad generalizability language until at least one independent validation artifact is linked. +- blocking: claim-deployment-ready - Hold broad generalizability language until at least one independent validation artifact is linked. +- blocking: claim-deployment-ready - Add a deterministic runbook, manifest, or notebook rerun before the assistant marks the claim reproducible. + +## Research Gap Prompts + +- high: pediatric oncology RNA-seq validation - frequently cited limitation with low replication coverage +- high: CPU-only low-resource clinic reproducibility run - deployment claim depends on clinic-like runtime evidence +- high: ancestry-balanced external validation - underrepresented ancestry is asserted but not evidenced diff --git a/external-validity-transfer-assistant/reports/summary.json b/external-validity-transfer-assistant/reports/summary.json new file mode 100644 index 00000000..ccc8f2c5 --- /dev/null +++ b/external-validity-transfer-assistant/reports/summary.json @@ -0,0 +1,263 @@ +{ + "projectId": "project-transfer-001", + "title": "Portable biomarker triage assistant for multi-site oncology cohorts", + "assistant": "external-validity-transfer-assistant", + "issue": "SCIBASE-AI/SCIBASE.AI#16", + "averageScore": 44, + "decision": "hold-for-transfer-evidence", + "severitySummary": { + "critical": 0, + "high": 5, + "medium": 5, + "low": 0 + }, + "claimReviews": [ + { + "id": "claim-generalizable-oncology", + "text": "The model generalizes across oncology patient populations and hospital settings.", + "score": 0, + "decision": "quarantine-from-review-packet", + "evidenceCount": 2, + "observedScope": { + "populations": [ + "adult" + ], + "settings": [ + "academic hospital" + ], + "instruments": [ + "bulk-rna-seq" + ], + "environments": [ + "python-3.11" + ], + "externalValidation": false, + "runnableEvidence": true + }, + "findings": [ + { + "severity": "high", + "rule": "population-transfer-gap", + "message": "Claim claim-generalizable-oncology asserts populations not represented in linked evidence: pediatric, underrepresented ancestry.", + "action": "Narrow the claim wording or add external validation for each missing population." + }, + { + "severity": "high", + "rule": "setting-transfer-gap", + "message": "Claim claim-generalizable-oncology asserts settings not represented in linked evidence: community hospital, international site.", + "action": "Add site-level validation evidence or mark the setting as a future research gap." + }, + { + "severity": "medium", + "rule": "assay-transfer-gap", + "message": "Claim claim-generalizable-oncology references unsupported assay or instrument contexts: single-cell-rna-seq.", + "action": "Separate assay-specific claims and document conversion limits before reviewer release." + }, + { + "severity": "medium", + "rule": "runtime-transfer-gap", + "message": "Claim claim-generalizable-oncology references runtime environments not covered by rerun evidence: cuda, cpu-only.", + "action": "Run the pipeline in each deployment-like environment or downgrade deployment readiness language." + }, + { + "severity": "high", + "rule": "no-external-validation", + "message": "Claim claim-generalizable-oncology has broad transfer language without external validation evidence.", + "action": "Hold broad generalizability language until at least one independent validation artifact is linked." + }, + { + "severity": "medium", + "rule": "strong-claim-subgroup-undercoverage", + "message": "Strong claim claim-generalizable-oncology does not cover required subgroup evidence: pediatric, underrepresented ancestry.", + "action": "Convert the claim to qualified language or create a subgroup-specific validation plan." + } + ] + }, + { + "id": "claim-reproducible-pipeline", + "text": "The manuscript includes enough artifacts for an independent lab to rerun the triage pipeline.", + "score": 100, + "decision": "review-ready", + "evidenceCount": 3, + "observedScope": { + "populations": [ + "adult" + ], + "settings": [ + "academic hospital", + "community hospital" + ], + "instruments": [ + "bulk-rna-seq" + ], + "environments": [ + "python-3.11" + ], + "externalValidation": true, + "runnableEvidence": true + }, + "findings": [] + }, + { + "id": "claim-deployment-ready", + "text": "The assistant is ready for deployment guidance in low-resource clinics.", + "score": 32, + "decision": "quarantine-from-review-packet", + "evidenceCount": 1, + "observedScope": { + "populations": [ + "adult" + ], + "settings": [ + "community hospital" + ], + "instruments": [ + "bulk-rna-seq" + ], + "environments": [ + "cpu-only" + ], + "externalValidation": false, + "runnableEvidence": false + }, + "findings": [ + { + "severity": "medium", + "rule": "setting-transfer-gap", + "message": "Claim claim-deployment-ready asserts settings not represented in linked evidence: low-resource clinic.", + "action": "Add site-level validation evidence or mark the setting as a future research gap." + }, + { + "severity": "high", + "rule": "no-external-validation", + "message": "Claim claim-deployment-ready has broad transfer language without external validation evidence.", + "action": "Hold broad generalizability language until at least one independent validation artifact is linked." + }, + { + "severity": "high", + "rule": "no-runnable-transfer-evidence", + "message": "Claim claim-deployment-ready lacks reproducible runtime evidence for the asserted context.", + "action": "Add a deterministic runbook, manifest, or notebook rerun before the assistant marks the claim reproducible." + }, + { + "severity": "medium", + "rule": "strong-claim-subgroup-undercoverage", + "message": "Strong claim claim-deployment-ready does not cover required subgroup evidence: pediatric, underrepresented ancestry.", + "action": "Convert the claim to qualified language or create a subgroup-specific validation plan." + } + ] + } + ], + "peerReviewSuggestions": [ + { + "claimId": "claim-generalizable-oncology", + "severity": "high", + "suggestion": "Claim claim-generalizable-oncology asserts populations not represented in linked evidence: pediatric, underrepresented ancestry.", + "action": "Narrow the claim wording or add external validation for each missing population." + }, + { + "claimId": "claim-generalizable-oncology", + "severity": "high", + "suggestion": "Claim claim-generalizable-oncology asserts settings not represented in linked evidence: community hospital, international site.", + "action": "Add site-level validation evidence or mark the setting as a future research gap." + }, + { + "claimId": "claim-generalizable-oncology", + "severity": "medium", + "suggestion": "Claim claim-generalizable-oncology references unsupported assay or instrument contexts: single-cell-rna-seq.", + "action": "Separate assay-specific claims and document conversion limits before reviewer release." + }, + { + "claimId": "claim-generalizable-oncology", + "severity": "medium", + "suggestion": "Claim claim-generalizable-oncology references runtime environments not covered by rerun evidence: cuda, cpu-only.", + "action": "Run the pipeline in each deployment-like environment or downgrade deployment readiness language." + }, + { + "claimId": "claim-generalizable-oncology", + "severity": "high", + "suggestion": "Claim claim-generalizable-oncology has broad transfer language without external validation evidence.", + "action": "Hold broad generalizability language until at least one independent validation artifact is linked." + }, + { + "claimId": "claim-generalizable-oncology", + "severity": "medium", + "suggestion": "Strong claim claim-generalizable-oncology does not cover required subgroup evidence: pediatric, underrepresented ancestry.", + "action": "Convert the claim to qualified language or create a subgroup-specific validation plan." + }, + { + "claimId": "claim-deployment-ready", + "severity": "medium", + "suggestion": "Claim claim-deployment-ready asserts settings not represented in linked evidence: low-resource clinic.", + "action": "Add site-level validation evidence or mark the setting as a future research gap." + }, + { + "claimId": "claim-deployment-ready", + "severity": "high", + "suggestion": "Claim claim-deployment-ready has broad transfer language without external validation evidence.", + "action": "Hold broad generalizability language until at least one independent validation artifact is linked." + }, + { + "claimId": "claim-deployment-ready", + "severity": "high", + "suggestion": "Claim claim-deployment-ready lacks reproducible runtime evidence for the asserted context.", + "action": "Add a deterministic runbook, manifest, or notebook rerun before the assistant marks the claim reproducible." + }, + { + "claimId": "claim-deployment-ready", + "severity": "medium", + "suggestion": "Strong claim claim-deployment-ready does not cover required subgroup evidence: pediatric, underrepresented ancestry.", + "action": "Convert the claim to qualified language or create a subgroup-specific validation plan." + } + ], + "reproducibilityActions": [ + { + "claimId": "claim-generalizable-oncology", + "priority": "recommended", + "action": "Run the pipeline in each deployment-like environment or downgrade deployment readiness language." + }, + { + "claimId": "claim-generalizable-oncology", + "priority": "blocking", + "action": "Hold broad generalizability language until at least one independent validation artifact is linked." + }, + { + "claimId": "claim-deployment-ready", + "priority": "blocking", + "action": "Hold broad generalizability language until at least one independent validation artifact is linked." + }, + { + "claimId": "claim-deployment-ready", + "priority": "blocking", + "action": "Add a deterministic runbook, manifest, or notebook rerun before the assistant marks the claim reproducible." + } + ], + "researchGaps": [ + { + "id": "gap-pediatric-oncology-rnaseq", + "topic": "pediatric oncology RNA-seq validation", + "reason": "frequently cited limitation with low replication coverage", + "priority": "high", + "suggestedNextStep": "Use pediatric oncology RNA-seq validation as a targeted validation or grant-planning workstream." + }, + { + "id": "gap-cpu-only-clinic-run", + "topic": "CPU-only low-resource clinic reproducibility run", + "reason": "deployment claim depends on clinic-like runtime evidence", + "priority": "high", + "suggestedNextStep": "Use CPU-only low-resource clinic reproducibility run as a targeted validation or grant-planning workstream." + }, + { + "id": "gap-ancestry-balanced-evaluation", + "topic": "ancestry-balanced external validation", + "reason": "underrepresented ancestry is asserted but not evidenced", + "priority": "high", + "suggestedNextStep": "Use ancestry-balanced external validation as a targeted validation or grant-planning workstream." + } + ], + "safetyNotes": [ + "Synthetic data only.", + "No external APIs, credentials, private manuscripts, or live clinical data are used.", + "The assistant produces deterministic review packets suitable for pre-submission review." + ] +} diff --git a/external-validity-transfer-assistant/reports/summary.svg b/external-validity-transfer-assistant/reports/summary.svg new file mode 100644 index 00000000..f07fd95f --- /dev/null +++ b/external-validity-transfer-assistant/reports/summary.svg @@ -0,0 +1,21 @@ + + +External Validity Transfer Assistant +Average transfer score + + +44 - hold-for-transfer-evidence +claim-generalizable-oncology + + +0 - quarantine-from-review-packet +claim-reproducible-pipeline + + +100 - review-ready +claim-deployment-ready + + +32 - quarantine-from-review-packet +Synthetic deterministic demo. No credentials, private manuscripts, or external APIs. + \ No newline at end of file diff --git a/external-validity-transfer-assistant/requirements-map.md b/external-validity-transfer-assistant/requirements-map.md new file mode 100644 index 00000000..8650ed6f --- /dev/null +++ b/external-validity-transfer-assistant/requirements-map.md @@ -0,0 +1,29 @@ +# Requirements Map + +## SCIBASE #16 Capability Mapping + +### Auto Peer Review Reports + +- Produces structured claim-level peer-review findings. +- Flags clarity and scope mismatch where claims overstate population, setting, assay, or runtime support. +- Emits reviewer-ready actions for claim wording, validation evidence, and transfer-risk holds. + +### Reproducibility Checker + +- Checks whether linked evidence includes reproducible runtime artifacts. +- Flags claims that depend on deployment environments without matching rerun evidence. +- Produces blocking or recommended reproducibility actions for reviewer packets. + +### Research Gap Finder + +- Converts missing transfer contexts into research-gap prompts. +- Ranks gaps that fit the lab's declared capabilities. +- Highlights pediatric validation, ancestry-balanced evaluation, and CPU-only clinic rerun gaps from synthetic corpus signals. + +## Acceptance Notes + +- Dependency-free CommonJS module. +- Deterministic synthetic sample data. +- Local tests for broad-claim holds, missing evidence quarantine, external validation handling, report rendering, and research-gap output. +- Generated JSON, Markdown, and SVG artifacts via `npm run demo`. +- No external API calls, credentials, private research data, or live clinical records. diff --git a/external-validity-transfer-assistant/sample-data.js b/external-validity-transfer-assistant/sample-data.js new file mode 100644 index 00000000..23f571e2 --- /dev/null +++ b/external-validity-transfer-assistant/sample-data.js @@ -0,0 +1,149 @@ +const project = { + id: "project-transfer-001", + title: "Portable biomarker triage assistant for multi-site oncology cohorts", + domain: "clinical-ai", + labCapabilities: ["rna-seq", "clinical-metadata", "containerized-python", "external-cohort-review"], + manuscript: { + title: "A generalizable biomarker triage model for oncology screening", + targetUse: "pre-submission peer review", + claims: [ + { + id: "claim-generalizable-oncology", + text: "The model generalizes across oncology patient populations and hospital settings.", + assertedScope: { + populations: ["adult", "pediatric", "underrepresented ancestry"], + settings: ["academic hospital", "community hospital", "international site"], + instruments: ["bulk-rna-seq", "single-cell-rna-seq"], + environments: ["python-3.11", "cuda", "cpu-only"] + }, + evidenceIds: ["internal-adult-cohort", "academic-hospital-runbook"], + confidence: "strong" + }, + { + id: "claim-reproducible-pipeline", + text: "The manuscript includes enough artifacts for an independent lab to rerun the triage pipeline.", + assertedScope: { + populations: ["adult"], + settings: ["academic hospital"], + instruments: ["bulk-rna-seq"], + environments: ["python-3.11"] + }, + evidenceIds: ["container-runbook", "clean-data-manifest", "external-adult-validation"], + confidence: "moderate" + }, + { + id: "claim-deployment-ready", + text: "The assistant is ready for deployment guidance in low-resource clinics.", + assertedScope: { + populations: ["adult"], + settings: ["low-resource clinic", "community hospital"], + instruments: ["bulk-rna-seq"], + environments: ["cpu-only"] + }, + evidenceIds: ["community-hospital-protocol"], + confidence: "strong" + } + ], + requiredSubgroups: ["adult", "pediatric", "underrepresented ancestry"], + declaredLimitations: [ + "The pilot cohort is single-country.", + "No pediatric external cohort is currently available." + ] + }, + evidence: [ + { + id: "internal-adult-cohort", + type: "internal-cohort", + population: "adult", + setting: "academic hospital", + instrument: "bulk-rna-seq", + environment: "python-3.11", + sampleSize: 820, + reproducible: true, + externalValidation: false, + limitations: ["single-country", "academic-site-only"] + }, + { + id: "academic-hospital-runbook", + type: "runbook", + population: "adult", + setting: "academic hospital", + instrument: "bulk-rna-seq", + environment: "python-3.11", + sampleSize: 0, + reproducible: true, + externalValidation: false, + limitations: ["no-community-site-smoke-test"] + }, + { + id: "container-runbook", + type: "runtime-evidence", + population: "adult", + setting: "academic hospital", + instrument: "bulk-rna-seq", + environment: "python-3.11", + sampleSize: 0, + reproducible: true, + externalValidation: false, + limitations: [] + }, + { + id: "clean-data-manifest", + type: "artifact-manifest", + population: "adult", + setting: "academic hospital", + instrument: "bulk-rna-seq", + environment: "python-3.11", + sampleSize: 820, + reproducible: true, + externalValidation: false, + limitations: [] + }, + { + id: "external-adult-validation", + type: "external-validation", + population: "adult", + setting: "community hospital", + instrument: "bulk-rna-seq", + environment: "python-3.11", + sampleSize: 290, + reproducible: true, + externalValidation: true, + limitations: ["adult-only"] + }, + { + id: "community-hospital-protocol", + type: "protocol", + population: "adult", + setting: "community hospital", + instrument: "bulk-rna-seq", + environment: "cpu-only", + sampleSize: 0, + reproducible: false, + externalValidation: false, + limitations: ["protocol-only", "no-completed-run"] + } + ], + corpusSignals: [ + { + id: "gap-pediatric-oncology-rnaseq", + topic: "pediatric oncology RNA-seq validation", + reason: "frequently cited limitation with low replication coverage", + labFit: ["rna-seq", "external-cohort-review"] + }, + { + id: "gap-cpu-only-clinic-run", + topic: "CPU-only low-resource clinic reproducibility run", + reason: "deployment claim depends on clinic-like runtime evidence", + labFit: ["containerized-python"] + }, + { + id: "gap-ancestry-balanced-evaluation", + topic: "ancestry-balanced external validation", + reason: "underrepresented ancestry is asserted but not evidenced", + labFit: ["clinical-metadata", "external-cohort-review"] + } + ] +}; + +module.exports = { project }; diff --git a/external-validity-transfer-assistant/test.js b/external-validity-transfer-assistant/test.js new file mode 100644 index 00000000..265d7471 --- /dev/null +++ b/external-validity-transfer-assistant/test.js @@ -0,0 +1,57 @@ +const assert = require("assert"); +const { project } = require("./sample-data"); +const { buildReviewPacket, evaluateClaim, renderMarkdownReport, renderSvgSummary } = require("./index"); + +const packet = buildReviewPacket(project); + +assert.strictEqual(packet.assistant, "external-validity-transfer-assistant"); +assert.strictEqual(packet.issue, "SCIBASE-AI/SCIBASE.AI#16"); +assert.ok(packet.peerReviewSuggestions.length >= 6, "expected transfer-risk peer review suggestions"); +assert.ok(packet.reproducibilityActions.length >= 3, "expected reproducibility actions"); +assert.ok(packet.researchGaps.length >= 3, "expected research-gap prompts"); + +const broadClaim = packet.claimReviews.find((review) => review.id === "claim-generalizable-oncology"); +assert.ok(broadClaim, "expected broad generalizability claim review"); +assert.ok( + ["hold-for-transfer-evidence", "quarantine-from-review-packet"].includes(broadClaim.decision), + "broad claim should be held or quarantined until transfer evidence exists" +); +assert.ok( + broadClaim.findings.some((finding) => finding.rule === "population-transfer-gap"), + "broad claim should flag missing population transfer evidence" +); +assert.ok( + broadClaim.findings.some((finding) => finding.rule === "no-external-validation"), + "broad claim should require external validation" +); + +const reproducibleClaim = packet.claimReviews.find((review) => review.id === "claim-reproducible-pipeline"); +assert.ok(reproducibleClaim.score > broadClaim.score, "reproducible claim should score better than broad claim"); +assert.strictEqual(reproducibleClaim.observedScope.externalValidation, true); +assert.strictEqual(reproducibleClaim.observedScope.runnableEvidence, true); + +const emptyClaim = { + id: "claim-empty", + text: "The model works for every lab.", + assertedScope: { + populations: ["adult"], + settings: ["international site"], + instruments: ["bulk-rna-seq"], + environments: ["python-3.11"] + }, + evidenceIds: [], + confidence: "strong" +}; +const emptyReview = evaluateClaim(emptyClaim, project.evidence, project.manuscript); +assert.strictEqual(emptyReview.decision, "quarantine-from-review-packet"); +assert.ok(emptyReview.findings.some((finding) => finding.rule === "missing-evidence")); + +const markdown = renderMarkdownReport(packet); +assert.ok(markdown.includes("## Claim Reviews")); +assert.ok(markdown.includes("## Research Gap Prompts")); + +const svg = renderSvgSummary(packet); +assert.ok(svg.includes("