diff --git a/manuscript-unit-consistency-assistant/README.md b/manuscript-unit-consistency-assistant/README.md new file mode 100644 index 00000000..fac050a5 --- /dev/null +++ b/manuscript-unit-consistency-assistant/README.md @@ -0,0 +1,52 @@ +# Manuscript Unit Consistency Assistant + +Dependency-free AI peer-review aid slice for SCIBASE issue #13. + +This module audits draft manuscript packets for unit hygiene risks that slow down scientific peer review: + +- numeric values that appear without adjacent units +- the same reagent or outcome reported across incompatible unit families +- table header units that drift from cell-level units +- implausible wet-lab temperature or percentage values +- reviewer-ready actions and deterministic audit digests + +The implementation uses synthetic sample data only. It is designed as a small, auditable MVP component that can sit behind an AI-assisted pre-submission review workflow. + +## Run + +```bash +npm --prefix manuscript-unit-consistency-assistant run check +npm --prefix manuscript-unit-consistency-assistant test +npm --prefix manuscript-unit-consistency-assistant run demo + +node manuscript-unit-consistency-assistant/test.js +node manuscript-unit-consistency-assistant/demo.js +node --check manuscript-unit-consistency-assistant/index.js +node --check manuscript-unit-consistency-assistant/sample-data.js +node --check manuscript-unit-consistency-assistant/test.js +node --check manuscript-unit-consistency-assistant/demo.js +``` + +The demo writes: + +- `reports/unit-consistency-report.json` +- `reports/unit-consistency-report.md` +- `reports/unit-consistency-summary.svg` +- `reports/demo-script.txt` +- `reports/demo.mp4` + +## API + +```js +const { analyzeManuscript } = require("./index"); + +const result = analyzeManuscript({ + manuscriptId: "draft-001", + title: "Draft title", + trackedTerms: [{ name: "IL-6", aliases: ["IL-6", "IL6"] }], + sections: [{ id: "methods", title: "Methods", text: "IL-6 was measured at 180 pg/mL." }], + tables: [], +}); +``` + +`result` contains a readiness summary, findings, reviewer actions, measurement inventory, and a stable audit digest. diff --git a/manuscript-unit-consistency-assistant/acceptance-notes.md b/manuscript-unit-consistency-assistant/acceptance-notes.md new file mode 100644 index 00000000..112c52da --- /dev/null +++ b/manuscript-unit-consistency-assistant/acceptance-notes.md @@ -0,0 +1,31 @@ +# Acceptance Notes + +## What Changed + +- Added a self-contained manuscript unit consistency assistant. +- Added synthetic sample data that demonstrates missing units, molar-vs-mass concentration conflicts, table unit drift, and implausible temperature detection. +- Added deterministic tests and demo artifacts for local review. + +## Verification Targets + +- The module runs with Node.js and no third-party dependencies. +- Findings include stable IDs, evidence snippets, recommendations, and severity. +- Reports are deterministic across repeated runs. +- Clean sample manuscripts return a 100 readiness score with no findings. +- The included demo video is H.264, 1280x720, and 4 seconds long. + +## Local Validation + +```bash +npm --prefix manuscript-unit-consistency-assistant run check +npm --prefix manuscript-unit-consistency-assistant test +npm --prefix manuscript-unit-consistency-assistant run demo +node manuscript-unit-consistency-assistant/test.js +node manuscript-unit-consistency-assistant/demo.js +node --check manuscript-unit-consistency-assistant/index.js +node --check manuscript-unit-consistency-assistant/sample-data.js +node --check manuscript-unit-consistency-assistant/test.js +node --check manuscript-unit-consistency-assistant/demo.js +git diff --check +mdls -name kMDItemDurationSeconds -name kMDItemCodecs -name kMDItemPixelHeight -name kMDItemPixelWidth manuscript-unit-consistency-assistant/reports/demo.mp4 +``` diff --git a/manuscript-unit-consistency-assistant/demo.js b/manuscript-unit-consistency-assistant/demo.js new file mode 100644 index 00000000..2b3f5187 --- /dev/null +++ b/manuscript-unit-consistency-assistant/demo.js @@ -0,0 +1,123 @@ +"use strict"; + +const fs = require("node:fs"); +const path = require("node:path"); +const { analyzeManuscript } = require("./index"); +const { sampleManuscript } = require("./sample-data"); + +const outputDir = path.join(__dirname, "reports"); +const report = analyzeManuscript(sampleManuscript); + +fs.mkdirSync(outputDir, { recursive: true }); +fs.writeFileSync( + path.join(outputDir, "unit-consistency-report.json"), + `${JSON.stringify(report, null, 2)}\n` +); +fs.writeFileSync( + path.join(outputDir, "unit-consistency-report.md"), + renderMarkdown(report) +); +fs.writeFileSync( + path.join(outputDir, "unit-consistency-summary.svg"), + renderSvg(report) +); +fs.writeFileSync( + path.join(outputDir, "demo-script.txt"), + renderDemoScript(report) +); + +console.log(`Wrote reports to ${outputDir}`); + +function renderMarkdown(result) { + const lines = [ + "# Manuscript Unit Consistency Report", + "", + `Manuscript: ${result.summary.title}`, + `Audit digest: ${result.auditDigest}`, + `Readiness: ${result.summary.readiness} (${result.summary.readinessScore}/100)`, + "", + "## Findings", + "", + ]; + + if (result.findings.length === 0) { + lines.push("No unit consistency findings were detected."); + } else { + result.findings.forEach((finding) => { + lines.push(`- ${finding.severity.toUpperCase()} ${finding.type}: ${finding.message}`); + lines.push(` Evidence: ${finding.evidence}`); + lines.push(` Action: ${finding.recommendation}`); + }); + } + + lines.push("", "## Reviewer Actions", ""); + result.reviewerActions.forEach((action) => { + lines.push(`- P${action.priority} ${action.action} (${action.sourceId})`); + }); + + lines.push("", "## Measurement Inventory", ""); + result.measurementInventory.forEach((measurement) => { + lines.push(`- ${measurement.sourceId}: ${measurement.raw} -> ${measurement.unitFamily} (${measurement.subject || "untracked"})`); + }); + + return `${lines.join("\n")}\n`; +} + +function renderDemoScript(result) { + return [ + "Demo storyboard for manuscript-unit-consistency-assistant", + "", + "1. Load a draft manuscript packet with sections, tables, and tracked terms.", + `2. Extract ${result.summary.measurementsReviewed} measurements from methods, results, and table cells.`, + `3. Surface ${result.findings.length} reviewer findings, prioritizing high-severity blockers first.`, + "4. Export JSON, Markdown, and SVG artifacts for an AI peer-review aid workflow.", + "", + `Readiness result: ${result.summary.readiness} (${result.summary.readinessScore}/100).`, + `Audit digest: ${result.auditDigest}.`, + "", + ].join("\n"); +} + +function renderSvg(result) { + const high = result.summary.findingsBySeverity.high || 0; + const medium = result.summary.findingsBySeverity.medium || 0; + const low = result.summary.findingsBySeverity.low || 0; + const score = result.summary.readinessScore; + const bars = [ + { label: "High", value: high, color: "#d92d20", y: 150 }, + { label: "Medium", value: medium, color: "#f79009", y: 205 }, + { label: "Low", value: low, color: "#475467", y: 260 }, + ]; + const max = Math.max(1, high, medium, low); + const barSvg = bars.map((bar) => { + const width = 360 * (bar.value / max); + return [ + `${bar.label}: ${bar.value}`, + ``, + ].join("\n"); + }).join("\n"); + + return [ + '', + '', + '', + 'Manuscript Unit Consistency Assistant', + `${escapeXml(result.summary.title)}`, + `Audit digest ${result.auditDigest}`, + barSvg, + ``, + `${score}`, + 'readiness score', + `${escapeXml(result.summary.readiness)}`, + '', + "", + ].join("\n"); +} + +function escapeXml(value) { + return String(value) + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); +} diff --git a/manuscript-unit-consistency-assistant/index.js b/manuscript-unit-consistency-assistant/index.js new file mode 100644 index 00000000..9e3ead71 --- /dev/null +++ b/manuscript-unit-consistency-assistant/index.js @@ -0,0 +1,507 @@ +"use strict"; + +const crypto = require("node:crypto"); + +const UNIT_DEFINITIONS = [ + { pattern: "cells/well", canonical: "cells/well", family: "count_density", scale: 1 }, + { pattern: "pg/mL", canonical: "pg/mL", family: "mass_concentration", scale: 1e-9 }, + { pattern: "ng/mL", canonical: "ng/mL", family: "mass_concentration", scale: 1e-6 }, + { pattern: "ug/mL", canonical: "ug/mL", family: "mass_concentration", scale: 1e-3 }, + { pattern: "microg/mL", canonical: "ug/mL", family: "mass_concentration", scale: 1e-3 }, + { pattern: "mg/mL", canonical: "mg/mL", family: "mass_concentration", scale: 1 }, + { pattern: "g/L", canonical: "mg/mL", family: "mass_concentration", scale: 1 }, + { pattern: "nM", canonical: "nM", family: "molar_concentration", scale: 1e-9 }, + { pattern: "uM", canonical: "uM", family: "molar_concentration", scale: 1e-6 }, + { pattern: "microM", canonical: "uM", family: "molar_concentration", scale: 1e-6 }, + { pattern: "mM", canonical: "mM", family: "molar_concentration", scale: 1e-3 }, + { pattern: "M", canonical: "M", family: "molar_concentration", scale: 1 }, + { pattern: "kg", canonical: "kg", family: "mass", scale: 1000 }, + { pattern: "mg", canonical: "mg", family: "mass", scale: 1e-3 }, + { pattern: "ug", canonical: "ug", family: "mass", scale: 1e-6 }, + { pattern: "microg", canonical: "ug", family: "mass", scale: 1e-6 }, + { pattern: "g", canonical: "g", family: "mass", scale: 1 }, + { pattern: "uL", canonical: "uL", family: "volume", scale: 1e-6 }, + { pattern: "microL", canonical: "uL", family: "volume", scale: 1e-6 }, + { pattern: "mL", canonical: "mL", family: "volume", scale: 1e-3 }, + { pattern: "L", canonical: "L", family: "volume", scale: 1 }, + { pattern: "hrs", canonical: "h", family: "time", scale: 3600 }, + { pattern: "hr", canonical: "h", family: "time", scale: 3600 }, + { pattern: "h", canonical: "h", family: "time", scale: 3600 }, + { pattern: "min", canonical: "min", family: "time", scale: 60 }, + { pattern: "sec", canonical: "s", family: "time", scale: 1 }, + { pattern: "s", canonical: "s", family: "time", scale: 1 }, + { pattern: "degC", canonical: "C", family: "temperature", scale: 1 }, + { pattern: "C", canonical: "C", family: "temperature", scale: 1 }, + { pattern: "rpm", canonical: "rpm", family: "rotation", scale: 1 }, + { pattern: "rcf", canonical: "rcf", family: "rotation", scale: 1 }, + { pattern: "xg", canonical: "xg", family: "rotation", scale: 1 }, + { pattern: "%", canonical: "%", family: "percent", scale: 1 }, +]; + +const UNIT_BY_PATTERN = new Map( + UNIT_DEFINITIONS.map((definition) => [definition.pattern.toLowerCase(), definition]) +); + +const UNIT_PATTERN = UNIT_DEFINITIONS.map((definition) => escapeRegExp(definition.pattern)) + .sort((a, b) => b.length - a.length) + .join("|") + .replace(/xg/g, "x\\s*g"); + +const MEASUREMENT_RE = new RegExp( + String.raw`(-?\d+(?:,\d{3})*(?:\.\d+)?)\s*(${UNIT_PATTERN})(?![A-Za-z/])`, + "gi" +); + +const MISSING_UNIT_RE = /\b(?:at|to|by|from|around|about|approximately|approx\.?|reported|centrifuged at|heated to|increased by|decreased by)\s+(-?\d+(?:,\d{3})*(?:\.\d+)?)(?![\d,.])(?!\s*(?:%|[A-Za-z]+\/[A-Za-z]+|pg\/mL|ng\/mL|ug\/mL|microg\/mL|mg\/mL|g\/L|nM|uM|microM|mM|M|kg|mg|ug|microg|g|uL|microL|mL|L|hrs?|h|min|sec|s|degC|C|rpm|rcf|x\s*g|cells\/well|fold|times|participants|subjects|samples|replicates|wells|plates|groups|days|weeks|months|years)\b)/gi; + +const STOPWORDS = new Set([ + "a", + "an", + "and", + "as", + "at", + "by", + "for", + "from", + "in", + "into", + "is", + "of", + "on", + "or", + "the", + "then", + "to", + "was", + "were", + "with", +]); + +function analyzeManuscript(packet) { + const safePacket = packet || {}; + const trackedTerms = (safePacket.trackedTerms || []).map((term) => normalizeTrackedTerm(term)); + const sections = (safePacket.sections || []).map((section, sectionIndex) => ({ + id: section.id || `section-${sectionIndex + 1}`, + title: section.title || `Section ${sectionIndex + 1}`, + text: section.text || "", + })); + const tables = safePacket.tables || []; + + const measurements = [ + ...extractSectionMeasurements(sections, trackedTerms), + ...extractTableMeasurements(tables, trackedTerms), + ]; + + const findings = [ + ...detectMissingUnits(sections), + ...detectRangeOutliers(measurements), + ...detectTrackedTermUnitDrift(measurements, trackedTerms), + ...detectTableUnitDrift(tables, trackedTerms), + ]; + + const dedupedFindings = dedupeFindings(findings); + const reviewerActions = buildReviewerActions(dedupedFindings); + const score = calculateReadinessScore(dedupedFindings); + const summary = { + manuscriptId: safePacket.manuscriptId || "unknown-manuscript", + title: safePacket.title || "Untitled manuscript", + sectionsReviewed: sections.length, + tablesReviewed: tables.length, + measurementsReviewed: measurements.length, + findingsBySeverity: countBy(dedupedFindings, "severity"), + readinessScore: score, + readiness: score >= 85 ? "ready-with-minor-edits" : score >= 65 ? "needs-targeted-revision" : "block-before-submission", + }; + + const result = { + tool: "manuscript-unit-consistency-assistant", + version: "1.0.0", + summary, + findings: dedupedFindings, + reviewerActions, + measurementInventory: measurements, + }; + + return { + ...result, + auditDigest: digestFor(result), + }; +} + +function extractSectionMeasurements(sections, trackedTerms) { + return sections.flatMap((section) => { + const text = section.text || ""; + const matches = []; + for (const match of text.matchAll(MEASUREMENT_RE)) { + const unit = normalizeUnit(match[2]); + if (!unit) { + continue; + } + const rawValue = Number.parseFloat(match[1].replace(/,/g, "")); + const windowText = text.slice(Math.max(0, match.index - 90), Math.min(text.length, match.index + 90)); + const sentenceText = compactWhitespace(sentenceAround(text, match.index).text); + matches.push({ + id: stableId("measurement", section.id, match.index, match[0]), + sourceType: "section", + sourceId: section.id, + sourceTitle: section.title, + raw: match[0], + value: rawValue, + unit: unit.canonical, + unitFamily: unit.family, + normalizedValue: normalizeValue(rawValue, unit), + subject: inferSectionSubject(text, match.index, trackedTerms), + context: sentenceText || compactWhitespace(windowText), + }); + } + return matches; + }); +} + +function extractTableMeasurements(tables, trackedTerms) { + const measurements = []; + tables.forEach((table, tableIndex) => { + const tableId = table.id || `table-${tableIndex + 1}`; + const columns = table.columns || []; + const rows = table.rows || []; + rows.forEach((row, rowIndex) => { + columns.forEach((column, columnIndex) => { + const cellValue = String(row[columnIndex] ?? ""); + for (const match of cellValue.matchAll(MEASUREMENT_RE)) { + const unit = normalizeUnit(match[2]); + if (!unit) { + continue; + } + const rawValue = Number.parseFloat(match[1].replace(/,/g, "")); + const context = `${table.title || tableId}: ${column.name || column} = ${cellValue}`; + measurements.push({ + id: stableId("table-measurement", tableId, rowIndex, columnIndex, match[0]), + sourceType: "table", + sourceId: tableId, + sourceTitle: table.title || tableId, + row: rowIndex + 1, + column: column.name || String(column), + raw: match[0], + value: rawValue, + unit: unit.canonical, + unitFamily: unit.family, + normalizedValue: normalizeValue(rawValue, unit), + subject: inferSubject(`${column.name || column} ${cellValue}`, trackedTerms), + context, + }); + } + }); + }); + }); + return measurements; +} + +function detectMissingUnits(sections) { + const findings = []; + sections.forEach((section) => { + const text = section.text || ""; + for (const match of text.matchAll(MISSING_UNIT_RE)) { + const value = Number.parseFloat(match[1].replace(/,/g, "")); + if (Number.isNaN(value)) { + continue; + } + const context = compactWhitespace(sentenceAround(text, match.index).text); + if (looksLikeCitationOrStatistic(context)) { + continue; + } + findings.push({ + id: stableId("missing-unit", section.id, match.index, match[0]), + type: "missing-unit", + severity: "medium", + sourceId: section.id, + message: `Numeric value "${match[1]}" appears without an adjacent unit in ${section.title}.`, + evidence: context, + recommendation: "Add the intended unit or rewrite the sentence so the quantity is unambiguous.", + }); + } + }); + return findings; +} + +function detectRangeOutliers(measurements) { + return measurements.flatMap((measurement) => { + if (measurement.unitFamily === "temperature" && (measurement.value < -90 || measurement.value > 120)) { + return [{ + id: stableId("temperature-outlier", measurement.id), + type: "range-outlier", + severity: "high", + sourceId: measurement.sourceId, + message: `${measurement.raw} is outside a plausible wet-lab manuscript temperature range.`, + evidence: measurement.context, + recommendation: "Confirm whether this is a typo, a specialized protocol condition, or a missing decimal point.", + }]; + } + if (measurement.unitFamily === "percent" && (measurement.value < 0 || measurement.value > 100)) { + return [{ + id: stableId("percent-outlier", measurement.id), + type: "range-outlier", + severity: "medium", + sourceId: measurement.sourceId, + message: `${measurement.raw} is outside a standard 0-100 percent range.`, + evidence: measurement.context, + recommendation: "Clarify whether the value is a percentage, fold change, or relative difference.", + }]; + } + return []; + }); +} + +function detectTrackedTermUnitDrift(measurements, trackedTerms) { + const findings = []; + trackedTerms.forEach((term) => { + const related = measurements.filter((measurement) => { + return measurement.subject === term.name && + (measurement.unitFamily === "molar_concentration" || measurement.unitFamily === "mass_concentration"); + }); + if (related.length < 2) { + return; + } + const families = new Set(related.map((measurement) => measurement.unitFamily)); + const concentrationFamilies = new Set( + related + .map((measurement) => measurement.unitFamily) + .filter((family) => family === "molar_concentration" || family === "mass_concentration") + ); + if (concentrationFamilies.size > 1) { + findings.push({ + id: stableId("unit-family-conflict", term.name, related.map((item) => item.raw).join("|")), + type: "unit-family-conflict", + severity: "high", + sourceId: related.map((item) => item.sourceId).join(","), + message: `${term.name} uses both molar and mass concentration units.`, + evidence: related.map((item) => `${item.sourceId}: ${item.raw}`).join("; "), + recommendation: "Add a molecular-weight conversion note or standardize the manuscript to one concentration basis.", + }); + return; + } + if (families.size === 1) { + const units = new Set(related.map((measurement) => measurement.unit)); + if (units.size > 1 && term.requireSingleUnit !== false) { + findings.push({ + id: stableId("unit-scale-drift", term.name, [...units].join("|")), + type: "unit-scale-drift", + severity: "low", + sourceId: related.map((item) => item.sourceId).join(","), + message: `${term.name} is reported in multiple unit scales: ${[...units].join(", ")}.`, + evidence: related.map((item) => `${item.sourceId}: ${item.raw}`).join("; "), + recommendation: "Keep the source units if scientifically useful, but include a normalized value for reviewer comparison.", + }); + } + } + }); + return findings; +} + +function detectTableUnitDrift(tables, trackedTerms) { + const findings = []; + tables.forEach((table, tableIndex) => { + const tableId = table.id || `table-${tableIndex + 1}`; + const columns = table.columns || []; + const rows = table.rows || []; + columns.forEach((column, columnIndex) => { + const headerText = column.name || String(column); + const headerUnit = extractUnitFromHeader(headerText); + if (!headerUnit) { + return; + } + rows.forEach((row, rowIndex) => { + const cellValue = String(row[columnIndex] ?? ""); + for (const match of cellValue.matchAll(MEASUREMENT_RE)) { + const cellUnit = normalizeUnit(match[2]); + if (!cellUnit) { + continue; + } + if (cellUnit.family !== headerUnit.family || cellUnit.canonical !== headerUnit.canonical) { + const subject = inferSubject(`${headerText} ${cellValue}`, trackedTerms) || headerText; + findings.push({ + id: stableId("table-unit-drift", tableId, rowIndex, columnIndex, cellUnit.canonical, headerUnit.canonical), + type: "table-unit-drift", + severity: cellUnit.family === headerUnit.family ? "medium" : "high", + sourceId: tableId, + message: `${subject} cell unit ${cellUnit.canonical} does not match header unit ${headerUnit.canonical}.`, + evidence: `${table.title || tableId}, row ${rowIndex + 1}, column "${headerText}": ${cellValue}`, + recommendation: "Convert the cell value to the header unit or split the column by unit.", + }); + } + } + }); + }); + }); + return findings; +} + +function buildReviewerActions(findings) { + const actions = findings.map((finding, index) => ({ + id: stableId("review-action", finding.id), + priority: finding.severity === "high" ? 1 : finding.severity === "medium" ? 2 : 3, + label: `Action ${index + 1}`, + action: finding.recommendation, + sourceId: finding.sourceId, + blocksSubmission: finding.severity === "high", + })); + return actions.sort((a, b) => a.priority - b.priority || a.label.localeCompare(b.label)); +} + +function calculateReadinessScore(findings) { + const penalty = findings.reduce((total, finding) => { + if (finding.severity === "high") { + return total + 22; + } + if (finding.severity === "medium") { + return total + 11; + } + return total + 5; + }, 0); + return Math.max(0, 100 - penalty); +} + +function normalizeTrackedTerm(term) { + if (typeof term === "string") { + return { name: term, aliases: [term], requireSingleUnit: true }; + } + return { + name: term.name, + aliases: term.aliases && term.aliases.length ? term.aliases : [term.name], + requireSingleUnit: term.requireSingleUnit !== false, + }; +} + +function inferSubject(text, trackedTerms) { + const lower = text.toLowerCase(); + for (const term of trackedTerms) { + if (term.aliases.some((alias) => lower.includes(alias.toLowerCase()))) { + return term.name; + } + } + const words = compactWhitespace(text) + .split(/\s+/) + .map((word) => word.replace(/^[^A-Za-z0-9]+|[^A-Za-z0-9-]+$/g, "")) + .filter(Boolean); + const candidates = words.filter((word) => !STOPWORDS.has(word.toLowerCase()) && /[A-Za-z]/.test(word)); + return candidates.slice(-3, -1).join(" ") || null; +} + +function inferSectionSubject(text, measurementIndex, trackedTerms) { + const sentence = sentenceAround(text, measurementIndex); + const lowerSentence = sentence.text.toLowerCase(); + let best = null; + trackedTerms.forEach((term) => { + term.aliases.forEach((alias) => { + const aliasIndex = lowerSentence.indexOf(alias.toLowerCase()); + if (aliasIndex === -1) { + return; + } + const distance = Math.abs(aliasIndex - (measurementIndex - sentence.start)); + if (!best || distance < best.distance) { + best = { name: term.name, distance }; + } + }); + }); + return best ? best.name : inferSubject(sentence.text, trackedTerms); +} + +function sentenceAround(text, index) { + const before = text.slice(0, index); + const after = text.slice(index); + const previousBoundary = Math.max(before.lastIndexOf("."), before.lastIndexOf(";"), before.lastIndexOf("\n")); + const nextBoundaryCandidates = [after.indexOf("."), after.indexOf(";"), after.indexOf("\n")] + .filter((candidate) => candidate !== -1); + const start = previousBoundary === -1 ? 0 : previousBoundary + 1; + const end = nextBoundaryCandidates.length === 0 + ? text.length + : index + Math.min(...nextBoundaryCandidates) + 1; + return { + start, + text: text.slice(start, end), + }; +} + +function normalizeUnit(rawUnit) { + if (!rawUnit) { + return null; + } + const normalized = rawUnit + .replace(/\s+/g, "") + .replace(/^micro/i, "micro") + .replace(/^µ/i, "u") + .replace(/^°C$/i, "degC") + .replace(/^xg$/i, "xg"); + const definition = UNIT_BY_PATTERN.get(normalized.toLowerCase()); + if (definition) { + return definition; + } + return UNIT_BY_PATTERN.get(rawUnit.toLowerCase()) || null; +} + +function normalizeValue(value, unit) { + if (unit.family === "temperature") { + return value; + } + return Number((value * unit.scale).toPrecision(8)); +} + +function extractUnitFromHeader(headerText) { + const match = String(headerText).match(/\(([^)]+)\)/); + if (!match) { + return null; + } + return normalizeUnit(match[1].trim()); +} + +function dedupeFindings(findings) { + const seen = new Set(); + const deduped = []; + for (const finding of findings) { + const key = `${finding.type}|${finding.sourceId}|${finding.message}`; + if (seen.has(key)) { + continue; + } + seen.add(key); + deduped.push(finding); + } + return deduped.sort((a, b) => severityRank(a.severity) - severityRank(b.severity) || a.id.localeCompare(b.id)); +} + +function severityRank(severity) { + return { high: 1, medium: 2, low: 3 }[severity] || 4; +} + +function countBy(items, field) { + return items.reduce((counts, item) => { + const key = item[field] || "unknown"; + counts[key] = (counts[key] || 0) + 1; + return counts; + }, {}); +} + +function looksLikeCitationOrStatistic(context) { + return /\b(?:p\s*[<=>]|n\s*=|fig(?:ure)?\.?\s*\d|table\s*\d|ref(?:erence)?\s*\d)\b/i.test(context); +} + +function digestFor(value) { + return crypto.createHash("sha256").update(JSON.stringify(value)).digest("hex").slice(0, 16); +} + +function stableId(...parts) { + return `muca_${digestFor(parts)}`; +} + +function compactWhitespace(value) { + return String(value).replace(/\s+/g, " ").trim(); +} + +function escapeRegExp(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +module.exports = { + analyzeManuscript, + extractSectionMeasurements, + extractTableMeasurements, + normalizeUnit, +}; diff --git a/manuscript-unit-consistency-assistant/package.json b/manuscript-unit-consistency-assistant/package.json new file mode 100644 index 00000000..288b8509 --- /dev/null +++ b/manuscript-unit-consistency-assistant/package.json @@ -0,0 +1,11 @@ +{ + "name": "manuscript-unit-consistency-assistant", + "version": "1.0.0", + "private": true, + "type": "commonjs", + "scripts": { + "check": "node --check index.js && node --check sample-data.js && node --check test.js && node --check demo.js", + "test": "node test.js", + "demo": "node demo.js" + } +} diff --git a/manuscript-unit-consistency-assistant/reports/demo-script.txt b/manuscript-unit-consistency-assistant/reports/demo-script.txt new file mode 100644 index 00000000..0eca1709 --- /dev/null +++ b/manuscript-unit-consistency-assistant/reports/demo-script.txt @@ -0,0 +1,9 @@ +Demo storyboard for manuscript-unit-consistency-assistant + +1. Load a draft manuscript packet with sections, tables, and tracked terms. +2. Extract 15 measurements from methods, results, and table cells. +3. Surface 8 reviewer findings, prioritizing high-severity blockers first. +4. Export JSON, Markdown, and SVG artifacts for an AI peer-review aid workflow. + +Readiness result: block-before-submission (2/100). +Audit digest: 66b689128bf18282. diff --git a/manuscript-unit-consistency-assistant/reports/demo.mp4 b/manuscript-unit-consistency-assistant/reports/demo.mp4 new file mode 100644 index 00000000..1f9b576c Binary files /dev/null and b/manuscript-unit-consistency-assistant/reports/demo.mp4 differ diff --git a/manuscript-unit-consistency-assistant/reports/unit-consistency-report.json b/manuscript-unit-consistency-assistant/reports/unit-consistency-report.json new file mode 100644 index 00000000..60995dc6 --- /dev/null +++ b/manuscript-unit-consistency-assistant/reports/unit-consistency-report.json @@ -0,0 +1,364 @@ +{ + "tool": "manuscript-unit-consistency-assistant", + "version": "1.0.0", + "summary": { + "manuscriptId": "SCIBASE-MUCA-001", + "title": "Metabolic rescue assay draft with unit hygiene risks", + "sectionsReviewed": 2, + "tablesReviewed": 1, + "measurementsReviewed": 15, + "findingsBySeverity": { + "high": 2, + "medium": 4, + "low": 2 + }, + "readinessScore": 2, + "readiness": "block-before-submission" + }, + "findings": [ + { + "id": "muca_36cf703a9346e39d", + "type": "range-outlier", + "severity": "high", + "sourceId": "results-cytokines", + "message": "370 C is outside a plausible wet-lab manuscript temperature range.", + "evidence": "A draft assay note says the plate was heated to 370 C for 15 min.", + "recommendation": "Confirm whether this is a typo, a specialized protocol condition, or a missing decimal point." + }, + { + "id": "muca_95dbbfcb578262ba", + "type": "unit-family-conflict", + "severity": "high", + "sourceId": "methods-cell-culture,methods-cell-culture", + "message": "metformin uses both molar and mass concentration units.", + "evidence": "methods-cell-culture: 5 mM; methods-cell-culture: 2 mg/mL", + "recommendation": "Add a molecular-weight conversion note or standardize the manuscript to one concentration basis." + }, + { + "id": "muca_3de684898a98c8d2", + "type": "missing-unit", + "severity": "medium", + "sourceId": "methods-cell-culture", + "message": "Numeric value \"12,000\" appears without an adjacent unit in Methods: cell culture and dosing.", + "evidence": "Pellets were centrifuged at 12,000 for 10 min before lysis.", + "recommendation": "Add the intended unit or rewrite the sentence so the quantity is unambiguous." + }, + { + "id": "muca_590da22e9ab0ee41", + "type": "table-unit-drift", + "severity": "medium", + "sourceId": "table-cytokines", + "message": "TNF-alpha cell unit ng/mL does not match header unit pg/mL.", + "evidence": "Table 1. Inflammation marker summary, row 1, column \"TNF-alpha (pg/mL)\": 0.09 ng/mL", + "recommendation": "Convert the cell value to the header unit or split the column by unit." + }, + { + "id": "muca_95dc9c8a07b00576", + "type": "table-unit-drift", + "severity": "medium", + "sourceId": "table-cytokines", + "message": "IL-6 cell unit ng/mL does not match header unit pg/mL.", + "evidence": "Table 1. Inflammation marker summary, row 2, column \"IL-6 (pg/mL)\": 0.21 ng/mL", + "recommendation": "Convert the cell value to the header unit or split the column by unit." + }, + { + "id": "muca_de3ee2dfeed34533", + "type": "missing-unit", + "severity": "medium", + "sourceId": "results-cytokines", + "message": "Numeric value \"35\" appears without an adjacent unit in Results: cytokine response.", + "evidence": "ATP increased by 35 compared with vehicle control.", + "recommendation": "Add the intended unit or rewrite the sentence so the quantity is unambiguous." + }, + { + "id": "muca_9c165b65e67621cb", + "type": "unit-scale-drift", + "severity": "low", + "sourceId": "results-cytokines,results-cytokines,table-cytokines,table-cytokines", + "message": "IL-6 is reported in multiple unit scales: pg/mL, ng/mL.", + "evidence": "results-cytokines: 180 pg/mL; results-cytokines: 0.22 ng/mL; table-cytokines: 180 pg/mL; table-cytokines: 0.21 ng/mL", + "recommendation": "Keep the source units if scientifically useful, but include a normalized value for reviewer comparison." + }, + { + "id": "muca_fcdbfc1cf13921d3", + "type": "unit-scale-drift", + "severity": "low", + "sourceId": "table-cytokines,table-cytokines", + "message": "TNF-alpha is reported in multiple unit scales: ng/mL, pg/mL.", + "evidence": "table-cytokines: 0.09 ng/mL; table-cytokines: 70 pg/mL", + "recommendation": "Keep the source units if scientifically useful, but include a normalized value for reviewer comparison." + } + ], + "reviewerActions": [ + { + "id": "muca_67af617354794f83", + "priority": 1, + "label": "Action 1", + "action": "Confirm whether this is a typo, a specialized protocol condition, or a missing decimal point.", + "sourceId": "results-cytokines", + "blocksSubmission": true + }, + { + "id": "muca_529ad118a3551d22", + "priority": 1, + "label": "Action 2", + "action": "Add a molecular-weight conversion note or standardize the manuscript to one concentration basis.", + "sourceId": "methods-cell-culture,methods-cell-culture", + "blocksSubmission": true + }, + { + "id": "muca_e8ecfa112c107bd2", + "priority": 2, + "label": "Action 3", + "action": "Add the intended unit or rewrite the sentence so the quantity is unambiguous.", + "sourceId": "methods-cell-culture", + "blocksSubmission": false + }, + { + "id": "muca_a9d04f4c8bb5dfff", + "priority": 2, + "label": "Action 4", + "action": "Convert the cell value to the header unit or split the column by unit.", + "sourceId": "table-cytokines", + "blocksSubmission": false + }, + { + "id": "muca_02a1bc61ed8e1db8", + "priority": 2, + "label": "Action 5", + "action": "Convert the cell value to the header unit or split the column by unit.", + "sourceId": "table-cytokines", + "blocksSubmission": false + }, + { + "id": "muca_649892c2608f0908", + "priority": 2, + "label": "Action 6", + "action": "Add the intended unit or rewrite the sentence so the quantity is unambiguous.", + "sourceId": "results-cytokines", + "blocksSubmission": false + }, + { + "id": "muca_1754df244fe5958e", + "priority": 3, + "label": "Action 7", + "action": "Keep the source units if scientifically useful, but include a normalized value for reviewer comparison.", + "sourceId": "results-cytokines,results-cytokines,table-cytokines,table-cytokines", + "blocksSubmission": false + }, + { + "id": "muca_19a1cbf17ba36129", + "priority": 3, + "label": "Action 8", + "action": "Keep the source units if scientifically useful, but include a normalized value for reviewer comparison.", + "sourceId": "table-cytokines,table-cytokines", + "blocksSubmission": false + } + ], + "measurementInventory": [ + { + "id": "muca_f1ceb42ef6a4fdcd", + "sourceType": "section", + "sourceId": "methods-cell-culture", + "sourceTitle": "Methods: cell culture and dosing", + "raw": "20,000 cells/well", + "value": 20000, + "unit": "cells/well", + "unitFamily": "count_density", + "normalizedValue": 20000, + "subject": "cells/well 96-well", + "context": "Cells were seeded at 20,000 cells/well in 96-well plates." + }, + { + "id": "muca_83deff953b23b592", + "sourceType": "section", + "sourceId": "methods-cell-culture", + "sourceTitle": "Methods: cell culture and dosing", + "raw": "5 mM", + "value": 5, + "unit": "mM", + "unitFamily": "molar_concentration", + "normalizedValue": 0.005, + "subject": "metformin", + "context": "Metformin was added at 5 mM for 24 h, while a later paragraph lists metformin at 2 mg/mL without a conversion note." + }, + { + "id": "muca_65f1949766dc7ed5", + "sourceType": "section", + "sourceId": "methods-cell-culture", + "sourceTitle": "Methods: cell culture and dosing", + "raw": "24 h", + "value": 24, + "unit": "h", + "unitFamily": "time", + "normalizedValue": 86400, + "subject": "metformin", + "context": "Metformin was added at 5 mM for 24 h, while a later paragraph lists metformin at 2 mg/mL without a conversion note." + }, + { + "id": "muca_5cd9802ef78a680d", + "sourceType": "section", + "sourceId": "methods-cell-culture", + "sourceTitle": "Methods: cell culture and dosing", + "raw": "2 mg/mL", + "value": 2, + "unit": "mg/mL", + "unitFamily": "mass_concentration", + "normalizedValue": 2, + "subject": "metformin", + "context": "Metformin was added at 5 mM for 24 h, while a later paragraph lists metformin at 2 mg/mL without a conversion note." + }, + { + "id": "muca_be3f9e06763987aa", + "sourceType": "section", + "sourceId": "methods-cell-culture", + "sourceTitle": "Methods: cell culture and dosing", + "raw": "5 mg/mL", + "value": 5, + "unit": "mg/mL", + "unitFamily": "mass_concentration", + "normalizedValue": 5, + "subject": "glucose", + "context": "The glucose rescue arm used 5 mg/mL glucose for 6 h." + }, + { + "id": "muca_6beb8d48a5431704", + "sourceType": "section", + "sourceId": "methods-cell-culture", + "sourceTitle": "Methods: cell culture and dosing", + "raw": "6 h", + "value": 6, + "unit": "h", + "unitFamily": "time", + "normalizedValue": 21600, + "subject": "glucose", + "context": "The glucose rescue arm used 5 mg/mL glucose for 6 h." + }, + { + "id": "muca_a10aee8c7490683f", + "sourceType": "section", + "sourceId": "methods-cell-culture", + "sourceTitle": "Methods: cell culture and dosing", + "raw": "10 min", + "value": 10, + "unit": "min", + "unitFamily": "time", + "normalizedValue": 600, + "subject": "min before", + "context": "Pellets were centrifuged at 12,000 for 10 min before lysis." + }, + { + "id": "muca_efb584083c3872b7", + "sourceType": "section", + "sourceId": "results-cytokines", + "sourceTitle": "Results: cytokine response", + "raw": "180 pg/mL", + "value": 180, + "unit": "pg/mL", + "unitFamily": "mass_concentration", + "normalizedValue": 1.8e-7, + "subject": "IL-6", + "context": "IL-6 fell from 180 pg/mL to 0." + }, + { + "id": "muca_1eb0057f96d8c3bb", + "sourceType": "section", + "sourceId": "results-cytokines", + "sourceTitle": "Results: cytokine response", + "raw": "0.22 ng/mL", + "value": 0.22, + "unit": "ng/mL", + "unitFamily": "mass_concentration", + "normalizedValue": 2.2e-7, + "subject": "IL-6", + "context": "IL-6 fell from 180 pg/mL to 0." + }, + { + "id": "muca_641e34b0de754de6", + "sourceType": "section", + "sourceId": "results-cytokines", + "sourceTitle": "Results: cytokine response", + "raw": "370 C", + "value": 370, + "unit": "C", + "unitFamily": "temperature", + "normalizedValue": 370, + "subject": "heated C", + "context": "A draft assay note says the plate was heated to 370 C for 15 min." + }, + { + "id": "muca_45c1f8badcdb8e47", + "sourceType": "section", + "sourceId": "results-cytokines", + "sourceTitle": "Results: cytokine response", + "raw": "15 min", + "value": 15, + "unit": "min", + "unitFamily": "time", + "normalizedValue": 900, + "subject": "heated C", + "context": "A draft assay note says the plate was heated to 370 C for 15 min." + }, + { + "id": "muca_08c2f7c0f74b65bd", + "sourceType": "table", + "sourceId": "table-cytokines", + "sourceTitle": "Table 1. Inflammation marker summary", + "row": 1, + "column": "IL-6 (pg/mL)", + "raw": "180 pg/mL", + "value": 180, + "unit": "pg/mL", + "unitFamily": "mass_concentration", + "normalizedValue": 1.8e-7, + "subject": "IL-6", + "context": "Table 1. Inflammation marker summary: IL-6 (pg/mL) = 180 pg/mL" + }, + { + "id": "muca_47badfa8f8f83d5d", + "sourceType": "table", + "sourceId": "table-cytokines", + "sourceTitle": "Table 1. Inflammation marker summary", + "row": 1, + "column": "TNF-alpha (pg/mL)", + "raw": "0.09 ng/mL", + "value": 0.09, + "unit": "ng/mL", + "unitFamily": "mass_concentration", + "normalizedValue": 9e-8, + "subject": "TNF-alpha", + "context": "Table 1. Inflammation marker summary: TNF-alpha (pg/mL) = 0.09 ng/mL" + }, + { + "id": "muca_8364cd567101eae3", + "sourceType": "table", + "sourceId": "table-cytokines", + "sourceTitle": "Table 1. Inflammation marker summary", + "row": 2, + "column": "IL-6 (pg/mL)", + "raw": "0.21 ng/mL", + "value": 0.21, + "unit": "ng/mL", + "unitFamily": "mass_concentration", + "normalizedValue": 2.1e-7, + "subject": "IL-6", + "context": "Table 1. Inflammation marker summary: IL-6 (pg/mL) = 0.21 ng/mL" + }, + { + "id": "muca_4484e92c7a32c632", + "sourceType": "table", + "sourceId": "table-cytokines", + "sourceTitle": "Table 1. Inflammation marker summary", + "row": 2, + "column": "TNF-alpha (pg/mL)", + "raw": "70 pg/mL", + "value": 70, + "unit": "pg/mL", + "unitFamily": "mass_concentration", + "normalizedValue": 7e-8, + "subject": "TNF-alpha", + "context": "Table 1. Inflammation marker summary: TNF-alpha (pg/mL) = 70 pg/mL" + } + ], + "auditDigest": "66b689128bf18282" +} diff --git a/manuscript-unit-consistency-assistant/reports/unit-consistency-report.md b/manuscript-unit-consistency-assistant/reports/unit-consistency-report.md new file mode 100644 index 00000000..b4fc01b9 --- /dev/null +++ b/manuscript-unit-consistency-assistant/reports/unit-consistency-report.md @@ -0,0 +1,61 @@ +# Manuscript Unit Consistency Report + +Manuscript: Metabolic rescue assay draft with unit hygiene risks +Audit digest: 66b689128bf18282 +Readiness: block-before-submission (2/100) + +## Findings + +- HIGH range-outlier: 370 C is outside a plausible wet-lab manuscript temperature range. + Evidence: A draft assay note says the plate was heated to 370 C for 15 min. + Action: Confirm whether this is a typo, a specialized protocol condition, or a missing decimal point. +- HIGH unit-family-conflict: metformin uses both molar and mass concentration units. + Evidence: methods-cell-culture: 5 mM; methods-cell-culture: 2 mg/mL + Action: Add a molecular-weight conversion note or standardize the manuscript to one concentration basis. +- MEDIUM missing-unit: Numeric value "12,000" appears without an adjacent unit in Methods: cell culture and dosing. + Evidence: Pellets were centrifuged at 12,000 for 10 min before lysis. + Action: Add the intended unit or rewrite the sentence so the quantity is unambiguous. +- MEDIUM table-unit-drift: TNF-alpha cell unit ng/mL does not match header unit pg/mL. + Evidence: Table 1. Inflammation marker summary, row 1, column "TNF-alpha (pg/mL)": 0.09 ng/mL + Action: Convert the cell value to the header unit or split the column by unit. +- MEDIUM table-unit-drift: IL-6 cell unit ng/mL does not match header unit pg/mL. + Evidence: Table 1. Inflammation marker summary, row 2, column "IL-6 (pg/mL)": 0.21 ng/mL + Action: Convert the cell value to the header unit or split the column by unit. +- MEDIUM missing-unit: Numeric value "35" appears without an adjacent unit in Results: cytokine response. + Evidence: ATP increased by 35 compared with vehicle control. + Action: Add the intended unit or rewrite the sentence so the quantity is unambiguous. +- LOW unit-scale-drift: IL-6 is reported in multiple unit scales: pg/mL, ng/mL. + Evidence: results-cytokines: 180 pg/mL; results-cytokines: 0.22 ng/mL; table-cytokines: 180 pg/mL; table-cytokines: 0.21 ng/mL + Action: Keep the source units if scientifically useful, but include a normalized value for reviewer comparison. +- LOW unit-scale-drift: TNF-alpha is reported in multiple unit scales: ng/mL, pg/mL. + Evidence: table-cytokines: 0.09 ng/mL; table-cytokines: 70 pg/mL + Action: Keep the source units if scientifically useful, but include a normalized value for reviewer comparison. + +## Reviewer Actions + +- P1 Confirm whether this is a typo, a specialized protocol condition, or a missing decimal point. (results-cytokines) +- P1 Add a molecular-weight conversion note or standardize the manuscript to one concentration basis. (methods-cell-culture,methods-cell-culture) +- P2 Add the intended unit or rewrite the sentence so the quantity is unambiguous. (methods-cell-culture) +- P2 Convert the cell value to the header unit or split the column by unit. (table-cytokines) +- P2 Convert the cell value to the header unit or split the column by unit. (table-cytokines) +- P2 Add the intended unit or rewrite the sentence so the quantity is unambiguous. (results-cytokines) +- P3 Keep the source units if scientifically useful, but include a normalized value for reviewer comparison. (results-cytokines,results-cytokines,table-cytokines,table-cytokines) +- P3 Keep the source units if scientifically useful, but include a normalized value for reviewer comparison. (table-cytokines,table-cytokines) + +## Measurement Inventory + +- methods-cell-culture: 20,000 cells/well -> count_density (cells/well 96-well) +- methods-cell-culture: 5 mM -> molar_concentration (metformin) +- methods-cell-culture: 24 h -> time (metformin) +- methods-cell-culture: 2 mg/mL -> mass_concentration (metformin) +- methods-cell-culture: 5 mg/mL -> mass_concentration (glucose) +- methods-cell-culture: 6 h -> time (glucose) +- methods-cell-culture: 10 min -> time (min before) +- results-cytokines: 180 pg/mL -> mass_concentration (IL-6) +- results-cytokines: 0.22 ng/mL -> mass_concentration (IL-6) +- results-cytokines: 370 C -> temperature (heated C) +- results-cytokines: 15 min -> time (heated C) +- table-cytokines: 180 pg/mL -> mass_concentration (IL-6) +- table-cytokines: 0.09 ng/mL -> mass_concentration (TNF-alpha) +- table-cytokines: 0.21 ng/mL -> mass_concentration (IL-6) +- table-cytokines: 70 pg/mL -> mass_concentration (TNF-alpha) diff --git a/manuscript-unit-consistency-assistant/reports/unit-consistency-summary.svg b/manuscript-unit-consistency-assistant/reports/unit-consistency-summary.svg new file mode 100644 index 00000000..3e282c63 --- /dev/null +++ b/manuscript-unit-consistency-assistant/reports/unit-consistency-summary.svg @@ -0,0 +1,17 @@ + + + +Manuscript Unit Consistency Assistant +Metabolic rescue assay draft with unit hygiene risks +Audit digest 66b689128bf18282 +High: 2 + +Medium: 4 + +Low: 2 + + +2 +readiness score +block-before-submission + diff --git a/manuscript-unit-consistency-assistant/requirements-map.md b/manuscript-unit-consistency-assistant/requirements-map.md new file mode 100644 index 00000000..2add4218 --- /dev/null +++ b/manuscript-unit-consistency-assistant/requirements-map.md @@ -0,0 +1,22 @@ +# Requirements Map + +Issue #13 asks for AI-assisted research tools at MVP level. This slice maps to the AI Peer Review Aid requirement. + +## AI Peer Review Aid + +- Instant diagnostic report on draft manuscripts: `analyzeManuscript` emits findings, severity, evidence snippets, and reviewer actions. +- Technical issue detection: the module checks unit omissions, incompatible unit families, table/header drift, and implausible lab values. +- Domain-specific review templates: `trackedTerms` lets a workflow define manuscript-specific reagents, outcomes, or assay names. +- Pre-review quality check: readiness score and blocking actions identify issues to fix before submission. + +## AI Paper Summarizer Support + +- The report includes a compact measurement inventory and audit digest that can be included in a paper summary or collaborator handoff. + +## AI Citation Tool Support + +- Conversion-note recommendations identify places where authors may need to cite a protocol, molecular-weight conversion source, or assay documentation. + +## Non-overlap + +This is not a knowledge graph measurement harmonization guard, analysis variable provenance audit, citation metadata checker, citation style normalizer, similarity detector, ethics/data availability audit, uncertainty calibration check, or broad manuscript grammar pass. It focuses only on manuscript unit consistency and reviewer action generation. diff --git a/manuscript-unit-consistency-assistant/sample-data.js b/manuscript-unit-consistency-assistant/sample-data.js new file mode 100644 index 00000000..d7f7fea4 --- /dev/null +++ b/manuscript-unit-consistency-assistant/sample-data.js @@ -0,0 +1,85 @@ +"use strict"; + +const sampleManuscript = { + manuscriptId: "SCIBASE-MUCA-001", + title: "Metabolic rescue assay draft with unit hygiene risks", + trackedTerms: [ + { name: "metformin", aliases: ["metformin"], requireSingleUnit: true }, + { name: "glucose", aliases: ["glucose"], requireSingleUnit: true }, + { name: "IL-6", aliases: ["IL-6", "IL6"], requireSingleUnit: true }, + { name: "TNF-alpha", aliases: ["TNF-alpha", "TNF"], requireSingleUnit: true }, + ], + sections: [ + { + id: "methods-cell-culture", + title: "Methods: cell culture and dosing", + text: [ + "Cells were seeded at 20,000 cells/well in 96-well plates.", + "Metformin was added at 5 mM for 24 h, while a later paragraph lists metformin at 2 mg/mL without a conversion note.", + "The glucose rescue arm used 5 mg/mL glucose for 6 h.", + "Pellets were centrifuged at 12,000 for 10 min before lysis.", + ].join(" "), + }, + { + id: "results-cytokines", + title: "Results: cytokine response", + text: [ + "IL-6 fell from 180 pg/mL to 0.22 ng/mL after treatment.", + "ATP increased by 35 compared with vehicle control.", + "A draft assay note says the plate was heated to 370 C for 15 min.", + ].join(" "), + }, + ], + tables: [ + { + id: "table-cytokines", + title: "Table 1. Inflammation marker summary", + columns: [ + { name: "Group" }, + { name: "IL-6 (pg/mL)" }, + { name: "TNF-alpha (pg/mL)" }, + ], + rows: [ + ["Vehicle", "180 pg/mL", "0.09 ng/mL"], + ["Metformin", "0.21 ng/mL", "70 pg/mL"], + ], + }, + ], +}; + +const cleanManuscript = { + manuscriptId: "SCIBASE-MUCA-CLEAN", + title: "Clean cytokine assay draft", + trackedTerms: [ + { name: "metformin", aliases: ["metformin"], requireSingleUnit: true }, + { name: "IL-6", aliases: ["IL-6"], requireSingleUnit: true }, + ], + sections: [ + { + id: "methods", + title: "Methods", + text: "Cells were seeded at 20,000 cells/well. Metformin was added at 5 mM for 24 h and plates were held at 37 C.", + }, + { + id: "results", + title: "Results", + text: "IL-6 changed from 180 pg/mL to 120 pg/mL after treatment.", + }, + ], + tables: [ + { + id: "table-clean", + title: "Table 1. Cytokine results", + columns: [{ name: "Group" }, { name: "IL-6 (pg/mL)" }], + rows: [ + ["Vehicle", "180 pg/mL"], + ["Metformin", "120 pg/mL"], + ], + }, + ], +}; + +module.exports = { + sampleManuscript, + cleanManuscript, +}; diff --git a/manuscript-unit-consistency-assistant/test.js b/manuscript-unit-consistency-assistant/test.js new file mode 100644 index 00000000..0eb51092 --- /dev/null +++ b/manuscript-unit-consistency-assistant/test.js @@ -0,0 +1,38 @@ +"use strict"; + +const assert = require("node:assert/strict"); +const { + analyzeManuscript, + extractSectionMeasurements, + normalizeUnit, +} = require("./index"); +const { cleanManuscript, sampleManuscript } = require("./sample-data"); + +function runTests() { + assert.equal(normalizeUnit("mM").family, "molar_concentration"); + assert.equal(normalizeUnit("ng/mL").family, "mass_concentration"); + assert.equal(normalizeUnit("x g").canonical, "xg"); + + const measurements = extractSectionMeasurements(sampleManuscript.sections, sampleManuscript.trackedTerms); + assert.ok(measurements.some((measurement) => measurement.subject === "metformin" && measurement.unit === "mM")); + assert.ok(measurements.some((measurement) => measurement.subject === "IL-6" && measurement.unit === "ng/mL")); + + const result = analyzeManuscript(sampleManuscript); + assert.equal(result.summary.manuscriptId, "SCIBASE-MUCA-001"); + assert.ok(result.summary.measurementsReviewed >= 10); + assert.ok(result.findings.some((finding) => finding.type === "unit-family-conflict")); + assert.ok(result.findings.some((finding) => finding.type === "missing-unit")); + assert.ok(result.findings.some((finding) => finding.type === "range-outlier")); + assert.ok(result.findings.some((finding) => finding.type === "table-unit-drift")); + assert.ok(result.reviewerActions.some((action) => action.blocksSubmission)); + assert.equal(result.auditDigest, analyzeManuscript(sampleManuscript).auditDigest); + + const clean = analyzeManuscript(cleanManuscript); + assert.equal(clean.findings.length, 0); + assert.equal(clean.summary.readinessScore, 100); + assert.equal(clean.summary.readiness, "ready-with-minor-edits"); + + console.log("manuscript-unit-consistency-assistant tests passed"); +} + +runTests();