Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions resumable-upload-checkpoint-guard/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Resumable Upload Checkpoint Guard

Self-contained Scientific/Engineering Data & Code Hosting slice for issue #14. It validates multipart upload checkpoint evidence before scientific datasets, notebooks, and supplements become durable hosted artifacts.

## What It Checks

- Contiguous chunk coverage from index `0` through the expected final chunk.
- Per-chunk declared checksum versus observed checksum evidence.
- Final artifact manifest hash before commit.
- DataCite/schema.org metadata schema readiness.
- Expired checkpoint state that must be restarted instead of resumed.

## Outputs

- `reports/upload-checkpoint-packet.json`: structured reviewer decisions and findings.
- `reports/checkpoint-report.md`: readable report for each synthetic upload scenario.
- `reports/summary.svg`: visual summary of commit, hold, and abort decisions.
- `reports/demo.mp4`: short demo artifact for Algora review.

## Local Verification

```bash
node resumable-upload-checkpoint-guard/test.js
node resumable-upload-checkpoint-guard/demo.js
node --check resumable-upload-checkpoint-guard/index.js
node --check resumable-upload-checkpoint-guard/test.js
node --check resumable-upload-checkpoint-guard/demo.js
node --check resumable-upload-checkpoint-guard/sample-data.js
```

The module is dependency-free, uses synthetic data only, and makes no network calls.
59 changes: 59 additions & 0 deletions resumable-upload-checkpoint-guard/demo.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
const fs = require('node:fs');
const path = require('node:path');

const {evaluateUploadCheckpoint, buildCheckpointReport} = require('./index');
const {scenarios} = require('./sample-data');

const reportsDir = path.join(__dirname, 'reports');
fs.mkdirSync(reportsDir, {recursive: true});

const evaluations = scenarios.map((scenario) => ({
scenario: scenario.name,
...evaluateUploadCheckpoint(scenario),
}));

const packetJson = JSON.stringify(evaluations, null, 2);
const reviewerReport = evaluations.map(buildCheckpointReport).join('\n---\n');
const commit = evaluations.filter((item) => item.decision === 'commit-artifact').length;
const metadata = evaluations.filter((item) => item.decision === 'hold-metadata').length;
const resume = evaluations.filter((item) => item.decision === 'hold-resume').length;
const abort = evaluations.filter((item) => item.decision === 'abort-and-reupload').length;
const findings = evaluations.reduce((sum, item) => sum + item.findings.length, 0);

const svg = `<svg xmlns="http://www.w3.org/2000/svg" width="960" height="540" viewBox="0 0 960 540">
<rect width="960" height="540" fill="#101820"/>
<text x="48" y="72" fill="#f8fafc" font-family="Arial, sans-serif" font-size="34" font-weight="700">Resumable Upload Checkpoint Guard</text>
<text x="48" y="112" fill="#bae6fd" font-family="Arial, sans-serif" font-size="18">Synthetic multipart upload safety packet for scientific artifacts</text>
<g transform="translate(48 158)">
<rect width="190" height="154" rx="10" fill="#065f46"/>
<text x="22" y="48" fill="#d1fae5" font-family="Arial, sans-serif" font-size="18" font-weight="700">Commit</text>
<text x="22" y="108" fill="#ecfdf5" font-family="Arial, sans-serif" font-size="54" font-weight="700">${commit}</text>
</g>
<g transform="translate(270 158)">
<rect width="190" height="154" rx="10" fill="#7c2d12"/>
<text x="22" y="48" fill="#fed7aa" font-family="Arial, sans-serif" font-size="18" font-weight="700">Metadata</text>
<text x="22" y="108" fill="#fff7ed" font-family="Arial, sans-serif" font-size="54" font-weight="700">${metadata}</text>
</g>
<g transform="translate(492 158)">
<rect width="190" height="154" rx="10" fill="#854d0e"/>
<text x="22" y="48" fill="#fef3c7" font-family="Arial, sans-serif" font-size="18" font-weight="700">Resume Hold</text>
<text x="22" y="108" fill="#fffbeb" font-family="Arial, sans-serif" font-size="54" font-weight="700">${resume}</text>
</g>
<g transform="translate(714 158)">
<rect width="190" height="154" rx="10" fill="#991b1b"/>
<text x="22" y="48" fill="#fee2e2" font-family="Arial, sans-serif" font-size="18" font-weight="700">Abort</text>
<text x="22" y="108" fill="#fef2f2" font-family="Arial, sans-serif" font-size="54" font-weight="700">${abort}</text>
</g>
<text x="48" y="382" fill="#e5e7eb" font-family="Arial, sans-serif" font-size="22">Checks: contiguous chunks, checksum evidence, manifest hash, metadata schema, expiry</text>
<text x="48" y="424" fill="#d1d5db" font-family="Arial, sans-serif" font-size="18">Reviewer findings: ${findings}. Outputs: JSON packet, Markdown report, SVG summary, MP4 artifact.</text>
<text x="48" y="478" fill="#9ca3af" font-family="Arial, sans-serif" font-size="16">Synthetic data only. No private datasets, credentials, storage provider calls, or network access.</text>
</svg>
`;

fs.writeFileSync(path.join(reportsDir, 'upload-checkpoint-packet.json'), `${packetJson}\n`);
fs.writeFileSync(path.join(reportsDir, 'checkpoint-report.md'), reviewerReport);
fs.writeFileSync(path.join(reportsDir, 'summary.svg'), svg);

console.log(`Wrote ${evaluations.length} upload checkpoint evaluations to ${reportsDir}`);
console.log(`Decision counts: commit=${commit}, metadata=${metadata}, resume=${resume}, abort=${abort}`);
console.log(`Reviewer findings: ${findings}`);
216 changes: 216 additions & 0 deletions resumable-upload-checkpoint-guard/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
function normalizeList(value) {
return Array.isArray(value) ? value : [];
}

function uploadAction(type, target, reason) {
return {type, target, reason};
}

function missingChunkIndexes(expectedChunks, chunkIndexes) {
const received = new Set(chunkIndexes);
const missing = [];
for (let index = 0; index < expectedChunks; index += 1) {
if (!received.has(index)) {
missing.push(index);
}
}
return missing;
}

function severityCounts(findings) {
return findings.reduce((counts, finding) => {
counts[finding.severity] = (counts[finding.severity] || 0) + 1;
return counts;
}, {});
}

function evaluateUploadCheckpoint(input) {
const artifact = input.artifact || {};
const chunks = normalizeList(input.chunks);
const expectedChunks = Number(artifact.expectedChunks || 0);
const chunkIndexes = chunks.map((chunk) => chunk.index);
const uniqueChunkIndexes = [...new Set(chunkIndexes)];
const findings = [];
const requiredActions = [];
const missingChunks = missingChunkIndexes(expectedChunks, chunkIndexes);
const receivedBytes = chunks.reduce((sum, chunk) => sum + Number(chunk.sizeBytes || 0), 0);

if (missingChunks.length > 0) {
findings.push({
type: 'missing-upload-chunk',
severity: 'critical',
missingChunks,
message: `Upload checkpoint is missing chunk indexes ${missingChunks.join(', ')}`,
});
requiredActions.push(uploadAction(
'abort_incomplete_checkpoint',
input.uploadId,
'durable artifact commits require contiguous multipart coverage'
));
}

const duplicateChunks = uniqueChunkIndexes.filter((index) =>
chunkIndexes.filter((chunkIndex) => chunkIndex === index).length > 1
);
if (duplicateChunks.length > 0) {
findings.push({
type: 'duplicate-upload-chunk',
severity: 'major',
duplicateChunks,
message: `Upload checkpoint repeats chunk indexes ${duplicateChunks.join(', ')}`,
});
requiredActions.push(uploadAction(
'deduplicate_checkpoint_chunks',
input.uploadId,
'resume state must have one authoritative checksum per chunk index'
));
}

for (const chunk of chunks) {
if (chunk.declaredHash !== chunk.observedHash) {
findings.push({
type: 'chunk-checksum-mismatch',
severity: 'major',
chunk: chunk.index,
declaredHash: chunk.declaredHash,
observedHash: chunk.observedHash,
message: `Chunk ${chunk.index} declares ${chunk.declaredHash} but observed ${chunk.observedHash}`,
});
requiredActions.push(uploadAction(
'reupload_chunk',
`${input.uploadId}:chunk-${chunk.index}`,
'chunk checksum evidence must match before upload resume or artifact commit'
));
}
}

if (artifact.expectedSizeBytes && receivedBytes !== artifact.expectedSizeBytes) {
findings.push({
type: 'upload-size-mismatch',
severity: 'major',
expectedSizeBytes: artifact.expectedSizeBytes,
receivedBytes,
message: `Received ${receivedBytes} bytes but artifact manifest expects ${artifact.expectedSizeBytes}`,
});
requiredActions.push(uploadAction(
'reconcile_upload_size',
input.uploadId,
'chunk byte totals must match the artifact manifest'
));
}

if (!artifact.finalManifestHash) {
findings.push({
type: 'missing-final-manifest-hash',
severity: 'metadata',
message: 'Artifact lacks final manifest hash',
});
requiredActions.push(uploadAction(
'record_final_manifest_hash',
artifact.path || input.uploadId,
'durable commits need a stable manifest hash for replay and DOI metadata'
));
}

if (!artifact.metadataSchema) {
findings.push({
type: 'missing-metadata-schema',
severity: 'metadata',
message: 'Artifact lacks DataCite/schema.org metadata schema evidence',
});
requiredActions.push(uploadAction(
'attach_metadata_schema',
artifact.path || input.uploadId,
'hosted research artifacts need machine-readable metadata before commit'
));
}

if (input.generatedAt && input.expiresAt && Date.parse(input.generatedAt) > Date.parse(input.expiresAt)) {
findings.push({
type: 'stale-upload-checkpoint',
severity: 'critical',
expiresAt: input.expiresAt,
message: `Upload checkpoint expired at ${input.expiresAt}`,
});
requiredActions.push(uploadAction(
'restart_expired_upload',
input.uploadId,
'expired resume state cannot safely become a durable artifact'
));
}

const counts = severityCounts(findings);
const criticalCount = counts.critical || 0;
const majorCount = counts.major || 0;
const metadataCount = counts.metadata || 0;
const decision = criticalCount > 0
? 'abort-and-reupload'
: majorCount > 0
? 'hold-resume'
: metadataCount > 0
? 'hold-metadata'
: 'commit-artifact';
const integrityScore = Math.max(0, 100 - criticalCount * 40 - majorCount * 25 - metadataCount * 10);

return {
uploadId: input.uploadId,
generatedAt: input.generatedAt,
expiresAt: input.expiresAt,
artifactPath: artifact.path,
decision,
integrityScore,
findings,
requiredActions,
summary: {
expectedChunks,
receivedChunks: uniqueChunkIndexes.length,
coverage: expectedChunks === 0 ? 1 : uniqueChunkIndexes.length / expectedChunks,
expectedSizeBytes: artifact.expectedSizeBytes || 0,
receivedBytes,
severityCounts: counts,
},
};
}

function percent(value) {
return `${Math.round(value * 100)}%`;
}

function buildCheckpointReport(result) {
const lines = [
'# Resumable Upload Checkpoint Guard Report',
'',
`Upload: ${result.uploadId}`,
`Artifact: ${result.artifactPath}`,
`Generated: ${result.generatedAt}`,
`Expires: ${result.expiresAt}`,
`Decision: ${result.decision}`,
`Integrity score: ${result.integrityScore}`,
'',
'## Upload Coverage',
'',
`Chunks: ${result.summary.receivedChunks}/${result.summary.expectedChunks}`,
`Coverage: ${percent(result.summary.coverage)}`,
`Bytes: ${result.summary.receivedBytes}/${result.summary.expectedSizeBytes}`,
`Findings: ${result.findings.length}`,
'',
'## Findings',
'',
...(result.findings.length
? result.findings.map((finding) => `- ${finding.severity}: ${finding.type} - ${finding.message}`)
: ['- None']),
'',
'## Required Actions',
'',
...(result.requiredActions.length
? result.requiredActions.map((action) => `- ${action.type}: ${action.target} (${action.reason})`)
: ['- None']),
'',
];
return lines.join('\n');
}

module.exports = {
evaluateUploadCheckpoint,
buildCheckpointReport,
};
Loading