From 7cc92395f3ccb843d43d22a097cc217143410662 Mon Sep 17 00:00:00 2001 From: Triple7 Date: Wed, 13 May 2026 04:33:01 -0700 Subject: [PATCH] Add modular forensic processor and batch downloads --- README.md | 7 ++ app.tsx | 13 ++- server.js | 170 +++++++++++++-------------------------- server/cleanup.js | 14 ++++ server/downloadTokens.js | 12 +++ server/metadataRules.js | 35 ++++++++ server/processor.js | 61 ++++++++++++++ 7 files changed, 196 insertions(+), 116 deletions(-) create mode 100644 server/cleanup.js create mode 100644 server/downloadTokens.js create mode 100644 server/metadataRules.js create mode 100644 server/processor.js diff --git a/README.md b/README.md index 2818ffb..c802924 100644 --- a/README.md +++ b/README.md @@ -114,3 +114,10 @@ docker run --rm -p 3001:3001 \ - Production must use real Stripe configuration; do not rely on mock checkout in production. Never commit real secrets to source control. + +## Batch processing API + +- `POST /api/process-batch` (authenticated): processes up to 20 uploaded files sequentially for paid plans (Creator/Studio). Free plan returns `403`. +- `GET /api/download/:token` (authenticated): one-time secure download for batch outputs. +- MP3 server cleanse remains unsupported (`422` for single process, per-file error in batch); use Quick Cleanse Browser for MP3. +- Batch requests enforce a 2GB post-upload soft guard; production deployments should still enforce proxy/body-size/disk limits. diff --git a/app.tsx b/app.tsx index ba0b0af..6b85fe8 100644 --- a/app.tsx +++ b/app.tsx @@ -27,6 +27,17 @@ interface UsageState { limit: number | null; // null = unlimited } +interface MarkerHit { ruleId: string; category: string; severity: 'critical' | 'high' | 'medium'; matchedTag: string; matchedValue: string; } +interface ResidualTag { tag: string; markerCategory: string; severity: string; } +interface ForensicReport { + removedCount: number; removedTags: string[]; timestamp: string; + status?: 'clean' | 'clean_with_notes' | 'review_required'; summary?: string; + wipeVerificationPassed?: boolean; finalVerificationPassed?: boolean; + detectedMarkersBefore?: MarkerHit[]; detectedMarkersFinal?: MarkerHit[]; + suspiciousResidual?: ResidualTag[]; unexpectedDescriptive?: string[]; + allowedInjectedTags?: string[]; rewrittenTags?: string[]; +} + interface QueueItem { id: string; file: File; @@ -34,7 +45,7 @@ interface QueueItem { seo: { title: string; description: string; tags: string }; downloadUrl: string | null; downloadName: string | null; - report: { removedCount: number; removedTags: string[]; timestamp: string } | null; + report: ForensicReport | null; error: string | null; analysis: { format: string; title: string; artist: string; genre: string; provenanceRisk: RiskLevel; detectedMarkers: string[]; parseError?: string | null } | null; logs: string[]; diff --git a/server.js b/server.js index f28fcbb..9abcb7a 100644 --- a/server.js +++ b/server.js @@ -5,6 +5,10 @@ const cors = require('cors'); const path = require('path'); const fs = require('fs-extra'); const { exiftool } = require('exiftool-vendored'); +const { processMediaFile } = require('./server/processor'); +const cleanup = require('./server/cleanup'); +const downloadTokens = require('./server/downloadTokens'); +const crypto = require('crypto'); const bcrypt = require('bcryptjs'); const jwt = require('jsonwebtoken'); const Database = require('better-sqlite3'); @@ -69,6 +73,9 @@ db.exec(` ); `); + +cleanup.init(db); +downloadTokens.init(db); // ───────────────────────────────────────────────────────────────────────────── // Usage helpers // ───────────────────────────────────────────────────────────────────────────── @@ -449,149 +456,82 @@ app.post('/api/generate-seo', requireAuth, async (req, res) => { // ───────────────────────────────────────────────────────────────────────────── app.post('/api/process', requireAuth, upload.single('file'), async (req, res) => { if (!req.file) return res.status(400).json({ error: 'No file uploaded' }); - const userId = req.user.sub; const inputPath = req.file.path; const originalName = req.file.originalname || ''; const ext = path.extname(originalName).toLowerCase() || '.mp3'; const mime = (req.file.mimetype || '').toLowerCase(); const isMp3 = ext === '.mp3' || mime === 'audio/mpeg'; - if (isMp3) { await fs.remove(inputPath).catch(() => {}); - return res.status(422).json({ - error: 'MP3 server cleanse is not supported', - detail: 'Use Quick Cleanse (Browser) for MP3 metadata rewriting, or upload MP4/M4A/WAV/FLAC for Full Server Cleanse.', - }); + return res.status(422).json({ error: 'MP3 server cleanse is not supported', detail: 'Use Quick Cleanse (Browser) for MP3 metadata rewriting, or upload MP4/M4A/WAV/FLAC for Full Server Cleanse.' }); } - - // ── Tier-based usage enforcement ───────────────────────────────────────── - // Always re-read plan from DB so upgrades (via webhook) take effect - // immediately without forcing a re-login. - const dbUser = db.prepare('SELECT plan FROM users WHERE id = ?').get(userId); + const dbUser = db.prepare('SELECT plan FROM users WHERE id = ?').get(userId); const userPlan = dbUser?.plan ?? 'free'; - if (userPlan === 'free') { const usedThisMonth = getMonthlyJobCount(userId); if (usedThisMonth >= FREE_MONTHLY_LIMIT) { await fs.remove(req.file.path).catch(() => {}); - return res.status(402).json({ - error: 'Monthly limit reached', - detail: `Free accounts are limited to ${FREE_MONTHLY_LIMIT} files per month. Upgrade to continue processing.`, - usedThisMonth, - limit: FREE_MONTHLY_LIMIT, - upgradeRequired: true, - }); + return res.status(402).json({ error: 'Monthly limit reached', detail: `Free accounts are limited to ${FREE_MONTHLY_LIMIT} files per month. Upgrade to continue processing.`, usedThisMonth, limit: FREE_MONTHLY_LIMIT, upgradeRequired: true }); } } - // ── End enforcement ─────────────────────────────────────────────────────── - const { title, description, tags, artist, genre, lyrics, platform = 'General' } = req.body; const outputPath = path.join('uploads', `out_${Date.now()}${ext}`); - - try { - await fs.copy(inputPath, outputPath); - } catch (err) { - await fs.remove(inputPath).catch(() => {}); - return res.status(500).json({ error: 'File copy failed' }); - } - + try { await fs.copy(inputPath, outputPath); } catch { await fs.remove(inputPath).catch(() => {}); return res.status(500).json({ error: 'File copy failed' }); } try { - // Phase 1: Forensic before-state - const beforeTags = await exiftool.read(outputPath); - const beforeKeys = new Set(Object.keys(beforeTags)); - - // Phase 2: Nuclear wipe (supported exiftool-vendored path only) - try { - await exiftool.write( - outputPath, - {}, - ['-all=', '-XMP:all=', '-IPTC:all=', '-overwrite_original'] - ); - } catch (wipeErr) { - console.warn('Primary metadata wipe failed:', wipeErr.message); - await fs.remove(inputPath).catch(() => {}); - await fs.remove(outputPath).catch(() => {}); - return res.status(422).json({ - error: 'Server cleanse unsupported for this format', - detail: 'This file format cannot be safely metadata-wiped on the server. Use Quick Cleanse (Browser) for MP3 or try MP4/M4A/WAV/FLAC for Full Server Cleanse.', - }); - } - - // Phase 3: Platform-aware SEO injection - const tagsArray = (tags || '').split(',').map(t => t.trim()).filter(Boolean); - const year = new Date().getFullYear(); - const safeArtist = (artist || 'Creator').substring(0, 255); - const safeTitle = (title || 'Untitled').substring(0, 255); - const safeDescription = (description || '').substring(0, 1000); - const safeGenre = (genre || '').substring(0, 100); - - const metaToWrite = { - Title: safeTitle, - Artist: safeArtist, - Copyright: `© ${year} ${safeArtist}`, - Keywords: tagsArray, - Genre: safeGenre, - }; - - switch (platform) { - case 'YouTube': - metaToWrite.Description = safeDescription; - metaToWrite.Comment = safeDescription; - break; - case 'Spotify': - case 'Apple Music': - metaToWrite.Description = safeDescription; - metaToWrite.Album = safeTitle; - metaToWrite.Year = year; - if (lyrics) metaToWrite['Lyrics-eng'] = lyrics.substring(0, 5000); - break; - case 'TikTok': - metaToWrite.Comment = `${safeTitle} ${tagsArray.map(t => `#${t.replace(/\s/g, '')}`).join(' ')}`.substring(0, 300); - break; - default: - metaToWrite.Description = safeDescription; - metaToWrite.Comment = safeDescription; - } - - await exiftool.write(outputPath, metaToWrite, ['-overwrite_original']); - - // Phase 4: Forensic diff - const afterTags = await exiftool.read(outputPath); - const afterKeys = new Set(Object.keys(afterTags)); - const removedKeys = [...beforeKeys].filter(k => !afterKeys.has(k)); - - // Phase 5: Record job (AFTER processing – only count successful deliveries) - try { - db.prepare( - 'INSERT INTO jobs (user_id, filename, platform) VALUES (?, ?, ?)' - ).run(userId, req.file.originalname, platform); - } catch (dbErr) { - console.error('Job record failed (non-fatal):', dbErr); - } - - // Phase 6: Send file with usage headers + const { report } = await processMediaFile({ outputPath, originalName: req.file.originalname, platform, metadata: { title, description, tags, artist, genre, lyrics } }); + try { db.prepare('INSERT INTO jobs (user_id, filename, platform) VALUES (?, ?, ?)').run(userId, req.file.originalname, platform); } catch (dbErr) { console.error('Job record failed (non-fatal):', dbErr); } const usedNow = getMonthlyJobCount(userId); - res.setHeader('X-Forensic-Removed', removedKeys.length); - res.setHeader('X-Forensic-Tags', JSON.stringify(removedKeys.slice(0, 50))); - res.setHeader('X-Forensic-Status', 'Sanitized'); + res.setHeader('X-Forensic-Removed', report.removedCount); + res.setHeader('X-Forensic-Tags', JSON.stringify(report.removedTags.slice(0, 50))); + res.setHeader('X-Forensic-Status', report.status || 'Sanitized'); res.setHeader('X-Usage-This-Month', usedNow); - res.setHeader('X-Usage-Limit', userPlan === 'free' ? FREE_MONTHLY_LIMIT : 'unlimited'); - - res.download(outputPath, `cleansed_${req.file.originalname}`, async (err) => { - if (err) console.error('Download stream error:', err); - await fs.remove(inputPath).catch(() => {}); - await fs.remove(outputPath).catch(() => {}); - }); - + res.setHeader('X-Usage-Limit', userPlan === 'free' ? FREE_MONTHLY_LIMIT : 'unlimited'); + cleanup.registerForCleanup([outputPath]); + res.download(outputPath, `cleansed_${req.file.originalname}`, async (err) => { if (err) console.error('Download stream error:', err); await fs.remove(inputPath).catch(() => {}); await cleanup.deleteImmediately(outputPath); }); } catch (err) { console.error('Processing error:', err); - res.status(500).json({ error: 'Processing failed', detail: err.message }); + const status = err.statusCode || 500; + res.status(status).json({ error: status === 422 ? err.message : 'Processing failed', detail: err.publicDetail || err.message }); await fs.remove(inputPath).catch(() => {}); await fs.remove(outputPath).catch(() => {}); } }); +app.post('/api/process-batch', requireAuth, upload.array('files', 20), async (req, res) => { + const userId = req.user.sub; + const files = req.files || []; + const dbUser = db.prepare('SELECT plan FROM users WHERE id = ?').get(userId); + const userPlan = dbUser?.plan ?? 'free'; + if (userPlan === 'free') { await Promise.all(files.map((f) => fs.remove(f.path).catch(() => {}))); return res.status(403).json({ error: 'Batch processing requires Creator or Studio plan.' }); } + const totalBytes = files.reduce((n, f) => n + (f.size || 0), 0); + // 2GB is a post-Multer soft guard; deployment/proxy/body-size limits are still required. + if (totalBytes > 2 * 1024 * 1024 * 1024) { await Promise.all(files.map((f) => fs.remove(f.path).catch(() => {}))); return res.status(400).json({ error: 'Batch total exceeds 2GB limit.' }); } + const { title, description, tags, artist, genre, lyrics, platform = 'General' } = req.body; + const results = []; + for (const file of files) { + const ext = path.extname(file.originalname || '').toLowerCase() || '.mp4'; + const mime = (file.mimetype || '').toLowerCase(); + const isMp3 = ext === '.mp3' || mime === 'audio/mpeg'; + if (isMp3) { await fs.remove(file.path).catch(() => {}); results.push({ originalName: file.originalname, error: 'MP3 server cleanse is not supported. Use Quick Cleanse (Browser) for MP3.' }); continue; } + const outputPath = path.join('uploads', `out_batch_${Date.now()}_${crypto.randomUUID()}${ext}`); + try { await fs.copy(file.path, outputPath); const { report } = await processMediaFile({ outputPath, originalName: file.originalname, platform, metadata: { title, description, tags, artist, genre, lyrics } }); db.prepare('INSERT INTO jobs (user_id, filename, platform) VALUES (?, ?, ?)').run(userId, file.originalname, platform); cleanup.registerForCleanup([outputPath]); const token = downloadTokens.createToken({ userId, filePath: outputPath, downloadName: `cleansed_${file.originalname}` }); results.push({ originalName: file.originalname, report, downloadToken: token }); } catch (err) { await fs.remove(outputPath).catch(() => {}); results.push({ originalName: file.originalname, error: err.publicDetail || err.message }); } finally { await fs.remove(file.path).catch(() => {}); } + } + const usedNow = getMonthlyJobCount(userId); + res.setHeader('X-Usage-This-Month', usedNow); + res.setHeader('X-Usage-Limit', userPlan === 'free' ? FREE_MONTHLY_LIMIT : 'unlimited'); + return res.json({ results, usage: { thisMonth: usedNow, limit: null } }); +}); + +app.get('/api/download/:token', requireAuth, async (req, res) => { + const userId = req.user.sub; + const consumed = downloadTokens.consumeToken(req.params.token, userId); + if (consumed.error) return res.status(consumed.code).json({ error: consumed.error }); + const { filePath, downloadName } = consumed; + if (!await fs.pathExists(filePath)) return res.status(410).json({ error: 'File is no longer available. It may have already been downloaded or cleaned up.' }); + res.download(filePath, downloadName, async (err) => { if (err) console.error('Download stream error:', err); await cleanup.deleteImmediately(filePath); }); +}); + // ───────────────────────────────────────────────────────────────────────────── // Error handlers // ───────────────────────────────────────────────────────────────────────────── diff --git a/server/cleanup.js b/server/cleanup.js new file mode 100644 index 0000000..e999068 --- /dev/null +++ b/server/cleanup.js @@ -0,0 +1,14 @@ +"use strict"; +const path = require('path'); +const fs = require('fs-extra'); +const UPLOAD_DIR = path.resolve('uploads'); +const TTL_MS = 60 * 60 * 1000; +const SWEEP_INTERVAL = 10 * 60 * 1000; +const ROW_MAX_AGE_MS = 7 * 24 * 60 * 60 * 1000; +let db; let timer; +const isWithinUploadDir = (filePath) => { const resolved = path.resolve(filePath); const relative = path.relative(UPLOAD_DIR, resolved); return relative === '' || (!relative.startsWith('..') && !path.isAbsolute(relative)); }; +function runSweeper() { if (!db) return; const now = Date.now(); const rows = db.prepare('SELECT id, file_path, registered_at FROM cleanup_queue WHERE deleted=0').all(); for (const r of rows) { if (now - r.registered_at > TTL_MS) { if (isWithinUploadDir(r.file_path)) fs.remove(path.resolve(r.file_path)).catch(() => {}); db.prepare('UPDATE cleanup_queue SET deleted=1 WHERE id=?').run(r.id); } } db.prepare('DELETE FROM cleanup_queue WHERE deleted=1 AND registered_at < ?').run(now - ROW_MAX_AGE_MS); } +function init(existingDb){ db = existingDb; db.exec(`CREATE TABLE IF NOT EXISTS cleanup_queue (id INTEGER PRIMARY KEY AUTOINCREMENT,file_path TEXT NOT NULL,registered_at INTEGER NOT NULL,deleted INTEGER NOT NULL DEFAULT 0)`); runSweeper(); timer = setInterval(runSweeper, SWEEP_INTERVAL); if (timer?.unref) timer.unref(); } +function registerForCleanup(filePaths){ if (!db || !Array.isArray(filePaths)) return; const stmt = db.prepare('INSERT INTO cleanup_queue (file_path, registered_at, deleted) VALUES (?, ?, 0)'); for (const f of filePaths) { if (!f) continue; if (!isWithinUploadDir(f)) { console.warn('Refusing cleanup registration outside uploads:', f); continue; } stmt.run(path.resolve(f), Date.now()); } } +async function deleteImmediately(filePath){ if (!filePath || !isWithinUploadDir(filePath)) { if (filePath) console.warn('Refusing cleanup delete outside uploads:', filePath); return; } const resolved = path.resolve(filePath); await fs.remove(resolved).catch(() => {}); if (db) db.prepare('UPDATE cleanup_queue SET deleted=1 WHERE file_path=?').run(resolved); } +module.exports = { init, registerForCleanup, deleteImmediately }; diff --git a/server/downloadTokens.js b/server/downloadTokens.js new file mode 100644 index 0000000..12cbaf9 --- /dev/null +++ b/server/downloadTokens.js @@ -0,0 +1,12 @@ +"use strict"; +const crypto = require('crypto'); +const path = require('path'); +const UPLOAD_DIR = path.resolve('uploads'); +const TOKEN_TTL_MS = 30 * 60 * 1000; +let db; let timer; +const isWithinUploadDir = (filePath) => { const resolved = path.resolve(filePath); const relative = path.relative(UPLOAD_DIR, resolved); return relative === '' || (!relative.startsWith('..') && !path.isAbsolute(relative)); }; +function sweepExpired(){ if (!db) return; db.prepare('DELETE FROM download_tokens WHERE used=1 OR expires_at < ?').run(Date.now()); } +function init(existingDb){ db = existingDb; db.exec(`CREATE TABLE IF NOT EXISTS download_tokens (token TEXT PRIMARY KEY,user_id INTEGER NOT NULL,file_path TEXT NOT NULL,download_name TEXT NOT NULL,expires_at INTEGER NOT NULL,used INTEGER NOT NULL DEFAULT 0)`); sweepExpired(); timer = setInterval(sweepExpired, 15*60*1000); if (timer?.unref) timer.unref(); } +function createToken({ userId, filePath, downloadName }){ const resolved = path.resolve(filePath); if (!isWithinUploadDir(resolved)) throw new Error('Invalid download path.'); const token = crypto.randomUUID(); db.prepare('INSERT INTO download_tokens (token,user_id,file_path,download_name,expires_at,used) VALUES (?,?,?,?,?,0)').run(token, userId, resolved, downloadName, Date.now()+TOKEN_TTL_MS); return token; } +function consumeToken(token, userId){ const row = db.prepare('SELECT * FROM download_tokens WHERE token=? AND used=0').get(token); if (!row) return { error:'Download link not found.', code:404 }; if (row.expires_at < Date.now()) return { error:'Download link has expired.', code:410 }; if (Number(row.user_id)!==Number(userId)) return { error:'Download link is not valid for this account.', code:403 }; if (!isWithinUploadDir(row.file_path)) return { error:'Invalid download path.', code:403 }; db.prepare('UPDATE download_tokens SET used=1 WHERE token=?').run(token); return { filePath: row.file_path, downloadName: row.download_name, code:200 }; } +module.exports = { init, createToken, consumeToken }; diff --git a/server/metadataRules.js b/server/metadataRules.js new file mode 100644 index 0000000..b489c70 --- /dev/null +++ b/server/metadataRules.js @@ -0,0 +1,35 @@ +"use strict"; + +const MARKER_RULES = [ + { id: 'c2pa-jumbf', category: 'AI Provenance', severity: 'critical', patterns: [/jumbf/i, /c2pa/i, /manifest/i, /assertion/i] }, + { id: 'xmp-creator-tool', category: 'XMP Origin', severity: 'high', patterns: [/CreatorTool/i, /DerivedFrom/i, /MetadataDate/i, /HistoryAction/i] }, + { id: 'iptc-synthetic', category: 'Synthetic Media Flag', severity: 'high', patterns: [/DigitalSourceType/i, /trainedAlgorithmicMedia/i] }, + { id: 'ai-brand', category: 'AI Brand Residue', severity: 'high', valueOnly: true, patterns: [/\bSuno\b/i, /\bUdio\b/i, /\bRunway\b/i, /\bLuma\b/i, /\bPika\b/i, /\bSora\b/i, /\bMidjourney\b/i, /\bDALL-E\b/i, /\bOpenAI\b/i, /\bChatGPT\b/i, /\bElevenLabs\b/i, /\bStable Diffusion\b/i, /\bAIVA\b/i, /\bMubert\b/i] }, + { id: 'encoder-software', category: 'Encoder / Software Residue', severity: 'medium', patterns: [/WrittenBy/i, /EncoderSettings/i] }, + { id: 'id3-provenance', category: 'ID3 Provenance Frames', severity: 'medium', patterns: [/^TSSE$/i, /^TXXX$/i] }, + { id: 'xmp-history', category: 'XMP Edit History', severity: 'medium', patterns: [/XMP\.History/i, /HistorySoftwareAgent/i, /HistoryChanged/i] }, +]; + +const BENIGN_TAG_PATTERNS = [ + /^SourceFile$/i, /^ExifToolVersion$/i, /^FileSize$/i, /^FileType$/i, /^FileTypeExtension$/i, /^MIMEType$/i, + /^FileAccessDate$/i, /^FileModifyDate$/i, /^FileInodeChangeDate$/i, /^errors$/i, /^warnings$/i, + /^Duration$/i, /^BitRate$/i, /^AvgBitrate$/i, /^MaxBitrate$/i, /^SampleRate$/i, /^AudioSampleRate$/i, + /^AudioChannels$/i, /^BitsPerSample$/i, /^AudioBitrate$/i, /^Balance$/i, /^EncoderDelay$/i, /^ZeroPadding$/i, + /^VideoFrameRate$/i, /^ImageWidth$/i, /^ImageHeight$/i, /^MajorBrand$/i, /^MinorVersion$/i, + /^CompatibleBrands$/i, /^MovieHeaderVersion$/i, /^TrackHeaderVersion$/i, /^MediaHeaderVersion$/i, + /^CreateDate$/i, /^ModifyDate$/i, /^TrackCreateDate$/i, /^TrackModifyDate$/i, /^MediaCreateDate$/i, + /^MediaModifyDate$/i, /^TrackDuration$/i, /^MediaDuration$/i, /^HandlerType$/i, /^HandlerDescription$/i, + /^CompressorID$/i, /^MatrixStructure$/i, /^XResolution$/i, /^YResolution$/i, +]; + +const ALLOWED_INJECTED_TAGS = new Set(['Title', 'Artist', 'Copyright', 'Keywords', 'Genre', 'Description', 'Comment', 'Album', 'Year', 'Lyrics-eng']); + +function isBenign(tagName) { + return BENIGN_TAG_PATTERNS.some((p) => p.test(String(tagName || ''))); +} + +function isAllowedInjected(tagName) { + return ALLOWED_INJECTED_TAGS.has(String(tagName || '')); +} + +module.exports = { MARKER_RULES, ALLOWED_INJECTED_TAGS, isBenign, isAllowedInjected }; diff --git a/server/processor.js b/server/processor.js new file mode 100644 index 0000000..41a481a --- /dev/null +++ b/server/processor.js @@ -0,0 +1,61 @@ +"use strict"; +const { exiftool } = require('exiftool-vendored'); +const { MARKER_RULES, isBenign, isAllowedInjected } = require('./metadataRules'); + +function unsupportedCleanseError(message, detail) { + const err = new Error(message); + err.statusCode = 422; + err.publicDetail = detail; + return err; +} + +function detectMarkers(tags = {}) { const hits = []; for (const [tag, raw] of Object.entries(tags)) { const value = raw == null ? '' : String(raw); for (const rule of MARKER_RULES) { const source = rule.valueOnly ? [value] : [tag, value]; if (rule.patterns.some((p) => source.some((s) => p.test(s)))) hits.push({ ruleId: rule.id, category: rule.category, severity: rule.severity, matchedTag: tag, matchedValue: value.substring(0, 120) }); } } return hits; } + +function verifyFinalState(tags = {}) { + const filtered = {}; + const unexpectedDescriptive = []; + for (const [tag, value] of Object.entries(tags)) { + if (isBenign(tag) || isAllowedInjected(tag)) continue; + filtered[tag] = value; + if (!tag.startsWith('Unknown')) unexpectedDescriptive.push(tag); + } + const detected = detectMarkers(filtered); + const suspiciousResidual = detected.map((h) => ({ tag: h.matchedTag, markerCategory: h.category, severity: h.severity })); + return { passed: suspiciousResidual.length === 0, suspiciousResidual, unexpectedDescriptive: [...new Set(unexpectedDescriptive)] }; +} + +function buildMetaToWrite(platform, metadata = {}) { + const { title, description, tags, artist, genre, lyrics } = metadata; + const safeArtist = (artist || 'Creator').substring(0, 255); + const safeTitle = (title || 'Untitled').substring(0, 255); + const safeDescription = (description || '').substring(0, 1000); + const safeGenre = (genre || '').substring(0, 100); + const year = new Date().getFullYear(); + const tagsArray = (Array.isArray(tags) ? tags : String(tags || '').split(',')).map((t) => String(t).trim()).filter(Boolean); + const metaToWrite = { Title: safeTitle, Artist: safeArtist, Copyright: `© ${year} ${safeArtist}`, Keywords: tagsArray, Genre: safeGenre }; + switch (platform) { case 'YouTube': metaToWrite.Description = safeDescription; metaToWrite.Comment = safeDescription; break; case 'Spotify': case 'Apple Music': metaToWrite.Description = safeDescription; metaToWrite.Album = safeTitle; metaToWrite.Year = year; if (lyrics) metaToWrite['Lyrics-eng'] = String(lyrics).substring(0, 5000); break; case 'TikTok': metaToWrite.Comment = `${safeTitle} ${tagsArray.map((t) => `#${t.replace(/\s/g, '')}`).join(' ')}`.substring(0, 300); break; default: metaToWrite.Description = safeDescription; metaToWrite.Comment = safeDescription; } + return metaToWrite; +} + +async function processMediaFile({ outputPath, platform = 'General', metadata = {} }) { + const beforeTags = await exiftool.read(outputPath); + const beforeKeys = Object.keys(beforeTags); + const beforeMarkers = detectMarkers(beforeTags); + try { await exiftool.write(outputPath, {}, ['-all=', '-XMP:all=', '-IPTC:all=', '-overwrite_original']); } catch { throw unsupportedCleanseError('Server cleanse unsupported for this format', 'This file format cannot be safely metadata-wiped on the server. Use Quick Cleanse (Browser) for MP3 or try MP4/M4A/WAV/FLAC for Full Server Cleanse.'); } + const wipeTags = await exiftool.read(outputPath); + const wipeMarkers = detectMarkers(wipeTags); + const wipeVerificationPassed = wipeMarkers.length === 0; + const metaToWrite = buildMetaToWrite(platform, metadata); + await exiftool.write(outputPath, metaToWrite, ['-overwrite_original']); + const finalTags = await exiftool.read(outputPath); + const finalMarkers = detectMarkers(finalTags); + const verification = verifyFinalState(finalTags); + const removedTags = beforeKeys.filter((k) => !(k in finalTags)); + const removedCount = beforeMarkers.length; + const status = (!wipeVerificationPassed || finalMarkers.length > 0) ? 'review_required' : (verification.unexpectedDescriptive.length > 0 ? 'clean_with_notes' : 'clean'); + const seo = ''; + const summary = status === 'review_required' ? `Residual provenance markers detected. Manual review required.${seo}` : status === 'clean_with_notes' ? `${removedCount} marker(s) removed. Some non-standard tags remain (not provenance).${seo}` : `${removedCount} forensic marker(s) removed. Verification passed.${seo}`; + return { report: { removedCount, removedTags, timestamp: new Date().toISOString(), status, summary, wipeVerificationPassed, finalVerificationPassed: verification.passed, detectedMarkersBefore: beforeMarkers, detectedMarkersFinal: finalMarkers, suspiciousResidual: verification.suspiciousResidual, unexpectedDescriptive: verification.unexpectedDescriptive, allowedInjectedTags: Object.keys(metaToWrite).filter(isAllowedInjected), rewrittenTags: Object.keys(metaToWrite) } }; +} + +module.exports = { processMediaFile, detectMarkers, verifyFinalState, buildMetaToWrite, unsupportedCleanseError };