From beac7fa616aaf56862f010120b7d648eb9f8b146 Mon Sep 17 00:00:00 2001 From: Triple7 Date: Mon, 4 May 2026 04:01:11 -0700 Subject: [PATCH 1/2] Harden metadata parsing and marker detection --- src/utils/metadata.d.ts | 1 + src/utils/metadata.js | 54 +++++++++++++++++++++++++++++++++-------- 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/src/utils/metadata.d.ts b/src/utils/metadata.d.ts index 2702ff4..1e146da 100644 --- a/src/utils/metadata.d.ts +++ b/src/utils/metadata.d.ts @@ -6,6 +6,7 @@ export interface FileMetadataResult { detectedMarkers: string[]; provenanceRisk: 'High' | 'Low'; raw: unknown; + parseError?: string | null; } export function readFileMetadata(file: File): Promise; diff --git a/src/utils/metadata.js b/src/utils/metadata.js index a921079..95e1707 100644 --- a/src/utils/metadata.js +++ b/src/utils/metadata.js @@ -2,6 +2,21 @@ import { parseBlob } from 'music-metadata-browser'; import ID3Writer from 'browser-id3-writer'; const AI_MARKERS = ['ai','generated','suno','udio','boomy','aiva','soundraw','mubert','stable audio','provenance','c2pa','content credentials','watermark','synthetic','elevenlabs']; +const MARKER_REGEX_CACHE = new Map(); + +function escapeRegex(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +function markerToRegex(marker) { + if (MARKER_REGEX_CACHE.has(marker)) return MARKER_REGEX_CACHE.get(marker); + const escaped = escapeRegex(marker); + const regex = marker.length <= 2 + ? new RegExp(`\\b${escaped}\\b`, 'i') + : new RegExp(`(?:^|\\W)${escaped}(?:$|\\W)`, 'i'); + MARKER_REGEX_CACHE.set(marker, regex); + return regex; +} function collectStrings(metadata) { const common = metadata?.common || {}; @@ -20,17 +35,26 @@ function collectStrings(metadata) { } export async function readFileMetadata(file) { - const parsed = await parseBlob(file); + let parsed = null; + let parseError = null; + + try { + parsed = await parseBlob(file); + } catch (error) { + parseError = error; + } + const searchable = collectStrings(parsed); - const detectedMarkers = AI_MARKERS.filter(marker => searchable.includes(marker)); + const detectedMarkers = AI_MARKERS.filter((marker) => markerToRegex(marker).test(searchable)); return { - format: parsed.format?.container || file.type || 'unknown', - title: parsed.common?.title || file.name.replace(/\.[^.]+$/, ''), - artist: parsed.common?.artist || '', - genre: parsed.common?.genre?.[0] || '', + format: parsed?.format?.container || file.type || 'unknown', + title: parsed?.common?.title || file.name.replace(/\.[^.]+$/, ''), + artist: parsed?.common?.artist || '', + genre: parsed?.common?.genre?.[0] || '', detectedMarkers, provenanceRisk: detectedMarkers.length > 0 ? 'High' : 'Low', raw: parsed, + parseError: parseError ? String(parseError?.message || parseError) : null, }; } @@ -38,10 +62,20 @@ export async function writeMP3Metadata(file, metadata) { const buffer = await file.arrayBuffer(); const writer = new ID3Writer(buffer); writer.removeTag(); - if (metadata.title) writer.setFrame('TIT2', metadata.title); - if (metadata.artist) writer.setFrame('TPE1', [metadata.artist]); - if (metadata.genre) writer.setFrame('TCON', [metadata.genre]); - writer.setFrame('TENC', 'SpectraCleanseAI Browser Quick Cleanse'); + + const safeText = (value) => { + if (typeof value !== 'string') return ''; + return value.replace(/\u0000/g, '').trim().slice(0, 500); + }; + + const title = safeText(metadata?.title); + const artist = safeText(metadata?.artist); + const genre = safeText(metadata?.genre); + + if (title) writer.setFrame('TIT2', title); + if (artist) writer.setFrame('TPE1', [artist]); + if (genre) writer.setFrame('TCON', [genre]); + if (title || artist || genre) writer.setFrame('TENC', 'SpectraCleanseAI Browser Quick Cleanse'); writer.addTag(); return new Blob([writer.getBlob()], { type: 'audio/mpeg' }); } From dc05817267abb52d3e3bf85c7c28863a0b0a579c Mon Sep 17 00:00:00 2001 From: Triple7 Date: Mon, 4 May 2026 04:10:06 -0700 Subject: [PATCH 2/2] Fix metadata parser import resolution for build stability --- src/utils/metadata.js | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/utils/metadata.js b/src/utils/metadata.js index 95e1707..36cb772 100644 --- a/src/utils/metadata.js +++ b/src/utils/metadata.js @@ -1,9 +1,22 @@ -import { parseBlob } from 'music-metadata-browser'; import ID3Writer from 'browser-id3-writer'; const AI_MARKERS = ['ai','generated','suno','udio','boomy','aiva','soundraw','mubert','stable audio','provenance','c2pa','content credentials','watermark','synthetic','elevenlabs']; const MARKER_REGEX_CACHE = new Map(); +let parseBlobLoader = null; + +async function getParseBlob() { + if (parseBlobLoader) return parseBlobLoader; + parseBlobLoader = import('music-metadata-browser').then((mod) => { + const fn = mod?.parseBlob || mod?.default?.parseBlob; + if (typeof fn !== 'function') { + throw new Error('music-metadata-browser parseBlob export not found'); + } + return fn; + }); + return parseBlobLoader; +} + function escapeRegex(value) { return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); } @@ -18,7 +31,7 @@ function markerToRegex(marker) { return regex; } -function collectStrings(metadata) { +function collectStrings(metadata, fileName = '') { const common = metadata?.common || {}; const native = metadata?.native || {}; const values = [common.title,common.artist,common.album,...(common.genre || []),...(common.comment || []),common.encodedby,common.publisher] @@ -31,6 +44,7 @@ function collectStrings(metadata) { if (frame?.value && typeof frame.value === 'object') values.push(JSON.stringify(frame.value)); }); }); + if (fileName) values.push(String(fileName)); return values.join(' | ').toLowerCase(); } @@ -39,12 +53,13 @@ export async function readFileMetadata(file) { let parseError = null; try { + const parseBlob = await getParseBlob(); parsed = await parseBlob(file); } catch (error) { parseError = error; } - const searchable = collectStrings(parsed); + const searchable = collectStrings(parsed, file?.name || ''); const detectedMarkers = AI_MARKERS.filter((marker) => markerToRegex(marker).test(searchable)); return { format: parsed?.format?.container || file.type || 'unknown',