diff --git a/apps/backend/services/lml/lml.client.ts b/apps/backend/services/lml/lml.client.ts index 9e101f67..6b69ebf4 100644 --- a/apps/backend/services/lml/lml.client.ts +++ b/apps/backend/services/lml/lml.client.ts @@ -52,7 +52,18 @@ class LmlClientError extends Error { export { LmlClientError }; -const TIMEOUT_MS = 5000; +// BS#873: matches `jobs/flowsheet-metadata-backfill/lml-fetch.ts` so the +// runtime path tolerates the same cold-cache LML cascade the backfill +// already accepts. Safe because every BS caller of /lookup runs +// fire-and-forget after the HTTP response is sent — a 30 s budget holds +// a Node promise + LML socket, not a user-visible request. Earlier 5 s +// budget was set when LML comfortably fit inside it; cold-cache +// compilations now land at 8-18 s (measured 2026-05-19, pre-A2/A3/A8 +// promotion). The catch arm in `services/metadata/enrichment.service.ts` +// stamps the row with synthesized search URLs (sans +// `metadata_attempt_at`) so even a 30 s timeout doesn't leave a fully +// blank row for the listener. +const TIMEOUT_MS = 30000; /** * Rate awareness of LML's Discogs ceilings (BS#906 / G4). diff --git a/apps/backend/services/metadata/enrichment.service.ts b/apps/backend/services/metadata/enrichment.service.ts index baa8592e..a5a8ab36 100644 --- a/apps/backend/services/metadata/enrichment.service.ts +++ b/apps/backend/services/metadata/enrichment.service.ts @@ -21,6 +21,9 @@ import * as Sentry from '@sentry/node'; import { sql } from 'drizzle-orm'; import { db, flowsheet } from '@wxyc/database'; import { fetchMetadata } from './metadata.service.js'; +import { SearchUrlProvider } from './providers/search-urls.provider.js'; + +const searchUrls = new SearchUrlProvider(); export interface EnrichmentInput { flowsheetId: number; @@ -131,7 +134,7 @@ export function fireAndForgetMetadataForRow(input: EnrichmentInput): void { // landing wins. .where(sql`"id" = ${input.flowsheetId} AND "metadata_attempt_at" IS NULL`); }) - .catch((err) => { + .catch(async (err) => { console.error('[Flowsheet] Metadata fetch failed:', err); Sentry.captureException(err, { tags: { subsystem: 'metadata' }, @@ -141,6 +144,35 @@ export function fireAndForgetMetadataForRow(input: EnrichmentInput): void { albumTitle: input.albumTitle, }, }); + // BS#873: best-effort fallback — write the three synthesized search + // URLs so the iOS playlist isn't fully blank while the LML cascade + // recovers. Crucially, do NOT stamp metadata_attempt_at: the row + // must stay eligible for the recurring drift-repair sweep so the + // real artwork/Discogs/Spotify/Apple match can land on a future + // attempt. The IS NULL gate matches the success path's idempotency + // contract (`jobs/flowsheet-metadata-backfill/enrich.ts:173`) so a + // race between the backfill and this catch arm leaves whichever + // landed first intact. + const urls = searchUrls.getAllSearchUrls( + input.artistName, + input.albumTitle ?? undefined, + input.trackTitle ?? undefined + ); + try { + await db + .update(flowsheet) + .set({ + youtube_music_url: urls.youtubeMusicUrl, + bandcamp_url: urls.bandcampUrl, + soundcloud_url: urls.soundcloudUrl, + }) + .where(sql`"id" = ${input.flowsheetId} AND "metadata_attempt_at" IS NULL`); + } catch (writeErr) { + // Observability path must never escalate a database failure on + // the fallback into an unhandled rejection — the original LML + // error has already reached Sentry; this is best-effort only. + console.error('[Flowsheet] Fallback URL write failed:', writeErr); + } }); inFlightEnrichments.add(promise); diff --git a/tests/integration/metadata-lml.spec.js b/tests/integration/metadata-lml.spec.js index da8569b9..6506620f 100644 --- a/tests/integration/metadata-lml.spec.js +++ b/tests/integration/metadata-lml.spec.js @@ -177,7 +177,7 @@ describe('Metadata via LML (Mock API)', () => { }); describe('LML failure handling', () => { - test('LML 500 error: entry created, no metadata columns populated, no attempt stamp', async () => { + test('LML 500 error: entry created, synthesized search URLs populated, attempt stamp left NULL (BS#873)', async () => { if (!mockApiAvailable) return; // Wait for any in-flight fire-and-forget to settle, then reset + simulate @@ -199,22 +199,33 @@ describe('Metadata via LML (Mock API)', () => { // doesn't depend on metadata enrichment. expect(addRes.body.id).toBeDefined(); - // Per #639, fetchMetadata re-throws on LML failure. The .catch branch - // in enrichment.service.ts logs to Sentry and deliberately does not - // touch the row, so: - // - metadata_attempt_at stays NULL (the row stays retryable by the - // historical drain in #638 and the recurring drift-repair sweep); - // - none of the metadata URL columns are populated, including the - // synthesized search URLs (they used to be filled here, which - // conflated tried-and-no-match with tried-and-LML-failed). + // BS#873: fetchMetadata re-throws on LML failure (per #639). The + // .catch branch in enrichment.service.ts now writes the three free + // synthesized YouTube/Bandcamp/SoundCloud search URLs so the listener + // isn't left with a fully blank row while the LML cascade recovers. + // Crucially, metadata_attempt_at stays NULL — the row remains + // eligible for the recurring drift-repair sweep so the real + // artwork/Discogs/Spotify/Apple match can land on a future attempt. // Give the fire-and-forget enough time to settle into the .catch. await new Promise((r) => setTimeout(r, 500)); const res = await request.get('/flowsheet').query({ limit: 20 }).send(); const entry = res.body.entries?.find((e) => e.id === addRes.body.id); expect(entry).toBeDefined(); - expect(entry.youtube_music_url).toBeNull(); - expect(entry.bandcamp_url).toBeNull(); - expect(entry.soundcloud_url).toBeNull(); + // Synthesized search URLs ARE populated on the catch path. + expect(entry.youtube_music_url).toContain('music.youtube.com'); + expect(entry.bandcamp_url).toContain('bandcamp.com'); + expect(entry.soundcloud_url).toContain('soundcloud.com'); + // LML-dependent URLs remain NULL — they can't be synthesized client-side. + expect(entry.artwork_url).toBeNull(); + expect(entry.discogs_url).toBeNull(); + expect(entry.spotify_url).toBeNull(); + expect(entry.apple_music_url).toBeNull(); + expect(entry.artist_bio).toBeNull(); + expect(entry.artist_wikipedia_url).toBeNull(); + // The retryability invariant (`metadata_attempt_at IS NULL`) is asserted + // at the SQL chunk level by `tests/unit/services/metadata.enrichment.test.ts` + // — `flowsheet.controller.ts:40` deliberately omits the column from + // `IFSEntry` so we can't see it from the HTTP response. }); }); }); diff --git a/tests/unit/services/metadata.enrichment.test.ts b/tests/unit/services/metadata.enrichment.test.ts index cd2f2d34..d16297d1 100644 --- a/tests/unit/services/metadata.enrichment.test.ts +++ b/tests/unit/services/metadata.enrichment.test.ts @@ -178,19 +178,60 @@ describe('fireAndForgetMetadataForRow', () => { expect(renderSql(setArgs.metadata_attempt_at)).toMatch(/NOW\(\)/i); }); - it('does NOT stamp metadata_attempt_at when fetchMetadata throws', async () => { + it('on LML failure: writes synthesized search URLs without stamping metadata_attempt_at (BS#873)', async () => { + // When the LML lookup rejects (timeout, 502, etc.), the row should not be + // left fully empty. Synthesize the three free YouTube/Bandcamp/SoundCloud + // search URLs and write them. Crucially, do NOT stamp metadata_attempt_at + // — the row stays eligible for the recurring drift-repair sweep so the + // real artwork/Discogs match can still land on a future attempt. mockFetchMetadata.mockRejectedValue(new Error('LML responded with 502')); fireAndForgetMetadataForRow({ flowsheetId: 44, artistName: 'King Crimson', albumTitle: 'Discipline', + trackTitle: 'Elephant Talk', }); await new Promise((resolve) => setImmediate(resolve)); - expect(mockDb.update).not.toHaveBeenCalled(); - expect(mockDb._chain.set).not.toHaveBeenCalled(); + expect(mockDb.update).toHaveBeenCalled(); + const setArgs = mockDb._chain.set.mock.calls[0]?.[0] as Record; + expect(setArgs).toBeDefined(); + expect(setArgs).not.toHaveProperty('metadata_attempt_at'); + expect(setArgs.youtube_music_url).toMatch(/^https:\/\/music\.youtube\.com\/search\?q=/); + expect(setArgs.bandcamp_url).toMatch(/^https:\/\/bandcamp\.com\/search\?q=/); + expect(setArgs.soundcloud_url).toMatch(/^https:\/\/soundcloud\.com\/search\?q=/); + // Discogs/Spotify/Apple/artist columns can't be synthesized without + // LML and must NOT be touched by the fallback path (a follow-up + // success on the backfill sweep needs to fill them). + expect(setArgs).not.toHaveProperty('artwork_url'); + expect(setArgs).not.toHaveProperty('discogs_url'); + expect(setArgs).not.toHaveProperty('spotify_url'); + expect(setArgs).not.toHaveProperty('apple_music_url'); + expect(setArgs).not.toHaveProperty('artist_bio'); + expect(setArgs).not.toHaveProperty('artist_wikipedia_url'); + }); + + it('on LML failure: UPDATE narrows on metadata_attempt_at IS NULL (idempotent with backfill, BS#873)', async () => { + // Mirror the success-path WHERE clause so a sweep UPDATE that already + // landed (real artwork + stamped attempt_at) cannot be clobbered by the + // catch-arm fallback. Row-lock granularity in PG means the second + // writer's UPDATE resolves to 0 rows, preserving the prior value. + mockFetchMetadata.mockRejectedValue(new Error('LML timed out')); + + fireAndForgetMetadataForRow({ + flowsheetId: 44, + artistName: 'King Crimson', + albumTitle: 'Discipline', + }); + + await new Promise((resolve) => setImmediate(resolve)); + + const whereCalls = mockDb._chain.where.mock.calls; + expect(whereCalls.length).toBeGreaterThan(0); + const lastWhereArg = whereCalls[whereCalls.length - 1][0]; + expect(renderSql(lastWhereArg)).toMatch(/metadata_attempt_at.*IS\s+NULL/i); }); it('skips the DB update when fetchMetadata returns null', async () => {