Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion apps/backend/services/lml/lml.client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,18 @@ class LmlClientError extends Error {

export { LmlClientError };

const TIMEOUT_MS = 5000;
// BS#873: matches `jobs/flowsheet-metadata-backfill/lml-fetch.ts` so the
// runtime path tolerates the same cold-cache LML cascade the backfill
// already accepts. Safe because every BS caller of /lookup runs
// fire-and-forget after the HTTP response is sent — a 30 s budget holds
// a Node promise + LML socket, not a user-visible request. Earlier 5 s
// budget was set when LML comfortably fit inside it; cold-cache
// compilations now land at 8-18 s (measured 2026-05-19, pre-A2/A3/A8
// promotion). The catch arm in `services/metadata/enrichment.service.ts`
// stamps the row with synthesized search URLs (sans
// `metadata_attempt_at`) so even a 30 s timeout doesn't leave a fully
// blank row for the listener.
const TIMEOUT_MS = 30000;

/**
* Rate awareness of LML's Discogs ceilings (BS#906 / G4).
Expand Down
34 changes: 33 additions & 1 deletion apps/backend/services/metadata/enrichment.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ import * as Sentry from '@sentry/node';
import { sql } from 'drizzle-orm';
import { db, flowsheet } from '@wxyc/database';
import { fetchMetadata } from './metadata.service.js';
import { SearchUrlProvider } from './providers/search-urls.provider.js';

const searchUrls = new SearchUrlProvider();

export interface EnrichmentInput {
flowsheetId: number;
Expand Down Expand Up @@ -131,7 +134,7 @@ export function fireAndForgetMetadataForRow(input: EnrichmentInput): void {
// landing wins.
.where(sql`"id" = ${input.flowsheetId} AND "metadata_attempt_at" IS NULL`);
})
.catch((err) => {
.catch(async (err) => {
console.error('[Flowsheet] Metadata fetch failed:', err);
Sentry.captureException(err, {
tags: { subsystem: 'metadata' },
Expand All @@ -141,6 +144,35 @@ export function fireAndForgetMetadataForRow(input: EnrichmentInput): void {
albumTitle: input.albumTitle,
},
});
// BS#873: best-effort fallback — write the three synthesized search
// URLs so the iOS playlist isn't fully blank while the LML cascade
// recovers. Crucially, do NOT stamp metadata_attempt_at: the row
// must stay eligible for the recurring drift-repair sweep so the
// real artwork/Discogs/Spotify/Apple match can land on a future
// attempt. The IS NULL gate matches the success path's idempotency
// contract (`jobs/flowsheet-metadata-backfill/enrich.ts:173`) so a
// race between the backfill and this catch arm leaves whichever
// landed first intact.
const urls = searchUrls.getAllSearchUrls(
input.artistName,
input.albumTitle ?? undefined,
input.trackTitle ?? undefined
);
try {
await db
.update(flowsheet)
.set({
youtube_music_url: urls.youtubeMusicUrl,
bandcamp_url: urls.bandcampUrl,
soundcloud_url: urls.soundcloudUrl,
})
.where(sql`"id" = ${input.flowsheetId} AND "metadata_attempt_at" IS NULL`);
} catch (writeErr) {
// Observability path must never escalate a database failure on
// the fallback into an unhandled rejection — the original LML
// error has already reached Sentry; this is best-effort only.
console.error('[Flowsheet] Fallback URL write failed:', writeErr);
}
});

inFlightEnrichments.add(promise);
Expand Down
35 changes: 23 additions & 12 deletions tests/integration/metadata-lml.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ describe('Metadata via LML (Mock API)', () => {
});

describe('LML failure handling', () => {
test('LML 500 error: entry created, no metadata columns populated, no attempt stamp', async () => {
test('LML 500 error: entry created, synthesized search URLs populated, attempt stamp left NULL (BS#873)', async () => {
if (!mockApiAvailable) return;

// Wait for any in-flight fire-and-forget to settle, then reset + simulate
Expand All @@ -199,22 +199,33 @@ describe('Metadata via LML (Mock API)', () => {
// doesn't depend on metadata enrichment.
expect(addRes.body.id).toBeDefined();

// Per #639, fetchMetadata re-throws on LML failure. The .catch branch
// in enrichment.service.ts logs to Sentry and deliberately does not
// touch the row, so:
// - metadata_attempt_at stays NULL (the row stays retryable by the
// historical drain in #638 and the recurring drift-repair sweep);
// - none of the metadata URL columns are populated, including the
// synthesized search URLs (they used to be filled here, which
// conflated tried-and-no-match with tried-and-LML-failed).
// BS#873: fetchMetadata re-throws on LML failure (per #639). The
// .catch branch in enrichment.service.ts now writes the three free
// synthesized YouTube/Bandcamp/SoundCloud search URLs so the listener
// isn't left with a fully blank row while the LML cascade recovers.
// Crucially, metadata_attempt_at stays NULL — the row remains
// eligible for the recurring drift-repair sweep so the real
// artwork/Discogs/Spotify/Apple match can land on a future attempt.
// Give the fire-and-forget enough time to settle into the .catch.
await new Promise((r) => setTimeout(r, 500));
const res = await request.get('/flowsheet').query({ limit: 20 }).send();
const entry = res.body.entries?.find((e) => e.id === addRes.body.id);
expect(entry).toBeDefined();
expect(entry.youtube_music_url).toBeNull();
expect(entry.bandcamp_url).toBeNull();
expect(entry.soundcloud_url).toBeNull();
// Synthesized search URLs ARE populated on the catch path.
expect(entry.youtube_music_url).toContain('music.youtube.com');
expect(entry.bandcamp_url).toContain('bandcamp.com');
expect(entry.soundcloud_url).toContain('soundcloud.com');
// LML-dependent URLs remain NULL — they can't be synthesized client-side.
expect(entry.artwork_url).toBeNull();
expect(entry.discogs_url).toBeNull();
expect(entry.spotify_url).toBeNull();
expect(entry.apple_music_url).toBeNull();
expect(entry.artist_bio).toBeNull();
expect(entry.artist_wikipedia_url).toBeNull();
// The retryability invariant (`metadata_attempt_at IS NULL`) is asserted
// at the SQL chunk level by `tests/unit/services/metadata.enrichment.test.ts`
// — `flowsheet.controller.ts:40` deliberately omits the column from
// `IFSEntry` so we can't see it from the HTTP response.
});
});
});
Expand Down
47 changes: 44 additions & 3 deletions tests/unit/services/metadata.enrichment.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -178,19 +178,60 @@ describe('fireAndForgetMetadataForRow', () => {
expect(renderSql(setArgs.metadata_attempt_at)).toMatch(/NOW\(\)/i);
});

it('does NOT stamp metadata_attempt_at when fetchMetadata throws', async () => {
it('on LML failure: writes synthesized search URLs without stamping metadata_attempt_at (BS#873)', async () => {
// When the LML lookup rejects (timeout, 502, etc.), the row should not be
// left fully empty. Synthesize the three free YouTube/Bandcamp/SoundCloud
// search URLs and write them. Crucially, do NOT stamp metadata_attempt_at
// — the row stays eligible for the recurring drift-repair sweep so the
// real artwork/Discogs match can still land on a future attempt.
mockFetchMetadata.mockRejectedValue(new Error('LML responded with 502'));

fireAndForgetMetadataForRow({
flowsheetId: 44,
artistName: 'King Crimson',
albumTitle: 'Discipline',
trackTitle: 'Elephant Talk',
});

await new Promise((resolve) => setImmediate(resolve));

expect(mockDb.update).not.toHaveBeenCalled();
expect(mockDb._chain.set).not.toHaveBeenCalled();
expect(mockDb.update).toHaveBeenCalled();
const setArgs = mockDb._chain.set.mock.calls[0]?.[0] as Record<string, unknown>;
expect(setArgs).toBeDefined();
expect(setArgs).not.toHaveProperty('metadata_attempt_at');
expect(setArgs.youtube_music_url).toMatch(/^https:\/\/music\.youtube\.com\/search\?q=/);
expect(setArgs.bandcamp_url).toMatch(/^https:\/\/bandcamp\.com\/search\?q=/);
expect(setArgs.soundcloud_url).toMatch(/^https:\/\/soundcloud\.com\/search\?q=/);
// Discogs/Spotify/Apple/artist columns can't be synthesized without
// LML and must NOT be touched by the fallback path (a follow-up
// success on the backfill sweep needs to fill them).
expect(setArgs).not.toHaveProperty('artwork_url');
expect(setArgs).not.toHaveProperty('discogs_url');
expect(setArgs).not.toHaveProperty('spotify_url');
expect(setArgs).not.toHaveProperty('apple_music_url');
expect(setArgs).not.toHaveProperty('artist_bio');
expect(setArgs).not.toHaveProperty('artist_wikipedia_url');
});

it('on LML failure: UPDATE narrows on metadata_attempt_at IS NULL (idempotent with backfill, BS#873)', async () => {
// Mirror the success-path WHERE clause so a sweep UPDATE that already
// landed (real artwork + stamped attempt_at) cannot be clobbered by the
// catch-arm fallback. Row-lock granularity in PG means the second
// writer's UPDATE resolves to 0 rows, preserving the prior value.
mockFetchMetadata.mockRejectedValue(new Error('LML timed out'));

fireAndForgetMetadataForRow({
flowsheetId: 44,
artistName: 'King Crimson',
albumTitle: 'Discipline',
});

await new Promise((resolve) => setImmediate(resolve));

const whereCalls = mockDb._chain.where.mock.calls;
expect(whereCalls.length).toBeGreaterThan(0);
const lastWhereArg = whereCalls[whereCalls.length - 1][0];
expect(renderSql(lastWhereArg)).toMatch(/metadata_attempt_at.*IS\s+NULL/i);
});

it('skips the DB update when fetchMetadata returns null', async () => {
Expand Down
Loading