From c69d920f3298843ca73314d7d24415ac11be7286 Mon Sep 17 00:00:00 2001 From: Jake Bromberg Date: Tue, 19 May 2026 12:10:34 -0700 Subject: [PATCH 1/3] Pin env-var surface parity between docker-compose ci backend and GHA workflow (BS#958) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per BS#164, the GHA integration suite runs the backend as a host node process with env from `.github/workflows/test.yml`; `npm run ci:testmock` runs it as a docker container with env from `dev_env/docker-compose.yml`. The two env surfaces drift independently — BS#955 was the most recent symptom (G4's `LML_CLIENT_*` overrides made it into the unit harness but neither CI surface) and the manual fix in BS#957 pinned the limiter env vars on both sides with a per-PR source-grep test. This commit generalizes that pattern. `tests/unit/scripts/ci-env-surface-parity.test.ts` extracts the env-key set from both surfaces and asserts that any divergence falls within an explicit allowlist. Each allowlist entry carries a one-line `Why:` justifying the divergence — the deliberate-thought receipt. Adding a new env var to one surface now fails CI unless either (a) the var is mirrored to the other or (b) the allowlist is amended with a justification. The allowlist captures today's intentional divergence (18 compose-only keys, 9 workflow-only keys). The lml-limiter-test-env.test.ts value-pin remains useful and is not retired — this guard enforces key presence, not value equality. Verified the test catches a regression: temporarily added a `FAKE_GUARD_TEST_KEY` to the compose backend env, ran the test, observed a clean diff failure showing the unauthorized key. --- .../scripts/ci-env-surface-parity.test.ts | 200 ++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 tests/unit/scripts/ci-env-surface-parity.test.ts diff --git a/tests/unit/scripts/ci-env-surface-parity.test.ts b/tests/unit/scripts/ci-env-surface-parity.test.ts new file mode 100644 index 00000000..0b232562 --- /dev/null +++ b/tests/unit/scripts/ci-env-surface-parity.test.ts @@ -0,0 +1,200 @@ +/** + * Pin the env-var surface parity between the two CI integration-test paths + * (BS#958): + * + * 1. `dev_env/docker-compose.yml` — the `backend` service env, used by + * `npm run ci:testmock` (the local docker-based repro). + * 2. `.github/workflows/test.yml` — the workflow-level `env:` block plus + * the `Start services` step `env:` block, used by GHA CI which runs + * the backend as a host node process and does NOT load env from + * docker-compose. + * + * Per BS#164, the host-process model on CI is intentional. But the two env + * surfaces drift independently and a new var landing in one but not the + * other can silently break the integration suite — as happened in BS#955 + * with G4's `LML_CLIENT_MAX_CONCURRENT` + `LML_CLIENT_RATE_PER_MIN`. This + * test treats today's divergence as the explicit allowlist; any new key + * appearing in only one surface fails CI with a clear message pointing the + * dev at either mirroring the var or amending the allowlist (with a brief + * `Why:` line). + * + * Source-grep test (no docker, no PG). Same style as the adjacent + * `lml-limiter-test-env.test.ts` and `docker-compose-db-port.test.ts`. + * + * # Updating the allowlist + * + * When you add a new env var to one of the two surfaces, the test will + * fail. Two options: + * + * - Preferred: mirror the var into the other surface. The integration + * suite gets the same behavior on both paths and the parity holds. + * - Otherwise: add the key to the appropriate `EXPECTED_ONLY_*` array + * below with a one-line `// Why:` comment explaining the divergence + * (e.g. "test-harness-only, never read by backend"). + * + * The allowlist is intentionally explicit. Don't silently expand it — the + * one-line `Why:` is the deliberate-thought receipt. + */ + +import * as fs from 'fs'; +import * as path from 'path'; + +const repoRoot = path.resolve(__dirname, '../../..'); +const composePath = path.join(repoRoot, 'dev_env/docker-compose.yml'); +const workflowPath = path.join(repoRoot, '.github/workflows/test.yml'); + +/** + * Keys present in the compose `backend` env block but NOT in the workflow + * env surface (top-level + Start services). Each entry must be justified. + */ +const EXPECTED_ONLY_IN_COMPOSE = [ + // Why: BETTER_AUTH service URLs differ between the docker network (auth:8080) + // and host-process model (localhost:8083). The host-process path relies on + // backend defaults that match its localhost network; the compose path + // overrides for the docker hostname `auth`. + 'BETTER_AUTH_AUDIENCE', + 'BETTER_AUTH_ISSUER', + 'BETTER_AUTH_JWKS_URL', + + // Why: backend-side feature flags that activate the full track-search + // fallback cascade in `tests/integration/library.spec.js` (Track 1 = CTA, + // Track 2 = LML cross-ref). The workflow path doesn't set these; whether + // that's a coverage gap on the workflow path is tracked separately and + // out of scope for this guard. + 'CATALOG_TRACK_SEARCH_CTA_ENABLED', + 'CATALOG_TRACK_SEARCH_DISCOGS_ENABLED', + + // Why: AWS Cognito identifiers from the legacy auth flow. Compose sets + // them via `.env` substitution; backend defaults work in the workflow's + // host-process auth-bypass mode. + 'COGNITO_USERPOOL_ID', + 'DJ_APP_CLIENT_ID', + + // Why: DB host/port. Compose uses the docker network hostname `ci-db` on + // in-container port `5432`; workflow uses backend defaults (`localhost` + // + `5432`) that match the host-process network. DB_PORT in compose is + // hardcoded `5432` after BS#959. + 'DB_HOST', + 'DB_PORT', + + // Why: metadata cache sizes. Compose substitutes from `.env` with + // defaults that match prod; workflow relies on backend defaults. + 'METADATA_ALBUM_CACHE_MAX_SIZE', + 'METADATA_ARTIST_CACHE_MAX_SIZE', + 'METADATA_ROTATION_PRIORITY', + + // Why: rate-limit gating. Compose exposes `TEST_RATE_LIMITING` so the + // `ci:env:full` script can flip the cap on and exercise the rate-limit + // middleware; the workflow doesn't run that variant. + 'RATE_LIMIT_REGISTRATION_MAX', + 'RATE_LIMIT_REGISTRATION_WINDOW_MS', + 'RATE_LIMIT_REQUEST_MAX', + 'RATE_LIMIT_REQUEST_WINDOW_MS', + 'SIMULATE_SLACK_FAILURE', + 'TEST_RATE_LIMITING', +]; + +/** + * Keys present in the workflow env surface but NOT in the compose + * `backend` env block. Each entry must be justified. + */ +const EXPECTED_ONLY_IN_WORKFLOW = [ + // Why: integration-test login credential. Read by the test harness (e.g. + // `tests/integration/setup/login.js`), not by the backend process. + 'AUTH_PASSWORD', + + // Why: CI-only auth URL alias used by the test harness when constructing + // host-process URLs. Not read by the backend. + 'CI_BETTER_AUTH_URL', + + // Why: host port mappings. CI_DB_PORT and CI_PORT are read by + // docker-compose externally (port-mapping resolution) on the local path; + // on GHA the workflow exports them as a top-level convention so scripts + // that share docker-compose semantics work in both places. Not read by + // the backend process. + 'CI_DB_PORT', + 'CI_PORT', + + // Why: mock-api server addressing. Backend reads LIBRARY_METADATA_URL + // (mirrored). MOCK_API_PORT + MOCK_API_URL exist for the harness side + // (mock-server start command, test assertions about response bodies). + 'MOCK_API_PORT', + 'MOCK_API_URL', + + // Why: test-mode signal needed on the workflow's host-process model + // (gates rate-limit middleware bypass, etc.). The compose backend + // container doesn't set NODE_ENV but its test-mode behaviors are reached + // via parallel gates (`TEST_RATE_LIMITING=false`, `AUTH_BYPASS=true`). + // If a code path appears that requires NODE_ENV=test specifically and + // can't reach it via those gates, this entry should move to "mirror in + // both" — but that's a backend change, not a CI-env one. + 'NODE_ENV', + + // Why: workflow-level alias for CI_PORT used by certain non-test scripts + // that read PORT (e.g. drizzle-kit migrations); harness-side. + 'PORT', + + // Why: backend-host URL for the integration test harness. Read by tests + // when forming http requests; the backend itself doesn't bind based on + // it (it binds on PORT). + 'TEST_HOST', +]; + +describe('CI env-var surface parity (BS#958)', () => { + const composeKeys = extractComposeBackendEnvKeys(); + const workflowKeys = extractWorkflowEnvKeys(); + + const actualOnlyInCompose = [...composeKeys].filter((k) => !workflowKeys.has(k)).sort(); + const actualOnlyInWorkflow = [...workflowKeys].filter((k) => !composeKeys.has(k)).sort(); + + it('keys present only in dev_env/docker-compose.yml match the allowlist', () => { + // Sort the allowlist for stable comparison; the source order in the + // allowlist is grouped for readability. + expect(actualOnlyInCompose).toEqual([...EXPECTED_ONLY_IN_COMPOSE].sort()); + }); + + it('keys present only in .github/workflows/test.yml match the allowlist', () => { + expect(actualOnlyInWorkflow).toEqual([...EXPECTED_ONLY_IN_WORKFLOW].sort()); + }); +}); + +function extractComposeBackendEnvKeys(): Set { + const compose = fs.readFileSync(composePath, 'utf-8'); + const block = compose.match(/^\s{2}backend:[\s\S]*?(?=^\s{2}\w|(?![\s\S]))/m)?.[0]; + if (!block) { + throw new Error('docker-compose `backend` service block not found'); + } + const envIdx = block.indexOf('environment:'); + if (envIdx === -1) { + throw new Error('docker-compose `backend` service has no `environment:` block'); + } + const tail = block.slice(envIdx); + const matches = [...tail.matchAll(/^\s*-\s*([A-Z][A-Z0-9_]*)=/gm)]; + return new Set(matches.map((m) => m[1])); +} + +function extractWorkflowEnvKeys(): Set { + const workflow = fs.readFileSync(workflowPath, 'utf-8'); + + // Top-level workflow env block (lines 15-28 today). + const topMatch = workflow.match(/^env:\n((?:\s+[A-Z][A-Z0-9_]*:.*\n)+)/m); + if (!topMatch) { + throw new Error('workflow top-level `env:` block not found'); + } + const topKeys = [...topMatch[1].matchAll(/^\s+([A-Z][A-Z0-9_]*):/gm)].map((m) => m[1]); + + // `Start services` step env block. + const startIdx = workflow.indexOf('- name: Start services'); + if (startIdx === -1) { + throw new Error('workflow step `Start services` not found'); + } + const stepSlice = workflow.slice(startIdx); + const runIdx = stepSlice.indexOf('\n run:'); + if (runIdx === -1) { + throw new Error('workflow step `Start services` has no `run:` key'); + } + const stepEnv = stepSlice.slice(0, runIdx); + const stepKeys = [...stepEnv.matchAll(/^\s+([A-Z][A-Z0-9_]*):/gm)].map((m) => m[1]); + + return new Set([...topKeys, ...stepKeys]); +} From 70ea3befdea171222cb47f77423da356449b6e48 Mon Sep 17 00:00:00 2001 From: Jake Bromberg Date: Tue, 19 May 2026 14:59:04 -0700 Subject: [PATCH 2/3] Include Integration-Tests job-level env in workflow surface (review feedback) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The first cut missed the `Integration-Tests:` job-level `env:` block (lines 266-281), which sets DB_HOST, DB_PORT, BETTER_AUTH_AUDIENCE/ISSUER/JWKS_URL, AUTH_PORT, BETTER_AUTH_SECRET, RUN_TESTS on the host-process backend. As a result the prior allowlist marked five vars as "only in compose" with `Why:` comments asserting that the workflow relied on backend defaults — but the workflow does set them, just in the job scope. That misinformation undermined the deliberate-thought-receipt purpose of the `Why:` lines. Fixes: - Split `extractWorkflowEnvKeys()` into three named extractors (`extractTopLevelEnv`, `extractIntegrationTestsJobEnv`, `extractStartServicesStepEnv`), unioned at the call site. Anchored the top-level extractor to the pre-`jobs:` slice so it can't accidentally match a job-level `env:` if file order changes. - Fixed a slice-off-by-one in the job-env extractor: `slice(0, stepsIdx)` excluded the trailing newline of the last env line, silently dropping `BETTER_AUTH_SECRET`. Adjusted to `slice(0, stepsIdx + 1)` and documented the gotcha inline. - Removed BETTER_AUTH_AUDIENCE/ISSUER/JWKS_URL, DB_HOST, DB_PORT from EXPECTED_ONLY_IN_COMPOSE (they're in the workflow job env). - Added AUTH_PORT, BETTER_AUTH_SECRET, RUN_TESTS to EXPECTED_ONLY_IN_WORKFLOW (genuinely workflow-only, with accurate `Why:` lines). - Header docstring updated to describe the three-scope union and dropped the stale reference to the BS#959 sibling test (not in this branch). --- .../scripts/ci-env-surface-parity.test.ts | 126 ++++++++++++++---- 1 file changed, 97 insertions(+), 29 deletions(-) diff --git a/tests/unit/scripts/ci-env-surface-parity.test.ts b/tests/unit/scripts/ci-env-surface-parity.test.ts index 0b232562..291a3959 100644 --- a/tests/unit/scripts/ci-env-surface-parity.test.ts +++ b/tests/unit/scripts/ci-env-surface-parity.test.ts @@ -4,10 +4,12 @@ * * 1. `dev_env/docker-compose.yml` — the `backend` service env, used by * `npm run ci:testmock` (the local docker-based repro). - * 2. `.github/workflows/test.yml` — the workflow-level `env:` block plus - * the `Start services` step `env:` block, used by GHA CI which runs - * the backend as a host node process and does NOT load env from - * docker-compose. + * 2. `.github/workflows/test.yml` — used by GHA CI, which runs the + * backend as a host node process and does NOT load env from + * docker-compose. The workflow surface is the union of three YAML + * scopes the host-process backend inherits: the workflow-top-level + * `env:`, the `Integration-Tests` job-level `env:`, and the + * `Start services` step-level `env:`. * * Per BS#164, the host-process model on CI is intentional. But the two env * surfaces drift independently and a new var landing in one but not the @@ -19,7 +21,8 @@ * `Why:` line). * * Source-grep test (no docker, no PG). Same style as the adjacent - * `lml-limiter-test-env.test.ts` and `docker-compose-db-port.test.ts`. + * `lml-limiter-test-env.test.ts` and the other guards in + * `tests/unit/scripts/`. * * # Updating the allowlist * @@ -44,18 +47,11 @@ const composePath = path.join(repoRoot, 'dev_env/docker-compose.yml'); const workflowPath = path.join(repoRoot, '.github/workflows/test.yml'); /** - * Keys present in the compose `backend` env block but NOT in the workflow - * env surface (top-level + Start services). Each entry must be justified. + * Keys present in the compose `backend` env block but NOT anywhere in the + * workflow env surface (top-level + Integration-Tests job + Start services + * step). Each entry must be justified. */ const EXPECTED_ONLY_IN_COMPOSE = [ - // Why: BETTER_AUTH service URLs differ between the docker network (auth:8080) - // and host-process model (localhost:8083). The host-process path relies on - // backend defaults that match its localhost network; the compose path - // overrides for the docker hostname `auth`. - 'BETTER_AUTH_AUDIENCE', - 'BETTER_AUTH_ISSUER', - 'BETTER_AUTH_JWKS_URL', - // Why: backend-side feature flags that activate the full track-search // fallback cascade in `tests/integration/library.spec.js` (Track 1 = CTA, // Track 2 = LML cross-ref). The workflow path doesn't set these; whether @@ -70,13 +66,6 @@ const EXPECTED_ONLY_IN_COMPOSE = [ 'COGNITO_USERPOOL_ID', 'DJ_APP_CLIENT_ID', - // Why: DB host/port. Compose uses the docker network hostname `ci-db` on - // in-container port `5432`; workflow uses backend defaults (`localhost` - // + `5432`) that match the host-process network. DB_PORT in compose is - // hardcoded `5432` after BS#959. - 'DB_HOST', - 'DB_PORT', - // Why: metadata cache sizes. Compose substitutes from `.env` with // defaults that match prod; workflow relies on backend defaults. 'METADATA_ALBUM_CACHE_MAX_SIZE', @@ -103,6 +92,19 @@ const EXPECTED_ONLY_IN_WORKFLOW = [ // `tests/integration/setup/login.js`), not by the backend process. 'AUTH_PASSWORD', + // Why: auth-service host port (workflow's auth process binds on 8083). + // The backend doesn't consume it directly — it encodes the auth URL via + // BETTER_AUTH_URL (mirrored). Compose puts AUTH_PORT in the auth service + // env, not the backend env. + 'AUTH_PORT', + + // Why: better-auth JWT signing secret needed by the auth service host + // process. Compose's ci-profile auth container omits it and relies on + // better-auth's test-mode default; the workflow sets it explicitly. The + // backend doesn't sign JWTs (only the auth service does), so this is + // workflow-only without a real divergence in backend behavior. + 'BETTER_AUTH_SECRET', + // Why: CI-only auth URL alias used by the test harness when constructing // host-process URLs. Not read by the backend. 'CI_BETTER_AUTH_URL', @@ -134,6 +136,12 @@ const EXPECTED_ONLY_IN_WORKFLOW = [ // that read PORT (e.g. drizzle-kit migrations); harness-side. 'PORT', + // Why: workflow-only gating var sourced from the `detect-changes` job's + // `run-integration` output. Drives `if:` conditions on every step in the + // `Integration-Tests` job; compose doesn't gate steps this way (compose + // runs everything unconditionally; profile selection is the gate). + 'RUN_TESTS', + // Why: backend-host URL for the integration test harness. Read by tests // when forming http requests; the backend itself doesn't bind based on // it (it binds on PORT). @@ -176,14 +184,76 @@ function extractComposeBackendEnvKeys(): Set { function extractWorkflowEnvKeys(): Set { const workflow = fs.readFileSync(workflowPath, 'utf-8'); - // Top-level workflow env block (lines 15-28 today). - const topMatch = workflow.match(/^env:\n((?:\s+[A-Z][A-Z0-9_]*:.*\n)+)/m); + // The host-process backend in GHA CI inherits env from THREE YAML scopes + // (workflow → job → step). Miss any of them and the parity check produces + // false positives. Take the union of all three. + return new Set([ + ...extractTopLevelEnv(workflow), + ...extractIntegrationTestsJobEnv(workflow), + ...extractStartServicesStepEnv(workflow), + ]); +} + +/** + * Workflow-top-level `env:`, scoped to the region above `jobs:` so a + * job-level `env:` can't accidentally match first. Required keys live at + * column 2 (`^ KEY:`). + */ +function extractTopLevelEnv(workflow: string): string[] { + const jobsIdx = workflow.indexOf('\njobs:\n'); + if (jobsIdx === -1) { + throw new Error('workflow has no top-level `jobs:` key'); + } + const preJobs = workflow.slice(0, jobsIdx); + const topMatch = preJobs.match(/^env:\n((?:\s+[A-Z][A-Z0-9_]*:.*\n)+)/m); if (!topMatch) { throw new Error('workflow top-level `env:` block not found'); } - const topKeys = [...topMatch[1].matchAll(/^\s+([A-Z][A-Z0-9_]*):/gm)].map((m) => m[1]); + return [...topMatch[1].matchAll(/^\s+([A-Z][A-Z0-9_]*):/gm)].map((m) => m[1]); +} + +/** + * Job-level `env:` under `Integration-Tests:`. This block (lines 266-281 + * today) sets the host-process backend's DB_HOST / DB_PORT / BETTER_AUTH_* + * etc. — vars the docker-compose path sets in the backend service env + * block. Missing this scope causes false-positive "only in compose" + * entries. + * + * The 4-space `env:` anchor specifically discriminates job-level env from + * the `services.postgres.env:` block (8-space indent) immediately above it + * in the same job — that one populates the postgres SERVICE container's + * env, not the runner-process env, and shouldn't be merged in. + */ +function extractIntegrationTestsJobEnv(workflow: string): string[] { + const jobIdx = workflow.indexOf('\n Integration-Tests:'); + if (jobIdx === -1) { + throw new Error('workflow `Integration-Tests:` job not found'); + } + const jobSlice = workflow.slice(jobIdx); + // Bound the slice at the next job (col 2) or `steps:` (col 4) so the + // regex doesn't run away into later jobs. `steps:` comes after the + // job-level env block. + const stepsIdx = jobSlice.indexOf('\n steps:'); + if (stepsIdx === -1) { + throw new Error('Integration-Tests job has no `steps:` key'); + } + // `+1` includes the trailing newline of the last env line; the + // env-key regex requires each line to terminate with `\n`, so without + // it the LAST entry in the env block (today: BETTER_AUTH_SECRET) is + // silently dropped. + const boundedSlice = jobSlice.slice(0, stepsIdx + 1); + const envMatch = boundedSlice.match(/\n env:\n((?: [A-Z][A-Z0-9_]*:.*\n)+)/); + if (!envMatch) { + throw new Error('Integration-Tests job-level `env:` block not found'); + } + return [...envMatch[1].matchAll(/^ ([A-Z][A-Z0-9_]*):/gm)].map((m) => m[1]); +} - // `Start services` step env block. +/** + * `Start services` step `env:` — the innermost YAML scope, where the + * limiter overrides (BS#955) live today. + */ +function extractStartServicesStepEnv(workflow: string): string[] { const startIdx = workflow.indexOf('- name: Start services'); if (startIdx === -1) { throw new Error('workflow step `Start services` not found'); @@ -194,7 +264,5 @@ function extractWorkflowEnvKeys(): Set { throw new Error('workflow step `Start services` has no `run:` key'); } const stepEnv = stepSlice.slice(0, runIdx); - const stepKeys = [...stepEnv.matchAll(/^\s+([A-Z][A-Z0-9_]*):/gm)].map((m) => m[1]); - - return new Set([...topKeys, ...stepKeys]); + return [...stepEnv.matchAll(/^\s+([A-Z][A-Z0-9_]*):/gm)].map((m) => m[1]); } From 493eb9a2e0f4909e8e25cb222e8d1d750657e34c Mon Sep 17 00:00:00 2001 From: Jake Bromberg Date: Tue, 19 May 2026 15:02:54 -0700 Subject: [PATCH 3/3] Use { n } quantifier for spaces in regex to satisfy no-regex-spaces lint rule --- tests/unit/scripts/ci-env-surface-parity.test.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/scripts/ci-env-surface-parity.test.ts b/tests/unit/scripts/ci-env-surface-parity.test.ts index 291a3959..c167036a 100644 --- a/tests/unit/scripts/ci-env-surface-parity.test.ts +++ b/tests/unit/scripts/ci-env-surface-parity.test.ts @@ -242,11 +242,11 @@ function extractIntegrationTestsJobEnv(workflow: string): string[] { // it the LAST entry in the env block (today: BETTER_AUTH_SECRET) is // silently dropped. const boundedSlice = jobSlice.slice(0, stepsIdx + 1); - const envMatch = boundedSlice.match(/\n env:\n((?: [A-Z][A-Z0-9_]*:.*\n)+)/); + const envMatch = boundedSlice.match(/\n {4}env:\n((?: {6}[A-Z][A-Z0-9_]*:.*\n)+)/); if (!envMatch) { throw new Error('Integration-Tests job-level `env:` block not found'); } - return [...envMatch[1].matchAll(/^ ([A-Z][A-Z0-9_]*):/gm)].map((m) => m[1]); + return [...envMatch[1].matchAll(/^ {6}([A-Z][A-Z0-9_]*):/gm)].map((m) => m[1]); } /**