Skip to content

Commit bb561fa

Browse files
authored
feat(bazel): SOCKET_BAZEL_FORCE_QUERY_FALLBACK env-var gate for deterministic fallback-parser coverage (#1317)
## Summary Add a `SOCKET_BAZEL_FORCE_QUERY_FALLBACK` env variable that, when truthy (`1` / `true` / `yes`, case-insensitive), makes `socket manifest bazel` skip the `unsorted_deps.json` fast path and parse extraction output through the `bazel query --output=build` regex fallback instead. ## Why The Bazel extractor has two parser paths inside `extractFromOneRepo`: 1. **Fast path** — `JSON.parse` over `<externalDir>/<repo>/unsorted_deps.json` (~50 lines) 2. **Fallback path** — six regex patterns over the cached `bazel query --output=build` stdout, in `bazel-build-parser.mts` Today the choice is decided purely by `existsSync(unsorted_deps.json)`. Whichever path Bazel happens to materialize on disk runs. The fast path is also the path that runs in 90%+ of real corpus repos, because `rules_jvm_external` materializes `unsorted_deps.json` by default.
1 parent 1597740 commit bb561fa

2 files changed

Lines changed: 192 additions & 3 deletions

File tree

src/commands/manifest/bazel/extract_bazel_to_maven.mts

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,21 @@ function bazelExternalDir(
234234
}
235235
}
236236

237+
// Internal diagnostic: when truthy, skip the unsorted_deps.json fast path
238+
// and force the bazel-query regex fallback. Used by bazel-bench to
239+
// deterministically exercise parseBazelBuildOutput on every CI run. Truthy
240+
// values are '1', 'true', 'yes' (case-insensitive); anything else (unset,
241+
// '', '0', 'false') is treated as off. Not exposed as a user-facing CLI
242+
// flag, so it is read here rather than added to constants.mts.
243+
function isForceQueryFallbackEnabled(): boolean {
244+
const raw = process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK']
245+
if (!raw) {
246+
return false
247+
}
248+
const normalized = raw.toLowerCase()
249+
return normalized === '1' || normalized === 'true' || normalized === 'yes'
250+
}
251+
237252
// Tries `external/<repo>/unsorted_deps.json` first; falls back to parsing the
238253
// probe stdout the caller already captured during discovery. Discovery runs
239254
// the same `kind("jvm_import rule|aar_import rule", @<repo>//:*)` query that
@@ -256,9 +271,17 @@ async function extractFromOneRepo(
256271
externalDir ?? '(unresolved — bazel-out symlink absent)',
257272
)
258273
}
259-
const candidates = externalDir
260-
? [path.join(externalDir, repoName, 'unsorted_deps.json')]
261-
: []
274+
const forceFallback = isForceQueryFallbackEnabled()
275+
if (forceFallback && verbose) {
276+
logger.log(
277+
`[VERBOSE] @${repoName}: SOCKET_BAZEL_FORCE_QUERY_FALLBACK set; skipping unsorted_deps.json fast path.`,
278+
)
279+
}
280+
const candidates = forceFallback
281+
? []
282+
: externalDir
283+
? [path.join(externalDir, repoName, 'unsorted_deps.json')]
284+
: []
262285
for (const c of candidates) {
263286
if (existsSync(c)) {
264287
// Bound the read to 1GB to prevent OOM on hostile content while allowing large real-world lockfiles.

src/commands/manifest/bazel/extract_bazel_to_maven.test.mts

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import {
22
existsSync,
3+
mkdirSync,
34
mkdtempSync,
45
readFileSync,
56
readdirSync,
67
rmSync,
8+
writeFileSync,
79
} from 'node:fs'
810
import os from 'node:os'
911
import path from 'node:path'
@@ -435,3 +437,167 @@ describe('extractBazelToMaven', () => {
435437
}
436438
})
437439
})
440+
441+
describe('SOCKET_BAZEL_FORCE_QUERY_FALLBACK', () => {
442+
// These tests pit two parsers against each other by giving each a
443+
// coordinate the other does not produce, then assert which one ran by
444+
// checking which coordinate landed in the manifest.
445+
// - unsorted_deps.json (fast path) → `com.example:from-json:9.9.9`
446+
// - cached probe stdout (regex fallback) → `com.example:from-regex:1.0.0`
447+
const FAST_PATH_JSON = JSON.stringify({
448+
artifacts: [
449+
{
450+
coordinates: 'com.example:from-json:9.9.9',
451+
url: 'https://example.invalid/from-json-9.9.9.jar',
452+
sha256:
453+
'1111111111111111111111111111111111111111111111111111111111111111',
454+
deps: [],
455+
},
456+
],
457+
})
458+
459+
const FALLBACK_PROBE_STDOUT = [
460+
'jvm_import(',
461+
' name = "com_example_from_regex",',
462+
' jars = ["@maven//:from-regex-1.0.0.jar"],',
463+
' maven_coordinates = "com.example:from-regex:1.0.0",',
464+
' deps = [],',
465+
')',
466+
'',
467+
].join('\n')
468+
469+
let tmp: string
470+
let originalEnv: string | undefined
471+
472+
beforeEach(() => {
473+
tmp = mkdtempSync(path.join(os.tmpdir(), 'bazel-extract-fallback-'))
474+
// Place unsorted_deps.json under <bazelOutputBase>/external/maven/.
475+
// This is what bazelExternalDir resolves to when bazelOutputBase is set.
476+
const externalRepoDir = path.join(tmp, 'external', 'maven')
477+
mkdirSync(externalRepoDir, { recursive: true })
478+
writeFileSync(
479+
path.join(externalRepoDir, 'unsorted_deps.json'),
480+
FAST_PATH_JSON,
481+
'utf8',
482+
)
483+
vi.mocked(detectWorkspaceMode).mockReturnValue({
484+
bzlmod: true,
485+
workspace: false,
486+
})
487+
vi.mocked(discoverMavenRepos).mockResolvedValue(
488+
new Map([['maven', FALLBACK_PROBE_STDOUT]]),
489+
)
490+
originalEnv = process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK']
491+
process.exitCode = 0
492+
})
493+
494+
afterEach(() => {
495+
if (originalEnv === undefined) {
496+
delete process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK']
497+
} else {
498+
process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK'] = originalEnv
499+
}
500+
rmSync(tmp, { recursive: true, force: true })
501+
vi.resetAllMocks()
502+
process.exitCode = 0
503+
})
504+
505+
it('uses the unsorted_deps.json fast path when the env var is unset', async () => {
506+
delete process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK']
507+
508+
const result = await extractBazelToMaven({
509+
bazelFlags: undefined,
510+
bazelOutputBase: tmp,
511+
bazelRc: undefined,
512+
bin: undefined,
513+
cwd: tmp,
514+
out: tmp,
515+
verbose: false,
516+
})
517+
518+
expect(result.ok).toBe(true)
519+
const manifest = JSON.parse(
520+
readFileSync(path.join(tmp, 'maven_install.json'), 'utf8'),
521+
)
522+
// The JSON parser ran: from-json coord is present, from-regex is absent.
523+
expect(manifest.artifacts['com.example:from-json']).toBeDefined()
524+
expect(manifest.artifacts['com.example:from-regex']).toBeUndefined()
525+
})
526+
527+
it('skips the unsorted_deps.json fast path and uses the regex fallback when the env var is "1"', async () => {
528+
process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK'] = '1'
529+
530+
const result = await extractBazelToMaven({
531+
bazelFlags: undefined,
532+
bazelOutputBase: tmp,
533+
bazelRc: undefined,
534+
bin: undefined,
535+
cwd: tmp,
536+
out: tmp,
537+
verbose: false,
538+
})
539+
540+
expect(result.ok).toBe(true)
541+
const manifest = JSON.parse(
542+
readFileSync(path.join(tmp, 'maven_install.json'), 'utf8'),
543+
)
544+
// The regex parser ran: from-regex coord is present, from-json is absent.
545+
expect(manifest.artifacts['com.example:from-regex']).toBeDefined()
546+
expect(manifest.artifacts['com.example:from-json']).toBeUndefined()
547+
})
548+
549+
it.each([
550+
['unset', undefined],
551+
['empty string', ''],
552+
['"0"', '0'],
553+
['"false"', 'false'],
554+
])('treats %s as falsy and uses the fast path', async (_label, value) => {
555+
if (value === undefined) {
556+
delete process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK']
557+
} else {
558+
process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK'] = value
559+
}
560+
561+
const result = await extractBazelToMaven({
562+
bazelFlags: undefined,
563+
bazelOutputBase: tmp,
564+
bazelRc: undefined,
565+
bin: undefined,
566+
cwd: tmp,
567+
out: tmp,
568+
verbose: false,
569+
})
570+
571+
expect(result.ok).toBe(true)
572+
const manifest = JSON.parse(
573+
readFileSync(path.join(tmp, 'maven_install.json'), 'utf8'),
574+
)
575+
expect(manifest.artifacts['com.example:from-json']).toBeDefined()
576+
expect(manifest.artifacts['com.example:from-regex']).toBeUndefined()
577+
})
578+
579+
it.each([
580+
['"1"', '1'],
581+
['"true"', 'true'],
582+
['"YES"', 'YES'],
583+
])('treats %s as truthy and forces the fallback', async (_label, value) => {
584+
process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK'] = value
585+
586+
const result = await extractBazelToMaven({
587+
bazelFlags: undefined,
588+
bazelOutputBase: tmp,
589+
bazelRc: undefined,
590+
bin: undefined,
591+
cwd: tmp,
592+
out: tmp,
593+
verbose: false,
594+
})
595+
596+
expect(result.ok).toBe(true)
597+
const manifest = JSON.parse(
598+
readFileSync(path.join(tmp, 'maven_install.json'), 'utf8'),
599+
)
600+
expect(manifest.artifacts['com.example:from-regex']).toBeDefined()
601+
expect(manifest.artifacts['com.example:from-json']).toBeUndefined()
602+
})
603+
})

0 commit comments

Comments
 (0)