From cb896ee335bd143bce080e7cb8150dc29c412967 Mon Sep 17 00:00:00 2001 From: Mateusz Charytoniuk Date: Thu, 21 May 2026 19:43:47 +0200 Subject: [PATCH 1/4] merge coverage across llvm-cov data[] entries to fix per-binary double-counting --- src/merge-segments-across-entries.mjs | 69 +++++++++++++ src/parse-llvm-cov-json.mjs | 103 +++++++++++++------ tests/fixtures/multi-binary-llvm-cov.json | 61 +++++++++++ tests/merge-segments-across-entries.test.mjs | 56 ++++++++++ tests/parse-llvm-cov-json.test.mjs | 23 +++++ 5 files changed, 279 insertions(+), 33 deletions(-) create mode 100644 src/merge-segments-across-entries.mjs create mode 100644 tests/fixtures/multi-binary-llvm-cov.json create mode 100644 tests/merge-segments-across-entries.test.mjs diff --git a/src/merge-segments-across-entries.mjs b/src/merge-segments-across-entries.mjs new file mode 100644 index 0000000..cf62d02 --- /dev/null +++ b/src/merge-segments-across-entries.mjs @@ -0,0 +1,69 @@ +// llvm-cov export emits one `data[]` entry per profiled binary. The static +// region table for a given source file is identical across entries, so a file +// touched by multiple binaries appears with the same segment coordinates but +// potentially different `count` and `hasCount` values. Aggregating segments by +// position lets `lineCoverageFromSegments` and `regionCoverageFromSegments` +// score the union of coverage across binaries without double-counting the +// denominator. +// +// llvm-cov segment tuple: [line, column, count, hasCount, isRegionEntry, isGapRegion] + +const LINE = 0; +const COLUMN = 1; +const COUNT = 2; +const HAS_COUNT = 3; +const IS_REGION_ENTRY = 4; +const IS_GAP_REGION = 5; + +/** + * @param {import("./line-coverage-from-segments.mjs").Segment} segment + * @returns {string} + */ +function segmentPositionKey(segment) { + return `${segment[LINE]}:${segment[COLUMN]}:${segment[IS_REGION_ENTRY]}:${segment[IS_GAP_REGION]}`; +} + +/** + * @param {import("./line-coverage-from-segments.mjs").Segment} left + * @param {import("./line-coverage-from-segments.mjs").Segment} right + * @returns {number} + */ +function compareSegmentsByPosition(left, right) { + if (left[LINE] !== right[LINE]) { + return left[LINE] - right[LINE]; + } + + return left[COLUMN] - right[COLUMN]; +} + +/** + * @param {import("./line-coverage-from-segments.mjs").Segment[][]} segmentArrays + * @returns {import("./line-coverage-from-segments.mjs").Segment[]} + */ +export function mergeSegmentsAcrossEntries(segmentArrays) { + /** @type {Map} */ + const mergedByPosition = new Map(); + + for (const segments of segmentArrays) { + for (const segment of segments) { + const key = segmentPositionKey(segment); + const existing = mergedByPosition.get(key); + + if (existing) { + existing[COUNT] = Math.max(existing[COUNT], segment[COUNT]); + existing[HAS_COUNT] = existing[HAS_COUNT] || segment[HAS_COUNT]; + } else { + mergedByPosition.set(key, [ + segment[LINE], + segment[COLUMN], + segment[COUNT], + segment[HAS_COUNT], + segment[IS_REGION_ENTRY], + segment[IS_GAP_REGION], + ]); + } + } + } + + return [...mergedByPosition.values()].sort(compareSegmentsByPosition); +} diff --git a/src/parse-llvm-cov-json.mjs b/src/parse-llvm-cov-json.mjs index 95932ec..99c013f 100644 --- a/src/parse-llvm-cov-json.mjs +++ b/src/parse-llvm-cov-json.mjs @@ -2,13 +2,17 @@ import { readFileSync } from "node:fs"; import { lineCoverageFromSegments } from "./line-coverage-from-segments.mjs"; import { mergeFunctionInstantiations } from "./merge-function-instantiations.mjs"; +import { mergeSegmentsAcrossEntries } from "./merge-segments-across-entries.mjs"; import { regionCoverageFromSegments } from "./region-coverage-from-segments.mjs"; import { workspaceRelativeCrate } from "./workspace-relative-crate.mjs"; // llvm-cov's per-file `summary` is computed per instantiation, so it undercounts // a crate that is compiled both as a unit-test binary and as a dependency. The -// `segments` array and the `functions` list, however, carry the merged counts -// across every instantiation, so coverage is derived from those instead. +// `segments` array and the `functions` list carry merged counts within a single +// `data[]` entry, but llvm-cov export emits one entry per profiled binary, so a +// file or function touched by multiple binaries appears in multiple entries. +// Coverage is therefore derived by aggregating segments and function entries +// across all data entries before computing per-crate stats. /** * @typedef {object} CrateStats @@ -72,6 +76,46 @@ function crateStatsFor(crateStats, crateName) { return fresh; } +/** + * @param {LlvmCovJson} data + * @returns {Map} + */ +function collectSegmentsByFilename(data) { + /** @type {Map} */ + const segmentsByFilename = new Map(); + + for (const dataEntry of data.data) { + for (const fileEntry of dataEntry.files) { + const existing = segmentsByFilename.get(fileEntry.filename); + + if (existing) { + existing.push(fileEntry.segments); + } else { + segmentsByFilename.set(fileEntry.filename, [fileEntry.segments]); + } + } + } + + return segmentsByFilename; +} + +/** + * @param {LlvmCovJson} data + * @returns {import("./merge-function-instantiations.mjs").FunctionEntry[]} + */ +function collectFunctionEntries(data) { + /** @type {import("./merge-function-instantiations.mjs").FunctionEntry[]} */ + const functionEntries = []; + + for (const dataEntry of data.data) { + for (const functionEntry of dataEntry.functions) { + functionEntries.push(functionEntry); + } + } + + return functionEntries; +} + /** * @param {string} jsonPath * @param {string} workspaceRoot @@ -83,45 +127,38 @@ export function parseLlvmCovJson(jsonPath, workspaceRoot) { /** @type {Map} */ const crateStats = new Map(); - for (const dataEntry of data.data) { - for (const fileEntry of dataEntry.files) { - const crateName = workspaceRelativeCrate( - fileEntry.filename, - workspaceRoot, - ); + for (const [filename, segmentArrays] of collectSegmentsByFilename(data)) { + const crateName = workspaceRelativeCrate(filename, workspaceRoot); - if (crateName === null) { - continue; - } + if (crateName === null) { + continue; + } - const stats = crateStatsFor(crateStats, crateName); + const mergedSegments = mergeSegmentsAcrossEntries(segmentArrays); + const stats = crateStatsFor(crateStats, crateName); - addCoverage( - stats.regions, - regionCoverageFromSegments(fileEntry.segments), - ); - addCoverage(stats.lines, lineCoverageFromSegments(fileEntry.segments)); - } + addCoverage(stats.regions, regionCoverageFromSegments(mergedSegments)); + addCoverage(stats.lines, lineCoverageFromSegments(mergedSegments)); + } - for (const mergedFunction of mergeFunctionInstantiations( - dataEntry.functions, - )) { - const crateName = workspaceRelativeCrate( - mergedFunction.filename, - workspaceRoot, - ); + for (const mergedFunction of mergeFunctionInstantiations( + collectFunctionEntries(data), + )) { + const crateName = workspaceRelativeCrate( + mergedFunction.filename, + workspaceRoot, + ); - if (crateName === null) { - continue; - } + if (crateName === null) { + continue; + } - const stats = crateStatsFor(crateStats, crateName); + const stats = crateStatsFor(crateStats, crateName); - stats.functions.count += 1; + stats.functions.count += 1; - if (mergedFunction.covered) { - stats.functions.covered += 1; - } + if (mergedFunction.covered) { + stats.functions.covered += 1; } } diff --git a/tests/fixtures/multi-binary-llvm-cov.json b/tests/fixtures/multi-binary-llvm-cov.json new file mode 100644 index 0000000..969b611 --- /dev/null +++ b/tests/fixtures/multi-binary-llvm-cov.json @@ -0,0 +1,61 @@ +{ + "data": [ + { + "files": [ + { + "filename": "/workspace/shared_crate/src/lib.rs", + "segments": [ + [10, 5, 3, true, true, false], + [12, 6, 0, false, false, false], + [20, 5, 0, true, true, false], + [22, 6, 0, false, false, false] + ] + } + ], + "functions": [ + { + "name": "_RNvCsbinary0_shared_crate_foo", + "count": 3, + "filenames": ["/workspace/shared_crate/src/lib.rs"], + "regions": [[10, 5, 12, 6, 3, 0, 0, 0]] + }, + { + "name": "_RNvCsbinary0_shared_crate_bar", + "count": 0, + "filenames": ["/workspace/shared_crate/src/lib.rs"], + "regions": [[20, 5, 22, 6, 0, 0, 0, 0]] + } + ] + }, + { + "files": [ + { + "filename": "/workspace/shared_crate/src/lib.rs", + "segments": [ + [10, 5, 0, true, true, false], + [12, 6, 0, false, false, false], + [20, 5, 0, true, true, false], + [22, 6, 0, false, false, false] + ] + } + ], + "functions": [ + { + "name": "_RNvCsbinary1_shared_crate_foo", + "count": 0, + "filenames": ["/workspace/shared_crate/src/lib.rs"], + "regions": [[10, 5, 12, 6, 0, 0, 0, 0]] + }, + { + "name": "_RNvCsbinary1_shared_crate_bar", + "count": 0, + "filenames": ["/workspace/shared_crate/src/lib.rs"], + "regions": [[20, 5, 22, 6, 0, 0, 0, 0]] + } + ] + } + ], + "type": "llvm.coverage.json.export", + "version": "3.0.1", + "cargo_llvm_cov": {} +} diff --git a/tests/merge-segments-across-entries.test.mjs b/tests/merge-segments-across-entries.test.mjs new file mode 100644 index 0000000..590ade9 --- /dev/null +++ b/tests/merge-segments-across-entries.test.mjs @@ -0,0 +1,56 @@ +import { strict as assert } from "node:assert"; +import { test } from "node:test"; + +import { mergeSegmentsAcrossEntries } from "../src/merge-segments-across-entries.mjs"; + +// llvm-cov segment tuple: [line, column, count, hasCount, isRegionEntry, isGapRegion] + +test("keeps the highest count when the same position appears in multiple entries", () => { + const merged = mergeSegmentsAcrossEntries([ + [[10, 5, 0, true, true, false]], + [[10, 5, 7, true, true, false]], + ]); + + assert.deepEqual(merged, [[10, 5, 7, true, true, false]]); +}); + +test("OR's hasCount across entries when one entry lacks a counted segment at the position", () => { + const merged = mergeSegmentsAcrossEntries([ + [[10, 5, 0, false, true, false]], + [[10, 5, 0, true, true, false]], + ]); + + assert.deepEqual(merged, [[10, 5, 0, true, true, false]]); +}); + +test("preserves distinct positions and returns them sorted by line then column", () => { + const merged = mergeSegmentsAcrossEntries([ + [ + [20, 1, 0, true, true, false], + [10, 5, 0, true, true, false], + ], + [[10, 9, 2, true, true, false]], + ]); + + assert.deepEqual(merged, [ + [10, 5, 0, true, true, false], + [10, 9, 2, true, true, false], + [20, 1, 0, true, true, false], + ]); +}); + +test("treats a gap-region segment as distinct from a non-gap segment at the same position", () => { + const merged = mergeSegmentsAcrossEntries([ + [[10, 5, 0, true, true, false]], + [[10, 5, 0, true, true, true]], + ]); + + assert.deepEqual(merged, [ + [10, 5, 0, true, true, false], + [10, 5, 0, true, true, true], + ]); +}); + +test("returns an empty array when no entries are provided", () => { + assert.deepEqual(mergeSegmentsAcrossEntries([]), []); +}); diff --git a/tests/parse-llvm-cov-json.test.mjs b/tests/parse-llvm-cov-json.test.mjs index 9a8578c..cbde381 100644 --- a/tests/parse-llvm-cov-json.test.mjs +++ b/tests/parse-llvm-cov-json.test.mjs @@ -17,6 +17,12 @@ const WINDOWS_FIXTURE_PATH = resolve( "windows-llvm-cov.json", ); +const MULTI_BINARY_FIXTURE_PATH = resolve( + dirname(fileURLToPath(import.meta.url)), + "fixtures", + "multi-binary-llvm-cov.json", +); + const WORKSPACE_ROOT = "/workspace"; test("groups files by their first path component", () => { @@ -59,3 +65,20 @@ test("groups files by crate for a Windows-style llvm-cov report", () => { assert.ok(crateStats.has("spiffe_svid_manager")); assert.ok(crateStats.has("spiffe_svid_manager_tests")); }); + +test("merges coverage across data entries when the same source file is reported by multiple test binaries", () => { + // The multi-binary fixture reports `shared_crate/src/lib.rs` twice: binary 0 + // executes `foo` (count > 0) while binary 1 only compiles it (count 0). Both + // binaries see `bar` as never executed. Without cross-entry merging, foo and + // bar would each be counted twice and the file's 2 regions / 6 lines would + // be doubled to 4 / 12. + const crateStats = parseLlvmCovJson( + MULTI_BINARY_FIXTURE_PATH, + WORKSPACE_ROOT, + ); + const sharedCrate = crateStats.get("shared_crate"); + + assert.deepEqual(sharedCrate.functions, { count: 2, covered: 1 }); + assert.deepEqual(sharedCrate.regions, { count: 2, covered: 1 }); + assert.deepEqual(sharedCrate.lines, { count: 6, covered: 3 }); +}); From 8009ff317c3a125ff2d80b3cab5987dd1bca6c35 Mon Sep 17 00:00:00 2001 From: Mateusz Charytoniuk Date: Thu, 21 May 2026 19:59:08 +0200 Subject: [PATCH 2/4] make segment merge order deterministic for tied positions --- src/merge-segments-across-entries.mjs | 10 +++++++++- tests/merge-segments-across-entries.test.mjs | 16 ++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/merge-segments-across-entries.mjs b/src/merge-segments-across-entries.mjs index cf62d02..475ad40 100644 --- a/src/merge-segments-across-entries.mjs +++ b/src/merge-segments-across-entries.mjs @@ -33,7 +33,15 @@ function compareSegmentsByPosition(left, right) { return left[LINE] - right[LINE]; } - return left[COLUMN] - right[COLUMN]; + if (left[COLUMN] !== right[COLUMN]) { + return left[COLUMN] - right[COLUMN]; + } + + if (left[IS_REGION_ENTRY] !== right[IS_REGION_ENTRY]) { + return Number(left[IS_REGION_ENTRY]) - Number(right[IS_REGION_ENTRY]); + } + + return Number(left[IS_GAP_REGION]) - Number(right[IS_GAP_REGION]); } /** diff --git a/tests/merge-segments-across-entries.test.mjs b/tests/merge-segments-across-entries.test.mjs index 590ade9..1169780 100644 --- a/tests/merge-segments-across-entries.test.mjs +++ b/tests/merge-segments-across-entries.test.mjs @@ -54,3 +54,19 @@ test("treats a gap-region segment as distinct from a non-gap segment at the same test("returns an empty array when no entries are provided", () => { assert.deepEqual(mergeSegmentsAcrossEntries([]), []); }); + +test("produces identical output regardless of the order of data[] entries with distinct-flag segments at the same line and column", () => { + const regionEntrySegment = [10, 5, 3, true, true, false]; + const nonRegionSegment = [10, 5, 0, true, false, false]; + + const forward = mergeSegmentsAcrossEntries([ + [regionEntrySegment], + [nonRegionSegment], + ]); + const reverse = mergeSegmentsAcrossEntries([ + [nonRegionSegment], + [regionEntrySegment], + ]); + + assert.deepEqual(forward, reverse); +}); From aae83fd0789154eb5f8685099bf712a9b1eb854c Mon Sep 17 00:00:00 2001 From: Mateusz Charytoniuk Date: Thu, 21 May 2026 20:13:16 +0200 Subject: [PATCH 3/4] sort region-entry segments first to preserve skipped-region semantics --- src/merge-segments-across-entries.mjs | 2 +- tests/merge-segments-across-entries.test.mjs | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/merge-segments-across-entries.mjs b/src/merge-segments-across-entries.mjs index 475ad40..892ceff 100644 --- a/src/merge-segments-across-entries.mjs +++ b/src/merge-segments-across-entries.mjs @@ -38,7 +38,7 @@ function compareSegmentsByPosition(left, right) { } if (left[IS_REGION_ENTRY] !== right[IS_REGION_ENTRY]) { - return Number(left[IS_REGION_ENTRY]) - Number(right[IS_REGION_ENTRY]); + return Number(right[IS_REGION_ENTRY]) - Number(left[IS_REGION_ENTRY]); } return Number(left[IS_GAP_REGION]) - Number(right[IS_GAP_REGION]); diff --git a/tests/merge-segments-across-entries.test.mjs b/tests/merge-segments-across-entries.test.mjs index 1169780..1476aba 100644 --- a/tests/merge-segments-across-entries.test.mjs +++ b/tests/merge-segments-across-entries.test.mjs @@ -55,6 +55,18 @@ test("returns an empty array when no entries are provided", () => { assert.deepEqual(mergeSegmentsAcrossEntries([]), []); }); +test("orders region-entry segments before non-region segments at the same position so startsSkippedRegion sees the structural marker first", () => { + const skippedRegionMarker = [10, 5, 0, false, true, false]; + const nonRegionSegment = [10, 5, 5, true, false, false]; + + const merged = mergeSegmentsAcrossEntries([ + [nonRegionSegment], + [skippedRegionMarker], + ]); + + assert.deepEqual(merged, [skippedRegionMarker, nonRegionSegment]); +}); + test("produces identical output regardless of the order of data[] entries with distinct-flag segments at the same line and column", () => { const regionEntrySegment = [10, 5, 3, true, true, false]; const nonRegionSegment = [10, 5, 0, true, false, false]; From 53133b6fb9ea2d5d5049131ba42ebc022ea1d538 Mon Sep 17 00:00:00 2001 From: Mateusz Charytoniuk Date: Fri, 22 May 2026 16:23:55 +0200 Subject: [PATCH 4/4] fix test name grammar --- tests/merge-segments-across-entries.test.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/merge-segments-across-entries.test.mjs b/tests/merge-segments-across-entries.test.mjs index 1476aba..2162845 100644 --- a/tests/merge-segments-across-entries.test.mjs +++ b/tests/merge-segments-across-entries.test.mjs @@ -14,7 +14,7 @@ test("keeps the highest count when the same position appears in multiple entries assert.deepEqual(merged, [[10, 5, 7, true, true, false]]); }); -test("OR's hasCount across entries when one entry lacks a counted segment at the position", () => { +test("ORs hasCount across entries when one entry lacks a counted segment at the position", () => { const merged = mergeSegmentsAcrossEntries([ [[10, 5, 0, false, true, false]], [[10, 5, 0, true, true, false]],