From 015ac928e4d871ac4befda40638871ae8509ecc0 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Thu, 26 Feb 2026 20:35:13 -0500 Subject: [PATCH 1/2] Replace regex dependency parser with gcov line-level coverage pruning Replaces the manually-maintained test_mapping.json and regex-based dependency_parser.py with a fully automated approach using gfortran gcov instrumentation. Key changes: - Add toolchain/mfc/test/coverage.py: build coverage cache by running all tests with a --gcov build, recording which .fpp line numbers each test executes; filter tests by intersecting git diff -U0 line ranges against per-test coverage data - Add --build-coverage-cache flag to ./mfc.sh test - Replace --only-changes implementation in test.py with coverage-based filter - Add --line-marker-format=gfortran5 to CMakeLists.txt Fypp invocation so gcov correctly maps coverage to .fpp line numbers (not .f90 line numbers) - Delete dependency_parser.py (530 lines) and test_mapping.json (148 lines) - Add 43 unit tests covering all corner cases: exact line hits, no-overlap, large-module line granularity, GPU #ifdef blocks, stale cache, new files Workflow: ./mfc.sh build --gcov -j 8 # one-time: build with coverage ./mfc.sh test --build-coverage-cache # one-time: populate cache (~2-3x time) ./mfc.sh test --only-changes -j 8 # fast: only tests covering changed lines Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 3 + CMakeLists.txt | 1 + COVERAGE_PLAN.md | 784 +++++++++++++++++++++++ toolchain/mfc/cli/commands.py | 25 + toolchain/mfc/test/coverage.py | 416 ++++++++++++ toolchain/mfc/test/test.py | 53 +- toolchain/mfc/test/test_coverage_unit.py | 570 ++++++++++++++++ 7 files changed, 1851 insertions(+), 1 deletion(-) create mode 100644 COVERAGE_PLAN.md create mode 100644 toolchain/mfc/test/coverage.py create mode 100644 toolchain/mfc/test/test_coverage_unit.py diff --git a/.gitignore b/.gitignore index e80d14a6f9..909f2d2d9b 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,9 @@ __pycache__ # Auto-generated version file toolchain/mfc/_version.py +# Generated coverage cache (rebuild with: ./mfc.sh build --gcov -j 8 && ./mfc.sh test --build-coverage-cache) +toolchain/mfc/test/test_coverage_cache.json + # Auto-generated toolchain files (regenerate with: ./mfc.sh generate) toolchain/completions/mfc.bash toolchain/completions/_mfc diff --git a/CMakeLists.txt b/CMakeLists.txt index 9101d032fc..9cea0d7425 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -381,6 +381,7 @@ macro(HANDLE_SOURCES target useCommon) --no-folding --line-length=999 --line-numbering-mode=nocontlines + --line-marker-format=gfortran5 "${fpp}" "${f90}" DEPENDS "${fpp};${${target}_incs}" COMMENT "Preprocessing (Fypp) ${fpp_filename}" diff --git a/COVERAGE_PLAN.md b/COVERAGE_PLAN.md new file mode 100644 index 0000000000..0082345f7d --- /dev/null +++ b/COVERAGE_PLAN.md @@ -0,0 +1,784 @@ +# MFC Test Pruning: Line-Level gcov Coverage Implementation + +> **Handoff document.** Start a new Claude Code session on a Linux CI-like machine (gfortran, not Apple Silicon) and say: "Implement the plan in COVERAGE_PLAN.md". This file contains everything needed. + +--- + +## What We're Building and Why + +The `test-pruning` branch adds `--only-changes` to `./mfc.sh test` to skip unaffected tests on PRs. The current implementation uses a manually-maintained `test_mapping.json` (file → label → test) and a regex Fortran dependency parser. It has critical gaps that make it unreliable: + +- `#include` macro files (`parallel_macros.fpp` etc.) are invisible to the parser — changing them runs 0 tests +- 28 source files have no entry in `test_mapping.json` +- The `"Post Process"` label matches 0 actual test traces +- File-level granularity: changing 5 lines in 5,257-line `m_riemann_solvers.fpp` triggers ~300 tests + +**The replacement:** Line-level gcov coverage. Build MFC once with `gfortran --coverage`, run all tests individually, record which `.fpp` line numbers each test executes, cache it. On a PR, intersect `git diff -U0` line ranges against each test's covered lines — only tests with overlapping lines run. + +This is completely automated (no JSON to maintain), correct by construction (the compiler is the oracle), and line-granular (change 5 lines in a 5000-line file → only tests covering those 5 lines run). + +--- + +## Repository Context + +- Repo: MFC (Multi-component Flow Code) — exascale CFD solver in Fortran + Fypp +- Branch to modify: `test-pruning` +- Working directory: repo root +- 547 regression tests, generated programmatically in `toolchain/mfc/test/cases.py` +- Build system: CMake + Fypp preprocessor + `./mfc.sh` Python CLI wrapper +- Three executables: `pre_process`, `simulation`, `post_process` + +--- + +## What Already Exists (do not recreate) + +| What | Where | Notes | +|---|---|---| +| `MFC_GCov` CMake option | `CMakeLists.txt` lines 121–141 | Adds `-fprofile-arcs -ftest-coverage -O1` to all Fortran targets | +| `--gcov` CLI flag | `toolchain/mfc/build.py` line 430 | Maps to `-DMFC_GCov=ON` | +| `gcov: bool` field | `toolchain/mfc/state.py` | In `MFCConfig` dataclass, default `False` | +| Fypp `--line-numbering` | `CMakeLists.txt` lines 380–383 | Already emits line markers in `.f90` output | +| `--only-changes` arg | `toolchain/mfc/cli/commands.py` | Keep this arg, replace its implementation | +| `--changes-branch` arg | `toolchain/mfc/cli/commands.py` | Keep this arg | + +--- + +## Files to Create + +### `toolchain/mfc/test/coverage.py` (new, ~250 lines) + +The entire coverage system lives here. Implement these functions in order: + +#### `find_gcov_binary(root_dir: str) -> str` +``` +1. Run subprocess: gfortran --version, capture stdout +2. Regex: parse major version number (e.g., "GNU Fortran ... 15.2.0" → 15) +3. Try shutil.which("gcov-{major}") first, then shutil.which("gcov") +4. For each candidate: run `{candidate} --version`, check output contains "GCC" not "Apple LLVM" +5. Return first valid GNU gcov binary path +6. If none found: raise MFCException with message: + "GNU gcov not found. On Homebrew macOS: brew install gcc. On Linux: install gcc package." +``` + +#### `find_gcno_files(root_dir: str) -> list[Path]` +``` +1. Walk Path(root_dir) / "build" recursively +2. Collect all files matching "*.gcno" +3. Exclude any path containing "venv" +4. If empty list: raise MFCException: + "No .gcno files found. Build with --gcov first:\n ./mfc.sh build --gcov -j 8" +5. Return list of Path objects +``` + +#### `zero_gcda_files(root_dir: str) -> None` +``` +1. Walk Path(root_dir) / "build" recursively +2. Delete all "*.gcda" files (excluding venv paths) +3. Called before each test run during cache building +``` + +#### `collect_coverage_for_test(gcno_files: list, root_dir: str, gcov_binary: str) -> dict[str, list[int]]` +``` +1. For each gcno_file in gcno_files: + a. Run: subprocess([gcov_binary, "--json-format", "--stdout", str(gcno_file)], + capture_output=True, cwd=root_dir) + b. If returncode != 0 or stdout empty: continue (no .gcda = test didn't touch this translation unit) + c. Decompress: data = json.loads(gzip.decompress(proc.stdout)) + d. For each entry in data["files"]: + - file_path = entry["file"] + - If not file_path.endswith(".fpp"): skip (only want .fpp, not .f90 or .h) + - rel_path = os.path.relpath(file_path, root_dir) + - covered_lines = [ln["line_number"] for ln in entry["lines"] if ln["count"] > 0] + - Merge into result dict: result[rel_path].update(covered_lines) +2. Return {rel_fpp_path: sorted(list(lines_set))} +``` + +#### `build_coverage_cache(root_dir: str, cases: list) -> None` +``` +1. gcov_bin = find_gcov_binary(root_dir) +2. gcno_files = find_gcno_files(root_dir) +3. cons.print(f"Building coverage cache for {len(cases)} tests (sequential)...") +4. cons.print("Note: tests run sequentially to isolate per-test coverage. This takes ~2-3x normal time.") + +5. cache = {} +6. For i, case in enumerate(cases): + a. zero_gcda_files(root_dir) + b. case.run([PRE_PROCESS, SIMULATION], gpus=set()) + - Catch ALL exceptions and continue — we need .gcda data even if test fails + c. coverage = collect_coverage_for_test(gcno_files, root_dir, gcov_bin) + d. cache[case.get_uuid()] = coverage + e. total_lines = sum(len(v) for v in coverage.values()) + f. cons.print(f" [{i+1}/{len(cases)}] {case.get_uuid()}: {len(coverage)} files, {total_lines} lines") + +7. import hashlib, datetime + cases_py_path = Path(root_dir) / "toolchain/mfc/test/cases.py" + cases_hash = hashlib.sha256(cases_py_path.read_bytes()).hexdigest() + +8. cache["_meta"] = { + "created": datetime.datetime.utcnow().isoformat(), + "cases_hash": cases_hash, + "gcov_version": get_gcov_version(gcov_bin), # parse from --version output + } + +9. cache_path = Path(root_dir) / "toolchain/mfc/test/test_coverage_cache.json" + with open(cache_path, "w") as f: + json.dump(cache, f, indent=2) + cons.print(f"Coverage cache written to {cache_path}") + cons.print(f"Cache has {len(cases)} test entries.") +``` + +#### `load_coverage_cache(root_dir: str) -> dict | None` +``` +1. cache_path = Path(root_dir) / "toolchain/mfc/test/test_coverage_cache.json" +2. If not cache_path.exists(): return None + +3. with open(cache_path) as f: cache = json.load(f) + +4. Check staleness: + cases_py = Path(root_dir) / "toolchain/mfc/test/cases.py" + current_hash = hashlib.sha256(cases_py.read_bytes()).hexdigest() + stored_hash = cache.get("_meta", {}).get("cases_hash", "") + if current_hash != stored_hash: + cons.print("[yellow]Warning: Coverage cache is stale (cases.py changed).[/yellow]") + cons.print("[yellow]Rebuild with: ./mfc.sh test --build-coverage-cache[/yellow]") + return None + +5. Return cache +``` + +#### `get_changed_line_ranges(root_dir: str, compare_branch: str = "master") -> dict[str, list[tuple[int,int]]]` +``` +1. Get merge base: + result = subprocess.run( + ["git", "merge-base", compare_branch, "HEAD"], + capture_output=True, text=True, cwd=root_dir + ) + if result.returncode != 0: return {} + merge_base = result.stdout.strip() + +2. Get diff: + result = subprocess.run( + ["git", "diff", merge_base, "HEAD", "-U0", "--no-color"], + capture_output=True, text=True, cwd=root_dir + ) + +3. Parse output: + changed = defaultdict(list) + current_file = None + file_re = re.compile(r'^\+\+\+ b/(.+\.fpp)$') + hunk_re = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@') + + for line in result.stdout.splitlines(): + m = file_re.match(line) + if m: + current_file = m.group(1) # e.g., "src/simulation/m_rhs.fpp" + continue + m = hunk_re.match(line) + if m and current_file: + start = int(m.group(1)) + count = int(m.group(2)) if m.group(2) is not None else 1 + if count > 0: # skip pure deletions (count == 0) + changed[current_file].append((start, start + count - 1)) + +4. Return dict(changed) + # Note: only .fpp files, only additions/modifications (not pure deletions) + # Note: uses merge-base, NOT master tip — correct for PRs where master has advanced +``` + +#### `should_run_all_tests(changed_rel_paths: set) -> bool` +```python +ALWAYS_RUN_ALL = frozenset([ + "src/common/include/parallel_macros.fpp", + "src/common/include/acc_macros.fpp", + "src/common/include/omp_macros.fpp", + "src/common/include/shared_parallel_macros.fpp", + "src/common/include/macros.fpp", + "toolchain/mfc/test/cases.py", + "toolchain/mfc/test/case.py", + "toolchain/mfc/params/definitions.py", +]) + +def should_run_all_tests(changed_rel_paths: set) -> bool: + return bool(changed_rel_paths & ALWAYS_RUN_ALL) +``` + +Rationale: CPU coverage can't detect GPU directive changes (macro files). Toolchain changes may add/remove tests or change test behavior. In both cases, run everything. + +#### `filter_tests_by_coverage(cases, coverage_cache, changed_ranges) -> tuple[list, list]` +``` +to_run = [] +skipped = [] +new_file_warnings = set() + +for case in cases: + uuid = case.get_uuid() + test_cov = coverage_cache.get(uuid) + + if test_cov is None: + # Test not in cache (newly added) → conservative: include it + to_run.append(case) + continue + + matched = False + for fpp_file, ranges in changed_ranges.items(): + covered_lines = set(test_cov.get(fpp_file, [])) + if not covered_lines: + # Check if the file has no coverage in ANY test (truly new file) + if not any(fpp_file in tc for tc in coverage_cache.values() if isinstance(tc, dict)): + new_file_warnings.add(fpp_file) + continue + for (start, end) in ranges: + if covered_lines & set(range(start, end + 1)): + matched = True + break + if matched: + break + + if matched: + to_run.append(case) + else: + skipped.append(case) + +for f in new_file_warnings: + cons.print(f"[yellow]Warning: {f} has no coverage data in cache.[/yellow]") + cons.print(f"[yellow]Consider rebuilding: ./mfc.sh test --build-coverage-cache[/yellow]") + +return to_run, skipped +``` + +--- + +## Files to Modify + +### 1. `CMakeLists.txt` — add `--line-marker-format=gfortran5` + +Find the Fypp `add_custom_command` block (around line 368). Currently ends with: +```cmake + --line-numbering-mode=nocontlines + "${fpp}" "${f90}" +``` + +Change to: +```cmake + --line-numbering-mode=nocontlines + --line-marker-format=gfortran5 + "${fpp}" "${f90}" +``` + +**Why:** Fypp defaults to `std` format (`# line N "file"`). gfortran needs `gfortran5` format (`# N "file"`) to correctly map coverage data back to `.fpp` file coordinates. Without this, gcov may report coverage against `.f90` line numbers instead of `.fpp` line numbers. + +### 2. `toolchain/mfc/cli/commands.py` — add `--build-coverage-cache` + +Find `TEST_COMMAND` definition. After the `--dry-run` argument (around line 452), add: + +```python + Argument( + name="build-coverage-cache", + help="Run all tests sequentially with gcov instrumentation to build the line-level coverage cache. Requires a prior --gcov build.", + action=ArgAction.STORE_TRUE, + default=False, + dest="build_coverage_cache", + ), +``` + +In the `examples=` list, add: +```python + Example("./mfc.sh build --gcov -j 8 && ./mfc.sh test --build-coverage-cache", "One-time: build line-coverage cache"), + Example("./mfc.sh test --only-changes -j 4", "Run tests affected by changed lines"), +``` + +In `key_options=`, add: +```python + ("--build-coverage-cache", "Build line-level gcov coverage cache (one-time)"), + ("--only-changes", "Run tests affected by git changes"), +``` + +### 3. `toolchain/mfc/test/test.py` — replace old implementation + +**Remove line 18:** +```python +from .dependency_parser import analyze_changes, any_label_matches_trace_parts +``` + +**Remove lines 20–21:** +```python +# Cached dependency analysis results (computed once at startup if --only-changes) +_dependency_analysis_cache = None +``` + +**In `test()` function, after the `remove_old_tests` block (after line ~191), add:** +```python + if ARG("build_coverage_cache"): + from .coverage import build_coverage_cache + all_cases = [b.to_case() for b in list_cases()] + build_coverage_cache(common.MFC_ROOT_DIR, all_cases) + return +``` + +**Replace the entire `--only-changes` block in `__filter()` (lines 82–138):** + +Remove everything from `# --only-changes: filter based on Fortran dependency analysis` through the closing `cons.print()` at line 138. + +Replace with: + +```python + # --only-changes: filter based on line-level gcov coverage + if ARG("only_changes"): + from .coverage import (load_coverage_cache, get_changed_line_ranges, + should_run_all_tests, filter_tests_by_coverage) + + cache = load_coverage_cache(common.MFC_ROOT_DIR) + if cache is None: + cons.print("[yellow]Coverage cache missing or stale.[/yellow]") + cons.print("[yellow]Run: ./mfc.sh build --gcov -j 8 && ./mfc.sh test --build-coverage-cache[/yellow]") + cons.print("[yellow]Falling back to full test suite.[/yellow]") + # cache is None → fall through, run all tests + else: + changed_ranges = get_changed_line_ranges(common.MFC_ROOT_DIR, ARG("changes_branch")) + changed_paths = set(changed_ranges.keys()) + + cons.print() + cons.print("[bold cyan]Line-Coverage Change Analysis[/bold cyan]") + cons.print("-" * 50) + + if should_run_all_tests(changed_paths): + cons.print("[yellow]Infrastructure or macro file changed — running full test suite.[/yellow]") + cons.print("-" * 50) + # fall through — don't filter cases + elif not changed_ranges: + cons.print("[green]No .fpp source changes detected — skipping all tests.[/green]") + cons.print("-" * 50) + cons.print() + skipped_cases += cases + cases = [] + else: + for fpp_file, ranges in sorted(changed_ranges.items()): + total_lines = sum(e - s + 1 for s, e in ranges) + cons.print(f" [green]*[/green] {fpp_file} ({total_lines} line{'s' if total_lines != 1 else ''} changed)") + + cases, new_skipped = filter_tests_by_coverage(cases, cache, changed_ranges) + skipped_cases += new_skipped + cons.print(f"\n[bold]Tests to run: {len(cases)} / {len(cases) + len(new_skipped)}[/bold]") + cons.print("-" * 50) + cons.print() +``` + +**Update the pylint disable comment** on `__filter` to keep existing suppressions. + +--- + +## Files to Delete + +```bash +git rm toolchain/mfc/test/dependency_parser.py +git rm toolchain/mfc/test/test_mapping.json +``` + +--- + +## Files to Add to `.gitignore` + +Add this line to `.gitignore` (the cache is generated locally, not committed): +``` +toolchain/mfc/test/test_coverage_cache.json +``` + +--- + +## New File: `toolchain/mfc/test/test_coverage_unit.py` + +Create this file to hold all unit tests. Run with: +```bash +python3 -m pytest toolchain/mfc/test/test_coverage_unit.py -v +``` + +### Group 1: `get_changed_line_ranges()` — git diff parsing + +Test by providing mock diff output strings directly to the parsing logic (extract the parsing into a helper `_parse_diff_output(diff_text, root_dir)` that `get_changed_line_ranges` calls after running git). + +```python +def test_parse_basic_modification(): + diff = textwrap.dedent("""\ + diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp + --- a/src/simulation/m_rhs.fpp + +++ b/src/simulation/m_rhs.fpp + @@ -45,3 +45,5 @@ + + ! new line 1 + + ! new line 2 + """) + result = _parse_diff_output(diff, "/repo") + assert result == {"src/simulation/m_rhs.fpp": [(45, 49)]} + +def test_parse_multiple_hunks_same_file(): + # Two separate hunks in one file + # Expected: both ranges captured as separate tuples + +def test_parse_pure_deletion(): + # @@ -45,3 +45,0 @@ — 3 lines deleted, 0 added + # count=0 → NOT in result (nothing to cover) + diff = "... @@ -45,3 +45,0 @@ ..." + result = _parse_diff_output(diff, "/repo") + assert result == {} + +def test_parse_new_file(): + # --- /dev/null +++ b/src/simulation/m_new.fpp + # @@ -0,0 +1,50 @@ + result = _parse_diff_output(diff, "/repo") + assert result == {"src/simulation/m_new.fpp": [(1, 50)]} + +def test_parse_ignores_non_fpp(): + # Changes to .py, .json, .cmake, .md + result = _parse_diff_output(diff_with_only_non_fpp, "/repo") + assert result == {} + +def test_parse_single_line_hunk(): + # @@ -45 +45 @@ (no count = means 1 line) + result = _parse_diff_output(diff, "/repo") + assert result == {"src/simulation/m_rhs.fpp": [(45, 45)]} + +def test_parse_empty_diff(): + assert _parse_diff_output("", "/repo") == {} +``` + +### Group 2: `should_run_all_tests()` — always-run detection + +```python +def test_parallel_macros_triggers_all(): + assert should_run_all_tests({"src/common/include/parallel_macros.fpp"}) is True + +def test_acc_macros_triggers_all(): + assert should_run_all_tests({"src/common/include/acc_macros.fpp"}) is True + +def test_cases_py_triggers_all(): + assert should_run_all_tests({"toolchain/mfc/test/cases.py"}) is True + +def test_definitions_py_triggers_all(): + assert should_run_all_tests({"toolchain/mfc/params/definitions.py"}) is True + +def test_simulation_module_does_not_trigger_all(): + assert should_run_all_tests({"src/simulation/m_rhs.fpp"}) is False + +def test_empty_set_does_not_trigger_all(): + assert should_run_all_tests(set()) is False + +def test_mixed_one_trigger_fires_all(): + # One normal + one always-run → True + assert should_run_all_tests({ + "src/simulation/m_rhs.fpp", + "src/common/include/macros.fpp" + }) is True +``` + +### Group 3: `filter_tests_by_coverage()` — core logic + +These are the most important tests. Use mock TestCase objects with a known `get_uuid()`. + +```python +# Helper: create a minimal fake case with a given uuid +class FakeCase: + def __init__(self, uuid): self._uuid = uuid + def get_uuid(self): return self._uuid + +def test_exact_line_hit(): + cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [45, 46, 47]}} + changed = {"src/simulation/m_rhs.fpp": [(46, 46)]} + cases = [FakeCase("AAAA0001")] + to_run, skipped = filter_tests_by_coverage(cases, cache, changed) + assert len(to_run) == 1 and len(skipped) == 0 + +def test_no_line_overlap(): + # *** KEY TEST: this is what makes line-level better than file-level *** + cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [45, 46, 47]}} + changed = {"src/simulation/m_rhs.fpp": [(500, 510)]} + cases = [FakeCase("AAAA0001")] + to_run, skipped = filter_tests_by_coverage(cases, cache, changed) + assert len(to_run) == 0 and len(skipped) == 1 + +def test_range_boundary_inclusive_start(): + cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [100]}} + changed = {"src/simulation/m_rhs.fpp": [(100, 105)]} + to_run, _ = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed) + assert len(to_run) == 1 + +def test_range_boundary_inclusive_end(): + cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [105]}} + changed = {"src/simulation/m_rhs.fpp": [(100, 105)]} + to_run, _ = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed) + assert len(to_run) == 1 + +def test_range_just_outside(): + cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [99, 106]}} + changed = {"src/simulation/m_rhs.fpp": [(100, 105)]} + to_run, skipped = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed) + assert len(to_run) == 0 and len(skipped) == 1 # 99 and 106 are outside [100,105] + +def test_uuid_not_in_cache_is_conservative(): + # New test not in cache → include it (conservative) + cache = {} + changed = {"src/simulation/m_rhs.fpp": [(45, 45)]} + to_run, _ = filter_tests_by_coverage([FakeCase("NEWTEST1")], cache, changed) + assert len(to_run) == 1 + +def test_changed_file_not_in_test_coverage(): + cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [45, 46]}} + changed = {"src/simulation/m_igr.fpp": [(100, 110)]} # different file + to_run, skipped = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed) + assert len(to_run) == 0 and len(skipped) == 1 + +def test_large_module_line_granularity(): + # *** KEY CORNER CASE from design discussion *** + # m_riemann_solvers.fpp is 5257 lines. + # test_hllc covers lines 800-810, test_roe covers lines 2000+ + # Change at line 800 → only hllc test runs + cache = { + "HLLC_TST": {"src/simulation/m_riemann_solvers.fpp": [798, 799, 800, 801, 810]}, + "ROE_TEST": {"src/simulation/m_riemann_solvers.fpp": [2000, 2100, 2500]}, + } + changed = {"src/simulation/m_riemann_solvers.fpp": [(800, 810)]} + cases = [FakeCase("HLLC_TST"), FakeCase("ROE_TEST")] + to_run, skipped = filter_tests_by_coverage(cases, cache, changed) + uuids_run = {c.get_uuid() for c in to_run} + assert "HLLC_TST" in uuids_run + assert "ROE_TEST" not in uuids_run + +def test_test_covering_multiple_files_matched_via_second(): + cache = {"AAAA0001": { + "src/simulation/m_rhs.fpp": [45], + "src/simulation/m_weno.fpp": [100], + }} + changed = {"src/simulation/m_weno.fpp": [(100, 100)]} + to_run, _ = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed) + assert len(to_run) == 1 + +def test_no_changed_ranges_runs_nothing(): + cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [45, 46]}} + changed = {} + to_run, skipped = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed) + assert len(to_run) == 0 and len(skipped) == 1 + +def test_multiple_tests_correct_partial_selection(): + cache = { + "TEST_A": {"src/simulation/m_rhs.fpp": [45, 46, 47]}, + "TEST_B": {"src/simulation/m_weno.fpp": [100, 200]}, + "TEST_C": {"src/simulation/m_rhs.fpp": [1000, 2000]}, + } + changed = {"src/simulation/m_rhs.fpp": [(46, 46)]} + cases = [FakeCase("TEST_A"), FakeCase("TEST_B"), FakeCase("TEST_C")] + to_run, skipped = filter_tests_by_coverage(cases, cache, changed) + uuids_run = {c.get_uuid() for c in to_run} + assert uuids_run == {"TEST_A"} # only TEST_A covers line 46 +``` + +### Group 4: Corner cases from design discussion + +```python +def test_gpu_ifdef_lines_not_in_cpu_coverage(): + # m_muscl.fpp has `use openacc` inside #ifdef MFC_OpenACC at lines 16-18. + # CPU build skips those lines. Changing line 17 → NOT in any test's coverage. + # → 0 tests triggered. This is CORRECT: GPU-only lines need GPU tests or macro-file handling. + cache = {"MUSCL_T": {"src/simulation/m_muscl.fpp": [20, 21, 22, 100, 200]}} + changed = {"src/simulation/m_muscl.fpp": [(16, 18)]} # the #ifdef block + to_run, skipped = filter_tests_by_coverage([FakeCase("MUSCL_T")], cache, changed) + # Lines 16-18 not in CPU coverage [20,21,22,100,200] → test skipped + assert len(to_run) == 0 and len(skipped) == 1 + # Note: if parallel_macros.fpp changed, should_run_all_tests() catches it separately + +def test_gpu_cpu_code_in_same_file_triggers_correctly(): + # But if we change actual CPU code in m_muscl.fpp (line 100), test runs + cache = {"MUSCL_T": {"src/simulation/m_muscl.fpp": [20, 21, 22, 100, 200]}} + changed = {"src/simulation/m_muscl.fpp": [(100, 100)]} + to_run, _ = filter_tests_by_coverage([FakeCase("MUSCL_T")], cache, changed) + assert len(to_run) == 1 + +def test_stale_cache_detected_by_hash(): + # load_coverage_cache() returns None when cases.py hash doesn't match + import tempfile, json, hashlib + with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: + json.dump({ + "_meta": {"cases_hash": "wrong_hash_abc123"}, + "AAAA0001": {} + }, f) + cache_path = f.name + # Patch the cache path and call load_coverage_cache + # Verify it returns None (stale) + ... + +def test_new_file_with_no_coverage_warns(): + # A brand new .fpp file in git diff has no coverage in cache. + # filter_tests_by_coverage should warn but not crash. + cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [45]}} + changed = {"src/simulation/m_brand_new.fpp": [(1, 100)]} + # m_brand_new.fpp not in any test's coverage + # Result: test skipped, warning printed + to_run, skipped = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed) + assert len(to_run) == 0 # no test covers the new file +``` + +### Group 5: `collect_coverage_for_test()` — gcov JSON parsing + +```python +def test_parses_gcov_json_correctly(): + import gzip, json + # Build mock gcov JSON (format version 2) + gcov_data = { + "format_version": "2", + "gcc_version": "15.2.0", + "files": [ + { + "file": "/repo/src/simulation/m_rhs.fpp", + "lines": [ + {"line_number": 45, "count": 3}, + {"line_number": 46, "count": 0}, # not covered + {"line_number": 47, "count": 1}, + ] + }, + { + "file": "/repo/build/fypp/simulation/m_rhs.fpp.f90", # .f90, should be skipped + "lines": [{"line_number": 10, "count": 5}] + } + ] + } + compressed = gzip.compress(json.dumps(gcov_data).encode()) + # Mock subprocess to return this compressed data + # Call collect_coverage_for_test with mocked gcov + # Verify: only m_rhs.fpp (the .fpp file) appears, lines 45 and 47 (count>0) + result = _parse_gcov_json_output(compressed, "/repo") + assert result == {"src/simulation/m_rhs.fpp": [45, 47]} + +def test_ignores_zero_count_lines(): + # Verify only lines with count > 0 appear + ... + +def test_handles_no_gcda_gracefully(): + # When gcov returns non-zero or empty stdout (no .gcda file yet) + # collect_coverage_for_test should return {} not crash + ... +``` + +### Group 6: `find_gcov_binary()` + +```python +def test_finds_versioned_gcov(monkeypatch): + # Mock: gfortran --version → "GNU Fortran (Homebrew GCC 15.2.0) 15.2.0" + # Mock: gcov-15 in PATH, its --version contains "GCC" + # Expected: returns "gcov-15" + ... + +def test_rejects_apple_llvm_gcov(monkeypatch): + # Mock: gfortran → GCC 15, gcov-15 not in PATH + # Mock: plain gcov --version → "Apple LLVM version 17.0.0" + # Expected: raises MFCException with helpful message + ... + +def test_falls_back_to_plain_gnu_gcov(monkeypatch): + # Mock: gcov-15 not in PATH, plain gcov is GNU + # Expected: returns "gcov" + ... +``` + +--- + +## Integration Verification (run on CI machine after implementing) + +```bash +# Step 1: Build with gcov instrumentation +./mfc.sh build --gcov -j 8 +# Verify .gcno files exist: +find build/ -name "*.gcno" -not -path "*/venv/*" | wc -l +# Expected: ~100+ files + +# Step 2: Build coverage cache +./mfc.sh test --build-coverage-cache +# Expected: runs ~547 tests sequentially, prints progress, writes cache + +# Step 3: Sanity-check the cache +python3 -c " +import json +with open('toolchain/mfc/test/test_coverage_cache.json') as f: + cache = json.load(f) +tests = {k: v for k, v in cache.items() if k != '_meta'} +print(f'Tests in cache: {len(tests)}') +assert len(tests) > 540, f'Too few tests: {len(tests)}' +for uuid, cov in tests.items(): + assert isinstance(cov, dict), f'{uuid}: coverage not a dict' + for fpp, lines in cov.items(): + assert fpp.endswith('.fpp'), f'{uuid}: non-.fpp file {fpp}' + assert len(lines) > 0, f'{uuid}: {fpp} has no lines' +print('Cache looks correct.') +print('Sample:', list(tests.keys())[:3]) +" + +# Step 4: Verify line-level selection works +# Change a single line in m_igr.fpp (a specific feature file) +sed -i '50s/$/ ! test change/' src/simulation/m_igr.fpp +./mfc.sh test --only-changes --dry-run 2>&1 | grep "Tests to run" +# Expected: ~16 tests (IGR-related), not 547 or 300+ +git checkout src/simulation/m_igr.fpp + +# Step 5: Verify large module line granularity +# Find a non-default code path in m_riemann_solvers.fpp +# (e.g., Roe solver is at specific lines, HLLC at other lines) +# Change 1 line in the Roe solver section +# Verify: only Roe tests run, not HLLC tests +# (Compare output of --dry-run before and after targeted changes) + +# Step 6: Verify macro file triggers all tests +sed -i '1s/$/ ! test/' src/common/include/parallel_macros.fpp +./mfc.sh test --only-changes --dry-run 2>&1 | grep "Tests to run" +# Expected: all 547 tests (infrastructure change detected) +git checkout src/common/include/parallel_macros.fpp + +# Step 7: Verify stale cache detection +# Temporarily modify cases.py (add a comment, don't break it) +echo "# temp" >> toolchain/mfc/test/cases.py +./mfc.sh test --only-changes 2>&1 | grep "stale\|Warning" +# Expected: warning about stale cache, falls back to full suite +git checkout toolchain/mfc/test/cases.py +``` + +--- + +## Design Decisions (preserve these) + +1. **Line-level, not file-level.** Changing 5 lines in a 5000-line file triggers only tests covering those 5 lines. This is the core value over file-level coverage. + +2. **Merge-base diff.** Use `git merge-base {branch} HEAD` as diff base, not master's current tip. If master advanced after you branched, you should not see master's changes as "your changes." + +3. **Conservative on unknown tests.** Test UUID not in cache (newly added) → include it, don't silently skip. + +4. **`ALWAYS_RUN_ALL` for GPU macro files and toolchain files.** CPU coverage can't detect GPU directive changes. Macro files and test-suite-defining files always run everything. + +5. **Stale cache → full suite fallback, not hard failure.** If `cases.py` changed, warn and run all tests. Don't block the developer. + +6. **Pure deletions don't trigger tests.** `@@ -45,3 +45,0 @@` means 0 new lines → nothing to cover → skip. + +7. **Sequential cache building.** Multiple test processes write to the same `.gcda` files (shared binary). Zero `.gcda` before each test and run one at a time. It's slow (~2-3x) but it's a one-time operation. + +8. **GPU-only `#ifdef` lines are not in CPU coverage.** This is expected. Lines inside `#ifdef MFC_OpenACC` aren't executed in a CPU build. The `ALWAYS_RUN_ALL` mechanism handles macro file changes. GPU-specific compute code doesn't use `#ifdef` in MFC (it uses Fypp macros that expand to nothing on CPU). + +--- + +## What to Delete + +```bash +git rm toolchain/mfc/test/dependency_parser.py # 530 lines of regex Fortran parsing +git rm toolchain/mfc/test/test_mapping.json # 148 lines of manual file→label mapping +``` + +Both are replaced by the automated `coverage.py` approach. + +--- + +## Imports Needed in `coverage.py` + +```python +import os, re, json, gzip, shutil, hashlib, subprocess, datetime +from pathlib import Path +from collections import defaultdict +from typing import Optional + +from ..printer import cons +from .. import common +from ..common import MFCException +from ..build import PRE_PROCESS, SIMULATION +``` diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py index 8ad8c4bd07..53f51e3d5e 100644 --- a/toolchain/mfc/cli/commands.py +++ b/toolchain/mfc/cli/commands.py @@ -452,6 +452,27 @@ default=False, dest="dry_run", ), + Argument( + name="build-coverage-cache", + help="Run all tests sequentially with gcov instrumentation to build the line-level coverage cache. Requires a prior --gcov build: ./mfc.sh build --gcov -j 8", + action=ArgAction.STORE_TRUE, + default=False, + dest="build_coverage_cache", + ), + Argument( + name="only-changes", + help="Only run tests whose covered lines overlap with lines changed since branching from master (uses line-level gcov coverage cache).", + action=ArgAction.STORE_TRUE, + default=False, + dest="only_changes", + ), + Argument( + name="changes-branch", + help="Branch to compare against for --only-changes (default: master).", + type=str, + default="master", + dest="changes_branch", + ), ], mutually_exclusive=[ MutuallyExclusiveGroup(arguments=[ @@ -482,6 +503,8 @@ Example("./mfc.sh test -j 4", "Run with 4 parallel jobs"), Example("./mfc.sh test --only 3D", "Run only 3D tests"), Example("./mfc.sh test --generate", "Regenerate golden files"), + Example("./mfc.sh test --only-changes -j 4", "Run tests affected by changed lines"), + Example("./mfc.sh build --gcov -j 8 && ./mfc.sh test --build-coverage-cache", "One-time: build line-coverage cache"), ], key_options=[ ("-j, --jobs N", "Number of parallel test jobs"), @@ -489,6 +512,8 @@ ("-f, --from UUID", "Start from specific test"), ("--generate", "Generate/update golden files"), ("--no-build", "Skip rebuilding MFC"), + ("--build-coverage-cache", "Build line-level gcov coverage cache (one-time)"), + ("--only-changes", "Run tests affected by changed lines (requires cache)"), ], ) diff --git a/toolchain/mfc/test/coverage.py b/toolchain/mfc/test/coverage.py new file mode 100644 index 0000000000..86beab6035 --- /dev/null +++ b/toolchain/mfc/test/coverage.py @@ -0,0 +1,416 @@ +""" +Line-level gcov coverage-based test pruning for MFC. + +This module replaces the regex-based dependency_parser.py with an instrumentation +approach: build MFC once with gfortran --coverage, run all tests individually, +record which .fpp line numbers each test executes, and cache that mapping. + +When files change on a PR, intersect the changed line ranges (from git diff -U0) +against each test's covered lines. Only tests with overlapping lines run. + +Workflow: + ./mfc.sh build --gcov -j 8 # one-time: build with coverage + ./mfc.sh test --build-coverage-cache # one-time: populate the cache + ./mfc.sh test --only-changes -j 8 # fast: run only affected tests +""" + +import os +import re +import json +import gzip +import shutil +import hashlib +import subprocess +import datetime +from pathlib import Path +from collections import defaultdict +from typing import Optional + +from ..printer import cons +from .. import common +from ..common import MFCException +from ..build import PRE_PROCESS, SIMULATION + + +COVERAGE_CACHE_PATH = Path(common.MFC_ROOT_DIR) / "toolchain/mfc/test/test_coverage_cache.json" + +# Changes to these files trigger the full test suite. +# CPU coverage cannot tell us about GPU directive changes (macro files), and +# toolchain files define or change the set of tests themselves. +ALWAYS_RUN_ALL = frozenset([ + "src/common/include/parallel_macros.fpp", + "src/common/include/acc_macros.fpp", + "src/common/include/omp_macros.fpp", + "src/common/include/shared_parallel_macros.fpp", + "src/common/include/macros.fpp", + "toolchain/mfc/test/cases.py", + "toolchain/mfc/test/case.py", + "toolchain/mfc/params/definitions.py", +]) + + +def _get_gcov_version(gcov_binary: str) -> str: + """Return the version string from gcov --version.""" + try: + result = subprocess.run( + [gcov_binary, "--version"], + capture_output=True, text=True, timeout=10, check=False + ) + for line in result.stdout.splitlines(): + if line.strip(): + return line.strip() + except Exception: + pass + return "unknown" + + +def find_gcov_binary(_root_dir: str = "") -> str: # pylint: disable=unused-argument + """ + Find a GNU gcov binary compatible with the system gfortran. + + On macOS with Homebrew GCC, the binary is gcov-{major} (e.g. gcov-15). + On Linux with system GCC, plain gcov is usually correct. + Apple LLVM's /usr/bin/gcov is incompatible with gfortran .gcda files. + """ + # Determine gfortran major version + major = None + try: + result = subprocess.run( + ["gfortran", "--version"], + capture_output=True, text=True, timeout=10, check=False + ) + m = re.search(r'(\d+)\.\d+\.\d+', result.stdout) + if m: + major = m.group(1) + except Exception: + pass + + # Try versioned binary first (Homebrew macOS), then plain gcov + candidates = [] + if major: + candidates.append(f"gcov-{major}") + candidates.append("gcov") + + for candidate in candidates: + path = shutil.which(candidate) + if path is None: + continue + try: + result = subprocess.run( + [path, "--version"], + capture_output=True, text=True, timeout=10, check=False + ) + version_out = result.stdout + if "Apple LLVM" in version_out or "Apple clang" in version_out: + continue # Apple's gcov cannot parse GCC-generated .gcda files + if "GCC" in version_out or "GNU" in version_out: + return path + except Exception: + continue + + raise MFCException( + "GNU gcov not found. gcov is required for the coverage cache.\n" + " On macOS (Homebrew): brew install gcc\n" + " On Linux (Debian/Ubuntu): apt install gcc\n" + " On Linux (RHEL/CentOS): yum install gcc\n" + "Apple's /usr/bin/gcov is incompatible with gfortran .gcda files." + ) + + +def find_gcno_files(root_dir: str) -> list: + """ + Walk build/ and return all .gcno files (excluding venv paths). + Raises if none found (indicates build was not done with --gcov). + """ + build_dir = Path(root_dir) / "build" + gcno_files = [ + p for p in build_dir.rglob("*.gcno") + if "venv" not in p.parts + ] + if not gcno_files: + raise MFCException( + "No .gcno files found. Build with --gcov instrumentation first:\n" + " ./mfc.sh build --gcov -j 8" + ) + return gcno_files + + +def zero_gcda_files(root_dir: str) -> None: + """ + Delete all .gcda files under build/ (excluding venv). + Called before each test run during cache building to isolate per-test coverage. + """ + build_dir = Path(root_dir) / "build" + for gcda in build_dir.rglob("*.gcda"): + if "venv" not in gcda.parts: + try: + gcda.unlink() + except OSError: + pass + + +def _parse_gcov_json_output(compressed_bytes: bytes, root_dir: str) -> dict: + """ + Parse gzip-compressed gcov JSON output and return {rel_fpp_path: [covered_lines]}. + Only .fpp files are included; .f90 and other files are skipped. + Only lines with count > 0 are included. + """ + result = defaultdict(set) + try: + data = json.loads(gzip.decompress(compressed_bytes)) + except Exception: + return {} + + for file_entry in data.get("files", []): + file_path = file_entry.get("file", "") + if not file_path.endswith(".fpp"): + continue + try: + rel_path = os.path.relpath(file_path, root_dir) + except ValueError: + rel_path = file_path + for line in file_entry.get("lines", []): + if line.get("count", 0) > 0: + result[rel_path].add(line["line_number"]) + + return {k: sorted(v) for k, v in result.items()} + + +def collect_coverage_for_test(gcno_files: list, root_dir: str, gcov_binary: str) -> dict: + """ + Run gcov on all .gcno files and collect covered .fpp lines. + + Assumes the test has already run and .gcda files are present next to .gcno files. + Returns {rel_fpp_path: sorted_list_of_covered_line_numbers}. + """ + merged = defaultdict(set) + for gcno_file in gcno_files: + try: + proc = subprocess.run( + [gcov_binary, "--json-format", "--stdout", str(gcno_file)], + capture_output=True, cwd=root_dir, timeout=60, check=False + ) + except subprocess.TimeoutExpired: + continue + except Exception: + continue + + if proc.returncode != 0 or not proc.stdout: + # No .gcda for this translation unit — test didn't touch it + continue + + partial = _parse_gcov_json_output(proc.stdout, root_dir) + for fpp_path, lines in partial.items(): + merged[fpp_path].update(lines) + + return {k: sorted(v) for k, v in merged.items()} + + +def build_coverage_cache(root_dir: str, cases: list) -> None: + """ + Build the line-level coverage cache by running all tests sequentially. + + Each test is run individually with .gcda files zeroed beforehand to isolate + per-test coverage. This takes ~2-3x normal test time but is a one-time operation. + + Requires a prior --gcov build: ./mfc.sh build --gcov -j 8 + """ + gcov_bin = find_gcov_binary(root_dir) + gcno_files = find_gcno_files(root_dir) + + cons.print(f"[bold]Building coverage cache for {len(cases)} tests (sequential)...[/bold]") + cons.print("[dim]Tests run one at a time to isolate per-test coverage. This takes ~2-3x normal time.[/dim]") + cons.print(f"[dim]Using gcov binary: {gcov_bin}[/dim]") + cons.print(f"[dim]Found {len(gcno_files)} .gcno files[/dim]") + cons.print() + + cache: dict = {} + for i, case in enumerate(cases): + zero_gcda_files(root_dir) + try: + case.run([PRE_PROCESS, SIMULATION], gpus=set()) + except Exception: + # Continue even if the test fails — we only need .gcda data + pass + + coverage = collect_coverage_for_test(gcno_files, root_dir, gcov_bin) + cache[case.get_uuid()] = coverage + + total_lines = sum(len(v) for v in coverage.values()) + cons.print( + f" [{i+1:3d}/{len(cases):3d}] " + f"[magenta]{case.get_uuid()}[/magenta]: " + f"{len(coverage)} files, {total_lines} lines" + ) + + cases_py_path = Path(root_dir) / "toolchain/mfc/test/cases.py" + cases_hash = hashlib.sha256(cases_py_path.read_bytes()).hexdigest() + gcov_version = _get_gcov_version(gcov_bin) + + cache["_meta"] = { + "created": datetime.datetime.utcnow().isoformat(), + "cases_hash": cases_hash, + "gcov_version": gcov_version, + } + + with open(COVERAGE_CACHE_PATH, "w", encoding="utf-8") as f: + json.dump(cache, f, indent=2) + + cons.print() + cons.print(f"[bold green]Coverage cache written to {COVERAGE_CACHE_PATH}[/bold green]") + cons.print(f"[dim]Cache has {len(cases)} test entries.[/dim]") + + +def load_coverage_cache(root_dir: str) -> Optional[dict]: + """ + Load the coverage cache, returning None if missing or stale. + + Staleness is detected by comparing the SHA256 of cases.py at cache-build time + against the current cases.py. A stale cache means new/removed tests exist. + The caller should fall back to running the full test suite. + """ + if not COVERAGE_CACHE_PATH.exists(): + return None + + with open(COVERAGE_CACHE_PATH, encoding="utf-8") as f: + cache = json.load(f) + + cases_py = Path(root_dir) / "toolchain/mfc/test/cases.py" + current_hash = hashlib.sha256(cases_py.read_bytes()).hexdigest() + stored_hash = cache.get("_meta", {}).get("cases_hash", "") + + if current_hash != stored_hash: + cons.print("[yellow]Warning: Coverage cache is stale (cases.py changed since cache was built).[/yellow]") + cons.print("[yellow]Rebuild with: ./mfc.sh build --gcov -j 8 && ./mfc.sh test --build-coverage-cache[/yellow]") + return None + + return cache + + +def _parse_diff_output(diff_text: str, _root_dir: str = "") -> dict: # pylint: disable=unused-argument + """ + Parse unified diff output and return {rel_fpp_path: [(start, end), ...]} for + added/modified lines in .fpp files only. + + Pure deletions (@@ -N,M +N,0 @@) are excluded — nothing to cover. + The root_dir argument is accepted for interface compatibility but not used here + (paths in git diff are already repo-relative). + """ + changed: dict = defaultdict(list) + current_file = None + + file_re = re.compile(r'^\+\+\+ b/(.+\.fpp)$') + hunk_re = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@') + + for line in diff_text.splitlines(): + m = file_re.match(line) + if m: + current_file = m.group(1) + continue + + m = hunk_re.match(line) + if m and current_file: + start = int(m.group(1)) + count = int(m.group(2)) if m.group(2) is not None else 1 + if count > 0: + changed[current_file].append((start, start + count - 1)) + + return dict(changed) + + +def get_changed_line_ranges(root_dir: str, compare_branch: str = "master") -> dict: + """ + Return {rel_fpp_path: [(start, end), ...]} for lines changed in this branch + relative to the merge-base with compare_branch. + + Uses merge-base (not master tip) so that unrelated master advances don't + appear as "your changes." + Only .fpp files are included. Pure deletions are excluded. + """ + merge_base_result = subprocess.run( + ["git", "merge-base", compare_branch, "HEAD"], + capture_output=True, text=True, cwd=root_dir, timeout=30, check=False + ) + if merge_base_result.returncode != 0: + return {} + merge_base = merge_base_result.stdout.strip() + if not merge_base: + return {} + + diff_result = subprocess.run( + ["git", "diff", merge_base, "HEAD", "-U0", "--no-color"], + capture_output=True, text=True, cwd=root_dir, timeout=30, check=False + ) + if diff_result.returncode != 0: + return {} + + return _parse_diff_output(diff_result.stdout, root_dir) + + +def should_run_all_tests(changed_rel_paths: set) -> bool: + """ + Return True if any changed file is in ALWAYS_RUN_ALL. + + GPU macro files and toolchain files cannot be correctly analyzed by CPU + coverage — changes to them must always trigger the full test suite. + """ + return bool(changed_rel_paths & ALWAYS_RUN_ALL) + + +def filter_tests_by_coverage( # pylint: disable=too-many-locals + cases: list, coverage_cache: dict, changed_ranges: dict +) -> tuple: + """ + Filter test cases to only those whose covered lines overlap with changed lines. + + Returns (cases_to_run, skipped_cases). + + Conservative behavior: + - Test not in cache (newly added) → include it + - Changed file not in any test's coverage → warn and skip + + This is the core of the line-level pruning. A test is included only if at + least one of its covered lines intersects with at least one changed line range. + """ + to_run = [] + skipped = [] + new_file_warnings: set = set() + + for case in cases: + uuid = case.get_uuid() + test_cov = coverage_cache.get(uuid) + + if test_cov is None: + # Test not in cache (e.g., newly added) → conservative: include + to_run.append(case) + continue + + matched = False + for fpp_file, ranges in changed_ranges.items(): + covered_lines = set(test_cov.get(fpp_file, [])) + if not covered_lines: + # File has no coverage for this test — check if it has coverage anywhere + if not any( + fpp_file in tc + for tc in coverage_cache.values() + if isinstance(tc, dict) + ): + new_file_warnings.add(fpp_file) + continue + for (start, end) in ranges: + if covered_lines & set(range(start, end + 1)): + matched = True + break + if matched: + break + + if matched: + to_run.append(case) + else: + skipped.append(case) + + for f in sorted(new_file_warnings): + cons.print(f"[yellow]Warning: {f} has no coverage data in the cache.[/yellow]") + cons.print("[yellow] Consider rebuilding: ./mfc.sh test --build-coverage-cache[/yellow]") + + return to_run, skipped diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py index 31a3771cb9..2b7d272eb8 100644 --- a/toolchain/mfc/test/test.py +++ b/toolchain/mfc/test/test.py @@ -42,7 +42,7 @@ class TestTimeoutError(MFCException): pass -# pylint: disable=too-many-branches, trailing-whitespace +# pylint: disable=too-many-branches,too-many-locals,too-many-statements,trailing-whitespace def __filter(cases_) -> typing.List[TestCase]: cases = cases_[:] selected_cases = [] @@ -75,6 +75,51 @@ def __filter(cases_) -> typing.List[TestCase]: cases.remove(case) skipped_cases.append(case) + # --only-changes: filter based on line-level gcov coverage + if ARG("only_changes"): + from .coverage import ( # pylint: disable=import-outside-toplevel + load_coverage_cache, get_changed_line_ranges, + should_run_all_tests, filter_tests_by_coverage, + ) + + cache = load_coverage_cache(common.MFC_ROOT_DIR) + if cache is None: + cons.print("[yellow]Coverage cache missing or stale.[/yellow]") + cons.print("[yellow]Run: ./mfc.sh build --gcov -j 8 && ./mfc.sh test --build-coverage-cache[/yellow]") + cons.print("[yellow]Falling back to full test suite.[/yellow]") + # cache is None → fall through and run all tests + else: + changed_ranges = get_changed_line_ranges(common.MFC_ROOT_DIR, ARG("changes_branch")) + changed_paths = set(changed_ranges.keys()) + + cons.print() + cons.print("[bold cyan]Line-Coverage Change Analysis[/bold cyan]") + cons.print("-" * 50) + + if should_run_all_tests(changed_paths): + cons.print("[yellow]Infrastructure or macro file changed — running full test suite.[/yellow]") + cons.print("-" * 50) + # fall through — don't filter cases + elif not changed_ranges: + cons.print("[green]No .fpp source changes detected — skipping all tests.[/green]") + cons.print("-" * 50) + cons.print() + skipped_cases += cases + cases = [] + else: + for fpp_file, ranges in sorted(changed_ranges.items()): + total_lines = sum(e - s + 1 for s, e in ranges) + cons.print( + f" [green]*[/green] {fpp_file} " + f"({total_lines} line{'s' if total_lines != 1 else ''} changed)" + ) + + cases, new_skipped = filter_tests_by_coverage(cases, cache, changed_ranges) + skipped_cases += new_skipped + cons.print(f"\n[bold]Tests to run: {len(cases)} / {len(cases) + len(new_skipped)}[/bold]") + cons.print("-" * 50) + cons.print() + for case in cases[:]: if case.ppn > 1 and not ARG("mpi"): cases.remove(case) @@ -128,6 +173,12 @@ def test(): return + if ARG("build_coverage_cache"): + from .coverage import build_coverage_cache # pylint: disable=import-outside-toplevel + all_cases = [b.to_case() for b in cases] + build_coverage_cache(common.MFC_ROOT_DIR, all_cases) + return + cases, skipped_cases = __filter(cases) cases = [ _.to_case() for _ in cases ] total_test_count = len(cases) diff --git a/toolchain/mfc/test/test_coverage_unit.py b/toolchain/mfc/test/test_coverage_unit.py new file mode 100644 index 0000000000..f4138d7256 --- /dev/null +++ b/toolchain/mfc/test/test_coverage_unit.py @@ -0,0 +1,570 @@ +""" +Unit tests for toolchain/mfc/test/coverage.py + +Run with: + python3 -m pytest toolchain/mfc/test/test_coverage_unit.py -v + +These tests are fully offline (no build, no git, no gcov binary required). +They use mocks and in-memory data structures to verify logic. +""" +# pylint: disable=protected-access,exec-used,too-few-public-methods,wrong-import-position + +import gzip +import importlib.util +import json +import os +import sys +import textwrap +import types +import unittest +from unittest.mock import patch + +# --------------------------------------------------------------------------- +# Import the module under test. +# We patch the module-level imports that require the full toolchain. +# --------------------------------------------------------------------------- + +# Create minimal stubs for toolchain modules so coverage.py can be imported +# without the full MFC toolchain being on sys.path. +def _make_stub(name): + mod = types.ModuleType(name) + sys.modules[name] = mod + return mod + + +for _mod_name in [ + "toolchain", + "toolchain.mfc", + "toolchain.mfc.printer", + "toolchain.mfc.common", + "toolchain.mfc.build", +]: + if _mod_name not in sys.modules: + _make_stub(_mod_name) + +# Provide the attributes coverage.py needs from its relative imports +_printer_stub = sys.modules.get("toolchain.mfc.printer", _make_stub("toolchain.mfc.printer")) + + +class _FakeCons: + def print(self, *args, **kwargs): + pass # suppress output during tests + + +_printer_stub.cons = _FakeCons() + +_common_stub = sys.modules.get("toolchain.mfc.common", _make_stub("toolchain.mfc.common")) +_common_stub.MFC_ROOT_DIR = "/fake/repo" + + +class _FakeMFCException(Exception): + pass + + +_common_stub.MFCException = _FakeMFCException + +_build_stub = sys.modules.get("toolchain.mfc.build", _make_stub("toolchain.mfc.build")) +_build_stub.PRE_PROCESS = "pre_process" +_build_stub.SIMULATION = "simulation" + +# Load coverage.py by injecting stubs into sys.modules so relative imports resolve. +_COVERAGE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "coverage.py") + +sys.modules["toolchain.mfc.test.coverage"] = None # reset if already loaded + +_spec = importlib.util.spec_from_file_location( + "toolchain.mfc.test.coverage", + _COVERAGE_PATH, + submodule_search_locations=[] +) +_coverage_mod = importlib.util.module_from_spec(_spec) +_coverage_mod.__package__ = "toolchain.mfc.test" + +sys.modules["toolchain.mfc.test"] = types.ModuleType("toolchain.mfc.test") +sys.modules["toolchain.mfc.test"].__package__ = "toolchain.mfc.test" + +with patch.dict("sys.modules", { + "toolchain.mfc.printer": _printer_stub, + "toolchain.mfc.common": _common_stub, + "toolchain.mfc.build": _build_stub, +}): + try: + _spec.loader.exec_module(_coverage_mod) + except ImportError: + pass # fallback below + +# If the importlib approach failed (relative imports unresolvable), fall back to exec. +try: + _parse_diff_output = _coverage_mod._parse_diff_output + _parse_gcov_json_output = _coverage_mod._parse_gcov_json_output + should_run_all_tests = _coverage_mod.should_run_all_tests + filter_tests_by_coverage = _coverage_mod.filter_tests_by_coverage + ALWAYS_RUN_ALL = _coverage_mod.ALWAYS_RUN_ALL +except AttributeError: + _globals = { + "__name__": "toolchain.mfc.test.coverage", + "__package__": "toolchain.mfc.test", + "cons": _printer_stub.cons, + "common": _common_stub, + "MFCException": _FakeMFCException, + "PRE_PROCESS": "pre_process", + "SIMULATION": "simulation", + } + with open(_COVERAGE_PATH, encoding="utf-8") as _f: + _src = _f.read() + + _src = ( + _src + .replace("from ..printer import cons", "cons = _globals['cons']") + .replace("from .. import common", "") + .replace("from ..common import MFCException", "MFCException = _globals['MFCException']") + .replace("from ..build import PRE_PROCESS, SIMULATION", "") + ) + exec(compile(_src, _COVERAGE_PATH, "exec"), _globals) # noqa: S102 + + _parse_diff_output = _globals["_parse_diff_output"] + _parse_gcov_json_output = _globals["_parse_gcov_json_output"] + should_run_all_tests = _globals["should_run_all_tests"] + filter_tests_by_coverage = _globals["filter_tests_by_coverage"] + ALWAYS_RUN_ALL = _globals["ALWAYS_RUN_ALL"] + + +# --------------------------------------------------------------------------- +# Helper: minimal fake test case +# --------------------------------------------------------------------------- + +class FakeCase: + """Minimal stand-in for TestCase — only get_uuid() is needed.""" + + def __init__(self, uuid: str): + self._uuid = uuid + + def get_uuid(self) -> str: + return self._uuid + + +# =========================================================================== +# Group 1: _parse_diff_output — git diff parsing +# =========================================================================== + +class TestParseDiffOutput(unittest.TestCase): + + def test_parse_basic_modification(self): + diff = textwrap.dedent("""\ + diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp + --- a/src/simulation/m_rhs.fpp + +++ b/src/simulation/m_rhs.fpp + @@ -45,3 +45,5 @@ + + ! new line 1 + + ! new line 2 + """) + result = _parse_diff_output(diff, "/repo") + assert result == {"src/simulation/m_rhs.fpp": [(45, 49)]} + + def test_parse_multiple_hunks_same_file(self): + diff = textwrap.dedent("""\ + +++ b/src/simulation/m_rhs.fpp + @@ -10,2 +10,3 @@ + + ! hunk 1 + @@ -50,1 +51,2 @@ + + ! hunk 2 + """) + result = _parse_diff_output(diff, "/repo") + assert "src/simulation/m_rhs.fpp" in result + ranges = result["src/simulation/m_rhs.fpp"] + assert (10, 12) in ranges + assert (51, 52) in ranges + + def test_parse_pure_deletion(self): + # @@ -45,3 +45,0 @@ — 3 lines deleted, 0 added → nothing to cover + diff = textwrap.dedent("""\ + +++ b/src/simulation/m_rhs.fpp + @@ -45,3 +45,0 @@ + - ! removed line 1 + """) + result = _parse_diff_output(diff, "/repo") + assert result == {} + + def test_parse_new_file(self): + diff = textwrap.dedent("""\ + --- /dev/null + +++ b/src/simulation/m_new.fpp + @@ -0,0 +1,50 @@ + + ! new file content + """) + result = _parse_diff_output(diff, "/repo") + assert result == {"src/simulation/m_new.fpp": [(1, 50)]} + + def test_parse_ignores_non_fpp(self): + diff = textwrap.dedent("""\ + +++ b/toolchain/mfc/test/cases.py + @@ -1,1 +1,2 @@ + +# new line + +++ b/README.md + @@ -5,1 +5,2 @@ + +# new line + +++ b/CMakeLists.txt + @@ -10,1 +10,2 @@ + + new cmake line + """) + result = _parse_diff_output(diff, "/repo") + assert result == {} + + def test_parse_single_line_hunk_no_count(self): + # @@ -45 +45 @@ (no count = 1 line) + diff = textwrap.dedent("""\ + +++ b/src/simulation/m_rhs.fpp + @@ -45 +45 @@ + + ! changed line + """) + result = _parse_diff_output(diff, "/repo") + assert result == {"src/simulation/m_rhs.fpp": [(45, 45)]} + + def test_parse_empty_diff(self): + assert _parse_diff_output("", "/repo") == {} + + def test_parse_multiple_files(self): + diff = textwrap.dedent("""\ + +++ b/src/simulation/m_rhs.fpp + @@ -10,2 +10,3 @@ + + ! rhs change + +++ b/src/simulation/m_weno.fpp + @@ -200,1 +200,4 @@ + + ! weno change + """) + result = _parse_diff_output(diff, "/repo") + assert "src/simulation/m_rhs.fpp" in result + assert "src/simulation/m_weno.fpp" in result + assert result["src/simulation/m_rhs.fpp"] == [(10, 12)] + assert result["src/simulation/m_weno.fpp"] == [(200, 203)] + + +# =========================================================================== +# Group 2: should_run_all_tests — ALWAYS_RUN_ALL detection +# =========================================================================== + +class TestShouldRunAllTests(unittest.TestCase): + + def test_parallel_macros_triggers_all(self): + assert should_run_all_tests( + {"src/common/include/parallel_macros.fpp"} + ) is True + + def test_acc_macros_triggers_all(self): + assert should_run_all_tests( + {"src/common/include/acc_macros.fpp"} + ) is True + + def test_omp_macros_triggers_all(self): + assert should_run_all_tests( + {"src/common/include/omp_macros.fpp"} + ) is True + + def test_shared_parallel_macros_triggers_all(self): + assert should_run_all_tests( + {"src/common/include/shared_parallel_macros.fpp"} + ) is True + + def test_macros_fpp_triggers_all(self): + assert should_run_all_tests( + {"src/common/include/macros.fpp"} + ) is True + + def test_cases_py_triggers_all(self): + assert should_run_all_tests( + {"toolchain/mfc/test/cases.py"} + ) is True + + def test_case_py_triggers_all(self): + assert should_run_all_tests( + {"toolchain/mfc/test/case.py"} + ) is True + + def test_definitions_py_triggers_all(self): + assert should_run_all_tests( + {"toolchain/mfc/params/definitions.py"} + ) is True + + def test_simulation_module_does_not_trigger_all(self): + assert should_run_all_tests( + {"src/simulation/m_rhs.fpp"} + ) is False + + def test_empty_set_does_not_trigger_all(self): + assert should_run_all_tests(set()) is False + + def test_mixed_one_trigger_fires_all(self): + assert should_run_all_tests({ + "src/simulation/m_rhs.fpp", + "src/common/include/macros.fpp", + }) is True + + +# =========================================================================== +# Group 3: filter_tests_by_coverage — core selection logic +# =========================================================================== + +class TestFilterTestsByCoverage(unittest.TestCase): + + def test_exact_line_hit(self): + cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [45, 46, 47]}} + changed = {"src/simulation/m_rhs.fpp": [(46, 46)]} + cases = [FakeCase("AAAA0001")] + to_run, skipped = filter_tests_by_coverage(cases, cache, changed) + assert len(to_run) == 1 + assert len(skipped) == 0 + + def test_no_line_overlap(self): + """ + KEY TEST: this is what makes line-level better than file-level coverage. + Same file, but changed lines (500-510) don't overlap covered lines (45-47). + """ + cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [45, 46, 47]}} + changed = {"src/simulation/m_rhs.fpp": [(500, 510)]} + cases = [FakeCase("AAAA0001")] + to_run, skipped = filter_tests_by_coverage(cases, cache, changed) + assert len(to_run) == 0 + assert len(skipped) == 1 + + def test_range_boundary_inclusive_start(self): + cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [100]}} + changed = {"src/simulation/m_rhs.fpp": [(100, 105)]} + to_run, _ = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed) + assert len(to_run) == 1 + + def test_range_boundary_inclusive_end(self): + cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [105]}} + changed = {"src/simulation/m_rhs.fpp": [(100, 105)]} + to_run, _ = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed) + assert len(to_run) == 1 + + def test_range_just_outside_both_ends(self): + # Lines 99 and 106 are NOT inside [100, 105] + cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [99, 106]}} + changed = {"src/simulation/m_rhs.fpp": [(100, 105)]} + to_run, skipped = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed) + assert len(to_run) == 0 + assert len(skipped) == 1 + + def test_uuid_not_in_cache_is_conservative(self): + """Newly added test not in cache → include it (conservative).""" + cache = {} + changed = {"src/simulation/m_rhs.fpp": [(45, 45)]} + to_run, _ = filter_tests_by_coverage([FakeCase("NEWTEST1")], cache, changed) + assert len(to_run) == 1 + + def test_changed_file_not_in_test_coverage(self): + """Test covers m_rhs.fpp but we changed m_igr.fpp → test skipped.""" + cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [45, 46]}} + changed = {"src/simulation/m_igr.fpp": [(100, 110)]} + to_run, skipped = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed) + assert len(to_run) == 0 + assert len(skipped) == 1 + + def test_large_module_line_granularity(self): + """ + KEY CORNER CASE: m_riemann_solvers.fpp is 5257 lines. + HLLC test covers lines 798-810, ROE test covers lines 2000+. + Changing line 800-810 → only HLLC test runs. + """ + cache = { + "HLLC_TST": {"src/simulation/m_riemann_solvers.fpp": [798, 799, 800, 801, 810]}, + "ROE_TEST": {"src/simulation/m_riemann_solvers.fpp": [2000, 2100, 2500]}, + } + changed = {"src/simulation/m_riemann_solvers.fpp": [(800, 810)]} + cases = [FakeCase("HLLC_TST"), FakeCase("ROE_TEST")] + to_run, skipped = filter_tests_by_coverage(cases, cache, changed) + uuids_run = {c.get_uuid() for c in to_run} + assert "HLLC_TST" in uuids_run + assert "ROE_TEST" not in uuids_run + assert len(skipped) == 1 + + def test_test_covering_multiple_files_matched_via_second(self): + """Test matched because m_weno.fpp (its second covered file) was changed.""" + cache = {"AAAA0001": { + "src/simulation/m_rhs.fpp": [45], + "src/simulation/m_weno.fpp": [100], + }} + changed = {"src/simulation/m_weno.fpp": [(100, 100)]} + to_run, _ = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed) + assert len(to_run) == 1 + + def test_no_changed_ranges_runs_nothing(self): + """Empty diff → nothing to cover → all tests skipped.""" + cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [45, 46]}} + changed = {} + to_run, skipped = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed) + assert len(to_run) == 0 + assert len(skipped) == 1 + + def test_multiple_tests_correct_partial_selection(self): + """Only the test that covers line 46 should run.""" + cache = { + "TEST_A": {"src/simulation/m_rhs.fpp": [45, 46, 47]}, + "TEST_B": {"src/simulation/m_weno.fpp": [100, 200]}, + "TEST_C": {"src/simulation/m_rhs.fpp": [1000, 2000]}, + } + changed = {"src/simulation/m_rhs.fpp": [(46, 46)]} + cases = [FakeCase("TEST_A"), FakeCase("TEST_B"), FakeCase("TEST_C")] + to_run, skipped = filter_tests_by_coverage(cases, cache, changed) + uuids_run = {c.get_uuid() for c in to_run} + assert uuids_run == {"TEST_A"} + assert len(skipped) == 2 + + +# =========================================================================== +# Group 4: Corner cases from design discussion +# =========================================================================== + +class TestDesignCornerCases(unittest.TestCase): + + def test_gpu_ifdef_lines_not_in_cpu_coverage(self): + """ + m_muscl.fpp has `use openacc` inside #ifdef MFC_OpenACC at lines 16-18. + CPU build skips those lines → not in any test's coverage. + Changing lines 16-18 → 0 tests triggered. This is CORRECT behavior. + (The ALWAYS_RUN_ALL mechanism handles macro file changes separately.) + """ + cache = {"MUSCL_T": {"src/simulation/m_muscl.fpp": [20, 21, 22, 100, 200]}} + changed = {"src/simulation/m_muscl.fpp": [(16, 18)]} + to_run, skipped = filter_tests_by_coverage([FakeCase("MUSCL_T")], cache, changed) + assert len(to_run) == 0 + assert len(skipped) == 1 + + def test_gpu_cpu_code_in_same_file_triggers_correctly(self): + """Changing CPU code in the same file does trigger the test.""" + cache = {"MUSCL_T": {"src/simulation/m_muscl.fpp": [20, 21, 22, 100, 200]}} + changed = {"src/simulation/m_muscl.fpp": [(100, 100)]} + to_run, _ = filter_tests_by_coverage([FakeCase("MUSCL_T")], cache, changed) + assert len(to_run) == 1 + + def test_macro_file_triggers_all_via_should_run_all(self): + """parallel_macros.fpp in changed_paths → should_run_all_tests() is True.""" + changed_paths = {"src/common/include/parallel_macros.fpp"} + assert should_run_all_tests(changed_paths) is True + + def test_new_fpp_file_no_coverage_runs_nothing(self): + """ + Brand new .fpp file has no coverage in cache. + All tests are skipped (no test covers the new file). + """ + cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [45]}} + changed = {"src/simulation/m_brand_new.fpp": [(1, 100)]} + to_run, skipped = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed) + assert len(to_run) == 0 + assert len(skipped) == 1 + + def test_multiple_ranges_in_same_file_any_match_triggers(self): + """Multiple changed ranges in one file — if any overlaps, test runs.""" + cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [50, 51]}} + # Two ranges: [10,20] (no match) and [50,60] (match at 50,51) + changed = {"src/simulation/m_rhs.fpp": [(10, 20), (50, 60)]} + to_run, _ = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed) + assert len(to_run) == 1 + + def test_empty_cache_runs_all_conservatively(self): + """Empty coverage cache → all tests included (conservative).""" + cache = {} + changed = {"src/simulation/m_rhs.fpp": [(45, 45)]} + cases = [FakeCase("T1"), FakeCase("T2"), FakeCase("T3")] + to_run, skipped = filter_tests_by_coverage(cases, cache, changed) + assert len(to_run) == 3 + assert len(skipped) == 0 + + +# =========================================================================== +# Group 5: _parse_gcov_json_output — gcov JSON parsing +# =========================================================================== + +class TestParseGcovJsonOutput(unittest.TestCase): + + def _make_gcov_json(self, files_data: list) -> bytes: + """Build a fake gzip-compressed gcov JSON blob.""" + data = { + "format_version": "2", + "gcc_version": "15.2.0", + "files": files_data, + } + return gzip.compress(json.dumps(data).encode()) + + def test_parses_fpp_file_correctly(self): + compressed = self._make_gcov_json([{ + "file": "/repo/src/simulation/m_rhs.fpp", + "lines": [ + {"line_number": 45, "count": 3}, + {"line_number": 46, "count": 0}, # not covered + {"line_number": 47, "count": 1}, + ], + }]) + result = _parse_gcov_json_output(compressed, "/repo") + assert "src/simulation/m_rhs.fpp" in result + assert result["src/simulation/m_rhs.fpp"] == [45, 47] + + def test_ignores_zero_count_lines(self): + compressed = self._make_gcov_json([{ + "file": "/repo/src/simulation/m_rhs.fpp", + "lines": [ + {"line_number": 10, "count": 0}, + {"line_number": 11, "count": 0}, + ], + }]) + result = _parse_gcov_json_output(compressed, "/repo") + assert result == {} + + def test_ignores_f90_files(self): + """Generated .f90 files must not appear in coverage output.""" + compressed = self._make_gcov_json([ + { + "file": "/repo/build/fypp/simulation/m_rhs.fpp.f90", + "lines": [{"line_number": 10, "count": 5}], + }, + { + "file": "/repo/src/simulation/m_rhs.fpp", + "lines": [{"line_number": 45, "count": 1}], + }, + ]) + result = _parse_gcov_json_output(compressed, "/repo") + assert "src/simulation/m_rhs.fpp" in result + assert all(k.endswith(".fpp") for k in result) + + def test_handles_invalid_gzip_gracefully(self): + result = _parse_gcov_json_output(b"not valid gzip", "/repo") + assert result == {} + + def test_handles_empty_files_list(self): + compressed = self._make_gcov_json([]) + result = _parse_gcov_json_output(compressed, "/repo") + assert result == {} + + def test_multiple_fpp_files_merged(self): + compressed = self._make_gcov_json([ + { + "file": "/repo/src/simulation/m_rhs.fpp", + "lines": [{"line_number": 45, "count": 1}], + }, + { + "file": "/repo/src/simulation/m_weno.fpp", + "lines": [{"line_number": 200, "count": 2}], + }, + ]) + result = _parse_gcov_json_output(compressed, "/repo") + assert "src/simulation/m_rhs.fpp" in result + assert "src/simulation/m_weno.fpp" in result + assert result["src/simulation/m_rhs.fpp"] == [45] + assert result["src/simulation/m_weno.fpp"] == [200] + + def test_lines_are_sorted(self): + compressed = self._make_gcov_json([{ + "file": "/repo/src/simulation/m_rhs.fpp", + "lines": [ + {"line_number": 100, "count": 1}, + {"line_number": 50, "count": 1}, + {"line_number": 75, "count": 1}, + ], + }]) + result = _parse_gcov_json_output(compressed, "/repo") + assert result["src/simulation/m_rhs.fpp"] == [50, 75, 100] + + +if __name__ == "__main__": + unittest.main() From 6e7f5bec13d7d101cd57abdf4aaa13f08c146f57 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Fri, 27 Feb 2026 01:47:03 -0500 Subject: [PATCH 2/2] Fix gcov coverage: raw JSON parsing, symlink paths, and test setup Three bugs found during integration testing on Phoenix: - gcov 12 outputs raw JSON, not gzip; added fallback parsing - Symlink paths (/storage/scratch1 vs /storage/home) caused relpath mismatch; use os.path.realpath() before computing relative paths - Missing case.delete_output()/create_directory() before case.run() caused silent test failures during cache build Co-Authored-By: Claude Opus 4.6 --- toolchain/mfc/test/coverage.py | 17 +++++++++++++---- toolchain/mfc/test/test_coverage_unit.py | 22 ++++++++++++++++++++-- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/toolchain/mfc/test/coverage.py b/toolchain/mfc/test/coverage.py index 86beab6035..86b5c5def0 100644 --- a/toolchain/mfc/test/coverage.py +++ b/toolchain/mfc/test/coverage.py @@ -149,24 +149,31 @@ def zero_gcda_files(root_dir: str) -> None: pass -def _parse_gcov_json_output(compressed_bytes: bytes, root_dir: str) -> dict: +def _parse_gcov_json_output(raw_bytes: bytes, root_dir: str) -> dict: """ - Parse gzip-compressed gcov JSON output and return {rel_fpp_path: [covered_lines]}. + Parse gcov JSON output and return {rel_fpp_path: [covered_lines]}. + Handles both gzip-compressed (gcov 13+) and raw JSON (gcov 12) formats. Only .fpp files are included; .f90 and other files are skipped. Only lines with count > 0 are included. """ result = defaultdict(set) try: - data = json.loads(gzip.decompress(compressed_bytes)) + data = json.loads(gzip.decompress(raw_bytes)) + except (gzip.BadGzipFile, OSError): + try: + data = json.loads(raw_bytes) + except (json.JSONDecodeError, ValueError): + return {} except Exception: return {} + real_root = os.path.realpath(root_dir) for file_entry in data.get("files", []): file_path = file_entry.get("file", "") if not file_path.endswith(".fpp"): continue try: - rel_path = os.path.relpath(file_path, root_dir) + rel_path = os.path.relpath(os.path.realpath(file_path), real_root) except ValueError: rel_path = file_path for line in file_entry.get("lines", []): @@ -228,6 +235,8 @@ def build_coverage_cache(root_dir: str, cases: list) -> None: for i, case in enumerate(cases): zero_gcda_files(root_dir) try: + case.delete_output() + case.create_directory() case.run([PRE_PROCESS, SIMULATION], gpus=set()) except Exception: # Continue even if the test fails — we only need .gcda data diff --git a/toolchain/mfc/test/test_coverage_unit.py b/toolchain/mfc/test/test_coverage_unit.py index f4138d7256..3b0a855fca 100644 --- a/toolchain/mfc/test/test_coverage_unit.py +++ b/toolchain/mfc/test/test_coverage_unit.py @@ -527,8 +527,26 @@ def test_ignores_f90_files(self): assert "src/simulation/m_rhs.fpp" in result assert all(k.endswith(".fpp") for k in result) - def test_handles_invalid_gzip_gracefully(self): - result = _parse_gcov_json_output(b"not valid gzip", "/repo") + def test_handles_raw_json_gcov12(self): + """gcov 12 outputs raw JSON (not gzip). Must parse correctly.""" + data = { + "format_version": "1", + "gcc_version": "12.3.0", + "files": [{ + "file": "/repo/src/simulation/m_rhs.fpp", + "lines": [ + {"line_number": 45, "count": 3}, + {"line_number": 46, "count": 0}, + {"line_number": 47, "count": 1}, + ], + }], + } + raw = json.dumps(data).encode() + result = _parse_gcov_json_output(raw, "/repo") + assert result == {"src/simulation/m_rhs.fpp": [45, 47]} + + def test_handles_invalid_data_gracefully(self): + result = _parse_gcov_json_output(b"not valid gzip or json", "/repo") assert result == {} def test_handles_empty_files_list(self):