From 015ac928e4d871ac4befda40638871ae8509ecc0 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Thu, 26 Feb 2026 20:35:13 -0500
Subject: [PATCH 1/2] Replace regex dependency parser with gcov line-level
 coverage pruning

Replaces the manually-maintained test_mapping.json and regex-based
dependency_parser.py with a fully automated approach using gfortran gcov
instrumentation.

Key changes:
- Add toolchain/mfc/test/coverage.py: build coverage cache by running all
  tests with a --gcov build, recording which .fpp line numbers each test
  executes; filter tests by intersecting git diff -U0 line ranges against
  per-test coverage data
- Add --build-coverage-cache flag to ./mfc.sh test
- Replace --only-changes implementation in test.py with coverage-based filter
- Add --line-marker-format=gfortran5 to CMakeLists.txt Fypp invocation so
  gcov correctly maps coverage to .fpp line numbers (not .f90 line numbers)
- Delete dependency_parser.py (530 lines) and test_mapping.json (148 lines)
- Add 43 unit tests covering all corner cases: exact line hits, no-overlap,
  large-module line granularity, GPU #ifdef blocks, stale cache, new files

Workflow:
  ./mfc.sh build --gcov -j 8              # one-time: build with coverage
  ./mfc.sh test --build-coverage-cache    # one-time: populate cache (~2-3x time)
  ./mfc.sh test --only-changes -j 8       # fast: only tests covering changed lines

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .gitignore                               |   3 +
 CMakeLists.txt                           |   1 +
 COVERAGE_PLAN.md                         | 784 +++++++++++++++++++++++
 toolchain/mfc/cli/commands.py            |  25 +
 toolchain/mfc/test/coverage.py           | 416 ++++++++++++
 toolchain/mfc/test/test.py               |  53 +-
 toolchain/mfc/test/test_coverage_unit.py | 570 ++++++++++++++++
 7 files changed, 1851 insertions(+), 1 deletion(-)
 create mode 100644 COVERAGE_PLAN.md
 create mode 100644 toolchain/mfc/test/coverage.py
 create mode 100644 toolchain/mfc/test/test_coverage_unit.py

diff --git a/.gitignore b/.gitignore
index e80d14a6f9..909f2d2d9b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,9 @@ __pycache__
 # Auto-generated version file
 toolchain/mfc/_version.py
 
+# Generated coverage cache (rebuild with: ./mfc.sh build --gcov -j 8 && ./mfc.sh test --build-coverage-cache)
+toolchain/mfc/test/test_coverage_cache.json
+
 # Auto-generated toolchain files (regenerate with: ./mfc.sh generate)
 toolchain/completions/mfc.bash
 toolchain/completions/_mfc
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9101d032fc..9cea0d7425 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -381,6 +381,7 @@ macro(HANDLE_SOURCES target useCommon)
                                  --no-folding
 								 --line-length=999
 		 						 --line-numbering-mode=nocontlines
+                                 --line-marker-format=gfortran5
                                  "${fpp}" "${f90}"
             DEPENDS  "${fpp};${${target}_incs}"
             COMMENT  "Preprocessing (Fypp) ${fpp_filename}"
diff --git a/COVERAGE_PLAN.md b/COVERAGE_PLAN.md
new file mode 100644
index 0000000000..0082345f7d
--- /dev/null
+++ b/COVERAGE_PLAN.md
@@ -0,0 +1,784 @@
+# MFC Test Pruning: Line-Level gcov Coverage Implementation
+
+> **Handoff document.** Start a new Claude Code session on a Linux CI-like machine (gfortran, not Apple Silicon) and say: "Implement the plan in COVERAGE_PLAN.md". This file contains everything needed.
+
+---
+
+## What We're Building and Why
+
+The `test-pruning` branch adds `--only-changes` to `./mfc.sh test` to skip unaffected tests on PRs. The current implementation uses a manually-maintained `test_mapping.json` (file → label → test) and a regex Fortran dependency parser. It has critical gaps that make it unreliable:
+
+- `#include` macro files (`parallel_macros.fpp` etc.) are invisible to the parser — changing them runs 0 tests
+- 28 source files have no entry in `test_mapping.json`
+- The `"Post Process"` label matches 0 actual test traces
+- File-level granularity: changing 5 lines in 5,257-line `m_riemann_solvers.fpp` triggers ~300 tests
+
+**The replacement:** Line-level gcov coverage. Build MFC once with `gfortran --coverage`, run all tests individually, record which `.fpp` line numbers each test executes, cache it. On a PR, intersect `git diff -U0` line ranges against each test's covered lines — only tests with overlapping lines run.
+
+This is completely automated (no JSON to maintain), correct by construction (the compiler is the oracle), and line-granular (change 5 lines in a 5000-line file → only tests covering those 5 lines run).
+
+---
+
+## Repository Context
+
+- Repo: MFC (Multi-component Flow Code) — exascale CFD solver in Fortran + Fypp
+- Branch to modify: `test-pruning`
+- Working directory: repo root
+- 547 regression tests, generated programmatically in `toolchain/mfc/test/cases.py`
+- Build system: CMake + Fypp preprocessor + `./mfc.sh` Python CLI wrapper
+- Three executables: `pre_process`, `simulation`, `post_process`
+
+---
+
+## What Already Exists (do not recreate)
+
+| What | Where | Notes |
+|---|---|---|
+| `MFC_GCov` CMake option | `CMakeLists.txt` lines 121–141 | Adds `-fprofile-arcs -ftest-coverage -O1` to all Fortran targets |
+| `--gcov` CLI flag | `toolchain/mfc/build.py` line 430 | Maps to `-DMFC_GCov=ON` |
+| `gcov: bool` field | `toolchain/mfc/state.py` | In `MFCConfig` dataclass, default `False` |
+| Fypp `--line-numbering` | `CMakeLists.txt` lines 380–383 | Already emits line markers in `.f90` output |
+| `--only-changes` arg | `toolchain/mfc/cli/commands.py` | Keep this arg, replace its implementation |
+| `--changes-branch` arg | `toolchain/mfc/cli/commands.py` | Keep this arg |
+
+---
+
+## Files to Create
+
+### `toolchain/mfc/test/coverage.py` (new, ~250 lines)
+
+The entire coverage system lives here. Implement these functions in order:
+
+#### `find_gcov_binary(root_dir: str) -> str`
+```
+1. Run subprocess: gfortran --version, capture stdout
+2. Regex: parse major version number (e.g., "GNU Fortran ... 15.2.0" → 15)
+3. Try shutil.which("gcov-{major}") first, then shutil.which("gcov")
+4. For each candidate: run `{candidate} --version`, check output contains "GCC" not "Apple LLVM"
+5. Return first valid GNU gcov binary path
+6. If none found: raise MFCException with message:
+   "GNU gcov not found. On Homebrew macOS: brew install gcc. On Linux: install gcc package."
+```
+
+#### `find_gcno_files(root_dir: str) -> list[Path]`
+```
+1. Walk Path(root_dir) / "build" recursively
+2. Collect all files matching "*.gcno"
+3. Exclude any path containing "venv"
+4. If empty list: raise MFCException:
+   "No .gcno files found. Build with --gcov first:\n  ./mfc.sh build --gcov -j 8"
+5. Return list of Path objects
+```
+
+#### `zero_gcda_files(root_dir: str) -> None`
+```
+1. Walk Path(root_dir) / "build" recursively
+2. Delete all "*.gcda" files (excluding venv paths)
+3. Called before each test run during cache building
+```
+
+#### `collect_coverage_for_test(gcno_files: list, root_dir: str, gcov_binary: str) -> dict[str, list[int]]`
+```
+1. For each gcno_file in gcno_files:
+   a. Run: subprocess([gcov_binary, "--json-format", "--stdout", str(gcno_file)],
+                      capture_output=True, cwd=root_dir)
+   b. If returncode != 0 or stdout empty: continue (no .gcda = test didn't touch this translation unit)
+   c. Decompress: data = json.loads(gzip.decompress(proc.stdout))
+   d. For each entry in data["files"]:
+      - file_path = entry["file"]
+      - If not file_path.endswith(".fpp"): skip (only want .fpp, not .f90 or .h)
+      - rel_path = os.path.relpath(file_path, root_dir)
+      - covered_lines = [ln["line_number"] for ln in entry["lines"] if ln["count"] > 0]
+      - Merge into result dict: result[rel_path].update(covered_lines)
+2. Return {rel_fpp_path: sorted(list(lines_set))}
+```
+
+#### `build_coverage_cache(root_dir: str, cases: list) -> None`
+```
+1. gcov_bin = find_gcov_binary(root_dir)
+2. gcno_files = find_gcno_files(root_dir)
+3. cons.print(f"Building coverage cache for {len(cases)} tests (sequential)...")
+4. cons.print("Note: tests run sequentially to isolate per-test coverage. This takes ~2-3x normal time.")
+
+5. cache = {}
+6. For i, case in enumerate(cases):
+   a. zero_gcda_files(root_dir)
+   b. case.run([PRE_PROCESS, SIMULATION], gpus=set())
+      - Catch ALL exceptions and continue — we need .gcda data even if test fails
+   c. coverage = collect_coverage_for_test(gcno_files, root_dir, gcov_bin)
+   d. cache[case.get_uuid()] = coverage
+   e. total_lines = sum(len(v) for v in coverage.values())
+   f. cons.print(f"  [{i+1}/{len(cases)}] {case.get_uuid()}: {len(coverage)} files, {total_lines} lines")
+
+7. import hashlib, datetime
+   cases_py_path = Path(root_dir) / "toolchain/mfc/test/cases.py"
+   cases_hash = hashlib.sha256(cases_py_path.read_bytes()).hexdigest()
+
+8. cache["_meta"] = {
+       "created": datetime.datetime.utcnow().isoformat(),
+       "cases_hash": cases_hash,
+       "gcov_version": get_gcov_version(gcov_bin),  # parse from --version output
+   }
+
+9. cache_path = Path(root_dir) / "toolchain/mfc/test/test_coverage_cache.json"
+   with open(cache_path, "w") as f:
+       json.dump(cache, f, indent=2)
+   cons.print(f"Coverage cache written to {cache_path}")
+   cons.print(f"Cache has {len(cases)} test entries.")
+```
+
+#### `load_coverage_cache(root_dir: str) -> dict | None`
+```
+1. cache_path = Path(root_dir) / "toolchain/mfc/test/test_coverage_cache.json"
+2. If not cache_path.exists(): return None
+
+3. with open(cache_path) as f: cache = json.load(f)
+
+4. Check staleness:
+   cases_py = Path(root_dir) / "toolchain/mfc/test/cases.py"
+   current_hash = hashlib.sha256(cases_py.read_bytes()).hexdigest()
+   stored_hash = cache.get("_meta", {}).get("cases_hash", "")
+   if current_hash != stored_hash:
+       cons.print("[yellow]Warning: Coverage cache is stale (cases.py changed).[/yellow]")
+       cons.print("[yellow]Rebuild with: ./mfc.sh test --build-coverage-cache[/yellow]")
+       return None
+
+5. Return cache
+```
+
+#### `get_changed_line_ranges(root_dir: str, compare_branch: str = "master") -> dict[str, list[tuple[int,int]]]`
+```
+1. Get merge base:
+   result = subprocess.run(
+       ["git", "merge-base", compare_branch, "HEAD"],
+       capture_output=True, text=True, cwd=root_dir
+   )
+   if result.returncode != 0: return {}
+   merge_base = result.stdout.strip()
+
+2. Get diff:
+   result = subprocess.run(
+       ["git", "diff", merge_base, "HEAD", "-U0", "--no-color"],
+       capture_output=True, text=True, cwd=root_dir
+   )
+
+3. Parse output:
+   changed = defaultdict(list)
+   current_file = None
+   file_re = re.compile(r'^\+\+\+ b/(.+\.fpp)$')
+   hunk_re = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@')
+
+   for line in result.stdout.splitlines():
+       m = file_re.match(line)
+       if m:
+           current_file = m.group(1)  # e.g., "src/simulation/m_rhs.fpp"
+           continue
+       m = hunk_re.match(line)
+       if m and current_file:
+           start = int(m.group(1))
+           count = int(m.group(2)) if m.group(2) is not None else 1
+           if count > 0:  # skip pure deletions (count == 0)
+               changed[current_file].append((start, start + count - 1))
+
+4. Return dict(changed)
+   # Note: only .fpp files, only additions/modifications (not pure deletions)
+   # Note: uses merge-base, NOT master tip — correct for PRs where master has advanced
+```
+
+#### `should_run_all_tests(changed_rel_paths: set) -> bool`
+```python
+ALWAYS_RUN_ALL = frozenset([
+    "src/common/include/parallel_macros.fpp",
+    "src/common/include/acc_macros.fpp",
+    "src/common/include/omp_macros.fpp",
+    "src/common/include/shared_parallel_macros.fpp",
+    "src/common/include/macros.fpp",
+    "toolchain/mfc/test/cases.py",
+    "toolchain/mfc/test/case.py",
+    "toolchain/mfc/params/definitions.py",
+])
+
+def should_run_all_tests(changed_rel_paths: set) -> bool:
+    return bool(changed_rel_paths & ALWAYS_RUN_ALL)
+```
+
+Rationale: CPU coverage can't detect GPU directive changes (macro files). Toolchain changes may add/remove tests or change test behavior. In both cases, run everything.
+
+#### `filter_tests_by_coverage(cases, coverage_cache, changed_ranges) -> tuple[list, list]`
+```
+to_run = []
+skipped = []
+new_file_warnings = set()
+
+for case in cases:
+    uuid = case.get_uuid()
+    test_cov = coverage_cache.get(uuid)
+
+    if test_cov is None:
+        # Test not in cache (newly added) → conservative: include it
+        to_run.append(case)
+        continue
+
+    matched = False
+    for fpp_file, ranges in changed_ranges.items():
+        covered_lines = set(test_cov.get(fpp_file, []))
+        if not covered_lines:
+            # Check if the file has no coverage in ANY test (truly new file)
+            if not any(fpp_file in tc for tc in coverage_cache.values() if isinstance(tc, dict)):
+                new_file_warnings.add(fpp_file)
+            continue
+        for (start, end) in ranges:
+            if covered_lines & set(range(start, end + 1)):
+                matched = True
+                break
+        if matched:
+            break
+
+    if matched:
+        to_run.append(case)
+    else:
+        skipped.append(case)
+
+for f in new_file_warnings:
+    cons.print(f"[yellow]Warning: {f} has no coverage data in cache.[/yellow]")
+    cons.print(f"[yellow]Consider rebuilding: ./mfc.sh test --build-coverage-cache[/yellow]")
+
+return to_run, skipped
+```
+
+---
+
+## Files to Modify
+
+### 1. `CMakeLists.txt` — add `--line-marker-format=gfortran5`
+
+Find the Fypp `add_custom_command` block (around line 368). Currently ends with:
+```cmake
+                             --line-numbering-mode=nocontlines
+                             "${fpp}" "${f90}"
+```
+
+Change to:
+```cmake
+                             --line-numbering-mode=nocontlines
+                             --line-marker-format=gfortran5
+                             "${fpp}" "${f90}"
+```
+
+**Why:** Fypp defaults to `std` format (`# line N "file"`). gfortran needs `gfortran5` format (`# N "file"`) to correctly map coverage data back to `.fpp` file coordinates. Without this, gcov may report coverage against `.f90` line numbers instead of `.fpp` line numbers.
+
+### 2. `toolchain/mfc/cli/commands.py` — add `--build-coverage-cache`
+
+Find `TEST_COMMAND` definition. After the `--dry-run` argument (around line 452), add:
+
+```python
+        Argument(
+            name="build-coverage-cache",
+            help="Run all tests sequentially with gcov instrumentation to build the line-level coverage cache. Requires a prior --gcov build.",
+            action=ArgAction.STORE_TRUE,
+            default=False,
+            dest="build_coverage_cache",
+        ),
+```
+
+In the `examples=` list, add:
+```python
+        Example("./mfc.sh build --gcov -j 8 && ./mfc.sh test --build-coverage-cache", "One-time: build line-coverage cache"),
+        Example("./mfc.sh test --only-changes -j 4", "Run tests affected by changed lines"),
+```
+
+In `key_options=`, add:
+```python
+        ("--build-coverage-cache", "Build line-level gcov coverage cache (one-time)"),
+        ("--only-changes",         "Run tests affected by git changes"),
+```
+
+### 3. `toolchain/mfc/test/test.py` — replace old implementation
+
+**Remove line 18:**
+```python
+from .dependency_parser import analyze_changes, any_label_matches_trace_parts
+```
+
+**Remove lines 20–21:**
+```python
+# Cached dependency analysis results (computed once at startup if --only-changes)
+_dependency_analysis_cache = None
+```
+
+**In `test()` function, after the `remove_old_tests` block (after line ~191), add:**
+```python
+    if ARG("build_coverage_cache"):
+        from .coverage import build_coverage_cache
+        all_cases = [b.to_case() for b in list_cases()]
+        build_coverage_cache(common.MFC_ROOT_DIR, all_cases)
+        return
+```
+
+**Replace the entire `--only-changes` block in `__filter()` (lines 82–138):**
+
+Remove everything from `# --only-changes: filter based on Fortran dependency analysis` through the closing `cons.print()` at line 138.
+
+Replace with:
+
+```python
+    # --only-changes: filter based on line-level gcov coverage
+    if ARG("only_changes"):
+        from .coverage import (load_coverage_cache, get_changed_line_ranges,
+                               should_run_all_tests, filter_tests_by_coverage)
+
+        cache = load_coverage_cache(common.MFC_ROOT_DIR)
+        if cache is None:
+            cons.print("[yellow]Coverage cache missing or stale.[/yellow]")
+            cons.print("[yellow]Run: ./mfc.sh build --gcov -j 8 && ./mfc.sh test --build-coverage-cache[/yellow]")
+            cons.print("[yellow]Falling back to full test suite.[/yellow]")
+            # cache is None → fall through, run all tests
+        else:
+            changed_ranges = get_changed_line_ranges(common.MFC_ROOT_DIR, ARG("changes_branch"))
+            changed_paths = set(changed_ranges.keys())
+
+            cons.print()
+            cons.print("[bold cyan]Line-Coverage Change Analysis[/bold cyan]")
+            cons.print("-" * 50)
+
+            if should_run_all_tests(changed_paths):
+                cons.print("[yellow]Infrastructure or macro file changed — running full test suite.[/yellow]")
+                cons.print("-" * 50)
+                # fall through — don't filter cases
+            elif not changed_ranges:
+                cons.print("[green]No .fpp source changes detected — skipping all tests.[/green]")
+                cons.print("-" * 50)
+                cons.print()
+                skipped_cases += cases
+                cases = []
+            else:
+                for fpp_file, ranges in sorted(changed_ranges.items()):
+                    total_lines = sum(e - s + 1 for s, e in ranges)
+                    cons.print(f"  [green]*[/green] {fpp_file} ({total_lines} line{'s' if total_lines != 1 else ''} changed)")
+
+                cases, new_skipped = filter_tests_by_coverage(cases, cache, changed_ranges)
+                skipped_cases += new_skipped
+                cons.print(f"\n[bold]Tests to run: {len(cases)} / {len(cases) + len(new_skipped)}[/bold]")
+                cons.print("-" * 50)
+                cons.print()
+```
+
+**Update the pylint disable comment** on `__filter` to keep existing suppressions.
+
+---
+
+## Files to Delete
+
+```bash
+git rm toolchain/mfc/test/dependency_parser.py
+git rm toolchain/mfc/test/test_mapping.json
+```
+
+---
+
+## Files to Add to `.gitignore`
+
+Add this line to `.gitignore` (the cache is generated locally, not committed):
+```
+toolchain/mfc/test/test_coverage_cache.json
+```
+
+---
+
+## New File: `toolchain/mfc/test/test_coverage_unit.py`
+
+Create this file to hold all unit tests. Run with:
+```bash
+python3 -m pytest toolchain/mfc/test/test_coverage_unit.py -v
+```
+
+### Group 1: `get_changed_line_ranges()` — git diff parsing
+
+Test by providing mock diff output strings directly to the parsing logic (extract the parsing into a helper `_parse_diff_output(diff_text, root_dir)` that `get_changed_line_ranges` calls after running git).
+
+```python
+def test_parse_basic_modification():
+    diff = textwrap.dedent("""\
+        diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp
+        --- a/src/simulation/m_rhs.fpp
+        +++ b/src/simulation/m_rhs.fpp
+        @@ -45,3 +45,5 @@
+        +    ! new line 1
+        +    ! new line 2
+    """)
+    result = _parse_diff_output(diff, "/repo")
+    assert result == {"src/simulation/m_rhs.fpp": [(45, 49)]}
+
+def test_parse_multiple_hunks_same_file():
+    # Two separate hunks in one file
+    # Expected: both ranges captured as separate tuples
+
+def test_parse_pure_deletion():
+    # @@ -45,3 +45,0 @@ — 3 lines deleted, 0 added
+    # count=0 → NOT in result (nothing to cover)
+    diff = "... @@ -45,3 +45,0 @@ ..."
+    result = _parse_diff_output(diff, "/repo")
+    assert result == {}
+
+def test_parse_new_file():
+    # --- /dev/null +++ b/src/simulation/m_new.fpp
+    # @@ -0,0 +1,50 @@
+    result = _parse_diff_output(diff, "/repo")
+    assert result == {"src/simulation/m_new.fpp": [(1, 50)]}
+
+def test_parse_ignores_non_fpp():
+    # Changes to .py, .json, .cmake, .md
+    result = _parse_diff_output(diff_with_only_non_fpp, "/repo")
+    assert result == {}
+
+def test_parse_single_line_hunk():
+    # @@ -45 +45 @@ (no count = means 1 line)
+    result = _parse_diff_output(diff, "/repo")
+    assert result == {"src/simulation/m_rhs.fpp": [(45, 45)]}
+
+def test_parse_empty_diff():
+    assert _parse_diff_output("", "/repo") == {}
+```
+
+### Group 2: `should_run_all_tests()` — always-run detection
+
+```python
+def test_parallel_macros_triggers_all():
+    assert should_run_all_tests({"src/common/include/parallel_macros.fpp"}) is True
+
+def test_acc_macros_triggers_all():
+    assert should_run_all_tests({"src/common/include/acc_macros.fpp"}) is True
+
+def test_cases_py_triggers_all():
+    assert should_run_all_tests({"toolchain/mfc/test/cases.py"}) is True
+
+def test_definitions_py_triggers_all():
+    assert should_run_all_tests({"toolchain/mfc/params/definitions.py"}) is True
+
+def test_simulation_module_does_not_trigger_all():
+    assert should_run_all_tests({"src/simulation/m_rhs.fpp"}) is False
+
+def test_empty_set_does_not_trigger_all():
+    assert should_run_all_tests(set()) is False
+
+def test_mixed_one_trigger_fires_all():
+    # One normal + one always-run → True
+    assert should_run_all_tests({
+        "src/simulation/m_rhs.fpp",
+        "src/common/include/macros.fpp"
+    }) is True
+```
+
+### Group 3: `filter_tests_by_coverage()` — core logic
+
+These are the most important tests. Use mock TestCase objects with a known `get_uuid()`.
+
+```python
+# Helper: create a minimal fake case with a given uuid
+class FakeCase:
+    def __init__(self, uuid): self._uuid = uuid
+    def get_uuid(self): return self._uuid
+
+def test_exact_line_hit():
+    cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [45, 46, 47]}}
+    changed = {"src/simulation/m_rhs.fpp": [(46, 46)]}
+    cases = [FakeCase("AAAA0001")]
+    to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+    assert len(to_run) == 1 and len(skipped) == 0
+
+def test_no_line_overlap():
+    # *** KEY TEST: this is what makes line-level better than file-level ***
+    cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [45, 46, 47]}}
+    changed = {"src/simulation/m_rhs.fpp": [(500, 510)]}
+    cases = [FakeCase("AAAA0001")]
+    to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+    assert len(to_run) == 0 and len(skipped) == 1
+
+def test_range_boundary_inclusive_start():
+    cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [100]}}
+    changed = {"src/simulation/m_rhs.fpp": [(100, 105)]}
+    to_run, _ = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+    assert len(to_run) == 1
+
+def test_range_boundary_inclusive_end():
+    cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [105]}}
+    changed = {"src/simulation/m_rhs.fpp": [(100, 105)]}
+    to_run, _ = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+    assert len(to_run) == 1
+
+def test_range_just_outside():
+    cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [99, 106]}}
+    changed = {"src/simulation/m_rhs.fpp": [(100, 105)]}
+    to_run, skipped = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+    assert len(to_run) == 0 and len(skipped) == 1  # 99 and 106 are outside [100,105]
+
+def test_uuid_not_in_cache_is_conservative():
+    # New test not in cache → include it (conservative)
+    cache = {}
+    changed = {"src/simulation/m_rhs.fpp": [(45, 45)]}
+    to_run, _ = filter_tests_by_coverage([FakeCase("NEWTEST1")], cache, changed)
+    assert len(to_run) == 1
+
+def test_changed_file_not_in_test_coverage():
+    cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [45, 46]}}
+    changed = {"src/simulation/m_igr.fpp": [(100, 110)]}  # different file
+    to_run, skipped = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+    assert len(to_run) == 0 and len(skipped) == 1
+
+def test_large_module_line_granularity():
+    # *** KEY CORNER CASE from design discussion ***
+    # m_riemann_solvers.fpp is 5257 lines.
+    # test_hllc covers lines 800-810, test_roe covers lines 2000+
+    # Change at line 800 → only hllc test runs
+    cache = {
+        "HLLC_TST": {"src/simulation/m_riemann_solvers.fpp": [798, 799, 800, 801, 810]},
+        "ROE_TEST": {"src/simulation/m_riemann_solvers.fpp": [2000, 2100, 2500]},
+    }
+    changed = {"src/simulation/m_riemann_solvers.fpp": [(800, 810)]}
+    cases = [FakeCase("HLLC_TST"), FakeCase("ROE_TEST")]
+    to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+    uuids_run = {c.get_uuid() for c in to_run}
+    assert "HLLC_TST" in uuids_run
+    assert "ROE_TEST" not in uuids_run
+
+def test_test_covering_multiple_files_matched_via_second():
+    cache = {"AAAA0001": {
+        "src/simulation/m_rhs.fpp": [45],
+        "src/simulation/m_weno.fpp": [100],
+    }}
+    changed = {"src/simulation/m_weno.fpp": [(100, 100)]}
+    to_run, _ = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+    assert len(to_run) == 1
+
+def test_no_changed_ranges_runs_nothing():
+    cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [45, 46]}}
+    changed = {}
+    to_run, skipped = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+    assert len(to_run) == 0 and len(skipped) == 1
+
+def test_multiple_tests_correct_partial_selection():
+    cache = {
+        "TEST_A": {"src/simulation/m_rhs.fpp": [45, 46, 47]},
+        "TEST_B": {"src/simulation/m_weno.fpp": [100, 200]},
+        "TEST_C": {"src/simulation/m_rhs.fpp": [1000, 2000]},
+    }
+    changed = {"src/simulation/m_rhs.fpp": [(46, 46)]}
+    cases = [FakeCase("TEST_A"), FakeCase("TEST_B"), FakeCase("TEST_C")]
+    to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+    uuids_run = {c.get_uuid() for c in to_run}
+    assert uuids_run == {"TEST_A"}  # only TEST_A covers line 46
+```
+
+### Group 4: Corner cases from design discussion
+
+```python
+def test_gpu_ifdef_lines_not_in_cpu_coverage():
+    # m_muscl.fpp has `use openacc` inside #ifdef MFC_OpenACC at lines 16-18.
+    # CPU build skips those lines. Changing line 17 → NOT in any test's coverage.
+    # → 0 tests triggered. This is CORRECT: GPU-only lines need GPU tests or macro-file handling.
+    cache = {"MUSCL_T": {"src/simulation/m_muscl.fpp": [20, 21, 22, 100, 200]}}
+    changed = {"src/simulation/m_muscl.fpp": [(16, 18)]}  # the #ifdef block
+    to_run, skipped = filter_tests_by_coverage([FakeCase("MUSCL_T")], cache, changed)
+    # Lines 16-18 not in CPU coverage [20,21,22,100,200] → test skipped
+    assert len(to_run) == 0 and len(skipped) == 1
+    # Note: if parallel_macros.fpp changed, should_run_all_tests() catches it separately
+
+def test_gpu_cpu_code_in_same_file_triggers_correctly():
+    # But if we change actual CPU code in m_muscl.fpp (line 100), test runs
+    cache = {"MUSCL_T": {"src/simulation/m_muscl.fpp": [20, 21, 22, 100, 200]}}
+    changed = {"src/simulation/m_muscl.fpp": [(100, 100)]}
+    to_run, _ = filter_tests_by_coverage([FakeCase("MUSCL_T")], cache, changed)
+    assert len(to_run) == 1
+
+def test_stale_cache_detected_by_hash():
+    # load_coverage_cache() returns None when cases.py hash doesn't match
+    import tempfile, json, hashlib
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
+        json.dump({
+            "_meta": {"cases_hash": "wrong_hash_abc123"},
+            "AAAA0001": {}
+        }, f)
+        cache_path = f.name
+    # Patch the cache path and call load_coverage_cache
+    # Verify it returns None (stale)
+    ...
+
+def test_new_file_with_no_coverage_warns():
+    # A brand new .fpp file in git diff has no coverage in cache.
+    # filter_tests_by_coverage should warn but not crash.
+    cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [45]}}
+    changed = {"src/simulation/m_brand_new.fpp": [(1, 100)]}
+    # m_brand_new.fpp not in any test's coverage
+    # Result: test skipped, warning printed
+    to_run, skipped = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+    assert len(to_run) == 0  # no test covers the new file
+```
+
+### Group 5: `collect_coverage_for_test()` — gcov JSON parsing
+
+```python
+def test_parses_gcov_json_correctly():
+    import gzip, json
+    # Build mock gcov JSON (format version 2)
+    gcov_data = {
+        "format_version": "2",
+        "gcc_version": "15.2.0",
+        "files": [
+            {
+                "file": "/repo/src/simulation/m_rhs.fpp",
+                "lines": [
+                    {"line_number": 45, "count": 3},
+                    {"line_number": 46, "count": 0},  # not covered
+                    {"line_number": 47, "count": 1},
+                ]
+            },
+            {
+                "file": "/repo/build/fypp/simulation/m_rhs.fpp.f90",  # .f90, should be skipped
+                "lines": [{"line_number": 10, "count": 5}]
+            }
+        ]
+    }
+    compressed = gzip.compress(json.dumps(gcov_data).encode())
+    # Mock subprocess to return this compressed data
+    # Call collect_coverage_for_test with mocked gcov
+    # Verify: only m_rhs.fpp (the .fpp file) appears, lines 45 and 47 (count>0)
+    result = _parse_gcov_json_output(compressed, "/repo")
+    assert result == {"src/simulation/m_rhs.fpp": [45, 47]}
+
+def test_ignores_zero_count_lines():
+    # Verify only lines with count > 0 appear
+    ...
+
+def test_handles_no_gcda_gracefully():
+    # When gcov returns non-zero or empty stdout (no .gcda file yet)
+    # collect_coverage_for_test should return {} not crash
+    ...
+```
+
+### Group 6: `find_gcov_binary()`
+
+```python
+def test_finds_versioned_gcov(monkeypatch):
+    # Mock: gfortran --version → "GNU Fortran (Homebrew GCC 15.2.0) 15.2.0"
+    # Mock: gcov-15 in PATH, its --version contains "GCC"
+    # Expected: returns "gcov-15"
+    ...
+
+def test_rejects_apple_llvm_gcov(monkeypatch):
+    # Mock: gfortran → GCC 15, gcov-15 not in PATH
+    # Mock: plain gcov --version → "Apple LLVM version 17.0.0"
+    # Expected: raises MFCException with helpful message
+    ...
+
+def test_falls_back_to_plain_gnu_gcov(monkeypatch):
+    # Mock: gcov-15 not in PATH, plain gcov is GNU
+    # Expected: returns "gcov"
+    ...
+```
+
+---
+
+## Integration Verification (run on CI machine after implementing)
+
+```bash
+# Step 1: Build with gcov instrumentation
+./mfc.sh build --gcov -j 8
+# Verify .gcno files exist:
+find build/ -name "*.gcno" -not -path "*/venv/*" | wc -l
+# Expected: ~100+ files
+
+# Step 2: Build coverage cache
+./mfc.sh test --build-coverage-cache
+# Expected: runs ~547 tests sequentially, prints progress, writes cache
+
+# Step 3: Sanity-check the cache
+python3 -c "
+import json
+with open('toolchain/mfc/test/test_coverage_cache.json') as f:
+    cache = json.load(f)
+tests = {k: v for k, v in cache.items() if k != '_meta'}
+print(f'Tests in cache: {len(tests)}')
+assert len(tests) > 540, f'Too few tests: {len(tests)}'
+for uuid, cov in tests.items():
+    assert isinstance(cov, dict), f'{uuid}: coverage not a dict'
+    for fpp, lines in cov.items():
+        assert fpp.endswith('.fpp'), f'{uuid}: non-.fpp file {fpp}'
+        assert len(lines) > 0, f'{uuid}: {fpp} has no lines'
+print('Cache looks correct.')
+print('Sample:', list(tests.keys())[:3])
+"
+
+# Step 4: Verify line-level selection works
+# Change a single line in m_igr.fpp (a specific feature file)
+sed -i '50s/$/ ! test change/' src/simulation/m_igr.fpp
+./mfc.sh test --only-changes --dry-run 2>&1 | grep "Tests to run"
+# Expected: ~16 tests (IGR-related), not 547 or 300+
+git checkout src/simulation/m_igr.fpp
+
+# Step 5: Verify large module line granularity
+# Find a non-default code path in m_riemann_solvers.fpp
+# (e.g., Roe solver is at specific lines, HLLC at other lines)
+# Change 1 line in the Roe solver section
+# Verify: only Roe tests run, not HLLC tests
+# (Compare output of --dry-run before and after targeted changes)
+
+# Step 6: Verify macro file triggers all tests
+sed -i '1s/$/ ! test/' src/common/include/parallel_macros.fpp
+./mfc.sh test --only-changes --dry-run 2>&1 | grep "Tests to run"
+# Expected: all 547 tests (infrastructure change detected)
+git checkout src/common/include/parallel_macros.fpp
+
+# Step 7: Verify stale cache detection
+# Temporarily modify cases.py (add a comment, don't break it)
+echo "# temp" >> toolchain/mfc/test/cases.py
+./mfc.sh test --only-changes 2>&1 | grep "stale\|Warning"
+# Expected: warning about stale cache, falls back to full suite
+git checkout toolchain/mfc/test/cases.py
+```
+
+---
+
+## Design Decisions (preserve these)
+
+1. **Line-level, not file-level.** Changing 5 lines in a 5000-line file triggers only tests covering those 5 lines. This is the core value over file-level coverage.
+
+2. **Merge-base diff.** Use `git merge-base {branch} HEAD` as diff base, not master's current tip. If master advanced after you branched, you should not see master's changes as "your changes."
+
+3. **Conservative on unknown tests.** Test UUID not in cache (newly added) → include it, don't silently skip.
+
+4. **`ALWAYS_RUN_ALL` for GPU macro files and toolchain files.** CPU coverage can't detect GPU directive changes. Macro files and test-suite-defining files always run everything.
+
+5. **Stale cache → full suite fallback, not hard failure.** If `cases.py` changed, warn and run all tests. Don't block the developer.
+
+6. **Pure deletions don't trigger tests.** `@@ -45,3 +45,0 @@` means 0 new lines → nothing to cover → skip.
+
+7. **Sequential cache building.** Multiple test processes write to the same `.gcda` files (shared binary). Zero `.gcda` before each test and run one at a time. It's slow (~2-3x) but it's a one-time operation.
+
+8. **GPU-only `#ifdef` lines are not in CPU coverage.** This is expected. Lines inside `#ifdef MFC_OpenACC` aren't executed in a CPU build. The `ALWAYS_RUN_ALL` mechanism handles macro file changes. GPU-specific compute code doesn't use `#ifdef` in MFC (it uses Fypp macros that expand to nothing on CPU).
+
+---
+
+## What to Delete
+
+```bash
+git rm toolchain/mfc/test/dependency_parser.py   # 530 lines of regex Fortran parsing
+git rm toolchain/mfc/test/test_mapping.json       # 148 lines of manual file→label mapping
+```
+
+Both are replaced by the automated `coverage.py` approach.
+
+---
+
+## Imports Needed in `coverage.py`
+
+```python
+import os, re, json, gzip, shutil, hashlib, subprocess, datetime
+from pathlib import Path
+from collections import defaultdict
+from typing import Optional
+
+from ..printer import cons
+from .. import common
+from ..common import MFCException
+from ..build import PRE_PROCESS, SIMULATION
+```
diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py
index 8ad8c4bd07..53f51e3d5e 100644
--- a/toolchain/mfc/cli/commands.py
+++ b/toolchain/mfc/cli/commands.py
@@ -452,6 +452,27 @@
             default=False,
             dest="dry_run",
         ),
+        Argument(
+            name="build-coverage-cache",
+            help="Run all tests sequentially with gcov instrumentation to build the line-level coverage cache. Requires a prior --gcov build: ./mfc.sh build --gcov -j 8",
+            action=ArgAction.STORE_TRUE,
+            default=False,
+            dest="build_coverage_cache",
+        ),
+        Argument(
+            name="only-changes",
+            help="Only run tests whose covered lines overlap with lines changed since branching from master (uses line-level gcov coverage cache).",
+            action=ArgAction.STORE_TRUE,
+            default=False,
+            dest="only_changes",
+        ),
+        Argument(
+            name="changes-branch",
+            help="Branch to compare against for --only-changes (default: master).",
+            type=str,
+            default="master",
+            dest="changes_branch",
+        ),
     ],
     mutually_exclusive=[
         MutuallyExclusiveGroup(arguments=[
@@ -482,6 +503,8 @@
         Example("./mfc.sh test -j 4", "Run with 4 parallel jobs"),
         Example("./mfc.sh test --only 3D", "Run only 3D tests"),
         Example("./mfc.sh test --generate", "Regenerate golden files"),
+        Example("./mfc.sh test --only-changes -j 4", "Run tests affected by changed lines"),
+        Example("./mfc.sh build --gcov -j 8 && ./mfc.sh test --build-coverage-cache", "One-time: build line-coverage cache"),
     ],
     key_options=[
         ("-j, --jobs N", "Number of parallel test jobs"),
@@ -489,6 +512,8 @@
         ("-f, --from UUID", "Start from specific test"),
         ("--generate", "Generate/update golden files"),
         ("--no-build", "Skip rebuilding MFC"),
+        ("--build-coverage-cache", "Build line-level gcov coverage cache (one-time)"),
+        ("--only-changes", "Run tests affected by changed lines (requires cache)"),
     ],
 )
 
diff --git a/toolchain/mfc/test/coverage.py b/toolchain/mfc/test/coverage.py
new file mode 100644
index 0000000000..86beab6035
--- /dev/null
+++ b/toolchain/mfc/test/coverage.py
@@ -0,0 +1,416 @@
+"""
+Line-level gcov coverage-based test pruning for MFC.
+
+This module replaces the regex-based dependency_parser.py with an instrumentation
+approach: build MFC once with gfortran --coverage, run all tests individually,
+record which .fpp line numbers each test executes, and cache that mapping.
+
+When files change on a PR, intersect the changed line ranges (from git diff -U0)
+against each test's covered lines. Only tests with overlapping lines run.
+
+Workflow:
+    ./mfc.sh build --gcov -j 8              # one-time: build with coverage
+    ./mfc.sh test --build-coverage-cache    # one-time: populate the cache
+    ./mfc.sh test --only-changes -j 8       # fast: run only affected tests
+"""
+
+import os
+import re
+import json
+import gzip
+import shutil
+import hashlib
+import subprocess
+import datetime
+from pathlib import Path
+from collections import defaultdict
+from typing import Optional
+
+from ..printer import cons
+from .. import common
+from ..common import MFCException
+from ..build import PRE_PROCESS, SIMULATION
+
+
+COVERAGE_CACHE_PATH = Path(common.MFC_ROOT_DIR) / "toolchain/mfc/test/test_coverage_cache.json"
+
+# Changes to these files trigger the full test suite.
+# CPU coverage cannot tell us about GPU directive changes (macro files), and
+# toolchain files define or change the set of tests themselves.
+ALWAYS_RUN_ALL = frozenset([
+    "src/common/include/parallel_macros.fpp",
+    "src/common/include/acc_macros.fpp",
+    "src/common/include/omp_macros.fpp",
+    "src/common/include/shared_parallel_macros.fpp",
+    "src/common/include/macros.fpp",
+    "toolchain/mfc/test/cases.py",
+    "toolchain/mfc/test/case.py",
+    "toolchain/mfc/params/definitions.py",
+])
+
+
+def _get_gcov_version(gcov_binary: str) -> str:
+    """Return the version string from gcov --version."""
+    try:
+        result = subprocess.run(
+            [gcov_binary, "--version"],
+            capture_output=True, text=True, timeout=10, check=False
+        )
+        for line in result.stdout.splitlines():
+            if line.strip():
+                return line.strip()
+    except Exception:
+        pass
+    return "unknown"
+
+
+def find_gcov_binary(_root_dir: str = "") -> str:  # pylint: disable=unused-argument
+    """
+    Find a GNU gcov binary compatible with the system gfortran.
+
+    On macOS with Homebrew GCC, the binary is gcov-{major} (e.g. gcov-15).
+    On Linux with system GCC, plain gcov is usually correct.
+    Apple LLVM's /usr/bin/gcov is incompatible with gfortran .gcda files.
+    """
+    # Determine gfortran major version
+    major = None
+    try:
+        result = subprocess.run(
+            ["gfortran", "--version"],
+            capture_output=True, text=True, timeout=10, check=False
+        )
+        m = re.search(r'(\d+)\.\d+\.\d+', result.stdout)
+        if m:
+            major = m.group(1)
+    except Exception:
+        pass
+
+    # Try versioned binary first (Homebrew macOS), then plain gcov
+    candidates = []
+    if major:
+        candidates.append(f"gcov-{major}")
+    candidates.append("gcov")
+
+    for candidate in candidates:
+        path = shutil.which(candidate)
+        if path is None:
+            continue
+        try:
+            result = subprocess.run(
+                [path, "--version"],
+                capture_output=True, text=True, timeout=10, check=False
+            )
+            version_out = result.stdout
+            if "Apple LLVM" in version_out or "Apple clang" in version_out:
+                continue  # Apple's gcov cannot parse GCC-generated .gcda files
+            if "GCC" in version_out or "GNU" in version_out:
+                return path
+        except Exception:
+            continue
+
+    raise MFCException(
+        "GNU gcov not found. gcov is required for the coverage cache.\n"
+        "  On macOS (Homebrew):  brew install gcc\n"
+        "  On Linux (Debian/Ubuntu): apt install gcc\n"
+        "  On Linux (RHEL/CentOS):  yum install gcc\n"
+        "Apple's /usr/bin/gcov is incompatible with gfortran .gcda files."
+    )
+
+
+def find_gcno_files(root_dir: str) -> list:
+    """
+    Walk build/ and return all .gcno files (excluding venv paths).
+    Raises if none found (indicates build was not done with --gcov).
+    """
+    build_dir = Path(root_dir) / "build"
+    gcno_files = [
+        p for p in build_dir.rglob("*.gcno")
+        if "venv" not in p.parts
+    ]
+    if not gcno_files:
+        raise MFCException(
+            "No .gcno files found. Build with --gcov instrumentation first:\n"
+            "  ./mfc.sh build --gcov -j 8"
+        )
+    return gcno_files
+
+
+def zero_gcda_files(root_dir: str) -> None:
+    """
+    Delete all .gcda files under build/ (excluding venv).
+    Called before each test run during cache building to isolate per-test coverage.
+    """
+    build_dir = Path(root_dir) / "build"
+    for gcda in build_dir.rglob("*.gcda"):
+        if "venv" not in gcda.parts:
+            try:
+                gcda.unlink()
+            except OSError:
+                pass
+
+
+def _parse_gcov_json_output(compressed_bytes: bytes, root_dir: str) -> dict:
+    """
+    Parse gzip-compressed gcov JSON output and return {rel_fpp_path: [covered_lines]}.
+    Only .fpp files are included; .f90 and other files are skipped.
+    Only lines with count > 0 are included.
+    """
+    result = defaultdict(set)
+    try:
+        data = json.loads(gzip.decompress(compressed_bytes))
+    except Exception:
+        return {}
+
+    for file_entry in data.get("files", []):
+        file_path = file_entry.get("file", "")
+        if not file_path.endswith(".fpp"):
+            continue
+        try:
+            rel_path = os.path.relpath(file_path, root_dir)
+        except ValueError:
+            rel_path = file_path
+        for line in file_entry.get("lines", []):
+            if line.get("count", 0) > 0:
+                result[rel_path].add(line["line_number"])
+
+    return {k: sorted(v) for k, v in result.items()}
+
+
+def collect_coverage_for_test(gcno_files: list, root_dir: str, gcov_binary: str) -> dict:
+    """
+    Run gcov on all .gcno files and collect covered .fpp lines.
+
+    Assumes the test has already run and .gcda files are present next to .gcno files.
+    Returns {rel_fpp_path: sorted_list_of_covered_line_numbers}.
+    """
+    merged = defaultdict(set)
+    for gcno_file in gcno_files:
+        try:
+            proc = subprocess.run(
+                [gcov_binary, "--json-format", "--stdout", str(gcno_file)],
+                capture_output=True, cwd=root_dir, timeout=60, check=False
+            )
+        except subprocess.TimeoutExpired:
+            continue
+        except Exception:
+            continue
+
+        if proc.returncode != 0 or not proc.stdout:
+            # No .gcda for this translation unit — test didn't touch it
+            continue
+
+        partial = _parse_gcov_json_output(proc.stdout, root_dir)
+        for fpp_path, lines in partial.items():
+            merged[fpp_path].update(lines)
+
+    return {k: sorted(v) for k, v in merged.items()}
+
+
+def build_coverage_cache(root_dir: str, cases: list) -> None:
+    """
+    Build the line-level coverage cache by running all tests sequentially.
+
+    Each test is run individually with .gcda files zeroed beforehand to isolate
+    per-test coverage. This takes ~2-3x normal test time but is a one-time operation.
+
+    Requires a prior --gcov build: ./mfc.sh build --gcov -j 8
+    """
+    gcov_bin = find_gcov_binary(root_dir)
+    gcno_files = find_gcno_files(root_dir)
+
+    cons.print(f"[bold]Building coverage cache for {len(cases)} tests (sequential)...[/bold]")
+    cons.print("[dim]Tests run one at a time to isolate per-test coverage. This takes ~2-3x normal time.[/dim]")
+    cons.print(f"[dim]Using gcov binary: {gcov_bin}[/dim]")
+    cons.print(f"[dim]Found {len(gcno_files)} .gcno files[/dim]")
+    cons.print()
+
+    cache: dict = {}
+    for i, case in enumerate(cases):
+        zero_gcda_files(root_dir)
+        try:
+            case.run([PRE_PROCESS, SIMULATION], gpus=set())
+        except Exception:
+            # Continue even if the test fails — we only need .gcda data
+            pass
+
+        coverage = collect_coverage_for_test(gcno_files, root_dir, gcov_bin)
+        cache[case.get_uuid()] = coverage
+
+        total_lines = sum(len(v) for v in coverage.values())
+        cons.print(
+            f"  [{i+1:3d}/{len(cases):3d}] "
+            f"[magenta]{case.get_uuid()}[/magenta]: "
+            f"{len(coverage)} files, {total_lines} lines"
+        )
+
+    cases_py_path = Path(root_dir) / "toolchain/mfc/test/cases.py"
+    cases_hash = hashlib.sha256(cases_py_path.read_bytes()).hexdigest()
+    gcov_version = _get_gcov_version(gcov_bin)
+
+    cache["_meta"] = {
+        "created": datetime.datetime.utcnow().isoformat(),
+        "cases_hash": cases_hash,
+        "gcov_version": gcov_version,
+    }
+
+    with open(COVERAGE_CACHE_PATH, "w", encoding="utf-8") as f:
+        json.dump(cache, f, indent=2)
+
+    cons.print()
+    cons.print(f"[bold green]Coverage cache written to {COVERAGE_CACHE_PATH}[/bold green]")
+    cons.print(f"[dim]Cache has {len(cases)} test entries.[/dim]")
+
+
+def load_coverage_cache(root_dir: str) -> Optional[dict]:
+    """
+    Load the coverage cache, returning None if missing or stale.
+
+    Staleness is detected by comparing the SHA256 of cases.py at cache-build time
+    against the current cases.py. A stale cache means new/removed tests exist.
+    The caller should fall back to running the full test suite.
+    """
+    if not COVERAGE_CACHE_PATH.exists():
+        return None
+
+    with open(COVERAGE_CACHE_PATH, encoding="utf-8") as f:
+        cache = json.load(f)
+
+    cases_py = Path(root_dir) / "toolchain/mfc/test/cases.py"
+    current_hash = hashlib.sha256(cases_py.read_bytes()).hexdigest()
+    stored_hash = cache.get("_meta", {}).get("cases_hash", "")
+
+    if current_hash != stored_hash:
+        cons.print("[yellow]Warning: Coverage cache is stale (cases.py changed since cache was built).[/yellow]")
+        cons.print("[yellow]Rebuild with: ./mfc.sh build --gcov -j 8 && ./mfc.sh test --build-coverage-cache[/yellow]")
+        return None
+
+    return cache
+
+
+def _parse_diff_output(diff_text: str, _root_dir: str = "") -> dict:  # pylint: disable=unused-argument
+    """
+    Parse unified diff output and return {rel_fpp_path: [(start, end), ...]} for
+    added/modified lines in .fpp files only.
+
+    Pure deletions (@@ -N,M +N,0 @@) are excluded — nothing to cover.
+    The root_dir argument is accepted for interface compatibility but not used here
+    (paths in git diff are already repo-relative).
+    """
+    changed: dict = defaultdict(list)
+    current_file = None
+
+    file_re = re.compile(r'^\+\+\+ b/(.+\.fpp)$')
+    hunk_re = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@')
+
+    for line in diff_text.splitlines():
+        m = file_re.match(line)
+        if m:
+            current_file = m.group(1)
+            continue
+
+        m = hunk_re.match(line)
+        if m and current_file:
+            start = int(m.group(1))
+            count = int(m.group(2)) if m.group(2) is not None else 1
+            if count > 0:
+                changed[current_file].append((start, start + count - 1))
+
+    return dict(changed)
+
+
+def get_changed_line_ranges(root_dir: str, compare_branch: str = "master") -> dict:
+    """
+    Return {rel_fpp_path: [(start, end), ...]} for lines changed in this branch
+    relative to the merge-base with compare_branch.
+
+    Uses merge-base (not master tip) so that unrelated master advances don't
+    appear as "your changes."
+    Only .fpp files are included. Pure deletions are excluded.
+    """
+    merge_base_result = subprocess.run(
+        ["git", "merge-base", compare_branch, "HEAD"],
+        capture_output=True, text=True, cwd=root_dir, timeout=30, check=False
+    )
+    if merge_base_result.returncode != 0:
+        return {}
+    merge_base = merge_base_result.stdout.strip()
+    if not merge_base:
+        return {}
+
+    diff_result = subprocess.run(
+        ["git", "diff", merge_base, "HEAD", "-U0", "--no-color"],
+        capture_output=True, text=True, cwd=root_dir, timeout=30, check=False
+    )
+    if diff_result.returncode != 0:
+        return {}
+
+    return _parse_diff_output(diff_result.stdout, root_dir)
+
+
+def should_run_all_tests(changed_rel_paths: set) -> bool:
+    """
+    Return True if any changed file is in ALWAYS_RUN_ALL.
+
+    GPU macro files and toolchain files cannot be correctly analyzed by CPU
+    coverage — changes to them must always trigger the full test suite.
+    """
+    return bool(changed_rel_paths & ALWAYS_RUN_ALL)
+
+
+def filter_tests_by_coverage(  # pylint: disable=too-many-locals
+    cases: list, coverage_cache: dict, changed_ranges: dict
+) -> tuple:
+    """
+    Filter test cases to only those whose covered lines overlap with changed lines.
+
+    Returns (cases_to_run, skipped_cases).
+
+    Conservative behavior:
+    - Test not in cache (newly added) → include it
+    - Changed file not in any test's coverage → warn and skip
+
+    This is the core of the line-level pruning. A test is included only if at
+    least one of its covered lines intersects with at least one changed line range.
+    """
+    to_run = []
+    skipped = []
+    new_file_warnings: set = set()
+
+    for case in cases:
+        uuid = case.get_uuid()
+        test_cov = coverage_cache.get(uuid)
+
+        if test_cov is None:
+            # Test not in cache (e.g., newly added) → conservative: include
+            to_run.append(case)
+            continue
+
+        matched = False
+        for fpp_file, ranges in changed_ranges.items():
+            covered_lines = set(test_cov.get(fpp_file, []))
+            if not covered_lines:
+                # File has no coverage for this test — check if it has coverage anywhere
+                if not any(
+                    fpp_file in tc
+                    for tc in coverage_cache.values()
+                    if isinstance(tc, dict)
+                ):
+                    new_file_warnings.add(fpp_file)
+                continue
+            for (start, end) in ranges:
+                if covered_lines & set(range(start, end + 1)):
+                    matched = True
+                    break
+            if matched:
+                break
+
+        if matched:
+            to_run.append(case)
+        else:
+            skipped.append(case)
+
+    for f in sorted(new_file_warnings):
+        cons.print(f"[yellow]Warning: {f} has no coverage data in the cache.[/yellow]")
+        cons.print("[yellow]  Consider rebuilding: ./mfc.sh test --build-coverage-cache[/yellow]")
+
+    return to_run, skipped
diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 31a3771cb9..2b7d272eb8 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -42,7 +42,7 @@
 class TestTimeoutError(MFCException):
     pass
 
-# pylint: disable=too-many-branches, trailing-whitespace
+# pylint: disable=too-many-branches,too-many-locals,too-many-statements,trailing-whitespace
 def __filter(cases_) -> typing.List[TestCase]:
     cases = cases_[:]
     selected_cases = []
@@ -75,6 +75,51 @@ def __filter(cases_) -> typing.List[TestCase]:
                 cases.remove(case)
                 skipped_cases.append(case)
 
+    # --only-changes: filter based on line-level gcov coverage
+    if ARG("only_changes"):
+        from .coverage import (  # pylint: disable=import-outside-toplevel
+            load_coverage_cache, get_changed_line_ranges,
+            should_run_all_tests, filter_tests_by_coverage,
+        )
+
+        cache = load_coverage_cache(common.MFC_ROOT_DIR)
+        if cache is None:
+            cons.print("[yellow]Coverage cache missing or stale.[/yellow]")
+            cons.print("[yellow]Run: ./mfc.sh build --gcov -j 8 && ./mfc.sh test --build-coverage-cache[/yellow]")
+            cons.print("[yellow]Falling back to full test suite.[/yellow]")
+            # cache is None → fall through and run all tests
+        else:
+            changed_ranges = get_changed_line_ranges(common.MFC_ROOT_DIR, ARG("changes_branch"))
+            changed_paths = set(changed_ranges.keys())
+
+            cons.print()
+            cons.print("[bold cyan]Line-Coverage Change Analysis[/bold cyan]")
+            cons.print("-" * 50)
+
+            if should_run_all_tests(changed_paths):
+                cons.print("[yellow]Infrastructure or macro file changed — running full test suite.[/yellow]")
+                cons.print("-" * 50)
+                # fall through — don't filter cases
+            elif not changed_ranges:
+                cons.print("[green]No .fpp source changes detected — skipping all tests.[/green]")
+                cons.print("-" * 50)
+                cons.print()
+                skipped_cases += cases
+                cases = []
+            else:
+                for fpp_file, ranges in sorted(changed_ranges.items()):
+                    total_lines = sum(e - s + 1 for s, e in ranges)
+                    cons.print(
+                        f"  [green]*[/green] {fpp_file} "
+                        f"({total_lines} line{'s' if total_lines != 1 else ''} changed)"
+                    )
+
+                cases, new_skipped = filter_tests_by_coverage(cases, cache, changed_ranges)
+                skipped_cases += new_skipped
+                cons.print(f"\n[bold]Tests to run: {len(cases)} / {len(cases) + len(new_skipped)}[/bold]")
+                cons.print("-" * 50)
+                cons.print()
+
     for case in cases[:]:
         if case.ppn > 1 and not ARG("mpi"):
             cases.remove(case)
@@ -128,6 +173,12 @@ def test():
 
         return
 
+    if ARG("build_coverage_cache"):
+        from .coverage import build_coverage_cache  # pylint: disable=import-outside-toplevel
+        all_cases = [b.to_case() for b in cases]
+        build_coverage_cache(common.MFC_ROOT_DIR, all_cases)
+        return
+
     cases, skipped_cases = __filter(cases)
     cases = [ _.to_case() for _ in cases ]
     total_test_count = len(cases)
diff --git a/toolchain/mfc/test/test_coverage_unit.py b/toolchain/mfc/test/test_coverage_unit.py
new file mode 100644
index 0000000000..f4138d7256
--- /dev/null
+++ b/toolchain/mfc/test/test_coverage_unit.py
@@ -0,0 +1,570 @@
+"""
+Unit tests for toolchain/mfc/test/coverage.py
+
+Run with:
+    python3 -m pytest toolchain/mfc/test/test_coverage_unit.py -v
+
+These tests are fully offline (no build, no git, no gcov binary required).
+They use mocks and in-memory data structures to verify logic.
+"""
+# pylint: disable=protected-access,exec-used,too-few-public-methods,wrong-import-position
+
+import gzip
+import importlib.util
+import json
+import os
+import sys
+import textwrap
+import types
+import unittest
+from unittest.mock import patch
+
+# ---------------------------------------------------------------------------
+# Import the module under test.
+# We patch the module-level imports that require the full toolchain.
+# ---------------------------------------------------------------------------
+
+# Create minimal stubs for toolchain modules so coverage.py can be imported
+# without the full MFC toolchain being on sys.path.
+def _make_stub(name):
+    mod = types.ModuleType(name)
+    sys.modules[name] = mod
+    return mod
+
+
+for _mod_name in [
+    "toolchain",
+    "toolchain.mfc",
+    "toolchain.mfc.printer",
+    "toolchain.mfc.common",
+    "toolchain.mfc.build",
+]:
+    if _mod_name not in sys.modules:
+        _make_stub(_mod_name)
+
+# Provide the attributes coverage.py needs from its relative imports
+_printer_stub = sys.modules.get("toolchain.mfc.printer", _make_stub("toolchain.mfc.printer"))
+
+
+class _FakeCons:
+    def print(self, *args, **kwargs):
+        pass  # suppress output during tests
+
+
+_printer_stub.cons = _FakeCons()
+
+_common_stub = sys.modules.get("toolchain.mfc.common", _make_stub("toolchain.mfc.common"))
+_common_stub.MFC_ROOT_DIR = "/fake/repo"
+
+
+class _FakeMFCException(Exception):
+    pass
+
+
+_common_stub.MFCException = _FakeMFCException
+
+_build_stub = sys.modules.get("toolchain.mfc.build", _make_stub("toolchain.mfc.build"))
+_build_stub.PRE_PROCESS = "pre_process"
+_build_stub.SIMULATION = "simulation"
+
+# Load coverage.py by injecting stubs into sys.modules so relative imports resolve.
+_COVERAGE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "coverage.py")
+
+sys.modules["toolchain.mfc.test.coverage"] = None  # reset if already loaded
+
+_spec = importlib.util.spec_from_file_location(
+    "toolchain.mfc.test.coverage",
+    _COVERAGE_PATH,
+    submodule_search_locations=[]
+)
+_coverage_mod = importlib.util.module_from_spec(_spec)
+_coverage_mod.__package__ = "toolchain.mfc.test"
+
+sys.modules["toolchain.mfc.test"] = types.ModuleType("toolchain.mfc.test")
+sys.modules["toolchain.mfc.test"].__package__ = "toolchain.mfc.test"
+
+with patch.dict("sys.modules", {
+    "toolchain.mfc.printer": _printer_stub,
+    "toolchain.mfc.common":  _common_stub,
+    "toolchain.mfc.build":   _build_stub,
+}):
+    try:
+        _spec.loader.exec_module(_coverage_mod)
+    except ImportError:
+        pass  # fallback below
+
+# If the importlib approach failed (relative imports unresolvable), fall back to exec.
+try:
+    _parse_diff_output = _coverage_mod._parse_diff_output
+    _parse_gcov_json_output = _coverage_mod._parse_gcov_json_output
+    should_run_all_tests = _coverage_mod.should_run_all_tests
+    filter_tests_by_coverage = _coverage_mod.filter_tests_by_coverage
+    ALWAYS_RUN_ALL = _coverage_mod.ALWAYS_RUN_ALL
+except AttributeError:
+    _globals = {
+        "__name__": "toolchain.mfc.test.coverage",
+        "__package__": "toolchain.mfc.test",
+        "cons": _printer_stub.cons,
+        "common": _common_stub,
+        "MFCException": _FakeMFCException,
+        "PRE_PROCESS": "pre_process",
+        "SIMULATION": "simulation",
+    }
+    with open(_COVERAGE_PATH, encoding="utf-8") as _f:
+        _src = _f.read()
+
+    _src = (
+        _src
+        .replace("from ..printer import cons", "cons = _globals['cons']")
+        .replace("from .. import common", "")
+        .replace("from ..common import MFCException", "MFCException = _globals['MFCException']")
+        .replace("from ..build import PRE_PROCESS, SIMULATION", "")
+    )
+    exec(compile(_src, _COVERAGE_PATH, "exec"), _globals)  # noqa: S102
+
+    _parse_diff_output = _globals["_parse_diff_output"]
+    _parse_gcov_json_output = _globals["_parse_gcov_json_output"]
+    should_run_all_tests = _globals["should_run_all_tests"]
+    filter_tests_by_coverage = _globals["filter_tests_by_coverage"]
+    ALWAYS_RUN_ALL = _globals["ALWAYS_RUN_ALL"]
+
+
+# ---------------------------------------------------------------------------
+# Helper: minimal fake test case
+# ---------------------------------------------------------------------------
+
+class FakeCase:
+    """Minimal stand-in for TestCase — only get_uuid() is needed."""
+
+    def __init__(self, uuid: str):
+        self._uuid = uuid
+
+    def get_uuid(self) -> str:
+        return self._uuid
+
+
+# ===========================================================================
+# Group 1: _parse_diff_output — git diff parsing
+# ===========================================================================
+
+class TestParseDiffOutput(unittest.TestCase):
+
+    def test_parse_basic_modification(self):
+        diff = textwrap.dedent("""\
+            diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp
+            --- a/src/simulation/m_rhs.fpp
+            +++ b/src/simulation/m_rhs.fpp
+            @@ -45,3 +45,5 @@
+            +    ! new line 1
+            +    ! new line 2
+        """)
+        result = _parse_diff_output(diff, "/repo")
+        assert result == {"src/simulation/m_rhs.fpp": [(45, 49)]}
+
+    def test_parse_multiple_hunks_same_file(self):
+        diff = textwrap.dedent("""\
+            +++ b/src/simulation/m_rhs.fpp
+            @@ -10,2 +10,3 @@
+            +    ! hunk 1
+            @@ -50,1 +51,2 @@
+            +    ! hunk 2
+        """)
+        result = _parse_diff_output(diff, "/repo")
+        assert "src/simulation/m_rhs.fpp" in result
+        ranges = result["src/simulation/m_rhs.fpp"]
+        assert (10, 12) in ranges
+        assert (51, 52) in ranges
+
+    def test_parse_pure_deletion(self):
+        # @@ -45,3 +45,0 @@ — 3 lines deleted, 0 added → nothing to cover
+        diff = textwrap.dedent("""\
+            +++ b/src/simulation/m_rhs.fpp
+            @@ -45,3 +45,0 @@
+            -    ! removed line 1
+        """)
+        result = _parse_diff_output(diff, "/repo")
+        assert result == {}
+
+    def test_parse_new_file(self):
+        diff = textwrap.dedent("""\
+            --- /dev/null
+            +++ b/src/simulation/m_new.fpp
+            @@ -0,0 +1,50 @@
+            +    ! new file content
+        """)
+        result = _parse_diff_output(diff, "/repo")
+        assert result == {"src/simulation/m_new.fpp": [(1, 50)]}
+
+    def test_parse_ignores_non_fpp(self):
+        diff = textwrap.dedent("""\
+            +++ b/toolchain/mfc/test/cases.py
+            @@ -1,1 +1,2 @@
+            +# new line
+            +++ b/README.md
+            @@ -5,1 +5,2 @@
+            +# new line
+            +++ b/CMakeLists.txt
+            @@ -10,1 +10,2 @@
+            +  new cmake line
+        """)
+        result = _parse_diff_output(diff, "/repo")
+        assert result == {}
+
+    def test_parse_single_line_hunk_no_count(self):
+        # @@ -45 +45 @@ (no count = 1 line)
+        diff = textwrap.dedent("""\
+            +++ b/src/simulation/m_rhs.fpp
+            @@ -45 +45 @@
+            +    ! changed line
+        """)
+        result = _parse_diff_output(diff, "/repo")
+        assert result == {"src/simulation/m_rhs.fpp": [(45, 45)]}
+
+    def test_parse_empty_diff(self):
+        assert _parse_diff_output("", "/repo") == {}
+
+    def test_parse_multiple_files(self):
+        diff = textwrap.dedent("""\
+            +++ b/src/simulation/m_rhs.fpp
+            @@ -10,2 +10,3 @@
+            +    ! rhs change
+            +++ b/src/simulation/m_weno.fpp
+            @@ -200,1 +200,4 @@
+            +    ! weno change
+        """)
+        result = _parse_diff_output(diff, "/repo")
+        assert "src/simulation/m_rhs.fpp" in result
+        assert "src/simulation/m_weno.fpp" in result
+        assert result["src/simulation/m_rhs.fpp"] == [(10, 12)]
+        assert result["src/simulation/m_weno.fpp"] == [(200, 203)]
+
+
+# ===========================================================================
+# Group 2: should_run_all_tests — ALWAYS_RUN_ALL detection
+# ===========================================================================
+
+class TestShouldRunAllTests(unittest.TestCase):
+
+    def test_parallel_macros_triggers_all(self):
+        assert should_run_all_tests(
+            {"src/common/include/parallel_macros.fpp"}
+        ) is True
+
+    def test_acc_macros_triggers_all(self):
+        assert should_run_all_tests(
+            {"src/common/include/acc_macros.fpp"}
+        ) is True
+
+    def test_omp_macros_triggers_all(self):
+        assert should_run_all_tests(
+            {"src/common/include/omp_macros.fpp"}
+        ) is True
+
+    def test_shared_parallel_macros_triggers_all(self):
+        assert should_run_all_tests(
+            {"src/common/include/shared_parallel_macros.fpp"}
+        ) is True
+
+    def test_macros_fpp_triggers_all(self):
+        assert should_run_all_tests(
+            {"src/common/include/macros.fpp"}
+        ) is True
+
+    def test_cases_py_triggers_all(self):
+        assert should_run_all_tests(
+            {"toolchain/mfc/test/cases.py"}
+        ) is True
+
+    def test_case_py_triggers_all(self):
+        assert should_run_all_tests(
+            {"toolchain/mfc/test/case.py"}
+        ) is True
+
+    def test_definitions_py_triggers_all(self):
+        assert should_run_all_tests(
+            {"toolchain/mfc/params/definitions.py"}
+        ) is True
+
+    def test_simulation_module_does_not_trigger_all(self):
+        assert should_run_all_tests(
+            {"src/simulation/m_rhs.fpp"}
+        ) is False
+
+    def test_empty_set_does_not_trigger_all(self):
+        assert should_run_all_tests(set()) is False
+
+    def test_mixed_one_trigger_fires_all(self):
+        assert should_run_all_tests({
+            "src/simulation/m_rhs.fpp",
+            "src/common/include/macros.fpp",
+        }) is True
+
+
+# ===========================================================================
+# Group 3: filter_tests_by_coverage — core selection logic
+# ===========================================================================
+
+class TestFilterTestsByCoverage(unittest.TestCase):
+
+    def test_exact_line_hit(self):
+        cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [45, 46, 47]}}
+        changed = {"src/simulation/m_rhs.fpp": [(46, 46)]}
+        cases = [FakeCase("AAAA0001")]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        assert len(to_run) == 1
+        assert len(skipped) == 0
+
+    def test_no_line_overlap(self):
+        """
+        KEY TEST: this is what makes line-level better than file-level coverage.
+        Same file, but changed lines (500-510) don't overlap covered lines (45-47).
+        """
+        cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [45, 46, 47]}}
+        changed = {"src/simulation/m_rhs.fpp": [(500, 510)]}
+        cases = [FakeCase("AAAA0001")]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        assert len(to_run) == 0
+        assert len(skipped) == 1
+
+    def test_range_boundary_inclusive_start(self):
+        cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [100]}}
+        changed = {"src/simulation/m_rhs.fpp": [(100, 105)]}
+        to_run, _ = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+        assert len(to_run) == 1
+
+    def test_range_boundary_inclusive_end(self):
+        cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [105]}}
+        changed = {"src/simulation/m_rhs.fpp": [(100, 105)]}
+        to_run, _ = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+        assert len(to_run) == 1
+
+    def test_range_just_outside_both_ends(self):
+        # Lines 99 and 106 are NOT inside [100, 105]
+        cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [99, 106]}}
+        changed = {"src/simulation/m_rhs.fpp": [(100, 105)]}
+        to_run, skipped = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+        assert len(to_run) == 0
+        assert len(skipped) == 1
+
+    def test_uuid_not_in_cache_is_conservative(self):
+        """Newly added test not in cache → include it (conservative)."""
+        cache = {}
+        changed = {"src/simulation/m_rhs.fpp": [(45, 45)]}
+        to_run, _ = filter_tests_by_coverage([FakeCase("NEWTEST1")], cache, changed)
+        assert len(to_run) == 1
+
+    def test_changed_file_not_in_test_coverage(self):
+        """Test covers m_rhs.fpp but we changed m_igr.fpp → test skipped."""
+        cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [45, 46]}}
+        changed = {"src/simulation/m_igr.fpp": [(100, 110)]}
+        to_run, skipped = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+        assert len(to_run) == 0
+        assert len(skipped) == 1
+
+    def test_large_module_line_granularity(self):
+        """
+        KEY CORNER CASE: m_riemann_solvers.fpp is 5257 lines.
+        HLLC test covers lines 798-810, ROE test covers lines 2000+.
+        Changing line 800-810 → only HLLC test runs.
+        """
+        cache = {
+            "HLLC_TST": {"src/simulation/m_riemann_solvers.fpp": [798, 799, 800, 801, 810]},
+            "ROE_TEST": {"src/simulation/m_riemann_solvers.fpp": [2000, 2100, 2500]},
+        }
+        changed = {"src/simulation/m_riemann_solvers.fpp": [(800, 810)]}
+        cases = [FakeCase("HLLC_TST"), FakeCase("ROE_TEST")]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        uuids_run = {c.get_uuid() for c in to_run}
+        assert "HLLC_TST" in uuids_run
+        assert "ROE_TEST" not in uuids_run
+        assert len(skipped) == 1
+
+    def test_test_covering_multiple_files_matched_via_second(self):
+        """Test matched because m_weno.fpp (its second covered file) was changed."""
+        cache = {"AAAA0001": {
+            "src/simulation/m_rhs.fpp": [45],
+            "src/simulation/m_weno.fpp": [100],
+        }}
+        changed = {"src/simulation/m_weno.fpp": [(100, 100)]}
+        to_run, _ = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+        assert len(to_run) == 1
+
+    def test_no_changed_ranges_runs_nothing(self):
+        """Empty diff → nothing to cover → all tests skipped."""
+        cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [45, 46]}}
+        changed = {}
+        to_run, skipped = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+        assert len(to_run) == 0
+        assert len(skipped) == 1
+
+    def test_multiple_tests_correct_partial_selection(self):
+        """Only the test that covers line 46 should run."""
+        cache = {
+            "TEST_A": {"src/simulation/m_rhs.fpp": [45, 46, 47]},
+            "TEST_B": {"src/simulation/m_weno.fpp": [100, 200]},
+            "TEST_C": {"src/simulation/m_rhs.fpp": [1000, 2000]},
+        }
+        changed = {"src/simulation/m_rhs.fpp": [(46, 46)]}
+        cases = [FakeCase("TEST_A"), FakeCase("TEST_B"), FakeCase("TEST_C")]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        uuids_run = {c.get_uuid() for c in to_run}
+        assert uuids_run == {"TEST_A"}
+        assert len(skipped) == 2
+
+
+# ===========================================================================
+# Group 4: Corner cases from design discussion
+# ===========================================================================
+
+class TestDesignCornerCases(unittest.TestCase):
+
+    def test_gpu_ifdef_lines_not_in_cpu_coverage(self):
+        """
+        m_muscl.fpp has `use openacc` inside #ifdef MFC_OpenACC at lines 16-18.
+        CPU build skips those lines → not in any test's coverage.
+        Changing lines 16-18 → 0 tests triggered. This is CORRECT behavior.
+        (The ALWAYS_RUN_ALL mechanism handles macro file changes separately.)
+        """
+        cache = {"MUSCL_T": {"src/simulation/m_muscl.fpp": [20, 21, 22, 100, 200]}}
+        changed = {"src/simulation/m_muscl.fpp": [(16, 18)]}
+        to_run, skipped = filter_tests_by_coverage([FakeCase("MUSCL_T")], cache, changed)
+        assert len(to_run) == 0
+        assert len(skipped) == 1
+
+    def test_gpu_cpu_code_in_same_file_triggers_correctly(self):
+        """Changing CPU code in the same file does trigger the test."""
+        cache = {"MUSCL_T": {"src/simulation/m_muscl.fpp": [20, 21, 22, 100, 200]}}
+        changed = {"src/simulation/m_muscl.fpp": [(100, 100)]}
+        to_run, _ = filter_tests_by_coverage([FakeCase("MUSCL_T")], cache, changed)
+        assert len(to_run) == 1
+
+    def test_macro_file_triggers_all_via_should_run_all(self):
+        """parallel_macros.fpp in changed_paths → should_run_all_tests() is True."""
+        changed_paths = {"src/common/include/parallel_macros.fpp"}
+        assert should_run_all_tests(changed_paths) is True
+
+    def test_new_fpp_file_no_coverage_runs_nothing(self):
+        """
+        Brand new .fpp file has no coverage in cache.
+        All tests are skipped (no test covers the new file).
+        """
+        cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [45]}}
+        changed = {"src/simulation/m_brand_new.fpp": [(1, 100)]}
+        to_run, skipped = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+        assert len(to_run) == 0
+        assert len(skipped) == 1
+
+    def test_multiple_ranges_in_same_file_any_match_triggers(self):
+        """Multiple changed ranges in one file — if any overlaps, test runs."""
+        cache = {"AAAA0001": {"src/simulation/m_rhs.fpp": [50, 51]}}
+        # Two ranges: [10,20] (no match) and [50,60] (match at 50,51)
+        changed = {"src/simulation/m_rhs.fpp": [(10, 20), (50, 60)]}
+        to_run, _ = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+        assert len(to_run) == 1
+
+    def test_empty_cache_runs_all_conservatively(self):
+        """Empty coverage cache → all tests included (conservative)."""
+        cache = {}
+        changed = {"src/simulation/m_rhs.fpp": [(45, 45)]}
+        cases = [FakeCase("T1"), FakeCase("T2"), FakeCase("T3")]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        assert len(to_run) == 3
+        assert len(skipped) == 0
+
+
+# ===========================================================================
+# Group 5: _parse_gcov_json_output — gcov JSON parsing
+# ===========================================================================
+
+class TestParseGcovJsonOutput(unittest.TestCase):
+
+    def _make_gcov_json(self, files_data: list) -> bytes:
+        """Build a fake gzip-compressed gcov JSON blob."""
+        data = {
+            "format_version": "2",
+            "gcc_version": "15.2.0",
+            "files": files_data,
+        }
+        return gzip.compress(json.dumps(data).encode())
+
+    def test_parses_fpp_file_correctly(self):
+        compressed = self._make_gcov_json([{
+            "file": "/repo/src/simulation/m_rhs.fpp",
+            "lines": [
+                {"line_number": 45, "count": 3},
+                {"line_number": 46, "count": 0},  # not covered
+                {"line_number": 47, "count": 1},
+            ],
+        }])
+        result = _parse_gcov_json_output(compressed, "/repo")
+        assert "src/simulation/m_rhs.fpp" in result
+        assert result["src/simulation/m_rhs.fpp"] == [45, 47]
+
+    def test_ignores_zero_count_lines(self):
+        compressed = self._make_gcov_json([{
+            "file": "/repo/src/simulation/m_rhs.fpp",
+            "lines": [
+                {"line_number": 10, "count": 0},
+                {"line_number": 11, "count": 0},
+            ],
+        }])
+        result = _parse_gcov_json_output(compressed, "/repo")
+        assert result == {}
+
+    def test_ignores_f90_files(self):
+        """Generated .f90 files must not appear in coverage output."""
+        compressed = self._make_gcov_json([
+            {
+                "file": "/repo/build/fypp/simulation/m_rhs.fpp.f90",
+                "lines": [{"line_number": 10, "count": 5}],
+            },
+            {
+                "file": "/repo/src/simulation/m_rhs.fpp",
+                "lines": [{"line_number": 45, "count": 1}],
+            },
+        ])
+        result = _parse_gcov_json_output(compressed, "/repo")
+        assert "src/simulation/m_rhs.fpp" in result
+        assert all(k.endswith(".fpp") for k in result)
+
+    def test_handles_invalid_gzip_gracefully(self):
+        result = _parse_gcov_json_output(b"not valid gzip", "/repo")
+        assert result == {}
+
+    def test_handles_empty_files_list(self):
+        compressed = self._make_gcov_json([])
+        result = _parse_gcov_json_output(compressed, "/repo")
+        assert result == {}
+
+    def test_multiple_fpp_files_merged(self):
+        compressed = self._make_gcov_json([
+            {
+                "file": "/repo/src/simulation/m_rhs.fpp",
+                "lines": [{"line_number": 45, "count": 1}],
+            },
+            {
+                "file": "/repo/src/simulation/m_weno.fpp",
+                "lines": [{"line_number": 200, "count": 2}],
+            },
+        ])
+        result = _parse_gcov_json_output(compressed, "/repo")
+        assert "src/simulation/m_rhs.fpp" in result
+        assert "src/simulation/m_weno.fpp" in result
+        assert result["src/simulation/m_rhs.fpp"] == [45]
+        assert result["src/simulation/m_weno.fpp"] == [200]
+
+    def test_lines_are_sorted(self):
+        compressed = self._make_gcov_json([{
+            "file": "/repo/src/simulation/m_rhs.fpp",
+            "lines": [
+                {"line_number": 100, "count": 1},
+                {"line_number": 50, "count": 1},
+                {"line_number": 75, "count": 1},
+            ],
+        }])
+        result = _parse_gcov_json_output(compressed, "/repo")
+        assert result["src/simulation/m_rhs.fpp"] == [50, 75, 100]
+
+
+if __name__ == "__main__":
+    unittest.main()

From 6e7f5bec13d7d101cd57abdf4aaa13f08c146f57 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Fri, 27 Feb 2026 01:47:03 -0500
Subject: [PATCH 2/2] Fix gcov coverage: raw JSON parsing, symlink paths, and
 test setup

Three bugs found during integration testing on Phoenix:
- gcov 12 outputs raw JSON, not gzip; added fallback parsing
- Symlink paths (/storage/scratch1 vs /storage/home) caused relpath
  mismatch; use os.path.realpath() before computing relative paths
- Missing case.delete_output()/create_directory() before case.run()
  caused silent test failures during cache build

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 toolchain/mfc/test/coverage.py           | 17 +++++++++++++----
 toolchain/mfc/test/test_coverage_unit.py | 22 ++++++++++++++++++++--
 2 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/toolchain/mfc/test/coverage.py b/toolchain/mfc/test/coverage.py
index 86beab6035..86b5c5def0 100644
--- a/toolchain/mfc/test/coverage.py
+++ b/toolchain/mfc/test/coverage.py
@@ -149,24 +149,31 @@ def zero_gcda_files(root_dir: str) -> None:
                 pass
 
 
-def _parse_gcov_json_output(compressed_bytes: bytes, root_dir: str) -> dict:
+def _parse_gcov_json_output(raw_bytes: bytes, root_dir: str) -> dict:
     """
-    Parse gzip-compressed gcov JSON output and return {rel_fpp_path: [covered_lines]}.
+    Parse gcov JSON output and return {rel_fpp_path: [covered_lines]}.
+    Handles both gzip-compressed (gcov 13+) and raw JSON (gcov 12) formats.
     Only .fpp files are included; .f90 and other files are skipped.
     Only lines with count > 0 are included.
     """
     result = defaultdict(set)
     try:
-        data = json.loads(gzip.decompress(compressed_bytes))
+        data = json.loads(gzip.decompress(raw_bytes))
+    except (gzip.BadGzipFile, OSError):
+        try:
+            data = json.loads(raw_bytes)
+        except (json.JSONDecodeError, ValueError):
+            return {}
     except Exception:
         return {}
 
+    real_root = os.path.realpath(root_dir)
     for file_entry in data.get("files", []):
         file_path = file_entry.get("file", "")
         if not file_path.endswith(".fpp"):
             continue
         try:
-            rel_path = os.path.relpath(file_path, root_dir)
+            rel_path = os.path.relpath(os.path.realpath(file_path), real_root)
         except ValueError:
             rel_path = file_path
         for line in file_entry.get("lines", []):
@@ -228,6 +235,8 @@ def build_coverage_cache(root_dir: str, cases: list) -> None:
     for i, case in enumerate(cases):
         zero_gcda_files(root_dir)
         try:
+            case.delete_output()
+            case.create_directory()
             case.run([PRE_PROCESS, SIMULATION], gpus=set())
         except Exception:
             # Continue even if the test fails — we only need .gcda data
diff --git a/toolchain/mfc/test/test_coverage_unit.py b/toolchain/mfc/test/test_coverage_unit.py
index f4138d7256..3b0a855fca 100644
--- a/toolchain/mfc/test/test_coverage_unit.py
+++ b/toolchain/mfc/test/test_coverage_unit.py
@@ -527,8 +527,26 @@ def test_ignores_f90_files(self):
         assert "src/simulation/m_rhs.fpp" in result
         assert all(k.endswith(".fpp") for k in result)
 
-    def test_handles_invalid_gzip_gracefully(self):
-        result = _parse_gcov_json_output(b"not valid gzip", "/repo")
+    def test_handles_raw_json_gcov12(self):
+        """gcov 12 outputs raw JSON (not gzip). Must parse correctly."""
+        data = {
+            "format_version": "1",
+            "gcc_version": "12.3.0",
+            "files": [{
+                "file": "/repo/src/simulation/m_rhs.fpp",
+                "lines": [
+                    {"line_number": 45, "count": 3},
+                    {"line_number": 46, "count": 0},
+                    {"line_number": 47, "count": 1},
+                ],
+            }],
+        }
+        raw = json.dumps(data).encode()
+        result = _parse_gcov_json_output(raw, "/repo")
+        assert result == {"src/simulation/m_rhs.fpp": [45, 47]}
+
+    def test_handles_invalid_data_gracefully(self):
+        result = _parse_gcov_json_output(b"not valid gzip or json", "/repo")
         assert result == {}
 
     def test_handles_empty_files_list(self):