From 4535fc3fd15d8223adb3ffc0eb02adc017322d66 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Thu, 19 Feb 2026 14:59:43 -0500
Subject: [PATCH 01/15] Add test sharding, proactive clean, and retry logic for
 self-hosted CI

- Shard Frontier GPU tests into 2 parts for faster parallel execution
- Add proactive ./mfc.sh clean in Phoenix test scripts to prevent
  cross-compiler contamination from stale build artifacts
- Add --requeue to Phoenix SLURM jobs for preemption recovery
- Add lint-gate job that must pass before self-hosted tests run
- Add retry logic for GitHub runner tests (retry <=5 failures)
- Add Frontier AMD test support with dedicated submit/test scripts
- Restructure self-hosted matrix with explicit cluster names

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/frontier/submit.sh     |  8 +--
 .github/workflows/frontier/test.sh       |  7 ++-
 .github/workflows/frontier_amd/submit.sh |  8 +--
 .github/workflows/frontier_amd/test.sh   |  7 ++-
 .github/workflows/phoenix/submit.sh      |  1 +
 .github/workflows/phoenix/test.sh        |  4 ++
 .github/workflows/test.yml               | 67 ++++++++++++++++++++----
 7 files changed, 85 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/frontier/submit.sh b/.github/workflows/frontier/submit.sh
index d5b416c65a..4c3e0e3e27 100644
--- a/.github/workflows/frontier/submit.sh
+++ b/.github/workflows/frontier/submit.sh
@@ -34,12 +34,13 @@ output_file="$job_slug.out"
 submit_output=$(sbatch <<EOT
 #!/bin/bash
 #SBATCH -J MFC-$job_slug            # Job name
-#SBATCH -A ENG160                  # charge account
+#SBATCH -A CFD154                  # charge account
 #SBATCH -N 1                       # Number of nodes required
 $sbatch_device_opts
-#SBATCH -t 05:59:00                # Duration of the job (Ex: 15 mins)
+#SBATCH -t 01:59:00                # Duration of the job
 #SBATCH -o$output_file             # Combined output and error messages file
-#SBATCH -p extended                # Extended partition for shorter queues
+#SBATCH -p batch                   # Batch partition (concurrent jobs)
+#SBATCH --qos=hackathon            # Hackathon QOS for batch access
 
 set -e
 set -x
@@ -50,6 +51,7 @@ echo "Running in $(pwd):"
 job_slug="$job_slug"
 job_device="$2"
 job_interface="$3"
+job_shard="$4"
 
 . ./mfc.sh load -c f -m $([ "$2" = "gpu" ] && echo "g" || echo "c")
 
diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh
index 17fbbaf8e5..ad109c6478 100644
--- a/.github/workflows/frontier/test.sh
+++ b/.github/workflows/frontier/test.sh
@@ -13,8 +13,13 @@ if [ "$job_device" = "gpu" ]; then
     fi
 fi
 
+shard_opts=""
+if [ -n "$job_shard" ]; then
+    shard_opts="--shard $job_shard"
+fi
+
 if [ "$job_device" = "gpu" ]; then
-    ./mfc.sh test -v -a --rdma-mpi --max-attempts 3 -j $ngpus $device_opts -- -c frontier
+    ./mfc.sh test -v -a --rdma-mpi --max-attempts 3 -j $ngpus $device_opts $shard_opts -- -c frontier
 else
     ./mfc.sh test -v -a --max-attempts 3 -j 32 --no-gpu -- -c frontier
 fi
diff --git a/.github/workflows/frontier_amd/submit.sh b/.github/workflows/frontier_amd/submit.sh
index 551e0056b8..df73db5807 100644
--- a/.github/workflows/frontier_amd/submit.sh
+++ b/.github/workflows/frontier_amd/submit.sh
@@ -34,12 +34,13 @@ output_file="$job_slug.out"
 submit_output=$(sbatch <<EOT
 #!/bin/bash
 #SBATCH -J MFC-$job_slug            # Job name
-#SBATCH -A ENG160                  # charge account
+#SBATCH -A CFD154                  # charge account
 #SBATCH -N 1                       # Number of nodes required
 $sbatch_device_opts
-#SBATCH -t 05:59:00                # Duration of the job (Ex: 15 mins)
+#SBATCH -t 01:59:00                # Duration of the job
 #SBATCH -o$output_file             # Combined output and error messages file
-#SBATCH -p extended                # Extended partition for shorter queues
+#SBATCH -p batch                   # Batch partition (concurrent jobs)
+#SBATCH --qos=hackathon            # Hackathon QOS for batch access
 
 set -e
 set -x
@@ -50,6 +51,7 @@ echo "Running in $(pwd):"
 job_slug="$job_slug"
 job_device="$2"
 job_interface="$3"
+job_shard="$4"
 
 . ./mfc.sh load -c famd -m $([ "$2" = "gpu" ] && echo "g" || echo "c")
 
diff --git a/.github/workflows/frontier_amd/test.sh b/.github/workflows/frontier_amd/test.sh
index ff65aa2b0e..c051144b2d 100644
--- a/.github/workflows/frontier_amd/test.sh
+++ b/.github/workflows/frontier_amd/test.sh
@@ -13,8 +13,13 @@ if [ "$job_device" = "gpu" ]; then
     fi
 fi
 
+shard_opts=""
+if [ -n "$job_shard" ]; then
+    shard_opts="--shard $job_shard"
+fi
+
 if [ "$job_device" = "gpu" ]; then
-    ./mfc.sh test -v -a --max-attempts 3 -j $ngpus $device_opts -- -c frontier_amd
+    ./mfc.sh test -v -a --max-attempts 3 -j $ngpus $device_opts $shard_opts -- -c frontier_amd
 else
     ./mfc.sh test -v -a --max-attempts 3 -j 32 --no-gpu -- -c frontier_amd
 fi
diff --git a/.github/workflows/phoenix/submit.sh b/.github/workflows/phoenix/submit.sh
index 06a03e465a..874f5afa44 100755
--- a/.github/workflows/phoenix/submit.sh
+++ b/.github/workflows/phoenix/submit.sh
@@ -48,6 +48,7 @@ submit_output=$(sbatch <<EOT
 $sbatch_device_opts
 #SBATCH -t 03:00:00                # Duration of the job (Ex: 15 mins)
 #SBATCH -q embers                  # QOS Name
+#SBATCH --requeue                  # Auto-requeue on preemption
 #SBATCH -o$output_file             # Combined output and error messages file
 
 set -e
diff --git a/.github/workflows/phoenix/test.sh b/.github/workflows/phoenix/test.sh
index 74c31c9fba..e6912f70b6 100644
--- a/.github/workflows/phoenix/test.sh
+++ b/.github/workflows/phoenix/test.sh
@@ -1,5 +1,9 @@
 #!/bin/bash
 
+# Clean stale build artifacts from previous CI runs to prevent
+# cross-compiler contamination (e.g. gfortran LAPACK linked by NVHPC)
+./mfc.sh clean
+
 build_opts=""
 if [ "$job_device" = "gpu" ]; then
     build_opts="--gpu"
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c4a39a2dbd..2d88007613 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -143,14 +143,33 @@ jobs:
           TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }}
 
       - name: Test
-        run:  |
-          /bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) $TEST_ALL $TEST_PCT
+        run: |
+          rm -f tests/failed_uuids.txt
+          TEST_EXIT=0
+          /bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) $TEST_ALL $TEST_PCT || TEST_EXIT=$?
+
+          # Retry only if a small number of tests failed (sporadic failures)
+          if [ -f tests/failed_uuids.txt ]; then
+            NUM_FAILED=$(wc -l < tests/failed_uuids.txt)
+            if [ "$NUM_FAILED" -le 5 ]; then
+              FAILED=$(cat tests/failed_uuids.txt | tr '\n' ' ')
+              echo ""
+              echo "=== Retrying $NUM_FAILED failed test(s): $FAILED ==="
+              echo ""
+              /bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) --only $FAILED $TEST_ALL || exit $?
+            else
+              echo "Too many failures ($NUM_FAILED) to retry — likely a real issue."
+              exit 1
+            fi
+          elif [ "$TEST_EXIT" -ne 0 ]; then
+            exit $TEST_EXIT
+          fi
         env:
           TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }}
           TEST_PCT: ${{ matrix.debug == 'debug' && '-% 20' || '' }}
 
   self:
-    name: "${{ matrix.cluster_name }} (${{ matrix.device }}${{ matrix.interface != 'none' && format('-{0}', matrix.interface) || '' }})"
+    name: "${{ matrix.cluster_name }} (${{ matrix.device }}${{ matrix.interface != 'none' && format('-{0}', matrix.interface) || '' }}${{ matrix.shard != '' && format(' [{0}]', matrix.shard) || '' }})"
     if: github.repository == 'MFlowCode/MFC' && needs.file-changes.outputs.checkall == 'true' && github.event.pull_request.draft != true
     needs: [lint-gate, file-changes]
     continue-on-error: false
@@ -164,50 +183,74 @@ jobs:
             cluster_name: 'Georgia Tech | Phoenix'
             device: 'gpu'
             interface: 'acc'
+            shard: ''
           - runner:       'gt'
             cluster:      'phoenix'
             cluster_name: 'Georgia Tech | Phoenix'
             device: 'gpu'
             interface: 'omp'
+            shard: ''
           - runner:       'gt'
             cluster:      'phoenix'
             cluster_name: 'Georgia Tech | Phoenix'
             device: 'cpu'
             interface: 'none'
-          # Frontier (ORNL) — build on login node, test via SLURM
+            shard: ''
+          # Frontier (ORNL) — build on login node, GPU tests sharded for batch partition
+          - runner:       'frontier'
+            cluster:      'frontier'
+            cluster_name: 'Oak Ridge | Frontier'
+            device: 'gpu'
+            interface: 'acc'
+            shard: '1/2'
           - runner:       'frontier'
             cluster:      'frontier'
             cluster_name: 'Oak Ridge | Frontier'
             device: 'gpu'
             interface: 'acc'
+            shard: '2/2'
           - runner:       'frontier'
             cluster:      'frontier'
             cluster_name: 'Oak Ridge | Frontier'
             device: 'gpu'
             interface: 'omp'
+            shard: '1/2'
+          - runner:       'frontier'
+            cluster:      'frontier'
+            cluster_name: 'Oak Ridge | Frontier'
+            device: 'gpu'
+            interface: 'omp'
+            shard: '2/2'
           - runner:       'frontier'
             cluster:      'frontier'
             cluster_name: 'Oak Ridge | Frontier'
             device: 'cpu'
             interface: 'none'
-          # Frontier AMD — build on login node, test via SLURM
+            shard: ''
+          # Frontier AMD — build on login node, GPU tests sharded for batch partition
           - runner:       'frontier'
             cluster:      'frontier_amd'
             cluster_name: 'Oak Ridge | Frontier (AMD)'
             device: 'gpu'
             interface: 'omp'
+            shard: '1/2'
+          - runner:       'frontier'
+            cluster:      'frontier_amd'
+            cluster_name: 'Oak Ridge | Frontier (AMD)'
+            device: 'gpu'
+            interface: 'omp'
+            shard: '2/2'
           - runner:       'frontier'
             cluster:      'frontier_amd'
             cluster_name: 'Oak Ridge | Frontier (AMD)'
             device: 'cpu'
             interface: 'none'
+            shard: ''
     runs-on:
       group:  phoenix
       labels: ${{ matrix.runner }}
     env:
       NODE_OPTIONS: ${{ matrix.cluster == 'phoenix' && '--max-old-space-size=2048' || '' }}
-      ACTIONS_RUNNER_FORCE_ACTIONS_NODE_VERSION: node16
-      ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
     steps:
       - name: Clone
         uses: actions/checkout@v4
@@ -216,10 +259,16 @@ jobs:
 
       - name: Build
         if:   matrix.cluster != 'phoenix'
-        run:  bash .github/workflows/${{ matrix.cluster }}/build.sh ${{ matrix.device }} ${{ matrix.interface }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 3
+          retry_wait_seconds: 60
+          timeout_minutes: 480
+          command: bash .github/workflows/${{ matrix.cluster }}/build.sh ${{ matrix.device }} ${{ matrix.interface }}
+          on_retry_command: ./mfc.sh clean
 
       - name: Test
-        run:  bash .github/workflows/${{ matrix.cluster }}/submit.sh .github/workflows/${{ matrix.cluster }}/test.sh ${{ matrix.device }} ${{ matrix.interface }}
+        run:  bash .github/workflows/${{ matrix.cluster }}/submit.sh .github/workflows/${{ matrix.cluster }}/test.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.shard }}
 
       - name: Print Logs
         if:   always()

From 8b2f712e71a4242321f3709a2ed24e08bea0d2be Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Thu, 19 Feb 2026 16:58:06 -0500
Subject: [PATCH 02/15] Add --shard and failed_uuids.txt support to test
 toolchain

The CI test scripts use --shard for splitting Frontier GPU tests across
multiple jobs, and failed_uuids.txt for retry logic. These toolchain
changes were missing from the cherry-pick.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 toolchain/mfc/cli/commands.py |  6 ++++++
 toolchain/mfc/test/test.py    | 17 +++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py
index 8ad8c4bd07..018e3cef83 100644
--- a/toolchain/mfc/cli/commands.py
+++ b/toolchain/mfc/cli/commands.py
@@ -452,6 +452,12 @@
             default=False,
             dest="dry_run",
         ),
+        Argument(
+            name="shard",
+            help="Run only a subset of tests (e.g., '1/2' for first half, '2/2' for second half).",
+            type=str,
+            default=None,
+        ),
     ],
     mutually_exclusive=[
         MutuallyExclusiveGroup(arguments=[
diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 31a3771cb9..54e00186dd 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -99,6 +99,14 @@ def __filter(cases_) -> typing.List[TestCase]:
         skipped_cases += example_cases
         cases = [case for case in cases if case not in example_cases]
 
+    if ARG("shard") is not None:
+        parts = ARG("shard").split("/")
+        if len(parts) != 2 or not all(p.isdigit() for p in parts) or int(parts[1]) < 1 or not 1 <= int(parts[0]) <= int(parts[1]):
+            raise MFCException(f"Invalid --shard '{ARG('shard')}': expected 'i/n' with 1 <= i <= n (e.g., '1/2').")
+        shard_idx, shard_count = int(parts[0]), int(parts[1])
+        skipped_cases += [c for i, c in enumerate(cases) if i % shard_count != shard_idx - 1]
+        cases = [c for i, c in enumerate(cases) if i % shard_count == shard_idx - 1]
+
     if ARG("percent") == 100:
         return cases, skipped_cases
 
@@ -206,6 +214,15 @@ def test():
     # Build the summary report
     _print_test_summary(nPASS, nFAIL, nSKIP, minutes, seconds, failed_tests, skipped_cases)
 
+    # Write failed UUIDs to file for CI retry logic
+    failed_uuids_path = os.path.join(common.MFC_TEST_DIR, "failed_uuids.txt")
+    if failed_tests:
+        with open(failed_uuids_path, "w") as f:
+            for test_info in failed_tests:
+                f.write(test_info['uuid'] + "\n")
+    elif os.path.exists(failed_uuids_path):
+        os.remove(failed_uuids_path)
+
     exit(nFAIL)
 
 
From 9de84d63c18f81151fb4a3abad3a341c38c49b9a Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Fri, 20 Feb 2026 14:43:00 -0500
Subject: [PATCH 03/15] Fix stale failed_uuids.txt on abort, guard empty retry,
 quote nproc

- Clean up failed_uuids.txt on early abort path so CI doesn't retry
  stale UUIDs from a previous run
- Guard retry condition with NUM_FAILED > 0 to prevent full-suite
  rerun when the file exists but is empty
- Quote $(nproc) to silence shellcheck SC2046 warnings

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/test.yml | 10 +++++-----
 toolchain/mfc/test/test.py |  5 +++++
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 2d88007613..32a0410e75 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -28,7 +28,7 @@ jobs:
 
       - name: Check Formatting
         run: |
-          ./mfc.sh format -j $(nproc)
+          ./mfc.sh format -j "$(nproc)"
           git diff --exit-code || (echo "::error::Code is not formatted. Run './mfc.sh format' locally." && exit 1)
 
       - name: Spell Check
@@ -138,7 +138,7 @@ jobs:
 
       - name: Build
         run:  |
-          /bin/bash mfc.sh test -v --dry-run -j $(nproc) --${{ matrix.debug }} --${{ matrix.mpi }} --${{ matrix.precision }} $TEST_ALL
+          /bin/bash mfc.sh test -v --dry-run -j "$(nproc)" --${{ matrix.debug }} --${{ matrix.mpi }} --${{ matrix.precision }} $TEST_ALL
         env:
           TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }}
 
@@ -146,17 +146,17 @@ jobs:
         run: |
           rm -f tests/failed_uuids.txt
           TEST_EXIT=0
-          /bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) $TEST_ALL $TEST_PCT || TEST_EXIT=$?
+          /bin/bash mfc.sh test -v --max-attempts 3 -j "$(nproc)" $TEST_ALL $TEST_PCT || TEST_EXIT=$?
 
           # Retry only if a small number of tests failed (sporadic failures)
           if [ -f tests/failed_uuids.txt ]; then
             NUM_FAILED=$(wc -l < tests/failed_uuids.txt)
-            if [ "$NUM_FAILED" -le 5 ]; then
+            if [ "$NUM_FAILED" -gt 0 ] && [ "$NUM_FAILED" -le 5 ]; then
               FAILED=$(cat tests/failed_uuids.txt | tr '\n' ' ')
               echo ""
               echo "=== Retrying $NUM_FAILED failed test(s): $FAILED ==="
               echo ""
-              /bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) --only $FAILED $TEST_ALL || exit $?
+              /bin/bash mfc.sh test -v --max-attempts 3 -j "$(nproc)" --only $FAILED $TEST_ALL || exit $?
             else
               echo "Too many failures ($NUM_FAILED) to retry — likely a real issue."
               exit 1
diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 54e00186dd..681f59f6ae 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -190,6 +190,11 @@ def test():
 
     # Check if we aborted due to high failure rate
     if abort_tests.is_set():
+        # Clean up stale failed_uuids.txt so CI doesn't retry wrong tests
+        failed_uuids_path = os.path.join(common.MFC_TEST_DIR, "failed_uuids.txt")
+        if os.path.exists(failed_uuids_path):
+            os.remove(failed_uuids_path)
+
         total_completed = nFAIL + nPASS
         cons.print()
         cons.unindent()

From 92deb41f63d26ef68ead305d63b2f51146da0aae Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Fri, 20 Feb 2026 15:36:28 -0500
Subject: [PATCH 04/15] Remove proactive clean from Phoenix test script

The build system should handle compiler changes correctly. Proactive
clean forces full rebuilds of FFTW/LAPACK from scratch every run,
which is slow and exposes builds to transient filesystem failures
(CMake TryCompile errors on Phoenix scratch).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/phoenix/test.sh | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.github/workflows/phoenix/test.sh b/.github/workflows/phoenix/test.sh
index e6912f70b6..74c31c9fba 100644
--- a/.github/workflows/phoenix/test.sh
+++ b/.github/workflows/phoenix/test.sh
@@ -1,9 +1,5 @@
 #!/bin/bash
 
-# Clean stale build artifacts from previous CI runs to prevent
-# cross-compiler contamination (e.g. gfortran LAPACK linked by NVHPC)
-./mfc.sh clean
-
 build_opts=""
 if [ "$job_device" = "gpu" ]; then
     build_opts="--gpu"

From 877127b68131b7bb2ac6f756f35ac20acd04338b Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Fri, 20 Feb 2026 22:20:18 -0500
Subject: [PATCH 05/15] Skip benchmark workflow for bot review events

Bot reviews (AI code reviewers) were triggering the benchmark workflow,
and the concurrency group was cancelling the real benchmark run from
the pull_request event. Gate the workflow early by skipping when the
review author is a Bot account type.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/bench.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
index 56735da9c1..b45fc45e40 100644
--- a/.github/workflows/bench.yml
+++ b/.github/workflows/bench.yml
@@ -13,6 +13,9 @@ concurrency:
 jobs:
   file-changes:
     name: Detect File Changes
+    if: >
+      github.event_name != 'pull_request_review' ||
+      github.event.review.user.type != 'Bot'
     runs-on: 'ubuntu-latest'
     outputs:
       checkall: ${{ steps.changes.outputs.checkall }}

From eaab95ae87012ab7be3834b3e0724841a33d6e1c Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Sat, 21 Feb 2026 00:16:32 -0500
Subject: [PATCH 06/15] Fix CI edge cases: guard os.remove, skip bare -- flag,
 use -s for empty file check

- Wrap os.remove() in try/except OSError on abort path so permission errors
  don't mask the real MFCException
- Only pass --precision flag when matrix.precision is non-empty to avoid
  invalid bare -- argument
- Use -s instead of -f for failed_uuids.txt to skip retry when file exists
  but is empty

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/test.yml | 4 ++--
 toolchain/mfc/test/test.py | 7 +++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 32a0410e75..e90bd1d300 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -138,7 +138,7 @@ jobs:
 
       - name: Build
         run:  |
-          /bin/bash mfc.sh test -v --dry-run -j "$(nproc)" --${{ matrix.debug }} --${{ matrix.mpi }} --${{ matrix.precision }} $TEST_ALL
+          /bin/bash mfc.sh test -v --dry-run -j "$(nproc)" --${{ matrix.debug }} --${{ matrix.mpi }} ${{ matrix.precision != '' && format('--{0}', matrix.precision) || '' }} $TEST_ALL
         env:
           TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }}
 
@@ -149,7 +149,7 @@ jobs:
           /bin/bash mfc.sh test -v --max-attempts 3 -j "$(nproc)" $TEST_ALL $TEST_PCT || TEST_EXIT=$?
 
           # Retry only if a small number of tests failed (sporadic failures)
-          if [ -f tests/failed_uuids.txt ]; then
+          if [ -s tests/failed_uuids.txt ]; then
             NUM_FAILED=$(wc -l < tests/failed_uuids.txt)
             if [ "$NUM_FAILED" -gt 0 ] && [ "$NUM_FAILED" -le 5 ]; then
               FAILED=$(cat tests/failed_uuids.txt | tr '\n' ' ')
diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 681f59f6ae..26be08fb8a 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -192,8 +192,11 @@ def test():
     if abort_tests.is_set():
         # Clean up stale failed_uuids.txt so CI doesn't retry wrong tests
         failed_uuids_path = os.path.join(common.MFC_TEST_DIR, "failed_uuids.txt")
-        if os.path.exists(failed_uuids_path):
-            os.remove(failed_uuids_path)
+        try:
+            if os.path.exists(failed_uuids_path):
+                os.remove(failed_uuids_path)
+        except OSError:
+            pass
 
         total_completed = nFAIL + nPASS
         cons.print()

From aa26048e4233123d9b72023f5961912a1538e422 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Mon, 23 Feb 2026 09:28:33 -0500
Subject: [PATCH 07/15] Fix --only filter silently matching zero tests with
 multiple UUIDs

The subset check required ALL passed UUIDs to match a single test
case's trace, which is impossible since each case has one UUID.
With 2+ failed tests, the CI retry selected 0 tests and exited 0,
silently masking real failures. Changed to intersection so each
case is kept if ANY of the passed UUIDs matches.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 toolchain/mfc/test/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 26be08fb8a..9fb0bd8eaf 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -71,7 +71,7 @@ def __filter(cases_) -> typing.List[TestCase]:
 
             checkCase = case.trace.split(" -> ")
             checkCase.append(case.get_uuid())
-            if not set(ARG("only")).issubset(set(checkCase)):
+            if not set(ARG("only")).intersection(set(checkCase)):
                 cases.remove(case)
                 skipped_cases.append(case)
 

From b5c095fb2b7c7db4d788b0bfafea2a4a65f93681 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 25 Feb 2026 21:23:43 -0500
Subject: [PATCH 08/15] Remove redundant NUM_FAILED > 0 guard in test retry
 logic

The -s check already guarantees the file is non-empty, so
NUM_FAILED > 0 is always true in that branch.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index e90bd1d300..aa3c29d6ff 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -151,7 +151,7 @@ jobs:
           # Retry only if a small number of tests failed (sporadic failures)
           if [ -s tests/failed_uuids.txt ]; then
             NUM_FAILED=$(wc -l < tests/failed_uuids.txt)
-            if [ "$NUM_FAILED" -gt 0 ] && [ "$NUM_FAILED" -le 5 ]; then
+            if [ "$NUM_FAILED" -le 5 ]; then
               FAILED=$(cat tests/failed_uuids.txt | tr '\n' ' ')
               echo ""
               echo "=== Retrying $NUM_FAILED failed test(s): $FAILED ==="

From 6b43b9b9e6ce2e5df9dfbf252d05dba8a668b1be Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 25 Feb 2026 21:40:15 -0500
Subject: [PATCH 09/15] Address review findings: shard slug collision, script
 consolidation, zero-match guard

- Include shard in SLURM job_slug to prevent output file collisions
  between parallel shards (e.g., test-gpu-acc-1-of-2.out)
- Consolidate frontier/ and frontier_amd/ submit.sh and test.sh into
  identical scripts that derive compiler flag and config from directory
- Add $shard_opts to CPU test branch for future-proofing
- Add zero-match guard for --only filter to fail loudly instead of
  silently exiting 0 when no tests match
- Hoist failed_uuids_path to single definition at top of test()
- Compute log slug dynamically in test.yml for shard-aware filenames
- Remove unnecessary shard: '' from non-sharded matrix entries
- Replace useless cat|tr pipeline with tr < file

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/frontier/submit.sh     | 22 +++++++++++++++++-----
 .github/workflows/frontier/test.sh       |  8 ++++++--
 .github/workflows/frontier_amd/submit.sh | 22 +++++++++++++++++-----
 .github/workflows/frontier_amd/test.sh   |  8 ++++++--
 .github/workflows/test.yml               | 24 +++++++++++++++---------
 toolchain/mfc/test/test.py               | 11 ++++++++---
 6 files changed, 69 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/frontier/submit.sh b/.github/workflows/frontier/submit.sh
index 4c3e0e3e27..ef0289696c 100644
--- a/.github/workflows/frontier/submit.sh
+++ b/.github/workflows/frontier/submit.sh
@@ -5,8 +5,17 @@ set -e
 # Ignore SIGHUP to survive login node session drops
 trap '' HUP
 
+# Determine compiler flag from directory name
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cluster_name="$(basename "$SCRIPT_DIR")"
+case "$cluster_name" in
+    frontier)     compiler_flag="f" ;;
+    frontier_amd) compiler_flag="famd" ;;
+    *) echo "ERROR: Unknown cluster '$cluster_name'"; exit 1 ;;
+esac
+
 usage() {
-    echo "Usage: $0 [script.sh] [cpu|gpu]"
+    echo "Usage: $0 [script.sh] [cpu|gpu] [none|acc|omp] [shard]"
 }
 
 if [ ! -z "$1" ]; then
@@ -27,8 +36,11 @@ else
     exit 1
 fi
 
-
-job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2-$3"
+shard_suffix=""
+if [ -n "$4" ]; then
+    shard_suffix="-$(echo "$4" | sed 's|/|-of-|')"
+fi
+job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2-$3${shard_suffix}"
 output_file="$job_slug.out"
 
 submit_output=$(sbatch <<EOT
@@ -52,8 +64,9 @@ job_slug="$job_slug"
 job_device="$2"
 job_interface="$3"
 job_shard="$4"
+job_cluster="$cluster_name"
 
-. ./mfc.sh load -c f -m $([ "$2" = "gpu" ] && echo "g" || echo "c")
+. ./mfc.sh load -c $compiler_flag -m $([ "$2" = "gpu" ] && echo "g" || echo "c")
 
 $sbatch_script_contents
 
@@ -70,5 +83,4 @@ fi
 echo "Submitted batch job $job_id"
 
 # Use resilient monitoring instead of sbatch -W
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 bash "$SCRIPT_DIR/../../scripts/monitor_slurm_job.sh" "$job_id" "$output_file"
diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh
index ad109c6478..f2c0591b3b 100644
--- a/.github/workflows/frontier/test.sh
+++ b/.github/workflows/frontier/test.sh
@@ -19,7 +19,11 @@ if [ -n "$job_shard" ]; then
 fi
 
 if [ "$job_device" = "gpu" ]; then
-    ./mfc.sh test -v -a --rdma-mpi --max-attempts 3 -j $ngpus $device_opts $shard_opts -- -c frontier
+    rdma_opts=""
+    if [ "$job_cluster" = "frontier" ]; then
+        rdma_opts="--rdma-mpi"
+    fi
+    ./mfc.sh test -v -a $rdma_opts --max-attempts 3 -j $ngpus $device_opts $shard_opts -- -c $job_cluster
 else
-    ./mfc.sh test -v -a --max-attempts 3 -j 32 --no-gpu -- -c frontier
+    ./mfc.sh test -v -a --max-attempts 3 -j 32 --no-gpu $shard_opts -- -c $job_cluster
 fi
diff --git a/.github/workflows/frontier_amd/submit.sh b/.github/workflows/frontier_amd/submit.sh
index df73db5807..ef0289696c 100644
--- a/.github/workflows/frontier_amd/submit.sh
+++ b/.github/workflows/frontier_amd/submit.sh
@@ -5,8 +5,17 @@ set -e
 # Ignore SIGHUP to survive login node session drops
 trap '' HUP
 
+# Determine compiler flag from directory name
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cluster_name="$(basename "$SCRIPT_DIR")"
+case "$cluster_name" in
+    frontier)     compiler_flag="f" ;;
+    frontier_amd) compiler_flag="famd" ;;
+    *) echo "ERROR: Unknown cluster '$cluster_name'"; exit 1 ;;
+esac
+
 usage() {
-    echo "Usage: $0 [script.sh] [cpu|gpu]"
+    echo "Usage: $0 [script.sh] [cpu|gpu] [none|acc|omp] [shard]"
 }
 
 if [ ! -z "$1" ]; then
@@ -27,8 +36,11 @@ else
     exit 1
 fi
 
-
-job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2-$3"
+shard_suffix=""
+if [ -n "$4" ]; then
+    shard_suffix="-$(echo "$4" | sed 's|/|-of-|')"
+fi
+job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2-$3${shard_suffix}"
 output_file="$job_slug.out"
 
 submit_output=$(sbatch <<EOT
@@ -52,8 +64,9 @@ job_slug="$job_slug"
 job_device="$2"
 job_interface="$3"
 job_shard="$4"
+job_cluster="$cluster_name"
 
-. ./mfc.sh load -c famd -m $([ "$2" = "gpu" ] && echo "g" || echo "c")
+. ./mfc.sh load -c $compiler_flag -m $([ "$2" = "gpu" ] && echo "g" || echo "c")
 
 $sbatch_script_contents
 
@@ -70,5 +83,4 @@ fi
 echo "Submitted batch job $job_id"
 
 # Use resilient monitoring instead of sbatch -W
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 bash "$SCRIPT_DIR/../../scripts/monitor_slurm_job.sh" "$job_id" "$output_file"
diff --git a/.github/workflows/frontier_amd/test.sh b/.github/workflows/frontier_amd/test.sh
index c051144b2d..f2c0591b3b 100644
--- a/.github/workflows/frontier_amd/test.sh
+++ b/.github/workflows/frontier_amd/test.sh
@@ -19,7 +19,11 @@ if [ -n "$job_shard" ]; then
 fi
 
 if [ "$job_device" = "gpu" ]; then
-    ./mfc.sh test -v -a --max-attempts 3 -j $ngpus $device_opts $shard_opts -- -c frontier_amd
+    rdma_opts=""
+    if [ "$job_cluster" = "frontier" ]; then
+        rdma_opts="--rdma-mpi"
+    fi
+    ./mfc.sh test -v -a $rdma_opts --max-attempts 3 -j $ngpus $device_opts $shard_opts -- -c $job_cluster
 else
-    ./mfc.sh test -v -a --max-attempts 3 -j 32 --no-gpu -- -c frontier_amd
+    ./mfc.sh test -v -a --max-attempts 3 -j 32 --no-gpu $shard_opts -- -c $job_cluster
 fi
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index aa3c29d6ff..3b0cd4e465 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -152,7 +152,7 @@ jobs:
           if [ -s tests/failed_uuids.txt ]; then
             NUM_FAILED=$(wc -l < tests/failed_uuids.txt)
             if [ "$NUM_FAILED" -le 5 ]; then
-              FAILED=$(cat tests/failed_uuids.txt | tr '\n' ' ')
+              FAILED=$(tr '\n' ' ' < tests/failed_uuids.txt)
               echo ""
               echo "=== Retrying $NUM_FAILED failed test(s): $FAILED ==="
               echo ""
@@ -183,19 +183,16 @@ jobs:
             cluster_name: 'Georgia Tech | Phoenix'
             device: 'gpu'
             interface: 'acc'
-            shard: ''
           - runner:       'gt'
             cluster:      'phoenix'
             cluster_name: 'Georgia Tech | Phoenix'
             device: 'gpu'
             interface: 'omp'
-            shard: ''
           - runner:       'gt'
             cluster:      'phoenix'
             cluster_name: 'Georgia Tech | Phoenix'
             device: 'cpu'
             interface: 'none'
-            shard: ''
           # Frontier (ORNL) — build on login node, GPU tests sharded for batch partition
           - runner:       'frontier'
             cluster:      'frontier'
@@ -226,7 +223,6 @@ jobs:
             cluster_name: 'Oak Ridge | Frontier'
             device: 'cpu'
             interface: 'none'
-            shard: ''
           # Frontier AMD — build on login node, GPU tests sharded for batch partition
           - runner:       'frontier'
             cluster:      'frontier_amd'
@@ -245,7 +241,6 @@ jobs:
             cluster_name: 'Oak Ridge | Frontier (AMD)'
             device: 'cpu'
             interface: 'none'
-            shard: ''
     runs-on:
       group:  phoenix
       labels: ${{ matrix.runner }}
@@ -270,13 +265,24 @@ jobs:
       - name: Test
         run:  bash .github/workflows/${{ matrix.cluster }}/submit.sh .github/workflows/${{ matrix.cluster }}/test.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.shard }}
 
+      - name: Compute Log Slug
+        if:   always()
+        id:   log
+        run:  |
+          SLUG="test-${{ matrix.device }}-${{ matrix.interface }}"
+          SHARD="${{ matrix.shard }}"
+          if [ -n "$SHARD" ]; then
+            SLUG="${SLUG}-$(echo "$SHARD" | sed 's|/|-of-|')"
+          fi
+          echo "slug=${SLUG}" >> "$GITHUB_OUTPUT"
+
       - name: Print Logs
         if:   always()
-        run:  cat test-${{ matrix.device }}-${{ matrix.interface }}.out
+        run:  cat ${{ steps.log.outputs.slug }}.out
 
       - name: Archive Logs
         uses: actions/upload-artifact@v4
         if:   matrix.cluster != 'phoenix'
         with:
-          name: logs-${{ strategy.job-index }}-${{ matrix.device }}-${{ matrix.interface }}
-          path: test-${{ matrix.device }}-${{ matrix.interface }}.out
+          name: logs-${{ strategy.job-index }}-${{ steps.log.outputs.slug }}
+          path: ${{ steps.log.outputs.slug }}.out
diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 9fb0bd8eaf..73dd8f3414 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -42,7 +42,7 @@
 class TestTimeoutError(MFCException):
     pass
 
-# pylint: disable=too-many-branches, trailing-whitespace
+# pylint: disable=too-many-branches, too-many-statements, trailing-whitespace
 def __filter(cases_) -> typing.List[TestCase]:
     cases = cases_[:]
     selected_cases = []
@@ -75,6 +75,12 @@ def __filter(cases_) -> typing.List[TestCase]:
                 cases.remove(case)
                 skipped_cases.append(case)
 
+        if not cases:
+            raise MFCException(
+                f"--only filter matched zero test cases. "
+                f"Specified: {ARG('only')}. Check that UUIDs/names are valid."
+            )
+
     for case in cases[:]:
         if case.ppn > 1 and not ARG("mpi"):
             cases.remove(case)
@@ -123,6 +129,7 @@ def test():
     global errors, failed_tests, test_start_time
 
     test_start_time = time.time()  # Start timing
+    failed_uuids_path = os.path.join(common.MFC_TEST_DIR, "failed_uuids.txt")
     cases = list_cases()
 
     # Delete UUIDs that are not in the list of cases from tests/
@@ -191,7 +198,6 @@ def test():
     # Check if we aborted due to high failure rate
     if abort_tests.is_set():
         # Clean up stale failed_uuids.txt so CI doesn't retry wrong tests
-        failed_uuids_path = os.path.join(common.MFC_TEST_DIR, "failed_uuids.txt")
         try:
             if os.path.exists(failed_uuids_path):
                 os.remove(failed_uuids_path)
@@ -223,7 +229,6 @@ def test():
     _print_test_summary(nPASS, nFAIL, nSKIP, minutes, seconds, failed_tests, skipped_cases)
 
     # Write failed UUIDs to file for CI retry logic
-    failed_uuids_path = os.path.join(common.MFC_TEST_DIR, "failed_uuids.txt")
     if failed_tests:
         with open(failed_uuids_path, "w") as f:
             for test_info in failed_tests:

From adac6887452b7e8013ceea93ed13f939c326c73f Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 25 Feb 2026 22:24:49 -0500
Subject: [PATCH 10/15] Use AND logic for labels and OR logic for UUIDs in
 --only filter

The --only filter now detects whether each term is a UUID (8-char hex)
or a trace label and applies appropriate matching:
  - Labels: AND logic (--only 2D Bubbles matches tests with both)
  - UUIDs: OR logic (--only UUID1 UUID2 matches tests with either)
  - Mixed: keep case if all labels match OR any UUID matches

This preserves the documented behavior for label filtering while
correctly supporting the CI retry path that passes multiple UUIDs.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 toolchain/mfc/test/test.py | 43 +++++++++++++++++++++++++++++++-------
 1 file changed, 35 insertions(+), 8 deletions(-)

diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 73dd8f3414..def4305bdd 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -42,6 +42,40 @@
 class TestTimeoutError(MFCException):
     pass
 
+def _filter_only(cases, skipped_cases):
+    """Filter cases by --only terms using AND for labels, OR for UUIDs.
+
+    Labels (non-UUID terms): case must match ALL labels (AND logic).
+    UUIDs (8-char hex terms): case must match ANY UUID (OR logic).
+    Mixed: keep case if all labels match OR any UUID matches.
+    """
+    def is_uuid(term):
+        return len(term) == 8 and all(c in '0123456789abcdefABCDEF' for c in term)
+
+    uuids  = [t for t in ARG("only") if is_uuid(t)]
+    labels = [t for t in ARG("only") if not is_uuid(t)]
+
+    for case in cases[:]:
+        check = set(case.trace.split(" -> "))
+        check.add(case.get_uuid())
+
+        label_ok = all(l in check for l in labels) if labels else True
+        uuid_ok  = any(u in check for u in uuids)  if uuids  else True
+
+        if labels and uuids:
+            keep = label_ok or uuid_ok
+        elif labels:
+            keep = label_ok
+        else:
+            keep = uuid_ok
+
+        if not keep:
+            cases.remove(case)
+            skipped_cases.append(case)
+
+    return cases, skipped_cases
+
+
 # pylint: disable=too-many-branches, too-many-statements, trailing-whitespace
 def __filter(cases_) -> typing.List[TestCase]:
     cases = cases_[:]
@@ -66,14 +100,7 @@ def __filter(cases_) -> typing.List[TestCase]:
         raise MFCException("Testing: Your specified range [--from,--to] is incorrect. Please ensure both IDs exist and are in the correct order.")
 
     if len(ARG("only")) > 0:
-        for case in cases[:]:
-            case: TestCase
-
-            checkCase = case.trace.split(" -> ")
-            checkCase.append(case.get_uuid())
-            if not set(ARG("only")).intersection(set(checkCase)):
-                cases.remove(case)
-                skipped_cases.append(case)
+        cases, skipped_cases = _filter_only(cases, skipped_cases)
 
         if not cases:
             raise MFCException(

From 06c0641029342b84fea968b4dc86cc7b3b00a54e Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 25 Feb 2026 22:47:33 -0500
Subject: [PATCH 11/15] Consolidate CI submit scripts: merge submit-bench.sh
 into submit.sh

submit.sh now auto-detects job type (bench vs test) from the submitted
script's basename, selecting the appropriate SBATCH account, time limit,
and partition. This eliminates three submit-bench.sh files and makes
frontier/ and frontier_amd/ scripts byte-identical via directory-name
detection for compiler flags and cluster-specific options.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/scripts/submit_and_monitor_bench.sh   | 51 ++++-----------
 .github/workflows/frontier/bench.sh           |  4 +-
 .github/workflows/frontier/build.sh           | 15 ++++-
 .github/workflows/frontier/submit-bench.sh    | 54 ---------------
 .github/workflows/frontier/submit.sh          | 28 ++++++--
 .github/workflows/frontier_amd/bench.sh       |  4 +-
 .github/workflows/frontier_amd/build.sh       | 15 ++++-
 .../workflows/frontier_amd/submit-bench.sh    | 54 ---------------
 .github/workflows/frontier_amd/submit.sh      | 28 ++++++--
 .github/workflows/phoenix/submit-bench.sh     | 65 -------------------
 .github/workflows/phoenix/submit.sh           | 21 +++++-
 11 files changed, 109 insertions(+), 230 deletions(-)
 delete mode 100644 .github/workflows/frontier/submit-bench.sh
 delete mode 100644 .github/workflows/frontier_amd/submit-bench.sh
 delete mode 100644 .github/workflows/phoenix/submit-bench.sh

diff --git a/.github/scripts/submit_and_monitor_bench.sh b/.github/scripts/submit_and_monitor_bench.sh
index 80790752d7..c081c8692a 100755
--- a/.github/scripts/submit_and_monitor_bench.sh
+++ b/.github/scripts/submit_and_monitor_bench.sh
@@ -14,50 +14,27 @@ device="$2"
 interface="$3"
 cluster="$4"
 
-# Get the directory where this script lives
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
 echo "[$dir] Submitting benchmark for $device-$interface on $cluster..."
 cd "$dir"
 
-# Submit job
-submit_output=$(bash .github/workflows/$cluster/submit-bench.sh \
-  .github/workflows/$cluster/bench.sh "$device" "$interface" 2>&1)
-
-job_id=$(echo "$submit_output" | sed -n 's/.*Submitted batch job \([0-9][0-9]*\).*/\1/p')
-job_slug="bench-$device-$interface"
-output_file="${job_slug}.out"
-
-if [ -z "$job_id" ]; then
-  echo "[$dir] ERROR: Failed to submit job"
-  echo "$submit_output"
-  exit 1
-fi
-
-echo "[$dir] Job ID: $job_id, monitoring output file: $output_file"
-
-# Use the monitoring script from PR (where this script lives)
-monitor_exit=0
-bash "${SCRIPT_DIR}/monitor_slurm_job.sh" "$job_id" "$output_file" || monitor_exit=$?
-if [ "$monitor_exit" -ne 0 ]; then
-  echo "[$dir] WARNING: SLURM job exited with code $monitor_exit"
-else
-  echo "[$dir] Monitoring complete for job $job_id"
-fi
+# Submit and monitor job (submit.sh auto-detects bench mode from script name)
+bash .github/workflows/$cluster/submit.sh \
+    .github/workflows/$cluster/bench.sh "$device" "$interface"
 
 # Verify the YAML output file was created
+job_slug="bench-$device-$interface"
 yaml_file="${job_slug}.yaml"
 if [ ! -f "$yaml_file" ]; then
-  echo "[$dir] ERROR: Expected output file not found: $yaml_file"
-  echo "[$dir] Directory contents:"
-  ls -la *.yaml 2>/dev/null || echo "  No YAML files found"
-  echo ""
-  echo "[$dir] Last 100 lines of job output ($output_file):"
-  echo "----------------------------------------"
-  tail -n 100 "$output_file" 2>/dev/null || echo "  Could not read output file"
-  echo "----------------------------------------"
-  exit 1
+    echo "[$dir] ERROR: Expected output file not found: $yaml_file"
+    echo "[$dir] Directory contents:"
+    ls -la *.yaml 2>/dev/null || echo "  No YAML files found"
+    echo ""
+    output_file="${job_slug}.out"
+    echo "[$dir] Last 100 lines of job output ($output_file):"
+    echo "----------------------------------------"
+    tail -n 100 "$output_file" 2>/dev/null || echo "  Could not read output file"
+    echo "----------------------------------------"
+    exit 1
 fi
 
 echo "[$dir] Verified output file exists: $yaml_file ($(stat -f%z "$yaml_file" 2>/dev/null || stat -c%s "$yaml_file" 2>/dev/null) bytes)"
-
diff --git a/.github/workflows/frontier/bench.sh b/.github/workflows/frontier/bench.sh
index 35b4c5950e..a79f1a2fc5 100644
--- a/.github/workflows/frontier/bench.sh
+++ b/.github/workflows/frontier/bench.sh
@@ -16,7 +16,7 @@ if [ "$job_device" = "gpu" ]; then
 fi
 
 if [ "$job_device" = "gpu" ]; then
-    ./mfc.sh bench --mem 12 -j $n_ranks -o "$job_slug.yaml" -- -c frontier $device_opts -n $n_ranks
+    ./mfc.sh bench --mem 12 -j $n_ranks -o "$job_slug.yaml" -- -c $job_cluster $device_opts -n $n_ranks
 else
-    ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c frontier $device_opts -n $n_ranks
+    ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c $job_cluster $device_opts -n $n_ranks
 fi
diff --git a/.github/workflows/frontier/build.sh b/.github/workflows/frontier/build.sh
index ca09c2a116..84036641c6 100644
--- a/.github/workflows/frontier/build.sh
+++ b/.github/workflows/frontier/build.sh
@@ -3,6 +3,15 @@
 # Ignore SIGHUP to survive login node session drops
 trap '' HUP
 
+# Determine compiler flag from directory name
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cluster_name="$(basename "$SCRIPT_DIR")"
+case "$cluster_name" in
+    frontier)     compiler_flag="f" ;;
+    frontier_amd) compiler_flag="famd" ;;
+    *) echo "ERROR: Unknown cluster '$cluster_name'"; exit 1 ;;
+esac
+
 job_device=$1
 job_interface=$2
 run_bench=$3
@@ -16,11 +25,11 @@ if [ "$job_device" = "gpu" ]; then
   fi
 fi
 
-. ./mfc.sh load -c f -m g
+. ./mfc.sh load -c $compiler_flag -m g
 
 # Only set up build cache for test suite, not benchmarks
 if [ "$run_bench" != "bench" ]; then
-    source .github/scripts/setup-build-cache.sh frontier "$job_device" "$job_interface"
+    source .github/scripts/setup-build-cache.sh "$cluster_name" "$job_device" "$job_interface"
 fi
 
 max_attempts=3
@@ -37,7 +46,7 @@ while [ $attempt -le $max_attempts ]; do
             fi
         done
     else
-        if ./mfc.sh test -v -a --dry-run --rdma-mpi -j 8 $build_opts; then
+        if ./mfc.sh test -v -a --dry-run $([ "$cluster_name" = "frontier" ] && echo "--rdma-mpi") -j 8 $build_opts; then
             build_cmd_ok=true
         else
             build_cmd_ok=false
diff --git a/.github/workflows/frontier/submit-bench.sh b/.github/workflows/frontier/submit-bench.sh
deleted file mode 100644
index 81b9b274e6..0000000000
--- a/.github/workflows/frontier/submit-bench.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-
-set -e
-
-usage() {
-    echo "Usage: $0 [script.sh] [cpu|gpu]"
-}
-
-if [ ! -z "$1" ]; then
-    sbatch_script_contents=`cat $1`
-else
-    usage
-    exit 1
-fi
-
-if [ "$2" = "cpu" ]; then
-    sbatch_device_opts="\
-#SBATCH -n 32                       # Number of cores required"
-elif [ "$2" = "gpu" ]; then
-    sbatch_device_opts="\
-#SBATCH -n 8                       # Number of cores required"
-else
-    usage; exit 1
-fi
-
-
-job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2-$3"
-
-sbatch <<EOT
-#!/bin/bash
-#SBATCH -JMFC-$job_slug            # Job name
-#SBATCH -A ENG160                  # charge account
-#SBATCH -N 1                       # Number of nodes required
-$sbatch_device_opts
-#SBATCH -t 05:59:00                # Duration of the job (Ex: 15 mins)
-#SBATCH -o$job_slug.out            # Combined output and error messages file
-#SBATCH -p extended                # Extended partition for shorter queues
-
-set -e
-set -x
-
-cd "\$SLURM_SUBMIT_DIR"
-echo "Running in $(pwd):"
-
-job_slug="$job_slug"
-job_device="$2"
-job_interface="$3"
-
-. ./mfc.sh load -c f -m g
-
-$sbatch_script_contents
-
-EOT
-
diff --git a/.github/workflows/frontier/submit.sh b/.github/workflows/frontier/submit.sh
index ef0289696c..13018c595d 100644
--- a/.github/workflows/frontier/submit.sh
+++ b/.github/workflows/frontier/submit.sh
@@ -25,6 +25,13 @@ else
     exit 1
 fi
 
+# Detect job type from submitted script basename
+script_basename="$(basename "$1" .sh)"
+case "$script_basename" in
+    bench*) job_type="bench" ;;
+    *)      job_type="test"  ;;
+esac
+
 if [ "$2" = "cpu" ]; then
     sbatch_device_opts="\
 #SBATCH -n 32                       # Number of cores required"
@@ -36,6 +43,19 @@ else
     exit 1
 fi
 
+# Select SBATCH params based on job type
+if [ "$job_type" = "bench" ]; then
+    sbatch_account="#SBATCH -A ENG160"
+    sbatch_time="#SBATCH -t 05:59:00"
+    sbatch_partition="#SBATCH -p extended"
+    sbatch_extra=""
+else
+    sbatch_account="#SBATCH -A CFD154"
+    sbatch_time="#SBATCH -t 01:59:00"
+    sbatch_partition="#SBATCH -p batch"
+    sbatch_extra="#SBATCH --qos=hackathon"
+fi
+
 shard_suffix=""
 if [ -n "$4" ]; then
     shard_suffix="-$(echo "$4" | sed 's|/|-of-|')"
@@ -46,13 +66,13 @@ output_file="$job_slug.out"
 submit_output=$(sbatch <<EOT
 #!/bin/bash
 #SBATCH -J MFC-$job_slug            # Job name
-#SBATCH -A CFD154                  # charge account
+$sbatch_account
 #SBATCH -N 1                       # Number of nodes required
 $sbatch_device_opts
-#SBATCH -t 01:59:00                # Duration of the job
+$sbatch_time
 #SBATCH -o$output_file             # Combined output and error messages file
-#SBATCH -p batch                   # Batch partition (concurrent jobs)
-#SBATCH --qos=hackathon            # Hackathon QOS for batch access
+$sbatch_partition
+$sbatch_extra
 
 set -e
 set -x
diff --git a/.github/workflows/frontier_amd/bench.sh b/.github/workflows/frontier_amd/bench.sh
index 6e01687e79..a79f1a2fc5 100644
--- a/.github/workflows/frontier_amd/bench.sh
+++ b/.github/workflows/frontier_amd/bench.sh
@@ -16,7 +16,7 @@ if [ "$job_device" = "gpu" ]; then
 fi
 
 if [ "$job_device" = "gpu" ]; then
-    ./mfc.sh bench --mem 12 -j $n_ranks -o "$job_slug.yaml" -- -c frontier_amd $device_opts -n $n_ranks
+    ./mfc.sh bench --mem 12 -j $n_ranks -o "$job_slug.yaml" -- -c $job_cluster $device_opts -n $n_ranks
 else
-    ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c frontier_amd $device_opts -n $n_ranks
+    ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c $job_cluster $device_opts -n $n_ranks
 fi
diff --git a/.github/workflows/frontier_amd/build.sh b/.github/workflows/frontier_amd/build.sh
index 6036f73fc2..84036641c6 100644
--- a/.github/workflows/frontier_amd/build.sh
+++ b/.github/workflows/frontier_amd/build.sh
@@ -3,6 +3,15 @@
 # Ignore SIGHUP to survive login node session drops
 trap '' HUP
 
+# Determine compiler flag from directory name
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cluster_name="$(basename "$SCRIPT_DIR")"
+case "$cluster_name" in
+    frontier)     compiler_flag="f" ;;
+    frontier_amd) compiler_flag="famd" ;;
+    *) echo "ERROR: Unknown cluster '$cluster_name'"; exit 1 ;;
+esac
+
 job_device=$1
 job_interface=$2
 run_bench=$3
@@ -16,11 +25,11 @@ if [ "$job_device" = "gpu" ]; then
   fi
 fi
 
-. ./mfc.sh load -c famd -m g
+. ./mfc.sh load -c $compiler_flag -m g
 
 # Only set up build cache for test suite, not benchmarks
 if [ "$run_bench" != "bench" ]; then
-    source .github/scripts/setup-build-cache.sh frontier_amd "$job_device" "$job_interface"
+    source .github/scripts/setup-build-cache.sh "$cluster_name" "$job_device" "$job_interface"
 fi
 
 max_attempts=3
@@ -37,7 +46,7 @@ while [ $attempt -le $max_attempts ]; do
             fi
         done
     else
-        if ./mfc.sh test -v -a --dry-run -j 8 $build_opts; then
+        if ./mfc.sh test -v -a --dry-run $([ "$cluster_name" = "frontier" ] && echo "--rdma-mpi") -j 8 $build_opts; then
             build_cmd_ok=true
         else
             build_cmd_ok=false
diff --git a/.github/workflows/frontier_amd/submit-bench.sh b/.github/workflows/frontier_amd/submit-bench.sh
deleted file mode 100644
index 109052abeb..0000000000
--- a/.github/workflows/frontier_amd/submit-bench.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-
-set -e
-
-usage() {
-    echo "Usage: $0 [script.sh] [cpu|gpu]"
-}
-
-if [ ! -z "$1" ]; then
-    sbatch_script_contents=`cat $1`
-else
-    usage
-    exit 1
-fi
-
-if [ "$2" = "cpu" ]; then
-    sbatch_device_opts="\
-#SBATCH -n 32                       # Number of cores required"
-elif [ "$2" = "gpu" ]; then
-    sbatch_device_opts="\
-#SBATCH -n 8                       # Number of cores required"
-else
-    usage; exit 1
-fi
-
-
-job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2-$3"
-
-sbatch <<EOT
-#!/bin/bash
-#SBATCH -JMFC-$job_slug            # Job name
-#SBATCH -A ENG160                  # charge account
-#SBATCH -N 1                       # Number of nodes required
-$sbatch_device_opts
-#SBATCH -t 05:59:00                # Duration of the job (Ex: 15 mins)
-#SBATCH -o$job_slug.out            # Combined output and error messages file
-#SBATCH -p extended                # Extended partition for shorter queues
-
-set -e
-set -x
-
-cd "\$SLURM_SUBMIT_DIR"
-echo "Running in $(pwd):"
-
-job_slug="$job_slug"
-job_device="$2"
-job_interface="$3"
-
-. ./mfc.sh load -c famd -m g
-
-$sbatch_script_contents
-
-EOT
-
diff --git a/.github/workflows/frontier_amd/submit.sh b/.github/workflows/frontier_amd/submit.sh
index ef0289696c..13018c595d 100644
--- a/.github/workflows/frontier_amd/submit.sh
+++ b/.github/workflows/frontier_amd/submit.sh
@@ -25,6 +25,13 @@ else
     exit 1
 fi
 
+# Detect job type from submitted script basename
+script_basename="$(basename "$1" .sh)"
+case "$script_basename" in
+    bench*) job_type="bench" ;;
+    *)      job_type="test"  ;;
+esac
+
 if [ "$2" = "cpu" ]; then
     sbatch_device_opts="\
 #SBATCH -n 32                       # Number of cores required"
@@ -36,6 +43,19 @@ else
     exit 1
 fi
 
+# Select SBATCH params based on job type
+if [ "$job_type" = "bench" ]; then
+    sbatch_account="#SBATCH -A ENG160"
+    sbatch_time="#SBATCH -t 05:59:00"
+    sbatch_partition="#SBATCH -p extended"
+    sbatch_extra=""
+else
+    sbatch_account="#SBATCH -A CFD154"
+    sbatch_time="#SBATCH -t 01:59:00"
+    sbatch_partition="#SBATCH -p batch"
+    sbatch_extra="#SBATCH --qos=hackathon"
+fi
+
 shard_suffix=""
 if [ -n "$4" ]; then
     shard_suffix="-$(echo "$4" | sed 's|/|-of-|')"
@@ -46,13 +66,13 @@ output_file="$job_slug.out"
 submit_output=$(sbatch <<EOT
 #!/bin/bash
 #SBATCH -J MFC-$job_slug            # Job name
-#SBATCH -A CFD154                  # charge account
+$sbatch_account
 #SBATCH -N 1                       # Number of nodes required
 $sbatch_device_opts
-#SBATCH -t 01:59:00                # Duration of the job
+$sbatch_time
 #SBATCH -o$output_file             # Combined output and error messages file
-#SBATCH -p batch                   # Batch partition (concurrent jobs)
-#SBATCH --qos=hackathon            # Hackathon QOS for batch access
+$sbatch_partition
+$sbatch_extra
 
 set -e
 set -x
diff --git a/.github/workflows/phoenix/submit-bench.sh b/.github/workflows/phoenix/submit-bench.sh
deleted file mode 100644
index a3830f5050..0000000000
--- a/.github/workflows/phoenix/submit-bench.sh
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/bin/bash
-
-set -e
-
-usage() {
-    echo "Usage: $0 [script.sh] [cpu|gpu] [none|acc|omp]"
-}
-
-if [ ! -z "$1" ]; then
-    sbatch_script_contents=`cat $1`
-else
-    usage
-    exit 1
-fi
-
-sbatch_cpu_opts="\
-#SBATCH -p cpu-small               # partition
-#SBATCH --ntasks-per-node=24       # Number of cores per node required
-#SBATCH --mem-per-cpu=2G           # Memory per core\
-"
-
-sbatch_gpu_opts="\
-#SBATCH -CL40S
-#SBATCH --ntasks-per-node=4       # Number of cores per node required
-#SBATCH -G2\
-"
-
-if [ "$2" = "cpu" ]; then
-    sbatch_device_opts="$sbatch_cpu_opts"
-elif [ "$2" = "gpu" ]; then
-    sbatch_device_opts="$sbatch_gpu_opts"
-else
-    usage
-    exit 1
-fi
-
-job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2-$3"
-
-sbatch <<EOT
-#!/bin/bash
-#SBATCH -Jshb-$job_slug            # Job name
-#SBATCH --account=gts-sbryngelson3 # charge account
-#SBATCH -N1                        # Number of nodes required
-$sbatch_device_opts
-#SBATCH -t 04:00:00                # Duration of the job (Ex: 15 mins)
-#SBATCH -q embers                  # QOS Name
-#SBATCH --requeue                  # Auto-requeue on preemption
-#SBATCH -o$job_slug.out            # Combined output and error messages file
-
-set -e
-set -x
-
-cd "\$SLURM_SUBMIT_DIR"
-echo "Running in $(pwd):"
-
-job_slug="$job_slug"
-job_device="$2"
-job_interface="$3"
-
-. ./mfc.sh load -c p -m $2
-
-$sbatch_script_contents
-
-EOT
-
diff --git a/.github/workflows/phoenix/submit.sh b/.github/workflows/phoenix/submit.sh
index 874f5afa44..5b7162fef7 100755
--- a/.github/workflows/phoenix/submit.sh
+++ b/.github/workflows/phoenix/submit.sh
@@ -16,17 +16,34 @@ else
     exit 1
 fi
 
+# Detect job type from submitted script basename
+script_basename="$(basename "$1" .sh)"
+case "$script_basename" in
+    bench*) job_type="bench" ;;
+    *)      job_type="test"  ;;
+esac
+
 sbatch_cpu_opts="\
 #SBATCH -p cpu-small               # partition
 #SBATCH --ntasks-per-node=24       # Number of cores per node required
 #SBATCH --mem-per-cpu=2G           # Memory per core\
 "
 
-sbatch_gpu_opts="\
+if [ "$job_type" = "bench" ]; then
+    sbatch_gpu_opts="\
+#SBATCH -CL40S
+#SBATCH --ntasks-per-node=4       # Number of cores per node required
+#SBATCH -G2\
+"
+    sbatch_time="#SBATCH -t 04:00:00"
+else
+    sbatch_gpu_opts="\
 #SBATCH -p gpu-v100,gpu-a100,gpu-h100,gpu-l40s
 #SBATCH --ntasks-per-node=4       # Number of cores per node required
 #SBATCH -G2\
 "
+    sbatch_time="#SBATCH -t 03:00:00"
+fi
 
 if [ "$2" = "cpu" ]; then
     sbatch_device_opts="$sbatch_cpu_opts"
@@ -46,7 +63,7 @@ submit_output=$(sbatch <<EOT
 #SBATCH --account=gts-sbryngelson3 # charge account
 #SBATCH -N1                        # Number of nodes required
 $sbatch_device_opts
-#SBATCH -t 03:00:00                # Duration of the job (Ex: 15 mins)
+$sbatch_time
 #SBATCH -q embers                  # QOS Name
 #SBATCH --requeue                  # Auto-requeue on preemption
 #SBATCH -o$output_file             # Combined output and error messages file

From a1c55ede5160f32ec503b4e154c0ee04f2a3e4da Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Thu, 26 Feb 2026 09:40:38 -0500
Subject: [PATCH 12/15] Use normal QOS instead of hackathon for Frontier test
 jobs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/frontier/submit.sh     | 2 +-
 .github/workflows/frontier_amd/submit.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/frontier/submit.sh b/.github/workflows/frontier/submit.sh
index 13018c595d..16d4f0d73c 100644
--- a/.github/workflows/frontier/submit.sh
+++ b/.github/workflows/frontier/submit.sh
@@ -53,7 +53,7 @@ else
     sbatch_account="#SBATCH -A CFD154"
     sbatch_time="#SBATCH -t 01:59:00"
     sbatch_partition="#SBATCH -p batch"
-    sbatch_extra="#SBATCH --qos=hackathon"
+    sbatch_extra="#SBATCH --qos=normal"
 fi
 
 shard_suffix=""
diff --git a/.github/workflows/frontier_amd/submit.sh b/.github/workflows/frontier_amd/submit.sh
index 13018c595d..16d4f0d73c 100644
--- a/.github/workflows/frontier_amd/submit.sh
+++ b/.github/workflows/frontier_amd/submit.sh
@@ -53,7 +53,7 @@ else
     sbatch_account="#SBATCH -A CFD154"
     sbatch_time="#SBATCH -t 01:59:00"
     sbatch_partition="#SBATCH -p batch"
-    sbatch_extra="#SBATCH --qos=hackathon"
+    sbatch_extra="#SBATCH --qos=normal"
 fi
 
 shard_suffix=""

From d5612834faa19df101d72fe29bd1408bbe7d8d06 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Thu, 26 Feb 2026 16:59:51 -0500
Subject: [PATCH 13/15] Add zero-test guard after shard filtering and pin retry
 action to SHA

- Raise MFCException when --shard produces zero cases (prevents
  silent green CI with nothing executed)
- Pin nick-fields/retry to commit SHA for security on self-hosted
  runners with cluster credentials

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/test.yml | 2 +-
 toolchain/mfc/test/test.py | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 3b0cd4e465..b6aee7e204 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -254,7 +254,7 @@ jobs:
 
       - name: Build
         if:   matrix.cluster != 'phoenix'
-        uses: nick-fields/retry@v3
+        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # v3
         with:
           max_attempts: 3
           retry_wait_seconds: 60
diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index def4305bdd..9a97018300 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -140,6 +140,12 @@ def __filter(cases_) -> typing.List[TestCase]:
         skipped_cases += [c for i, c in enumerate(cases) if i % shard_count != shard_idx - 1]
         cases = [c for i, c in enumerate(cases) if i % shard_count == shard_idx - 1]
 
+        if not cases:
+            raise MFCException(
+                f"--shard {ARG('shard')} matched zero test cases. "
+                f"Total cases before sharding may be less than shard count."
+            )
+
     if ARG("percent") == 100:
         return cases, skipped_cases
 

From 46dcd73d350cb7f227dd390a637ee90d2bde67b9 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Thu, 26 Feb 2026 18:20:50 -0500
Subject: [PATCH 14/15] Trigger CI


From a2431bf29381f903327b1f4e1da0d598bed6d517 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Thu, 26 Feb 2026 19:15:40 -0500
Subject: [PATCH 15/15] Rename ambiguous single-letter variable `l` to `label`
 in _filter_only

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 toolchain/mfc/test/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 9a97018300..049af9e560 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -59,7 +59,7 @@ def is_uuid(term):
         check = set(case.trace.split(" -> "))
         check.add(case.get_uuid())
 
-        label_ok = all(l in check for l in labels) if labels else True
+        label_ok = all(label in check for label in labels) if labels else True
         uuid_ok  = any(u in check for u in uuids)  if uuids  else True
 
         if labels and uuids: