diff --git a/.github/scripts/submit_and_monitor_bench.sh b/.github/scripts/submit_and_monitor_bench.sh
index 80790752d7..c081c8692a 100755
--- a/.github/scripts/submit_and_monitor_bench.sh
+++ b/.github/scripts/submit_and_monitor_bench.sh
@@ -14,50 +14,27 @@ device="$2"
 interface="$3"
 cluster="$4"
 
-# Get the directory where this script lives
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
 echo "[$dir] Submitting benchmark for $device-$interface on $cluster..."
 cd "$dir"
 
-# Submit job
-submit_output=$(bash .github/workflows/$cluster/submit-bench.sh \
-  .github/workflows/$cluster/bench.sh "$device" "$interface" 2>&1)
-
-job_id=$(echo "$submit_output" | sed -n 's/.*Submitted batch job \([0-9][0-9]*\).*/\1/p')
-job_slug="bench-$device-$interface"
-output_file="${job_slug}.out"
-
-if [ -z "$job_id" ]; then
-  echo "[$dir] ERROR: Failed to submit job"
-  echo "$submit_output"
-  exit 1
-fi
-
-echo "[$dir] Job ID: $job_id, monitoring output file: $output_file"
-
-# Use the monitoring script from PR (where this script lives)
-monitor_exit=0
-bash "${SCRIPT_DIR}/monitor_slurm_job.sh" "$job_id" "$output_file" || monitor_exit=$?
-if [ "$monitor_exit" -ne 0 ]; then
-  echo "[$dir] WARNING: SLURM job exited with code $monitor_exit"
-else
-  echo "[$dir] Monitoring complete for job $job_id"
-fi
+# Submit and monitor job (submit.sh auto-detects bench mode from script name)
+bash .github/workflows/$cluster/submit.sh \
+    .github/workflows/$cluster/bench.sh "$device" "$interface"
 
 # Verify the YAML output file was created
+job_slug="bench-$device-$interface"
 yaml_file="${job_slug}.yaml"
 if [ ! -f "$yaml_file" ]; then
-  echo "[$dir] ERROR: Expected output file not found: $yaml_file"
-  echo "[$dir] Directory contents:"
-  ls -la *.yaml 2>/dev/null || echo "  No YAML files found"
-  echo ""
-  echo "[$dir] Last 100 lines of job output ($output_file):"
-  echo "----------------------------------------"
-  tail -n 100 "$output_file" 2>/dev/null || echo "  Could not read output file"
-  echo "----------------------------------------"
-  exit 1
+    echo "[$dir] ERROR: Expected output file not found: $yaml_file"
+    echo "[$dir] Directory contents:"
+    ls -la *.yaml 2>/dev/null || echo "  No YAML files found"
+    echo ""
+    output_file="${job_slug}.out"
+    echo "[$dir] Last 100 lines of job output ($output_file):"
+    echo "----------------------------------------"
+    tail -n 100 "$output_file" 2>/dev/null || echo "  Could not read output file"
+    echo "----------------------------------------"
+    exit 1
 fi
 
 echo "[$dir] Verified output file exists: $yaml_file ($(stat -f%z "$yaml_file" 2>/dev/null || stat -c%s "$yaml_file" 2>/dev/null) bytes)"
-
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
index 56735da9c1..b45fc45e40 100644
--- a/.github/workflows/bench.yml
+++ b/.github/workflows/bench.yml
@@ -13,6 +13,9 @@ concurrency:
 jobs:
   file-changes:
     name: Detect File Changes
+    if: >
+      github.event_name != 'pull_request_review' ||
+      github.event.review.user.type != 'Bot'
     runs-on: 'ubuntu-latest'
     outputs:
       checkall: ${{ steps.changes.outputs.checkall }}
diff --git a/.github/workflows/frontier/bench.sh b/.github/workflows/frontier/bench.sh
index 35b4c5950e..a79f1a2fc5 100644
--- a/.github/workflows/frontier/bench.sh
+++ b/.github/workflows/frontier/bench.sh
@@ -16,7 +16,7 @@ if [ "$job_device" = "gpu" ]; then
 fi
 
 if [ "$job_device" = "gpu" ]; then
-    ./mfc.sh bench --mem 12 -j $n_ranks -o "$job_slug.yaml" -- -c frontier $device_opts -n $n_ranks
+    ./mfc.sh bench --mem 12 -j $n_ranks -o "$job_slug.yaml" -- -c $job_cluster $device_opts -n $n_ranks
 else
-    ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c frontier $device_opts -n $n_ranks
+    ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c $job_cluster $device_opts -n $n_ranks
 fi
diff --git a/.github/workflows/frontier/build.sh b/.github/workflows/frontier/build.sh
index ca09c2a116..84036641c6 100644
--- a/.github/workflows/frontier/build.sh
+++ b/.github/workflows/frontier/build.sh
@@ -3,6 +3,15 @@
 # Ignore SIGHUP to survive login node session drops
 trap '' HUP
 
+# Determine compiler flag from directory name
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cluster_name="$(basename "$SCRIPT_DIR")"
+case "$cluster_name" in
+    frontier)     compiler_flag="f" ;;
+    frontier_amd) compiler_flag="famd" ;;
+    *) echo "ERROR: Unknown cluster '$cluster_name'"; exit 1 ;;
+esac
+
 job_device=$1
 job_interface=$2
 run_bench=$3
@@ -16,11 +25,11 @@ if [ "$job_device" = "gpu" ]; then
   fi
 fi
 
-. ./mfc.sh load -c f -m g
+. ./mfc.sh load -c $compiler_flag -m g
 
 # Only set up build cache for test suite, not benchmarks
 if [ "$run_bench" != "bench" ]; then
-    source .github/scripts/setup-build-cache.sh frontier "$job_device" "$job_interface"
+    source .github/scripts/setup-build-cache.sh "$cluster_name" "$job_device" "$job_interface"
 fi
 
 max_attempts=3
@@ -37,7 +46,7 @@ while [ $attempt -le $max_attempts ]; do
             fi
         done
     else
-        if ./mfc.sh test -v -a --dry-run --rdma-mpi -j 8 $build_opts; then
+        if ./mfc.sh test -v -a --dry-run $([ "$cluster_name" = "frontier" ] && echo "--rdma-mpi") -j 8 $build_opts; then
             build_cmd_ok=true
         else
             build_cmd_ok=false
diff --git a/.github/workflows/frontier/submit-bench.sh b/.github/workflows/frontier/submit-bench.sh
deleted file mode 100644
index 81b9b274e6..0000000000
--- a/.github/workflows/frontier/submit-bench.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-
-set -e
-
-usage() {
-    echo "Usage: $0 [script.sh] [cpu|gpu]"
-}
-
-if [ ! -z "$1" ]; then
-    sbatch_script_contents=`cat $1`
-else
-    usage
-    exit 1
-fi
-
-if [ "$2" = "cpu" ]; then
-    sbatch_device_opts="\
-#SBATCH -n 32                       # Number of cores required"
-elif [ "$2" = "gpu" ]; then
-    sbatch_device_opts="\
-#SBATCH -n 8                       # Number of cores required"
-else
-    usage; exit 1
-fi
-
-
-job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2-$3"
-
-sbatch <<EOT
-#!/bin/bash
-#SBATCH -JMFC-$job_slug            # Job name
-#SBATCH -A ENG160                  # charge account
-#SBATCH -N 1                       # Number of nodes required
-$sbatch_device_opts
-#SBATCH -t 05:59:00                # Duration of the job (Ex: 15 mins)
-#SBATCH -o$job_slug.out            # Combined output and error messages file
-#SBATCH -p extended                # Extended partition for shorter queues
-
-set -e
-set -x
-
-cd "\$SLURM_SUBMIT_DIR"
-echo "Running in $(pwd):"
-
-job_slug="$job_slug"
-job_device="$2"
-job_interface="$3"
-
-. ./mfc.sh load -c f -m g
-
-$sbatch_script_contents
-
-EOT
-
diff --git a/.github/workflows/frontier/submit.sh b/.github/workflows/frontier/submit.sh
index d5b416c65a..16d4f0d73c 100644
--- a/.github/workflows/frontier/submit.sh
+++ b/.github/workflows/frontier/submit.sh
@@ -5,8 +5,17 @@ set -e
 # Ignore SIGHUP to survive login node session drops
 trap '' HUP
 
+# Determine compiler flag from directory name
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cluster_name="$(basename "$SCRIPT_DIR")"
+case "$cluster_name" in
+    frontier)     compiler_flag="f" ;;
+    frontier_amd) compiler_flag="famd" ;;
+    *) echo "ERROR: Unknown cluster '$cluster_name'"; exit 1 ;;
+esac
+
 usage() {
-    echo "Usage: $0 [script.sh] [cpu|gpu]"
+    echo "Usage: $0 [script.sh] [cpu|gpu] [none|acc|omp] [shard]"
 }
 
 if [ ! -z "$1" ]; then
@@ -16,6 +25,13 @@ else
     exit 1
 fi
 
+# Detect job type from submitted script basename
+script_basename="$(basename "$1" .sh)"
+case "$script_basename" in
+    bench*) job_type="bench" ;;
+    *)      job_type="test"  ;;
+esac
+
 if [ "$2" = "cpu" ]; then
     sbatch_device_opts="\
 #SBATCH -n 32                       # Number of cores required"
@@ -27,19 +43,36 @@ else
     exit 1
 fi
 
+# Select SBATCH params based on job type
+if [ "$job_type" = "bench" ]; then
+    sbatch_account="#SBATCH -A ENG160"
+    sbatch_time="#SBATCH -t 05:59:00"
+    sbatch_partition="#SBATCH -p extended"
+    sbatch_extra=""
+else
+    sbatch_account="#SBATCH -A CFD154"
+    sbatch_time="#SBATCH -t 01:59:00"
+    sbatch_partition="#SBATCH -p batch"
+    sbatch_extra="#SBATCH --qos=normal"
+fi
 
-job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2-$3"
+shard_suffix=""
+if [ -n "$4" ]; then
+    shard_suffix="-$(echo "$4" | sed 's|/|-of-|')"
+fi
+job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2-$3${shard_suffix}"
 output_file="$job_slug.out"
 
 submit_output=$(sbatch <<EOT
 #!/bin/bash
 #SBATCH -J MFC-$job_slug            # Job name
-#SBATCH -A ENG160                  # charge account
+$sbatch_account
 #SBATCH -N 1                       # Number of nodes required
 $sbatch_device_opts
-#SBATCH -t 05:59:00                # Duration of the job (Ex: 15 mins)
+$sbatch_time
 #SBATCH -o$output_file             # Combined output and error messages file
-#SBATCH -p extended                # Extended partition for shorter queues
+$sbatch_partition
+$sbatch_extra
 
 set -e
 set -x
@@ -50,8 +83,10 @@ echo "Running in $(pwd):"
 job_slug="$job_slug"
 job_device="$2"
 job_interface="$3"
+job_shard="$4"
+job_cluster="$cluster_name"
 
-. ./mfc.sh load -c f -m $([ "$2" = "gpu" ] && echo "g" || echo "c")
+. ./mfc.sh load -c $compiler_flag -m $([ "$2" = "gpu" ] && echo "g" || echo "c")
 
 $sbatch_script_contents
 
@@ -68,5 +103,4 @@ fi
 echo "Submitted batch job $job_id"
 
 # Use resilient monitoring instead of sbatch -W
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 bash "$SCRIPT_DIR/../../scripts/monitor_slurm_job.sh" "$job_id" "$output_file"
diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh
index 17fbbaf8e5..f2c0591b3b 100644
--- a/.github/workflows/frontier/test.sh
+++ b/.github/workflows/frontier/test.sh
@@ -13,8 +13,17 @@ if [ "$job_device" = "gpu" ]; then
     fi
 fi
 
+shard_opts=""
+if [ -n "$job_shard" ]; then
+    shard_opts="--shard $job_shard"
+fi
+
 if [ "$job_device" = "gpu" ]; then
-    ./mfc.sh test -v -a --rdma-mpi --max-attempts 3 -j $ngpus $device_opts -- -c frontier
+    rdma_opts=""
+    if [ "$job_cluster" = "frontier" ]; then
+        rdma_opts="--rdma-mpi"
+    fi
+    ./mfc.sh test -v -a $rdma_opts --max-attempts 3 -j $ngpus $device_opts $shard_opts -- -c $job_cluster
 else
-    ./mfc.sh test -v -a --max-attempts 3 -j 32 --no-gpu -- -c frontier
+    ./mfc.sh test -v -a --max-attempts 3 -j 32 --no-gpu $shard_opts -- -c $job_cluster
 fi
diff --git a/.github/workflows/frontier_amd/bench.sh b/.github/workflows/frontier_amd/bench.sh
index 6e01687e79..a79f1a2fc5 100644
--- a/.github/workflows/frontier_amd/bench.sh
+++ b/.github/workflows/frontier_amd/bench.sh
@@ -16,7 +16,7 @@ if [ "$job_device" = "gpu" ]; then
 fi
 
 if [ "$job_device" = "gpu" ]; then
-    ./mfc.sh bench --mem 12 -j $n_ranks -o "$job_slug.yaml" -- -c frontier_amd $device_opts -n $n_ranks
+    ./mfc.sh bench --mem 12 -j $n_ranks -o "$job_slug.yaml" -- -c $job_cluster $device_opts -n $n_ranks
 else
-    ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c frontier_amd $device_opts -n $n_ranks
+    ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c $job_cluster $device_opts -n $n_ranks
 fi
diff --git a/.github/workflows/frontier_amd/build.sh b/.github/workflows/frontier_amd/build.sh
index 6036f73fc2..84036641c6 100644
--- a/.github/workflows/frontier_amd/build.sh
+++ b/.github/workflows/frontier_amd/build.sh
@@ -3,6 +3,15 @@
 # Ignore SIGHUP to survive login node session drops
 trap '' HUP
 
+# Determine compiler flag from directory name
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cluster_name="$(basename "$SCRIPT_DIR")"
+case "$cluster_name" in
+    frontier)     compiler_flag="f" ;;
+    frontier_amd) compiler_flag="famd" ;;
+    *) echo "ERROR: Unknown cluster '$cluster_name'"; exit 1 ;;
+esac
+
 job_device=$1
 job_interface=$2
 run_bench=$3
@@ -16,11 +25,11 @@ if [ "$job_device" = "gpu" ]; then
   fi
 fi
 
-. ./mfc.sh load -c famd -m g
+. ./mfc.sh load -c $compiler_flag -m g
 
 # Only set up build cache for test suite, not benchmarks
 if [ "$run_bench" != "bench" ]; then
-    source .github/scripts/setup-build-cache.sh frontier_amd "$job_device" "$job_interface"
+    source .github/scripts/setup-build-cache.sh "$cluster_name" "$job_device" "$job_interface"
 fi
 
 max_attempts=3
@@ -37,7 +46,7 @@ while [ $attempt -le $max_attempts ]; do
             fi
         done
     else
-        if ./mfc.sh test -v -a --dry-run -j 8 $build_opts; then
+        if ./mfc.sh test -v -a --dry-run $([ "$cluster_name" = "frontier" ] && echo "--rdma-mpi") -j 8 $build_opts; then
             build_cmd_ok=true
         else
             build_cmd_ok=false
diff --git a/.github/workflows/frontier_amd/submit-bench.sh b/.github/workflows/frontier_amd/submit-bench.sh
deleted file mode 100644
index 109052abeb..0000000000
--- a/.github/workflows/frontier_amd/submit-bench.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-
-set -e
-
-usage() {
-    echo "Usage: $0 [script.sh] [cpu|gpu]"
-}
-
-if [ ! -z "$1" ]; then
-    sbatch_script_contents=`cat $1`
-else
-    usage
-    exit 1
-fi
-
-if [ "$2" = "cpu" ]; then
-    sbatch_device_opts="\
-#SBATCH -n 32                       # Number of cores required"
-elif [ "$2" = "gpu" ]; then
-    sbatch_device_opts="\
-#SBATCH -n 8                       # Number of cores required"
-else
-    usage; exit 1
-fi
-
-
-job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2-$3"
-
-sbatch <<EOT
-#!/bin/bash
-#SBATCH -JMFC-$job_slug            # Job name
-#SBATCH -A ENG160                  # charge account
-#SBATCH -N 1                       # Number of nodes required
-$sbatch_device_opts
-#SBATCH -t 05:59:00                # Duration of the job (Ex: 15 mins)
-#SBATCH -o$job_slug.out            # Combined output and error messages file
-#SBATCH -p extended                # Extended partition for shorter queues
-
-set -e
-set -x
-
-cd "\$SLURM_SUBMIT_DIR"
-echo "Running in $(pwd):"
-
-job_slug="$job_slug"
-job_device="$2"
-job_interface="$3"
-
-. ./mfc.sh load -c famd -m g
-
-$sbatch_script_contents
-
-EOT
-
diff --git a/.github/workflows/frontier_amd/submit.sh b/.github/workflows/frontier_amd/submit.sh
index 551e0056b8..16d4f0d73c 100644
--- a/.github/workflows/frontier_amd/submit.sh
+++ b/.github/workflows/frontier_amd/submit.sh
@@ -5,8 +5,17 @@ set -e
 # Ignore SIGHUP to survive login node session drops
 trap '' HUP
 
+# Determine compiler flag from directory name
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cluster_name="$(basename "$SCRIPT_DIR")"
+case "$cluster_name" in
+    frontier)     compiler_flag="f" ;;
+    frontier_amd) compiler_flag="famd" ;;
+    *) echo "ERROR: Unknown cluster '$cluster_name'"; exit 1 ;;
+esac
+
 usage() {
-    echo "Usage: $0 [script.sh] [cpu|gpu]"
+    echo "Usage: $0 [script.sh] [cpu|gpu] [none|acc|omp] [shard]"
 }
 
 if [ ! -z "$1" ]; then
@@ -16,6 +25,13 @@ else
     exit 1
 fi
 
+# Detect job type from submitted script basename
+script_basename="$(basename "$1" .sh)"
+case "$script_basename" in
+    bench*) job_type="bench" ;;
+    *)      job_type="test"  ;;
+esac
+
 if [ "$2" = "cpu" ]; then
     sbatch_device_opts="\
 #SBATCH -n 32                       # Number of cores required"
@@ -27,19 +43,36 @@ else
     exit 1
 fi
 
+# Select SBATCH params based on job type
+if [ "$job_type" = "bench" ]; then
+    sbatch_account="#SBATCH -A ENG160"
+    sbatch_time="#SBATCH -t 05:59:00"
+    sbatch_partition="#SBATCH -p extended"
+    sbatch_extra=""
+else
+    sbatch_account="#SBATCH -A CFD154"
+    sbatch_time="#SBATCH -t 01:59:00"
+    sbatch_partition="#SBATCH -p batch"
+    sbatch_extra="#SBATCH --qos=normal"
+fi
 
-job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2-$3"
+shard_suffix=""
+if [ -n "$4" ]; then
+    shard_suffix="-$(echo "$4" | sed 's|/|-of-|')"
+fi
+job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2-$3${shard_suffix}"
 output_file="$job_slug.out"
 
 submit_output=$(sbatch <<EOT
 #!/bin/bash
 #SBATCH -J MFC-$job_slug            # Job name
-#SBATCH -A ENG160                  # charge account
+$sbatch_account
 #SBATCH -N 1                       # Number of nodes required
 $sbatch_device_opts
-#SBATCH -t 05:59:00                # Duration of the job (Ex: 15 mins)
+$sbatch_time
 #SBATCH -o$output_file             # Combined output and error messages file
-#SBATCH -p extended                # Extended partition for shorter queues
+$sbatch_partition
+$sbatch_extra
 
 set -e
 set -x
@@ -50,8 +83,10 @@ echo "Running in $(pwd):"
 job_slug="$job_slug"
 job_device="$2"
 job_interface="$3"
+job_shard="$4"
+job_cluster="$cluster_name"
 
-. ./mfc.sh load -c famd -m $([ "$2" = "gpu" ] && echo "g" || echo "c")
+. ./mfc.sh load -c $compiler_flag -m $([ "$2" = "gpu" ] && echo "g" || echo "c")
 
 $sbatch_script_contents
 
@@ -68,5 +103,4 @@ fi
 echo "Submitted batch job $job_id"
 
 # Use resilient monitoring instead of sbatch -W
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 bash "$SCRIPT_DIR/../../scripts/monitor_slurm_job.sh" "$job_id" "$output_file"
diff --git a/.github/workflows/frontier_amd/test.sh b/.github/workflows/frontier_amd/test.sh
index ff65aa2b0e..f2c0591b3b 100644
--- a/.github/workflows/frontier_amd/test.sh
+++ b/.github/workflows/frontier_amd/test.sh
@@ -13,8 +13,17 @@ if [ "$job_device" = "gpu" ]; then
     fi
 fi
 
+shard_opts=""
+if [ -n "$job_shard" ]; then
+    shard_opts="--shard $job_shard"
+fi
+
 if [ "$job_device" = "gpu" ]; then
-    ./mfc.sh test -v -a --max-attempts 3 -j $ngpus $device_opts -- -c frontier_amd
+    rdma_opts=""
+    if [ "$job_cluster" = "frontier" ]; then
+        rdma_opts="--rdma-mpi"
+    fi
+    ./mfc.sh test -v -a $rdma_opts --max-attempts 3 -j $ngpus $device_opts $shard_opts -- -c $job_cluster
 else
-    ./mfc.sh test -v -a --max-attempts 3 -j 32 --no-gpu -- -c frontier_amd
+    ./mfc.sh test -v -a --max-attempts 3 -j 32 --no-gpu $shard_opts -- -c $job_cluster
 fi
diff --git a/.github/workflows/phoenix/submit-bench.sh b/.github/workflows/phoenix/submit-bench.sh
deleted file mode 100644
index a3830f5050..0000000000
--- a/.github/workflows/phoenix/submit-bench.sh
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/bin/bash
-
-set -e
-
-usage() {
-    echo "Usage: $0 [script.sh] [cpu|gpu] [none|acc|omp]"
-}
-
-if [ ! -z "$1" ]; then
-    sbatch_script_contents=`cat $1`
-else
-    usage
-    exit 1
-fi
-
-sbatch_cpu_opts="\
-#SBATCH -p cpu-small               # partition
-#SBATCH --ntasks-per-node=24       # Number of cores per node required
-#SBATCH --mem-per-cpu=2G           # Memory per core\
-"
-
-sbatch_gpu_opts="\
-#SBATCH -CL40S
-#SBATCH --ntasks-per-node=4       # Number of cores per node required
-#SBATCH -G2\
-"
-
-if [ "$2" = "cpu" ]; then
-    sbatch_device_opts="$sbatch_cpu_opts"
-elif [ "$2" = "gpu" ]; then
-    sbatch_device_opts="$sbatch_gpu_opts"
-else
-    usage
-    exit 1
-fi
-
-job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2-$3"
-
-sbatch <<EOT
-#!/bin/bash
-#SBATCH -Jshb-$job_slug            # Job name
-#SBATCH --account=gts-sbryngelson3 # charge account
-#SBATCH -N1                        # Number of nodes required
-$sbatch_device_opts
-#SBATCH -t 04:00:00                # Duration of the job (Ex: 15 mins)
-#SBATCH -q embers                  # QOS Name
-#SBATCH --requeue                  # Auto-requeue on preemption
-#SBATCH -o$job_slug.out            # Combined output and error messages file
-
-set -e
-set -x
-
-cd "\$SLURM_SUBMIT_DIR"
-echo "Running in $(pwd):"
-
-job_slug="$job_slug"
-job_device="$2"
-job_interface="$3"
-
-. ./mfc.sh load -c p -m $2
-
-$sbatch_script_contents
-
-EOT
-
diff --git a/.github/workflows/phoenix/submit.sh b/.github/workflows/phoenix/submit.sh
index 06a03e465a..5b7162fef7 100755
--- a/.github/workflows/phoenix/submit.sh
+++ b/.github/workflows/phoenix/submit.sh
@@ -16,17 +16,34 @@ else
     exit 1
 fi
 
+# Detect job type from submitted script basename
+script_basename="$(basename "$1" .sh)"
+case "$script_basename" in
+    bench*) job_type="bench" ;;
+    *)      job_type="test"  ;;
+esac
+
 sbatch_cpu_opts="\
 #SBATCH -p cpu-small               # partition
 #SBATCH --ntasks-per-node=24       # Number of cores per node required
 #SBATCH --mem-per-cpu=2G           # Memory per core\
 "
 
-sbatch_gpu_opts="\
+if [ "$job_type" = "bench" ]; then
+    sbatch_gpu_opts="\
+#SBATCH -CL40S
+#SBATCH --ntasks-per-node=4       # Number of cores per node required
+#SBATCH -G2\
+"
+    sbatch_time="#SBATCH -t 04:00:00"
+else
+    sbatch_gpu_opts="\
 #SBATCH -p gpu-v100,gpu-a100,gpu-h100,gpu-l40s
 #SBATCH --ntasks-per-node=4       # Number of cores per node required
 #SBATCH -G2\
 "
+    sbatch_time="#SBATCH -t 03:00:00"
+fi
 
 if [ "$2" = "cpu" ]; then
     sbatch_device_opts="$sbatch_cpu_opts"
@@ -46,8 +63,9 @@ submit_output=$(sbatch <<EOT
 #SBATCH --account=gts-sbryngelson3 # charge account
 #SBATCH -N1                        # Number of nodes required
 $sbatch_device_opts
-#SBATCH -t 03:00:00                # Duration of the job (Ex: 15 mins)
+$sbatch_time
 #SBATCH -q embers                  # QOS Name
+#SBATCH --requeue                  # Auto-requeue on preemption
 #SBATCH -o$output_file             # Combined output and error messages file
 
 set -e
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c4a39a2dbd..b6aee7e204 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -28,7 +28,7 @@ jobs:
 
       - name: Check Formatting
         run: |
-          ./mfc.sh format -j $(nproc)
+          ./mfc.sh format -j "$(nproc)"
           git diff --exit-code || (echo "::error::Code is not formatted. Run './mfc.sh format' locally." && exit 1)
 
       - name: Spell Check
@@ -138,19 +138,38 @@ jobs:
 
       - name: Build
         run:  |
-          /bin/bash mfc.sh test -v --dry-run -j $(nproc) --${{ matrix.debug }} --${{ matrix.mpi }} --${{ matrix.precision }} $TEST_ALL
+          /bin/bash mfc.sh test -v --dry-run -j "$(nproc)" --${{ matrix.debug }} --${{ matrix.mpi }} ${{ matrix.precision != '' && format('--{0}', matrix.precision) || '' }} $TEST_ALL
         env:
           TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }}
 
       - name: Test
-        run:  |
-          /bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) $TEST_ALL $TEST_PCT
+        run: |
+          rm -f tests/failed_uuids.txt
+          TEST_EXIT=0
+          /bin/bash mfc.sh test -v --max-attempts 3 -j "$(nproc)" $TEST_ALL $TEST_PCT || TEST_EXIT=$?
+
+          # Retry only if a small number of tests failed (sporadic failures)
+          if [ -s tests/failed_uuids.txt ]; then
+            NUM_FAILED=$(wc -l < tests/failed_uuids.txt)
+            if [ "$NUM_FAILED" -le 5 ]; then
+              FAILED=$(tr '\n' ' ' < tests/failed_uuids.txt)
+              echo ""
+              echo "=== Retrying $NUM_FAILED failed test(s): $FAILED ==="
+              echo ""
+              /bin/bash mfc.sh test -v --max-attempts 3 -j "$(nproc)" --only $FAILED $TEST_ALL || exit $?
+            else
+              echo "Too many failures ($NUM_FAILED) to retry — likely a real issue."
+              exit 1
+            fi
+          elif [ "$TEST_EXIT" -ne 0 ]; then
+            exit $TEST_EXIT
+          fi
         env:
           TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }}
           TEST_PCT: ${{ matrix.debug == 'debug' && '-% 20' || '' }}
 
   self:
-    name: "${{ matrix.cluster_name }} (${{ matrix.device }}${{ matrix.interface != 'none' && format('-{0}', matrix.interface) || '' }})"
+    name: "${{ matrix.cluster_name }} (${{ matrix.device }}${{ matrix.interface != 'none' && format('-{0}', matrix.interface) || '' }}${{ matrix.shard != '' && format(' [{0}]', matrix.shard) || '' }})"
     if: github.repository == 'MFlowCode/MFC' && needs.file-changes.outputs.checkall == 'true' && github.event.pull_request.draft != true
     needs: [lint-gate, file-changes]
     continue-on-error: false
@@ -174,28 +193,49 @@ jobs:
             cluster_name: 'Georgia Tech | Phoenix'
             device: 'cpu'
             interface: 'none'
-          # Frontier (ORNL) — build on login node, test via SLURM
+          # Frontier (ORNL) — build on login node, GPU tests sharded for batch partition
           - runner:       'frontier'
             cluster:      'frontier'
             cluster_name: 'Oak Ridge | Frontier'
             device: 'gpu'
             interface: 'acc'
+            shard: '1/2'
+          - runner:       'frontier'
+            cluster:      'frontier'
+            cluster_name: 'Oak Ridge | Frontier'
+            device: 'gpu'
+            interface: 'acc'
+            shard: '2/2'
+          - runner:       'frontier'
+            cluster:      'frontier'
+            cluster_name: 'Oak Ridge | Frontier'
+            device: 'gpu'
+            interface: 'omp'
+            shard: '1/2'
           - runner:       'frontier'
             cluster:      'frontier'
             cluster_name: 'Oak Ridge | Frontier'
             device: 'gpu'
             interface: 'omp'
+            shard: '2/2'
           - runner:       'frontier'
             cluster:      'frontier'
             cluster_name: 'Oak Ridge | Frontier'
             device: 'cpu'
             interface: 'none'
-          # Frontier AMD — build on login node, test via SLURM
+          # Frontier AMD — build on login node, GPU tests sharded for batch partition
           - runner:       'frontier'
             cluster:      'frontier_amd'
             cluster_name: 'Oak Ridge | Frontier (AMD)'
             device: 'gpu'
             interface: 'omp'
+            shard: '1/2'
+          - runner:       'frontier'
+            cluster:      'frontier_amd'
+            cluster_name: 'Oak Ridge | Frontier (AMD)'
+            device: 'gpu'
+            interface: 'omp'
+            shard: '2/2'
           - runner:       'frontier'
             cluster:      'frontier_amd'
             cluster_name: 'Oak Ridge | Frontier (AMD)'
@@ -206,8 +246,6 @@ jobs:
       labels: ${{ matrix.runner }}
     env:
       NODE_OPTIONS: ${{ matrix.cluster == 'phoenix' && '--max-old-space-size=2048' || '' }}
-      ACTIONS_RUNNER_FORCE_ACTIONS_NODE_VERSION: node16
-      ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
     steps:
       - name: Clone
         uses: actions/checkout@v4
@@ -216,18 +254,35 @@ jobs:
 
       - name: Build
         if:   matrix.cluster != 'phoenix'
-        run:  bash .github/workflows/${{ matrix.cluster }}/build.sh ${{ matrix.device }} ${{ matrix.interface }}
+        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # v3
+        with:
+          max_attempts: 3
+          retry_wait_seconds: 60
+          timeout_minutes: 480
+          command: bash .github/workflows/${{ matrix.cluster }}/build.sh ${{ matrix.device }} ${{ matrix.interface }}
+          on_retry_command: ./mfc.sh clean
 
       - name: Test
-        run:  bash .github/workflows/${{ matrix.cluster }}/submit.sh .github/workflows/${{ matrix.cluster }}/test.sh ${{ matrix.device }} ${{ matrix.interface }}
+        run:  bash .github/workflows/${{ matrix.cluster }}/submit.sh .github/workflows/${{ matrix.cluster }}/test.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.shard }}
+
+      - name: Compute Log Slug
+        if:   always()
+        id:   log
+        run:  |
+          SLUG="test-${{ matrix.device }}-${{ matrix.interface }}"
+          SHARD="${{ matrix.shard }}"
+          if [ -n "$SHARD" ]; then
+            SLUG="${SLUG}-$(echo "$SHARD" | sed 's|/|-of-|')"
+          fi
+          echo "slug=${SLUG}" >> "$GITHUB_OUTPUT"
 
       - name: Print Logs
         if:   always()
-        run:  cat test-${{ matrix.device }}-${{ matrix.interface }}.out
+        run:  cat ${{ steps.log.outputs.slug }}.out
 
       - name: Archive Logs
         uses: actions/upload-artifact@v4
         if:   matrix.cluster != 'phoenix'
         with:
-          name: logs-${{ strategy.job-index }}-${{ matrix.device }}-${{ matrix.interface }}
-          path: test-${{ matrix.device }}-${{ matrix.interface }}.out
+          name: logs-${{ strategy.job-index }}-${{ steps.log.outputs.slug }}
+          path: ${{ steps.log.outputs.slug }}.out
diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py
index 8ad8c4bd07..018e3cef83 100644
--- a/toolchain/mfc/cli/commands.py
+++ b/toolchain/mfc/cli/commands.py
@@ -452,6 +452,12 @@
             default=False,
             dest="dry_run",
         ),
+        Argument(
+            name="shard",
+            help="Run only a subset of tests (e.g., '1/2' for first half, '2/2' for second half).",
+            type=str,
+            default=None,
+        ),
     ],
     mutually_exclusive=[
         MutuallyExclusiveGroup(arguments=[
diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 31a3771cb9..049af9e560 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -42,7 +42,41 @@
 class TestTimeoutError(MFCException):
     pass
 
-# pylint: disable=too-many-branches, trailing-whitespace
+def _filter_only(cases, skipped_cases):
+    """Filter cases by --only terms using AND for labels, OR for UUIDs.
+
+    Labels (non-UUID terms): case must match ALL labels (AND logic).
+    UUIDs (8-char hex terms): case must match ANY UUID (OR logic).
+    Mixed: keep case if all labels match OR any UUID matches.
+    """
+    def is_uuid(term):
+        return len(term) == 8 and all(c in '0123456789abcdefABCDEF' for c in term)
+
+    uuids  = [t for t in ARG("only") if is_uuid(t)]
+    labels = [t for t in ARG("only") if not is_uuid(t)]
+
+    for case in cases[:]:
+        check = set(case.trace.split(" -> "))
+        check.add(case.get_uuid())
+
+        label_ok = all(label in check for label in labels) if labels else True
+        uuid_ok  = any(u in check for u in uuids)  if uuids  else True
+
+        if labels and uuids:
+            keep = label_ok or uuid_ok
+        elif labels:
+            keep = label_ok
+        else:
+            keep = uuid_ok
+
+        if not keep:
+            cases.remove(case)
+            skipped_cases.append(case)
+
+    return cases, skipped_cases
+
+
+# pylint: disable=too-many-branches, too-many-statements, trailing-whitespace
 def __filter(cases_) -> typing.List[TestCase]:
     cases = cases_[:]
     selected_cases = []
@@ -66,14 +100,13 @@ def __filter(cases_) -> typing.List[TestCase]:
         raise MFCException("Testing: Your specified range [--from,--to] is incorrect. Please ensure both IDs exist and are in the correct order.")
 
     if len(ARG("only")) > 0:
-        for case in cases[:]:
-            case: TestCase
+        cases, skipped_cases = _filter_only(cases, skipped_cases)
 
-            checkCase = case.trace.split(" -> ")
-            checkCase.append(case.get_uuid())
-            if not set(ARG("only")).issubset(set(checkCase)):
-                cases.remove(case)
-                skipped_cases.append(case)
+        if not cases:
+            raise MFCException(
+                f"--only filter matched zero test cases. "
+                f"Specified: {ARG('only')}. Check that UUIDs/names are valid."
+            )
 
     for case in cases[:]:
         if case.ppn > 1 and not ARG("mpi"):
@@ -99,6 +132,20 @@ def __filter(cases_) -> typing.List[TestCase]:
         skipped_cases += example_cases
         cases = [case for case in cases if case not in example_cases]
 
+    if ARG("shard") is not None:
+        parts = ARG("shard").split("/")
+        if len(parts) != 2 or not all(p.isdigit() for p in parts) or int(parts[1]) < 1 or not 1 <= int(parts[0]) <= int(parts[1]):
+            raise MFCException(f"Invalid --shard '{ARG('shard')}': expected 'i/n' with 1 <= i <= n (e.g., '1/2').")
+        shard_idx, shard_count = int(parts[0]), int(parts[1])
+        skipped_cases += [c for i, c in enumerate(cases) if i % shard_count != shard_idx - 1]
+        cases = [c for i, c in enumerate(cases) if i % shard_count == shard_idx - 1]
+
+        if not cases:
+            raise MFCException(
+                f"--shard {ARG('shard')} matched zero test cases. "
+                f"Total cases before sharding may be less than shard count."
+            )
+
     if ARG("percent") == 100:
         return cases, skipped_cases
 
@@ -115,6 +162,7 @@ def test():
     global errors, failed_tests, test_start_time
 
     test_start_time = time.time()  # Start timing
+    failed_uuids_path = os.path.join(common.MFC_TEST_DIR, "failed_uuids.txt")
     cases = list_cases()
 
     # Delete UUIDs that are not in the list of cases from tests/
@@ -182,6 +230,13 @@ def test():
 
     # Check if we aborted due to high failure rate
     if abort_tests.is_set():
+        # Clean up stale failed_uuids.txt so CI doesn't retry wrong tests
+        try:
+            if os.path.exists(failed_uuids_path):
+                os.remove(failed_uuids_path)
+        except OSError:
+            pass
+
         total_completed = nFAIL + nPASS
         cons.print()
         cons.unindent()
@@ -206,6 +261,14 @@ def test():
     # Build the summary report
     _print_test_summary(nPASS, nFAIL, nSKIP, minutes, seconds, failed_tests, skipped_cases)
 
+    # Write failed UUIDs to file for CI retry logic
+    if failed_tests:
+        with open(failed_uuids_path, "w") as f:
+            for test_info in failed_tests:
+                f.write(test_info['uuid'] + "\n")
+    elif os.path.exists(failed_uuids_path):
+        os.remove(failed_uuids_path)
+
     exit(nFAIL)