From 02e65d7c2eb922a0417f82721cbfb29de9f12a99 Mon Sep 17 00:00:00 2001
From: Jose Santos <josantos@amd.com>
Date: Wed, 25 Feb 2026 14:16:37 -0600
Subject: [PATCH 1/6] Add GitHub Actions runner scripts and documentation

- Introduced multiple scripts for managing GitHub Actions runners within an Apptainer container, including build, run, and cleanup functionalities.
- Added comprehensive documentation (README.md, AGENTS.md, skills.md) detailing setup, usage, and environment configuration.
- Included example environment files for both runner and container configurations to guide users in setting up their environments securely and effectively.

This update establishes a robust framework for running GitHub Actions in HPC environments, enhancing usability and maintainability.
---
 .github/scripts/github-runner-files/AGENTS.md |  24 ++
 .github/scripts/github-runner-files/README.md | 214 ++++++++++++++++++
 .../build-github-coding-agent-runner.sh       |  92 ++++++++
 .../cleanup-old-runners.sh                    |  83 +++++++
 .../scripts/github-runner-files/env.example   |  28 +++
 .../run-github-coding-agent-runner.sh         | 198 ++++++++++++++++
 .../runner-container.env.example              |  55 +++++
 .github/scripts/github-runner-files/skills.md |  31 +++
 .github/scripts/github-runner-files/start.sh  | 169 ++++++++++++++
 9 files changed, 894 insertions(+)
 create mode 100644 .github/scripts/github-runner-files/AGENTS.md
 create mode 100644 .github/scripts/github-runner-files/README.md
 create mode 100755 .github/scripts/github-runner-files/build-github-coding-agent-runner.sh
 create mode 100755 .github/scripts/github-runner-files/cleanup-old-runners.sh
 create mode 100644 .github/scripts/github-runner-files/env.example
 create mode 100755 .github/scripts/github-runner-files/run-github-coding-agent-runner.sh
 create mode 100644 .github/scripts/github-runner-files/runner-container.env.example
 create mode 100644 .github/scripts/github-runner-files/skills.md
 create mode 100755 .github/scripts/github-runner-files/start.sh

diff --git a/.github/scripts/github-runner-files/AGENTS.md b/.github/scripts/github-runner-files/AGENTS.md
new file mode 100644
index 000000000..2c91c0a4f
--- /dev/null
+++ b/.github/scripts/github-runner-files/AGENTS.md
@@ -0,0 +1,24 @@
+# Agent instructions
+
+## Workflow
+
+Flow: **run-github-coding-agent-runner.sh** → **container** → **start.sh** → **Actions listener**.
+
+- **Standalone:** run `./run-github-coding-agent-runner.sh` with required flags (`--github-token`, `--github-repository`, `--script-dir`, `--runner-base`). No env needed.
+- **SLURM:** set `GITHUB_TOKEN` and `GITHUB_REPOSITORY`, then `sbatch run-github-coding-agent-runner.sh`. When the script runs under SLURM with no arguments, it uses env and SLURM defaults (`SLURM_SUBMIT_DIR`, `WORK`) for script-dir and runner-base. start.sh installs/configures the runner in `RUNNER_HOME` if needed and starts the Actions listener; workflow jobs run in the container.
+
+## Conventions
+
+When editing scripts or config in this project:
+
+1. **Never add sensitive data to scripts or committed files.**  
+   Do not hardcode tokens, passwords, API keys, or other secrets. Use environment variables or a secure mechanism outside the repo (e.g. `export GITHUB_TOKEN` before running).
+
+2. **Never use host-specific absolute paths.**  
+   Do not add paths like `/work1/amd/josantos/...` or other machine-specific directories. Prefer:
+   - Paths relative to the script (e.g. `SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"` then `cd "${SCRIPT_DIR}"`).
+   - Environment variables (e.g. `$WORK`, `$HOME`) when a base directory is needed.
+   - Relative paths from the project or script location.
+
+3. **Never edit the container definition file (e.g. `iris.def`) unless explicitly asked.**  
+   Prefer changing scripts (e.g. `start.sh`, `run-github-coding-agent-runner.sh`) to install, configure, or run things at runtime. Only modify `.def` files when the user explicitly requests it.
diff --git a/.github/scripts/github-runner-files/README.md b/.github/scripts/github-runner-files/README.md
new file mode 100644
index 000000000..8850fe52d
--- /dev/null
+++ b/.github/scripts/github-runner-files/README.md
@@ -0,0 +1,214 @@
+# Iris + GitHub Actions Self-Hosted Runner (Apptainer)
+
+This setup runs a GitHub Actions self-hosted runner in an Apptainer container with the Iris framework (ROCm, Triton) and the `copilot` label, for HPC environments where Docker is not available.
+
+## Prerequisites
+
+- Apptainer/Singularity installed
+- GitHub Personal Access Token with `repo` scope
+- Access to the repository where you want to register the runner
+- SLURM (for job scheduling)
+- Optional: ROCm/AMD GPU partition for GPU workflows
+
+## Quick Start
+
+### 1. Create GitHub Personal Access Token
+
+1. Go to https://github.com/settings/tokens/new
+2. Name: e.g. `GitHub Actions Runner`
+3. Scopes: Select `repo` (Full control of private repositories)
+4. Click "Generate token" and save it securely
+
+### 2. Prepare token and paths
+
+You will pass the GitHub token and repository as flags (see step 4). Do not commit tokens.
+
+### 3. Build the Container
+
+From this directory:
+
+```bash
+sbatch build-github-coding-agent-runner.sh
+```
+
+This builds `github-copilot-coding-agent-runner.sif` from `iris.def` by default. To use another definition file: `./build-github-coding-agent-runner.sh --def=my.def` or set `DEF_FILE=my.def` before `sbatch`. The job uses partition `mi3001x` and may take a while. See **skills.md** for full build instructions.
+
+### 4. Run the Runner
+
+After the build completes, from the repo directory (where `run-github-coding-agent-runner.sh` and the `.sif` live). You can run in two ways:
+
+**Option A — Standalone with flags (required when not using SLURM):**
+
+```bash
+./run-github-coding-agent-runner.sh \
+  --github-token='YOUR_GITHUB_TOKEN' \
+  --github-repository='owner/repo' \
+  --script-dir="$(pwd)" \
+  --runner-base="$(pwd)/runner-data"
+```
+
+**Option B — Via SLURM with environment variables (when `sbatch run-github-coding-agent-runner.sh` is used, the script uses env and SLURM defaults for any value not passed as a flag):**
+
+```bash
+export GITHUB_TOKEN='YOUR_GITHUB_TOKEN'
+export GITHUB_REPOSITORY='owner/repo'
+sbatch run-github-coding-agent-runner.sh
+```
+
+With Option B, `SCRIPT_DIR` defaults to `SLURM_SUBMIT_DIR` (or the script’s directory), and `RUNNER_BASE` defaults to `$WORK/github-runner-data` if `WORK` is set, otherwise `$SCRIPT_DIR/github-runner-data`. You can override with `export SCRIPT_DIR=... RUNNER_BASE=...` if needed.
+
+Copy-paste and replace:
+- `YOUR_GITHUB_TOKEN` — your GitHub Personal Access Token
+- `owner/repo` — your repository (e.g. `Jose/Iris`)
+- `runner-data` (Option A) — directory for runner state and work (created if missing); use any path you prefer.
+
+Optional flags (Option A) or env vars (Option B) (examples):
+
+```bash
+  --cluster-name='vultr-k8' \   # or export CLUSTER_NAME=...
+  --runner-labels='copilot,rocm' \
+  --use-overlay=1
+```
+
+### 5. Verify Runner Registration
+
+1. Go to your repository on GitHub
+2. Navigate to: Settings → Actions → Runners
+3. You should see your runner listed with the `copilot` label
+
+## Using the Runner in Workflows
+
+In your `.github/workflows/*.yml` files, use the runner via the `copilot` label (or whatever you passed to `--runner-labels`). Ensure the workflow’s `runs-on` matches: e.g. `runs-on: copilot` or `runs-on: [self-hosted, copilot]`. If a workflow uses a different label (e.g. `apptainer`), either register the runner with that label too or change the workflow to `copilot`.
+
+```yaml
+name: Example Workflow
+on: [push]
+
+jobs:
+  build:
+    runs-on: copilot
+    steps:
+      - uses: actions/checkout@v4
+      - name: Run a test
+        run: echo "Running on Iris + copilot runner in HPC!"
+```
+
+## Workflow
+
+End-to-end flow when you run the runner via SLURM:
+
+1. **One-time setup**  
+   Create a GitHub PAT with `repo` scope. From this directory, run `sbatch build-github-coding-agent-runner.sh` to build `github-copilot-coding-agent-runner.sif` from `iris.def` (Iris + ROCm; the runner is not in the image).
+
+2. **Run the runner**  
+   Either pass required flags to `run-github-coding-agent-runner.sh` (standalone) or set `GITHUB_TOKEN` and `GITHUB_REPOSITORY` and run `sbatch run-github-coding-agent-runner.sh` (SLURM-only env fallback; see step 4). The script runs Apptainer with overlay and bind mounts and executes `/bin/bash -c "/runner-scripts/start.sh"`. So: **run-github-coding-agent-runner.sh** → **container** → **start.sh**.
+
+3. **Inside the container: start.sh**  
+   It receives `GITHUB_TOKEN`, `GITHUB_REPOSITORY`, `RUNNER_HOME`, `RUNNER_NAME`, `RUNNER_LABELS`, and `RUNNER_WORKDIR` from the run script (via `--env`). It checks required vars, sets defaults for any unset, and uses `RUNNER_HOME` (e.g. `/runner-home`). If the runner is not installed in `RUNNER_HOME`, it installs it (from `/opt/actions-runner` or by download). It fetches a registration token from GitHub, runs `config.sh`, then starts the Actions runner listener (`./run.sh`). The runner listens for jobs; when a workflow uses the `copilot` (or your) label, GitHub sends a job and the runner runs the steps in the container.
+
+4. **End-to-end**  
+   You run **run-github-coding-agent-runner.sh** with `--github-token`, `--github-repository`, `--script-dir`, and `--runner-base` (and optionally `--sif`). **run-github-coding-agent-runner.sh** starts the container, binds the script dir and runner dirs, passes env to the container, and runs **start.sh**. **start.sh** installs/configures the runner if needed and starts the listener. So: **run-github-coding-agent-runner.sh** → **container** → **start.sh** (install/configure + listener) → **runner runs workflow jobs**.
+
+## Management Commands
+
+```bash
+# Build container
+sbatch build-github-coding-agent-runner.sh
+
+# Run standalone (required flags)
+./run-github-coding-agent-runner.sh --github-token='...' --github-repository='owner/repo' --script-dir="$(pwd)" --runner-base="$(pwd)/runner-data"
+
+# Run via SLURM with env (set GITHUB_TOKEN and GITHUB_REPOSITORY; SCRIPT_DIR/RUNNER_BASE default from SLURM)
+export GITHUB_TOKEN=... GITHUB_REPOSITORY=owner/repo
+sbatch run-github-coding-agent-runner.sh
+
+# Check SLURM job status
+squeue -u $USER
+
+# View SLURM job logs
+tail -f github-coding-agent-runner-*.out
+
+# Cancel SLURM job
+scancel <job_id>
+```
+
+## Customization
+
+### Runner Name and Labels
+
+Defaults are set in `run-github-coding-agent-runner.sh` (e.g. runner name: `repo-runner-cluster-YYYYMMDD-HHMMSS`; default label: `copilot`). Override with flags:
+
+```bash
+./run-github-coding-agent-runner.sh ... --runner-name='my-runner' --runner-labels='copilot,slurm,apptainer,hpc,iris,rocm,mi300x'
+```
+
+### SLURM Parameters
+
+Edit `run-github-coding-agent-runner.sh` SBATCH directives as needed:
+
+- `#SBATCH --time=8:00:00`
+- `#SBATCH -p mi3008x`  # partition
+- `#SBATCH --nodes=1`
+
+GPU access is enabled via `--rocm` in the container run.
+
+### Kubernetes / no overlay
+
+Overlays are not used in Kubernetes (default `USE_OVERLAY=0` in pods). The script uses **bind mounts only** for writable space:
+
+- **RUNNER_HOME** (runner config) and **RUNNER_WORKDIR** (job work) are bind-mounted from the host/pod.
+- Optional: set **RUNNER_TMP** to a writable directory (e.g. a pod `emptyDir` mounted in the container) and the script will bind it to `/tmp` inside the container so tools (e.g. Triton cache) can write there.
+
+Example in a pod spec: mount an `emptyDir` at `/runner-tmp` and set `RUNNER_TMP=/runner-tmp` in the container env so `/tmp` is writable without an overlay.
+
+## Troubleshooting
+
+### Runner not appearing in GitHub
+
+1. Check logs: `tail -f github-coding-agent-runner-*.out` and `github-coding-agent-runner-*.err`
+2. Verify the token (`--github-token` or `GITHUB_TOKEN`) has `repo` scope
+3. Verify `--github-repository` format is `owner/repo`
+4. Check token has not expired
+
+### Build failures
+
+- Build runs on partition `mi3001x` with fakeroot. See **skills.md** for details.
+- Cache and temp dirs are under the project directory (`.apptainer-cache`, `.apptainer-tmp`). Ensure enough disk space.
+
+### Container not found when running
+
+If the container image is missing (default: `script-dir/github-copilot-coding-agent-runner.sif`), `run-github-coding-agent-runner.sh` will print a message. Run the build and wait for it to complete, or pass `--sif=/path/to/image.sif`.
+
+### Runner offline
+
+```bash
+squeue -u $USER
+tail -50 github-coding-agent-runner-*.err
+scancel <job_id>
+# Resubmit: either same flags (standalone) or same env then sbatch run-github-coding-agent-runner.sh
+```
+
+## Security
+
+- **Tokens**: Never commit tokens. Use `--github-token=TOKEN` when running standalone, or set `GITHUB_TOKEN` when using `sbatch run-github-coding-agent-runner.sh`; do not put secrets in committed files.
+- **Paths**: Do not hardcode host-specific paths in scripts. See **AGENTS.md** for project conventions.
+- **Container**: Apptainer runs as your user; the container is read-only with a per-job writable overlay.
+
+## File Structure
+
+```
+github-runner/
+├── iris.def                        # Apptainer definition (Iris + ROCm)
+├── build-github-coding-agent-runner.sh   # SLURM build job (--def=FILE for definition file)
+├── run-github-coding-agent-runner.sh   # Run job (flags or sbatch + env)
+├── start.sh                             # Runner startup (inside container; also used as K8s entrypoint)
+├── runner-container.env.example    # Example env file for container (start.sh sources it)
+├── AGENTS.md                       # Agent instructions (no secrets, relative paths)
+├── skills.md                       # Build instructions
+├── README.md                       # This file
+└── github-copilot-coding-agent-runner.sif   # Built image (after build)
+```
+
+## License
+
+MIT License.
diff --git a/.github/scripts/github-runner-files/build-github-coding-agent-runner.sh b/.github/scripts/github-runner-files/build-github-coding-agent-runner.sh
new file mode 100755
index 000000000..25dd97f23
--- /dev/null
+++ b/.github/scripts/github-runner-files/build-github-coding-agent-runner.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+
+# SLURM job script to build GitHub Coding Agent Runner container
+
+#SBATCH --job-name=build-github-coding-agent-runner
+#SBATCH --output=build-github-coding-agent-runner-%j.out
+#SBATCH --error=build-github-coding-agent-runner-%j.err
+#SBATCH --time=2:00:00
+#SBATCH --nodes=1
+#SBATCH -p mi3001x
+
+set -e
+
+# Parse flags for definition file (and optional output)
+# Usage: ./build-github-coding-agent-runner.sh [--def=FILE] [--output=SIF]
+#   or:  sbatch build-github-coding-agent-runner.sh  (uses DEF_FILE env or default iris.def)
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --def=*)         DEF_FILE="${1#*=}"; shift ;;
+        --def)           DEF_FILE="${2:-}"; shift 2 ;;
+        --definition=*) DEF_FILE="${1#*=}"; shift ;;
+        --definition)    DEF_FILE="${2:-}"; shift 2 ;;
+        -d)              DEF_FILE="${2:-}"; shift 2 ;;
+        --output=*)      OUTPUT_SIF="${1#*=}"; shift ;;
+        --output)        OUTPUT_SIF="${2:-}"; shift 2 ;;
+        -o)              OUTPUT_SIF="${2:-}"; shift 2 ;;
+        -h|--help)
+            echo "Usage: $0 [OPTIONS]"
+            echo "Options:"
+            echo "  --def=FILE, --definition=FILE, -d FILE   Apptainer definition file (default: iris.def)"
+            echo "  --output=FILE, -o FILE                   Output .sif file (default: github-copilot-coding-agent-runner.sif)"
+            exit 0
+            ;;
+        *) break ;;
+    esac
+done
+
+# Defaults: when under SLURM with no args, use env; else use script default
+DEF_FILE="${DEF_FILE:-iris.def}"
+OUTPUT_SIF="${OUTPUT_SIF:-github-copilot-coding-agent-runner.sif}"
+
+echo "=========================================="
+echo "GitHub Coding Agent Runner Container Build"
+echo "=========================================="
+echo "Job ID: $SLURM_JOB_ID"
+echo "Node: $SLURM_NODELIST"
+echo "Start: $(date)"
+echo "=========================================="
+
+# Run from script directory so build and def file are in the right place
+if [ -n "${SLURM_SUBMIT_DIR}" ]; then
+    BUILD_DIR="${SLURM_SUBMIT_DIR}"
+else
+    BUILD_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+fi
+cd "${BUILD_DIR}"
+echo "Build directory: ${BUILD_DIR}"
+
+# Resolve def file path if relative
+[ "${DEF_FILE#/}" = "$DEF_FILE" ] && DEF_FILE="${BUILD_DIR}/${DEF_FILE}"
+[ "${OUTPUT_SIF#/}" = "$OUTPUT_SIF" ] && OUTPUT_SIF="${BUILD_DIR}/${OUTPUT_SIF}"
+
+if [ ! -f "$DEF_FILE" ]; then
+    echo "Error: definition file not found: $DEF_FILE"
+    exit 1
+fi
+
+# Temp and cache under build dir (avoids /tmp filling up)
+export APPTAINER_TMPDIR="${BUILD_DIR}/.apptainer-tmp"
+export APPTAINER_CACHEDIR="${BUILD_DIR}/.apptainer-cache"
+mkdir -p "$APPTAINER_TMPDIR" "$APPTAINER_CACHEDIR"
+
+echo ""
+echo "=========================================="
+echo "Building container image..."
+echo "Definition file: $DEF_FILE"
+echo "Output file: $OUTPUT_SIF"
+echo "=========================================="
+
+apptainer build --force --fakeroot "$OUTPUT_SIF" "$DEF_FILE"
+
+# Clean build temp to free space (cache is kept for faster rebuilds; remove .apptainer-cache to reclaim that too).
+rm -rf "$APPTAINER_TMPDIR"
+echo "Cleaned temporary directory: $APPTAINER_TMPDIR"
+
+echo ""
+echo "=========================================="
+echo "Build completed"
+echo "=========================================="
+
+echo ""
+echo "Finished: $(date)"
diff --git a/.github/scripts/github-runner-files/cleanup-old-runners.sh b/.github/scripts/github-runner-files/cleanup-old-runners.sh
new file mode 100755
index 000000000..ba079e122
--- /dev/null
+++ b/.github/scripts/github-runner-files/cleanup-old-runners.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+# Cleanup script for old GitHub runner configurations and overlays
+
+set -e
+
+WORK_DIR="${WORK:-/work1/amd/josantos}"
+RUNNER_BASE="${WORK_DIR}/github-runner-data"
+OVERLAY_DIR="${RUNNER_BASE}/overlays"
+
+echo "=========================================="
+echo "GitHub Runner Cleanup Script"
+echo "=========================================="
+echo "Cleaning up directories in: ${RUNNER_BASE}"
+echo ""
+
+# Function to check if SLURM job is still running
+is_job_running() {
+    local job_id=$1
+    squeue -j "$job_id" &>/dev/null
+}
+
+# Cleanup old runner config directories
+echo "Cleaning up old runner configurations..."
+for runner_dir in "${RUNNER_BASE}"/.github-runner-*; do
+    if [ -d "$runner_dir" ]; then
+        # Extract job ID from directory name
+        job_id=$(basename "$runner_dir" | sed 's/.github-runner-//')
+        
+        if [[ "$job_id" =~ ^[0-9]+$ ]]; then
+            # Check if job is still running
+            if is_job_running "$job_id"; then
+                echo "  Skipping $runner_dir (job $job_id is still running)"
+            else
+                echo "  Removing $runner_dir (job $job_id is not running)"
+                rm -rf "$runner_dir"
+            fi
+        else
+            echo "  Skipping $runner_dir (not a job-specific directory)"
+        fi
+    fi
+done
+
+# Cleanup old overlay images
+echo "Cleaning up old overlay images..."
+for overlay_file in "${OVERLAY_DIR}"/overlay-*.img; do
+    if [ -f "$overlay_file" ]; then
+        # Extract job ID from filename
+        job_id=$(basename "$overlay_file" | sed 's/overlay-//' | sed 's/.img$//')
+        
+        if [[ "$job_id" =~ ^[0-9]+$ ]]; then
+            # Check if job is still running
+            if is_job_running "$job_id"; then
+                echo "  Skipping $overlay_file (job $job_id is still running)"
+            else
+                size=$(du -h "$overlay_file" | cut -f1)
+                echo "  Removing $overlay_file (job $job_id is not running, size: $size)"
+                rm -f "$overlay_file"
+            fi
+        else
+            echo "  Skipping $overlay_file (not a job-specific overlay)"
+        fi
+    fi
+done
+
+echo "=========================================="
+echo "Cleanup complete!"
+echo "=========================================="
+
+# Show remaining files
+echo "Remaining runner configurations:"
+ls -lh "${RUNNER_BASE}"/.github-runner-* 2>/dev/null || echo "  None"
+
+echo ""
+echo "Remaining overlay images:"
+ls -lh "${OVERLAY_DIR}"/overlay-*.img 2>/dev/null || echo "  None"
+
+# Show disk usage
+echo ""
+echo "Disk usage:"
+echo "  Runner data directory: $(du -sh "${RUNNER_BASE}" 2>/dev/null | cut -f1)"
+echo "  Overlays directory: $(du -sh "${OVERLAY_DIR}" 2>/dev/null | cut -f1)"
+
diff --git a/.github/scripts/github-runner-files/env.example b/.github/scripts/github-runner-files/env.example
new file mode 100644
index 000000000..76de21881
--- /dev/null
+++ b/.github/scripts/github-runner-files/env.example
@@ -0,0 +1,28 @@
+# GitHub Actions Runner Environment Configuration
+# 
+# SECURITY WARNING: This file contains sensitive information!
+# 
+# Recommended setup:
+# 1. Copy this file: cp env.example ~/.github-runner-env
+# 2. Edit with your values: nano ~/.github-runner-env
+# 3. Protect the file: chmod 600 ~/.github-runner-env
+# 4. Source before running: source ~/.github-runner-env
+#
+# For SLURM jobs, the run-slurm.sh script will automatically
+# source ~/.github-runner-env if it exists
+
+# REQUIRED: GitHub Personal Access Token with 'repo' scope
+# Create at: https://github.com/settings/tokens/new
+export GITHUB_TOKEN=''
+
+# REQUIRED: Repository in format owner/repo
+export GITHUB_REPOSITORY=''
+
+# OPTIONAL: Customize runner name (defaults to hostname + timestamp)
+export RUNNER_NAME=''
+
+# OPTIONAL: Comma-separated labels (defaults to 'copilot')
+export RUNNER_LABELS='copilot'
+
+# OPTIONAL: Work directory (defaults to current directory/_work)
+export RUNNER_WORKDIR=""
diff --git a/.github/scripts/github-runner-files/run-github-coding-agent-runner.sh b/.github/scripts/github-runner-files/run-github-coding-agent-runner.sh
new file mode 100755
index 000000000..291b4d39c
--- /dev/null
+++ b/.github/scripts/github-runner-files/run-github-coding-agent-runner.sh
@@ -0,0 +1,198 @@
+#!/bin/bash
+
+# SLURM job script to run GitHub Coding Agent Runner (Iris + Apptainer)
+
+#SBATCH --job-name=github-coding-agent-runner
+#SBATCH --output=github-coding-agent-runner-%j.out
+#SBATCH --error=github-coding-agent-runner-%j.err
+#SBATCH --time=8:00:00
+#SBATCH --nodes=1
+#SBATCH -p mi3008x  # MI300X partition
+
+# Adjust the above SLURM parameters as needed for your system
+#
+# Two ways to run:
+#   1) Standalone with flags (required):
+#        ./run-github-coding-agent-runner.sh --github-token='...' --github-repository='owner/repo' --script-dir="$(pwd)" --runner-base="$(pwd)/runner-data"
+#   2) Via sbatch with env (SLURM-only fallback): set GITHUB_TOKEN, GITHUB_REPOSITORY; SCRIPT_DIR/RUNNER_BASE default from SLURM_SUBMIT_DIR and WORK
+#        export GITHUB_TOKEN=... GITHUB_REPOSITORY=owner/repo
+#        sbatch run-github-coding-agent-runner.sh
+
+set -e
+
+# Parse input flags first. When running under SLURM with no args, env and SLURM defaults are used for any unset value.
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --github-token=*)      GITHUB_TOKEN="${1#*=}"; shift ;;
+        --github-token)        GITHUB_TOKEN="${2:-}"; shift 2 ;;
+        --github-repository=*) GITHUB_REPOSITORY="${1#*=}"; shift ;;
+        --github-repository)   GITHUB_REPOSITORY="${2:-}"; shift 2 ;;
+        --runner-name=*)       RUNNER_NAME="${1#*=}"; shift ;;
+        --runner-name)         RUNNER_NAME="${2:-}"; shift 2 ;;
+        --cluster-name=*)      CLUSTER_NAME="${1#*=}"; shift ;;
+        --cluster-name)        CLUSTER_NAME="${2:-}"; shift 2 ;;
+        --runner-labels=*)     RUNNER_LABELS="${1#*=}"; shift ;;
+        --runner-labels)       RUNNER_LABELS="${2:-}"; shift 2 ;;
+        --script-dir=*)        SCRIPT_DIR="${1#*=}"; shift ;;
+        --script-dir)          SCRIPT_DIR="${2:-}"; shift 2 ;;
+        --runner-base=*)       RUNNER_BASE="${1#*=}"; shift ;;
+        --runner-base)         RUNNER_BASE="${2:-}"; shift 2 ;;
+        --sif=*)               SIF_PATH="${1#*=}"; shift ;;
+        --sif)                 SIF_PATH="${2:-}"; shift 2 ;;
+        --runner-tmp=*)        RUNNER_TMP="${1#*=}"; shift ;;
+        --runner-tmp)          RUNNER_TMP="${2:-}"; shift 2 ;;
+        --use-overlay=*)       USE_OVERLAY="${1#*=}"; shift ;;
+        --use-overlay)         USE_OVERLAY="${2:-}"; shift 2 ;;
+        -h|--help)
+            echo "Usage: $0 [OPTIONS]"
+            echo "Options (--option=value or --option value):"
+            echo "  --github-token=TOKEN       GitHub token (required)"
+            echo "  --github-repository=OWNER/REPO   e.g. Jose/Iris (required)"
+            echo "  --script-dir=DIR           Directory with container and scripts (required)"
+            echo "  --runner-base=DIR          Runner data base directory (required)"
+            echo "  --sif=PATH                 Path to .sif container (default: script-dir/github-copilot-coding-agent-runner.sif)"
+            echo "  --runner-name=NAME         Runner name (default: repo-runner-cluster-YYYYMMDD-HHMMSS)"
+            echo "  --cluster-name=NAME        Cluster name for default runner name (default: hostname)"
+            echo "  --runner-labels=LABELS     Comma-separated labels (default: copilot)"
+            echo "  --runner-tmp=DIR           Bind DIR to /tmp in container (e.g. Triton cache)"
+            echo "  --use-overlay=0|1          Use overlay (1) or bind mounts only (0)"
+            exit 0
+            ;;
+        *) break ;;
+    esac
+done
+
+# SLURM-only env fallback: when running under sbatch with no args, use env and SLURM defaults
+if [ -n "${SLURM_JOB_ID}" ]; then
+    if [ -z "$SCRIPT_DIR" ]; then
+        SCRIPT_DIR="${SLURM_SUBMIT_DIR:-}"
+        [ -z "$SCRIPT_DIR" ] && SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+    fi
+    if [ -z "$RUNNER_BASE" ]; then
+        [ -n "${WORK}" ] && RUNNER_BASE="${WORK}/github-runner-data" || RUNNER_BASE="${SCRIPT_DIR}/github-runner-data"
+    fi
+    [ -z "$USE_OVERLAY" ] && USE_OVERLAY="${USE_OVERLAY:-1}"
+    [ -z "$SIF_PATH" ]    && SIF_PATH="${SIF_PATH:-}"
+    [ -z "$RUNNER_NAME" ] && RUNNER_NAME="${RUNNER_NAME:-}"
+    [ -z "$RUNNER_LABELS" ] && RUNNER_LABELS="${RUNNER_LABELS:-}"
+    [ -z "$RUNNER_TMP" ]  && RUNNER_TMP="${RUNNER_TMP:-}"
+fi
+
+# Required: pass as flags when standalone, or set env when using sbatch
+[ -n "$GITHUB_TOKEN" ]     || { echo "Error: pass --github-token=TOKEN or set GITHUB_TOKEN (when using sbatch)"; exit 1; }
+[ -n "$GITHUB_REPOSITORY" ] || { echo "Error: pass --github-repository=owner/repo or set GITHUB_REPOSITORY (when using sbatch)"; exit 1; }
+[ -n "$SCRIPT_DIR" ]       || { echo "Error: pass --script-dir=DIR or set SCRIPT_DIR (when using sbatch)"; exit 1; }
+[ -d "$SCRIPT_DIR" ]      || { echo "Error: SCRIPT_DIR must be an existing directory"; exit 1; }
+[ -n "$RUNNER_BASE" ]      || { echo "Error: pass --runner-base=DIR or set RUNNER_BASE (when using sbatch)"; exit 1; }
+
+# SIF path: default under script-dir if not passed; relative paths under script-dir
+SIF_PATH="${SIF_PATH:-${SCRIPT_DIR}/github-copilot-coding-agent-runner.sif}"
+[ "${SIF_PATH#/}" = "$SIF_PATH" ] && SIF_PATH="${SCRIPT_DIR}/${SIF_PATH}"
+
+# Subdirectories of runner base only (no env or separate flags)
+RUNNER_WORKDIR="${RUNNER_BASE}/_work"
+OVERLAY_DIR="${RUNNER_BASE}/overlay"
+
+# Default runner name: repo-runner-clustername-YYYYMMDD-HHMMSS (e.g. iris-runner-vultr-k8-20260214-025830)
+if [ -z "$RUNNER_NAME" ]; then
+    REPO_NAME="${GITHUB_REPOSITORY##*/}"
+    REPO_NAME="$(echo "$REPO_NAME" | tr '[:upper:]' '[:lower:]')"
+    [ -z "$CLUSTER_NAME" ] && CLUSTER_NAME="$(hostname 2>/dev/null || echo local)"
+    RUNNER_NAME="${REPO_NAME}-runner-${CLUSTER_NAME}-$(date +%Y%m%d)-$(date +%H%M%S)"
+fi
+RUNNER_LABELS="${RUNNER_LABELS:-copilot}"
+mkdir -p "${RUNNER_WORKDIR}"
+[ -n "${USE_OVERLAY}" ] && [ "${USE_OVERLAY}" != "0" ] && mkdir -p "${OVERLAY_DIR}"
+
+echo "=========================================="
+echo "GitHub Coding Agent Runner - SLURM Job"
+echo "Job ID: $SLURM_JOB_ID"
+echo "Node: $SLURM_NODELIST"
+echo "=========================================="
+echo "Repository: $GITHUB_REPOSITORY"
+echo "Runner Name: $RUNNER_NAME"
+echo "Labels: $RUNNER_LABELS"
+echo "Script/container directory: $SCRIPT_DIR"
+echo "Runner base: $RUNNER_BASE"
+echo "Container SIF: $SIF_PATH"
+echo "Overlay directory: $OVERLAY_DIR"
+echo "Work directory: $RUNNER_WORKDIR"
+echo "TMP bind: ${RUNNER_TMP:-<none>}"
+echo "Overlay: ${USE_OVERLAY:-0} (use USE_OVERLAY=1 to enable in non-SLURM)"
+echo "=========================================="
+
+# Change to the directory containing the container
+cd "${SCRIPT_DIR}"
+
+# Container must exist (build first with: sbatch build-github-coding-agent-runner.sh)
+if [ ! -f "$SIF_PATH" ]; then
+    echo "Error: container not found: $SIF_PATH"
+    echo "Build first: cd ${SCRIPT_DIR} && sbatch build-github-coding-agent-runner.sh"
+    exit 1
+fi
+
+# Writable runner install dir (start.sh installs runner here if missing)
+RUNNER_HOME_HOST="${RUNNER_BASE}/.github-runner"
+mkdir -p "${RUNNER_HOME_HOST}"
+# When running as root (e.g. in a K8s pod), chown so start.sh can re-exec as nobody and still write
+if [ "$(id -u)" = "0" ]; then
+    chown -R 65534:65534 "${RUNNER_HOME_HOST}" "${RUNNER_WORKDIR}" 2>/dev/null || true
+fi
+
+# Show GPU info
+echo "GPU Information:"
+rocm-smi --showproductname || echo "Warning: Could not get GPU info"
+echo "=========================================="
+
+# Run github-copilot-coding-agent-runner.sif: mount start.sh and writable dirs.
+# RUNNER_HOME=/runner-home so start.sh installs/runs the runner there (no HOME override).
+#
+# Options (overlay not available in Kubernetes):
+# - USE_OVERLAY=1 (SLURM): use --overlay for a writable layer (needs overlayfs).
+# - USE_OVERLAY=0 (default in K8s/pods): no overlay; only bind mounts. Writable paths:
+#   RUNNER_HOME_HOST (runner config), RUNNER_WORKDIR (job work), and optionally
+#   RUNNER_TMP (bind to /tmp) if set, so /tmp is writable (e.g. Triton cache).
+RUNNER_TMP_BIND=""
+if [ -n "${RUNNER_TMP:-}" ] && [ -d "${RUNNER_TMP}" ]; then
+    RUNNER_TMP_BIND="--bind ${RUNNER_TMP}:/tmp:rw"
+fi
+
+if [ -n "${USE_OVERLAY}" ] && [ "${USE_OVERLAY}" != "0" ] && [ -d "${OVERLAY_DIR}" ]; then
+    apptainer exec \
+        --no-home \
+        --overlay "${OVERLAY_DIR}" \
+        --bind "${SCRIPT_DIR}:/runner-scripts:ro" \
+        --bind "${RUNNER_HOME_HOST}:/runner-home:rw" \
+        --bind "${RUNNER_WORKDIR}:${RUNNER_WORKDIR}" \
+        --env "RUNNER_HOME=/runner-home" \
+        --env "GITHUB_TOKEN=${GITHUB_TOKEN}" \
+        --env "GITHUB_REPOSITORY=${GITHUB_REPOSITORY}" \
+        --env "RUNNER_NAME=${RUNNER_NAME}" \
+        --env "RUNNER_LABELS=${RUNNER_LABELS}" \
+        --env "RUNNER_WORKDIR=${RUNNER_WORKDIR}" \
+        --rocm \
+        "$SIF_PATH" \
+        /bin/bash -c "/runner-scripts/start.sh"
+else
+    # No overlay (Kubernetes or USE_OVERLAY=0): bind mounts only
+    # Optional: set RUNNER_TMP to a writable dir (e.g. pod emptyDir) to bind /tmp for Triton/cache
+    apptainer exec \
+        --no-home \
+        --bind "${SCRIPT_DIR}:/runner-scripts:ro" \
+        --bind "${RUNNER_HOME_HOST}:/runner-home:rw" \
+        --bind "${RUNNER_WORKDIR}:${RUNNER_WORKDIR}" \
+        ${RUNNER_TMP_BIND:+"$RUNNER_TMP_BIND"} \
+        --env "RUNNER_HOME=/runner-home" \
+        --env "GITHUB_TOKEN=${GITHUB_TOKEN}" \
+        --env "GITHUB_REPOSITORY=${GITHUB_REPOSITORY}" \
+        --env "RUNNER_NAME=${RUNNER_NAME}" \
+        --env "RUNNER_LABELS=${RUNNER_LABELS}" \
+        --env "RUNNER_WORKDIR=${RUNNER_WORKDIR}" \
+        --rocm \
+        "$SIF_PATH" \
+        /bin/bash -c "/runner-scripts/start.sh"
+fi
+
+echo "=========================================="
+echo "GitHub Coding Agent Runner stopped"
+echo "=========================================="
diff --git a/.github/scripts/github-runner-files/runner-container.env.example b/.github/scripts/github-runner-files/runner-container.env.example
new file mode 100644
index 000000000..169f44b15
--- /dev/null
+++ b/.github/scripts/github-runner-files/runner-container.env.example
@@ -0,0 +1,55 @@
+# Environment for the GitHub Actions runner inside the container.
+# start.sh sources this (from RUNNER_ENV_FILE, or runner-container.env in
+# RUNNER_HOME or next to start.sh). Use it to set paths and options for
+# your specific host, device, and container—no hardcoded paths in start.sh.
+#
+# Usage:
+#   cp runner-container.env.example runner-container.env
+#   # Edit runner-container.env for this host/container, then run the runner.
+#   # Or: export RUNNER_ENV_FILE=/path/to/your.env
+#
+# Variables set here are inherited by workflow steps (actions run in the
+# same env). RUNNER_WORKDIR and RUNNER_HOME are already set by start.sh
+# before this file is sourced, so you can use them below.
+
+# -----------------------------------------------------------------------------
+# Runner behavior
+# -----------------------------------------------------------------------------
+
+# Allow runner to run as root (often needed for GPU access in containers)
+export RUNNER_ALLOW_RUNASROOT=1
+
+# Triton kernel cache (must be writable). Default in start.sh is RUNNER_WORKDIR/.triton_cache
+# export TRITON_CACHE_DIR="${RUNNER_WORKDIR}/.triton_cache"
+
+# Git config when running as root (start.sh sets a default; override if needed)
+# export GIT_CONFIG_GLOBAL="${RUNNER_WORKDIR}/.gitconfig"
+
+# -----------------------------------------------------------------------------
+# Paths: set for YOUR container/host. Examples below—uncomment and adjust.
+# -----------------------------------------------------------------------------
+
+# --- Example A: Iris-style image (ROCm + venv + Triton under /opt) ---
+# export ROCM_PATH="/opt/rocm"
+# export PATH="/opt/rocm/bin:/opt/conda/envs/py_3.10/bin:${PATH}"
+# export LD_LIBRARY_PATH="/opt/rocm/lib:${LD_LIBRARY_PATH:-}"
+# export PYTHONPATH="/opt/venv/lib/python3.13/site-packages:/opt/triton/python:${PYTHONPATH:-}"
+
+# --- Example B: Conda-only (no ROCm) ---
+# export PATH="/opt/conda/bin:${PATH}"
+# export PYTHONPATH="/opt/conda/lib/python3.10/site-packages:${PYTHONPATH:-}"
+
+# --- Example C: System Python + ROCm in /usr ---
+# export ROCM_PATH="/usr"
+# export PATH="/usr/lib/rocm/bin:${PATH}"
+# export LD_LIBRARY_PATH="/usr/lib/rocm/lib:${LD_LIBRARY_PATH:-}"
+
+# --- Example D: Custom locations (set your own) ---
+# export ROCM_PATH="${ROCM_PATH:-/path/to/rocm}"
+# export PATH="/path/to/python/bin:/path/to/rocm/bin:${PATH}"
+# export LD_LIBRARY_PATH="/path/to/rocm/lib:${LD_LIBRARY_PATH:-}"
+# export PYTHONPATH="/path/to/site-packages:${PYTHONPATH:-}"
+
+# If your image already sets PATH/PYTHONPATH/ROCM_PATH in its Dockerfile or
+# definition file, you can leave this file empty or only set runner options
+# (RUNNER_ALLOW_RUNASROOT, TRITON_CACHE_DIR, GIT_CONFIG_GLOBAL).
diff --git a/.github/scripts/github-runner-files/skills.md b/.github/scripts/github-runner-files/skills.md
new file mode 100644
index 000000000..bc9f078f7
--- /dev/null
+++ b/.github/scripts/github-runner-files/skills.md
@@ -0,0 +1,31 @@
+# Build instructions
+
+## Container build (SLURM)
+
+From the `github-runner` directory:
+
+```bash
+sbatch build-github-coding-agent-runner.sh
+```
+
+- **Partition:** `mi3001x`
+- **Time limit:** 2 hours
+- **Input:** definition file, default `iris.def` (override with `--def=FILE` or env `DEF_FILE`)
+- **Output:** default `github-copilot-coding-agent-runner.sif` (override with `--output=FILE` or env `OUTPUT_SIF`)
+
+The job uses `SLURM_SUBMIT_DIR` when set, so submit from the repo directory (e.g. `cd /path/to/github-runner && sbatch build-github-coding-agent-runner.sh`) so the build runs in the right place.
+
+Temp and cache are under the build directory (`.apptainer-tmp`, `.apptainer-cache`) to avoid filling `/tmp`. The temp dir is removed after a successful build; the cache is kept for faster rebuilds. To reclaim space, remove `.apptainer-cache` as well.
+
+## After build
+
+**Option 1 — Run via SLURM with env (SLURM-only fallback):** set `GITHUB_TOKEN` and `GITHUB_REPOSITORY`, then submit the script. The script uses `SLURM_SUBMIT_DIR` and `WORK` (when set) for script-dir and runner-base.
+
+```bash
+export GITHUB_TOKEN=... GITHUB_REPOSITORY=owner/repo
+sbatch run-github-coding-agent-runner.sh
+```
+
+**Option 2 — Run standalone with flags:** pass all required options on the command line (see README).
+
+See **README.md** for full setup and usage.
diff --git a/.github/scripts/github-runner-files/start.sh b/.github/scripts/github-runner-files/start.sh
new file mode 100755
index 000000000..f7045fd2d
--- /dev/null
+++ b/.github/scripts/github-runner-files/start.sh
@@ -0,0 +1,169 @@
+#!/bin/bash
+
+# GitHub Actions Runner startup script for Apptainer (SLURM, standalone, or Kubernetes)
+#
+# Usage: env only. Required: GITHUB_TOKEN, GITHUB_REPOSITORY, RUNNER_HOME
+# Optional: RUNNER_NAME, RUNNER_LABELS, RUNNER_WORKDIR, RUNNER_ENV_FILE, etc.
+# See runner-container.env.example and README for details.
+
+set -e
+
+# Required: set when launching the runner (e.g. by run-github-coding-agent-runner.sh or pod spec)
+[ -n "$GITHUB_TOKEN" ]     || { echo "Error: GITHUB_TOKEN is required"; exit 1; }
+[ -n "$GITHUB_REPOSITORY" ] || { echo "Error: GITHUB_REPOSITORY is required (owner/repo)"; exit 1; }
+[ -n "${RUNNER_HOME:-}" ]  || { echo "Error: RUNNER_HOME is required"; exit 1; }
+
+# Default values (set early so env file can use RUNNER_WORKDIR / RUNNER_HOME)
+RUNNER_NAME="${RUNNER_NAME:-$(hostname)-$(date +%s)}"
+RUNNER_LABELS="${RUNNER_LABELS:-copilot}"
+RUNNER_WORKDIR="${RUNNER_WORKDIR:-$(dirname "${RUNNER_HOME}")/_work}"
+
+# Source container env file so variables can be set or sourced (override with RUNNER_ENV_FILE)
+if [ -n "${RUNNER_ENV_FILE:-}" ] && [ -f "${RUNNER_ENV_FILE}" ]; then
+    echo "Sourcing env file: ${RUNNER_ENV_FILE}"
+    set -a
+    # shellcheck source=/dev/null
+    . "${RUNNER_ENV_FILE}"
+    set +a
+elif [ -f "${RUNNER_HOME}/runner-container.env" ]; then
+    echo "Sourcing env file: ${RUNNER_HOME}/runner-container.env"
+    set -a
+    # shellcheck source=/dev/null
+    . "${RUNNER_HOME}/runner-container.env"
+    set +a
+else
+    RUNNER_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" 2>/dev/null && pwd)"
+    if [ -n "${RUNNER_SCRIPT_DIR:-}" ] && [ -f "${RUNNER_SCRIPT_DIR}/runner-container.env" ]; then
+        echo "Sourcing env file: ${RUNNER_SCRIPT_DIR}/runner-container.env"
+        set -a
+        # shellcheck source=/dev/null
+        . "${RUNNER_SCRIPT_DIR}/runner-container.env"
+        set +a
+    fi
+fi
+
+# Runner-only defaults (use RUNNER_WORKDIR; no host-specific paths here).
+# PATH, PYTHONPATH, ROCM_PATH, LD_LIBRARY_PATH, etc. come from the container
+# image or from runner-container.env (see runner-container.env.example).
+# Copy and edit that file per host/container so workflows see the right tools.
+export RUNNER_ALLOW_RUNASROOT="${RUNNER_ALLOW_RUNASROOT:-1}"
+export TRITON_CACHE_DIR="${TRITON_CACHE_DIR:-${RUNNER_WORKDIR}/.triton_cache}"
+
+mkdir -p "${RUNNER_HOME}"
+
+echo "=========================================="
+echo "GitHub Actions Runner - Apptainer Edition"
+echo "=========================================="
+echo "Repository: $GITHUB_REPOSITORY"
+echo "Runner Name: $RUNNER_NAME"
+echo "Labels: $RUNNER_LABELS"
+echo "Work Directory: $RUNNER_WORKDIR"
+echo "Runner Home: $RUNNER_HOME"
+echo "=========================================="
+
+# Install runner binaries if not already present
+if [ ! -f "${RUNNER_HOME}/run.sh" ]; then
+    echo "Setting up runner in ${RUNNER_HOME}..."
+    if [ -d /opt/actions-runner ] && [ -f /opt/actions-runner/run.sh ]; then
+        cp -r /opt/actions-runner/* "${RUNNER_HOME}/"
+        chmod +x "${RUNNER_HOME}"/*.sh
+    else
+        RUNNER_VERSION="${RUNNER_VERSION:-2.313.0}"
+        echo "Downloading Actions runner v${RUNNER_VERSION}..."
+        (cd "${RUNNER_HOME}" && curl -sL -o runner.tgz \
+            "https://github.com/actions/runner/releases/download/v${RUNNER_VERSION}/actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz" \
+            && tar xzf runner.tgz && rm -f runner.tgz)
+        chmod +x "${RUNNER_HOME}"/*.sh 2>/dev/null || true
+    fi
+fi
+
+# Change to writable runner directory
+cd "${RUNNER_HOME}"
+
+# Create work directory if it doesn't exist
+mkdir -p "$RUNNER_WORKDIR"
+
+# Get registration token
+echo "Getting registration token..."
+REGISTRATION_RESPONSE=$(curl -s -X POST \
+    -H "Authorization: token $GITHUB_TOKEN" \
+    -H "Accept: application/vnd.github.v3+json" \
+    "https://api.github.com/repos/$GITHUB_REPOSITORY/actions/runners/registration-token")
+if command -v jq >/dev/null 2>&1; then
+    REGISTRATION_TOKEN=$(echo "$REGISTRATION_RESPONSE" | jq -r .token)
+else
+    REGISTRATION_TOKEN=$(echo "$REGISTRATION_RESPONSE" | grep -o '"token":"[^"]*"' | head -1 | cut -d'"' -f4)
+fi
+
+if [ "$REGISTRATION_TOKEN" == "null" ] || [ -z "$REGISTRATION_TOKEN" ]; then
+    echo "Error: Failed to get registration token."
+    echo "Please check:"
+    echo "  1. GITHUB_TOKEN has 'repo' scope"
+    echo "  2. Token has not expired"
+    echo "  3. GITHUB_REPOSITORY format is correct (owner/repo)"
+    exit 1
+fi
+
+echo "Registration token obtained successfully"
+
+# Check if already configured (cleanup any previous config)
+if [ -f ".runner" ]; then
+    echo "Found existing runner configuration, removing..."
+    ./config.sh remove --token "$REGISTRATION_TOKEN" || true
+fi
+
+# Configure the runner
+echo "Configuring runner..."
+./config.sh \
+    --url "https://github.com/$GITHUB_REPOSITORY" \
+    --token "$REGISTRATION_TOKEN" \
+    --name "$RUNNER_NAME" \
+    --labels "$RUNNER_LABELS" \
+    --work "$RUNNER_WORKDIR" \
+    --unattended \
+    --replace
+
+# Cleanup function
+cleanup() {
+    # Only run removal once; skip if config already removed
+    if [ ! -f "${RUNNER_HOME}/.runner" ]; then
+        echo "Runner config already removed or not configured. Skipping cleanup."
+        return 0
+    fi
+
+    echo ""
+    echo "Shutting down... Removing runner from GitHub..."
+
+    REMOVE_RESPONSE=$(curl -s -X POST \
+        -H "Authorization: token $GITHUB_TOKEN" \
+        -H "Accept: application/vnd.github.v3+json" \
+        "https://api.github.com/repos/$GITHUB_REPOSITORY/actions/runners/remove-token")
+    if command -v jq >/dev/null 2>&1; then
+        REMOVE_TOKEN=$(echo "$REMOVE_RESPONSE" | jq -r .token)
+    else
+        REMOVE_TOKEN=$(echo "$REMOVE_RESPONSE" | grep -o '"token":"[^"]*"' | head -1 | cut -d'"' -f4)
+    fi
+
+    if [ "$REMOVE_TOKEN" != "null" ] && [ -n "$REMOVE_TOKEN" ]; then
+        ./config.sh remove --token "$REMOVE_TOKEN"
+        echo "Runner removed successfully"
+    else
+        echo "Warning: Could not remove runner automatically"
+    fi
+}
+
+# Set trap to cleanup on exit
+trap cleanup EXIT INT TERM
+
+# Fix git safe directory issues (common when running as root in containers)
+# Point git config to a writable location (can be overridden by env file)
+export GIT_CONFIG_GLOBAL="${GIT_CONFIG_GLOBAL:-${RUNNER_WORKDIR}/.gitconfig}"
+mkdir -p "$(dirname "$GIT_CONFIG_GLOBAL")"
+git config --global --add safe.directory '*'
+
+# Start the runner
+echo "Starting GitHub Actions Runner..."
+echo "Press Ctrl+C to stop"
+echo "=========================================="
+command -v rocminfo >/dev/null 2>&1 && rocminfo || true
+./run.sh

From aa5a3a137abddc239be8d01eb970d49ae6b3e29b Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Wed, 25 Feb 2026 20:18:35 +0000
Subject: [PATCH 2/6] Apply Ruff auto-fixes

---
 examples/common/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/common/utils.py b/examples/common/utils.py
index 0e6ea9482..f9ebba8d7 100644
--- a/examples/common/utils.py
+++ b/examples/common/utils.py
@@ -86,7 +86,7 @@ def reset(self):
         self.comm_end_timestamp.fill_(self.min_ts)
 
     def to_json(self, filename, gpu_freq):
-        cycles_to_us = lambda cycles: (cycles / gpu_freq)
+        cycles_to_us = lambda cycles: cycles / gpu_freq
 
         gemm_begin_us = cycles_to_us(self.mm_begin_timestamp.cpu().numpy())
         gemm_end_us = cycles_to_us(self.mm_end_timestamp.cpu().numpy())

From 73e2829c11783b50a7f0d9786bb92b2a33b5fbf4 Mon Sep 17 00:00:00 2001
From: Jose Santos <josantos@amd.com>
Date: Wed, 25 Feb 2026 14:45:57 -0600
Subject: [PATCH 3/6] Add GitHub Actions workflow for Copilot setup

- Introduced a new workflow file to automate the setup of a Python virtual environment for Copilot.
- The workflow includes steps for checking out the repository, creating and activating a virtual environment, installing dependencies, and verifying ROCm and GPU visibility.
- This addition enhances the CI/CD process by streamlining the environment setup for Copilot integration.
---
 .github/workflows/copilot-setup-steps.yml | 35 +++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 .github/workflows/copilot-setup-steps.yml

diff --git a/.github/workflows/copilot-setup-steps.yml b/.github/workflows/copilot-setup-steps.yml
new file mode 100644
index 000000000..363fdbef3
--- /dev/null
+++ b/.github/workflows/copilot-setup-steps.yml
@@ -0,0 +1,35 @@
+name: Copilot Setup Steps
+
+on:
+  workflow_dispatch:
+
+jobs:
+  copilot-setup-steps:
+    runs-on: [self-hosted, copilot]
+
+    permissions:
+      contents: read
+
+    timeout-minutes: 59
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        
+      - name: Create task venv for Copilot
+        run: |
+          python3 -m venv $GITHUB_WORKSPACE/.venv
+          source $GITHUB_WORKSPACE/.venv/bin/activate
+          python -m pip install --upgrade pip
+          python -m pip install -e .
+
+      - name: Make venv default for subsequent steps
+        run: |
+          echo "$GITHUB_WORKSPACE/.venv/bin" >> $GITHUB_PATH
+
+      - name: Verify ROCm and GPU visibility
+        run: |
+          echo "=== rocminfo ==="
+          rocminfo | head -50 || true
+          echo "=== rocm-smi ==="
+          rocm-smi || true

From 418a23feaecd04f0c6953d7428bff43e5782d3a7 Mon Sep 17 00:00:00 2001
From: Jose Santos <josantos@amd.com>
Date: Thu, 26 Feb 2026 11:21:57 -0600
Subject: [PATCH 4/6] ci: trigger copilot setup workflow on PR @copilot
 comments

add issue_comment trigger with created, edited
gate job to PR comments containing @copilot
keep manual workflow_dispatch
target self-hosted runner labels: copilot, apptainer, iris
---
 .github/workflows/copilot-setup-steps.yml | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/copilot-setup-steps.yml b/.github/workflows/copilot-setup-steps.yml
index 363fdbef3..e4eed5f11 100644
--- a/.github/workflows/copilot-setup-steps.yml
+++ b/.github/workflows/copilot-setup-steps.yml
@@ -2,20 +2,26 @@ name: Copilot Setup Steps
 
 on:
   workflow_dispatch:
+  issue_comment:
+    types: [created, edited]
 
 jobs:
   copilot-setup-steps:
-    runs-on: [self-hosted, copilot]
+    if: >-
+      github.event_name == 'workflow_dispatch' ||
+      (github.event.issue.pull_request && contains(github.event.comment.body, '@copilot'))
+    runs-on: [self-hosted, copilot, apptainer, iris]
 
     permissions:
       contents: read
+      pull-requests: read
 
     timeout-minutes: 59
 
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
-        
+
       - name: Create task venv for Copilot
         run: |
           python3 -m venv $GITHUB_WORKSPACE/.venv

From 4d39f2f472fb5bbf149600e0ef55c99fa1850c40 Mon Sep 17 00:00:00 2001
From: Jose Santos <josantos@amd.com>
Date: Tue, 3 Mar 2026 12:46:03 -0600
Subject: [PATCH 5/6] Update Copilot setup workflow to use system site packages
 for virtual environment

---
 .github/workflows/copilot-setup-steps.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/copilot-setup-steps.yml b/.github/workflows/copilot-setup-steps.yml
index e4eed5f11..b3d1b86db 100644
--- a/.github/workflows/copilot-setup-steps.yml
+++ b/.github/workflows/copilot-setup-steps.yml
@@ -24,7 +24,7 @@ jobs:
 
       - name: Create task venv for Copilot
         run: |
-          python3 -m venv $GITHUB_WORKSPACE/.venv
+          python3 -m venv --system-site-packages $GITHUB_WORKSPACE/.venv
           source $GITHUB_WORKSPACE/.venv/bin/activate
           python -m pip install --upgrade pip
           python -m pip install -e .

From 4b55cb77c672444cc047fe7a4e93cd1879283b97 Mon Sep 17 00:00:00 2001
From: Jose Santos <josantos@AUSJOSANTOS.amd.com>
Date: Mon, 16 Mar 2026 14:02:14 -0400
Subject: [PATCH 6/6] Enhance GitHub runner scripts with additional directory
 creation and cleanup processes

- Updated `run-github-coding-agent-runner.sh` to create necessary subdirectories within the runner work directory.
- Modified `start.sh` to ensure writable HOME and TMPDIR directories are set up for job steps.
- Added cleanup functionality to terminate stale MCP processes during runner cleanup.
- Introduced new documentation files outlining build and run instructions, as well as workflow conventions for the GitHub Actions runner.
---
 .../.cursor/rules/github-runner-build-run.mdc | 19 ++++++++++++++
 .../rules/github-runner-conventions.mdc       | 25 +++++++++++++++++++
 .../run-github-coding-agent-runner.sh         |  2 +-
 .github/scripts/github-runner-files/start.sh  | 10 ++++++++
 4 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100644 .github/scripts/github-runner-files/.cursor/rules/github-runner-build-run.mdc
 create mode 100644 .github/scripts/github-runner-files/.cursor/rules/github-runner-conventions.mdc

diff --git a/.github/scripts/github-runner-files/.cursor/rules/github-runner-build-run.mdc b/.github/scripts/github-runner-files/.cursor/rules/github-runner-build-run.mdc
new file mode 100644
index 000000000..69a4cee8c
--- /dev/null
+++ b/.github/scripts/github-runner-files/.cursor/rules/github-runner-build-run.mdc
@@ -0,0 +1,19 @@
+---
+description: Build and run instructions (from skills.md)
+globs: **/build-github-coding-agent-runner.sh,**/run-github-coding-agent-runner.sh,**/start.sh,**/iris.def
+alwaysApply: false
+---
+
+# Build & Run
+
+## Build (SLURM)
+
+From the `github-runner` directory: `sbatch build-github-coding-agent-runner.sh`.
+
+- Partition: `mi3001x`, time limit: 2 hours.
+- Input: `--def=FILE` (default `iris.def`). Output: `--output=FILE` (default `github-copilot-coding-agent-runner.sif`). Same directory.
+- Submit from repo dir so `SLURM_SUBMIT_DIR` is correct. Temp/cache under build dir (`.apptainer-tmp`, `.apptainer-cache`); temp removed after success, cache kept for rebuilds.
+
+## Run
+
+After build: (1) Standalone: `./run-github-coding-agent-runner.sh --github-token=... --github-repository=... --script-dir="$(pwd)" --runner-base="$(pwd)/runner-data"`. (2) SLURM: set `GITHUB_TOKEN` and `GITHUB_REPOSITORY`, then `sbatch run-github-coding-agent-runner.sh` (script uses env and SLURM defaults). See README.md for full setup.
diff --git a/.github/scripts/github-runner-files/.cursor/rules/github-runner-conventions.mdc b/.github/scripts/github-runner-files/.cursor/rules/github-runner-conventions.mdc
new file mode 100644
index 000000000..860cd02b0
--- /dev/null
+++ b/.github/scripts/github-runner-files/.cursor/rules/github-runner-conventions.mdc
@@ -0,0 +1,25 @@
+---
+description: Workflow and conventions for the GitHub Actions runner (from AGENTS.md)
+alwaysApply: true
+---
+
+# GitHub Runner – Workflow and Conventions
+
+## Workflow
+
+Flow: run-github-coding-agent-runner.sh → container → start.sh → Actions listener. Two run modes: (1) Standalone: `./run-github-coding-agent-runner.sh` with required flags (--github-token, --github-repository, --script-dir, --runner-base). (2) SLURM: set GITHUB_TOKEN and GITHUB_REPOSITORY, then `sbatch run-github-coding-agent-runner.sh`; when under SLURM with no args, the script uses env and SLURM defaults. start.sh installs/configures the runner in RUNNER_HOME and starts the Actions listener.
+
+## Conventions
+
+1. **No sensitive data** – Do not hardcode tokens, passwords, or API keys. Use environment variables (e.g. export GITHUB_TOKEN before running).
+2. **No host-specific paths** – Do not add paths like /work1/amd/josantos/... Prefer SCRIPT_DIR with dirname BASH_SOURCE, or GITHUB_WORKSPACE, RUNNER_WORKDIR, RUNNER_BASE, WORK, or relative paths.
+3. **Do not edit iris.def unless the user explicitly asks.** Prefer changing start.sh or run-github-coding-agent-runner.sh for runtime behavior.
+4. **Use known writable directories** – Prefer GITHUB_WORKSPACE, RUNNER_WORKDIR, RUNNER_BASE for installs and cache. Avoid $HOME, ~, /tmp.
+
+## Directory layout (when running in runner)
+
+| Variable | Use for |
+|----------|---------|
+| GITHUB_WORKSPACE | Repo checkout; installs, cache, venv |
+| RUNNER_WORKDIR | Parent of owner/repo; job work |
+| RUNNER_BASE | Runner data root; overlay, .github-runner |
diff --git a/.github/scripts/github-runner-files/run-github-coding-agent-runner.sh b/.github/scripts/github-runner-files/run-github-coding-agent-runner.sh
index 291b4d39c..ad6c0728f 100755
--- a/.github/scripts/github-runner-files/run-github-coding-agent-runner.sh
+++ b/.github/scripts/github-runner-files/run-github-coding-agent-runner.sh
@@ -101,7 +101,7 @@ if [ -z "$RUNNER_NAME" ]; then
     RUNNER_NAME="${REPO_NAME}-runner-${CLUSTER_NAME}-$(date +%Y%m%d)-$(date +%H%M%S)"
 fi
 RUNNER_LABELS="${RUNNER_LABELS:-copilot}"
-mkdir -p "${RUNNER_WORKDIR}"
+mkdir -p "${RUNNER_WORKDIR}" "${RUNNER_WORKDIR}/.home" "${RUNNER_WORKDIR}/.pip-cache" "${RUNNER_WORKDIR}/.tmp" "${RUNNER_WORKDIR}/.cache"
 [ -n "${USE_OVERLAY}" ] && [ "${USE_OVERLAY}" != "0" ] && mkdir -p "${OVERLAY_DIR}"
 
 echo "=========================================="
diff --git a/.github/scripts/github-runner-files/start.sh b/.github/scripts/github-runner-files/start.sh
index f7045fd2d..adc1aea36 100755
--- a/.github/scripts/github-runner-files/start.sh
+++ b/.github/scripts/github-runner-files/start.sh
@@ -49,6 +49,11 @@ fi
 export RUNNER_ALLOW_RUNASROOT="${RUNNER_ALLOW_RUNASROOT:-1}"
 export TRITON_CACHE_DIR="${TRITON_CACHE_DIR:-${RUNNER_WORKDIR}/.triton_cache}"
 
+# Writable HOME/TMPDIR for job steps (run-github-coding-agent-runner.sh may already create dirs on host)
+mkdir -p "${RUNNER_WORKDIR}/.home" "${RUNNER_WORKDIR}/.tmp"
+export HOME="${RUNNER_WORKDIR}/.home"
+export TMPDIR="${RUNNER_WORKDIR}/.tmp"
+
 mkdir -p "${RUNNER_HOME}"
 
 echo "=========================================="
@@ -125,6 +130,11 @@ echo "Configuring runner..."
 
 # Cleanup function
 cleanup() {
+    # Kill any stale MCP processes left over from cancelled jobs
+    pkill -f "mcp/dist/index.js" 2>/dev/null || true
+    pkill -f "mcp-server-playwright" 2>/dev/null || true
+    pkill -f "playwright-mcp" 2>/dev/null || true
+
     # Only run removal once; skip if config already removed
     if [ ! -f "${RUNNER_HOME}/.runner" ]; then
         echo "Runner config already removed or not configured. Skipping cleanup."