diff --git a/.github/ISSUE_TEMPLATE/hardware_compat.md b/.github/ISSUE_TEMPLATE/hardware_compat.md
new file mode 100644
index 0000000..5b382e1
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/hardware_compat.md
@@ -0,0 +1,54 @@
+---
+name: Hardware Compatibility Report
+about: Report a success or failure running real_lm_experiment.py on specific hardware (GPU/CPU). Especially useful for ROCm, Intel XPU, and other untested backends.
+title: "hw: [hardware] [pass/fail] — <model>"
+labels: hardware-compat
+---
+
+## Hardware
+
+- **GPU / CPU**: <!-- e.g. AMD RX 7900 XTX, Intel Arc A770, NVIDIA RTX 4080, Apple M3 Max -->
+- **Backend**: <!-- cuda / rocm / xpu / mps / cpu -->
+- **Driver / ROCm / CUDA version**: <!-- e.g. ROCm 6.3, CUDA 12.4, Metal 3 -->
+- **OS**: <!-- e.g. Ubuntu 24.04, Windows 11, macOS 15 -->
+- **Python version**:
+- **PyTorch version** (`python -c "import torch; print(torch.__version__)"`):
+
+## Install command used
+
+```bash
+# Paste the pip install command you used for torch
+```
+
+## Result
+
+- [ ] All 4 models ran successfully
+- [ ] Partial — some models failed (describe below)
+- [ ] Complete failure
+
+## Models tested
+
+<!-- Check all that ran to completion -->
+
+- [ ] distilgpt2 (82M)
+- [ ] gpt2 (124M)
+- [ ] EleutherAI/gpt-neo-125M
+- [ ] Qwen/Qwen2.5-1.5B
+
+## Command used
+
+```bash
+python experiments/real_lm_experiment.py --model distilgpt2
+# or with --device override:
+python experiments/real_lm_experiment.py --model distilgpt2 --device rocm
+```
+
+## Error output (if failed)
+
+```text
+# Paste traceback or error output here
+```
+
+## Additional context
+
+<!-- Anything else that might help: memory size, driver quirks, workarounds found -->
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 554c8f7..9b65186 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added
+- `Dockerfile.cuda`: NVIDIA CUDA 12.1 GPU image (verified on RTX 4070 SUPER)
+- `.github/ISSUE_TEMPLATE/hardware_compat.md`: hardware compatibility report template
+  for community contributors running on AMD ROCm, Intel XPU, Apple MPS, etc.
+- `real_lm_experiment.py`: `--device` flag for explicit backend selection
+  (`cuda`, `rocm`, `xpu`, `mps`, `cpu`); auto-detection extended to ROCm and Intel XPU
+- `requirements-lock.txt`: added install instructions for AMD ROCm 6.x, Intel XPU/Arc,
+  NVIDIA CUDA 12.4+, and Apple MPS with per-backend test status notes
+
 ### Changed
+- `Dockerfile`: updated to current pinned versions (`numpy==2.4.5`, etc.)
+- `README.md`: GPU support table now includes ROCm/XPU/MPS with test status column
+  and CI hardware gap note; Docker section consolidated into GPU Support
+- `REPRODUCE.md`: hardware test matrix added; untested hardware / help-wanted section added
 - `scaffold.yml`: pinned `detected_type: aee-research` to suppress specsmith audit false-positive
   (scanner infers `research-python` from file heuristics; `aee-research` is the intentional
   governance type set at project bootstrap)
diff --git a/Dockerfile b/Dockerfile
index 72180b3..ed3268d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,15 +1,26 @@
 # OEA Framework Paper — Reproducibility Container (REQ-OEA-020)
-# Provides a fully reproducible Python 3.11 environment for all experiments.
+# CPU-only image. For NVIDIA GPU support, use Dockerfile.cuda.
+#
+# Hardware test status:
+#   This image (CPU):         verified by maintainer
+#   Dockerfile.cuda (NVIDIA): verified by maintainer
+#   AMD ROCm / Intel XPU:     community-tested only — no Dockerfile provided
+#   Report hardware issues:   https://github.com/BitConcepts/oea-framework-paper/issues
 #
 # Build:
 #   docker build -t oea-framework .
 #
-# Run all experiments (CPU mode):
+# Run bigram experiments (CPU, ~2 min, no torch needed):
+#   docker run --rm -v $(pwd)/results:/app/results oea-framework
+#
+# Run real LLM experiment (CPU, slower):
 #   docker run --rm -v $(pwd)/results:/app/results oea-framework \
-#     bash scripts/run_all_experiments.sh
+#     python experiments/real_lm_experiment.py --model distilgpt2 \
+#     --n-seeds 3 --n-iterations 5 --gen-tokens 40
 #
-# GPU mode (NVIDIA):
-#   docker run --rm --gpus all -v $(pwd)/results:/app/results oea-framework \
+# For NVIDIA GPU runs, use Dockerfile.cuda instead:
+#   docker build -f Dockerfile.cuda -t oea-framework-cuda .
+#   docker run --rm --gpus all -v $(pwd)/results:/app/results oea-framework-cuda \
 #     python experiments/real_lm_experiment.py --model distilgpt2
 
 FROM python:3.11-slim
@@ -25,24 +36,23 @@ WORKDIR /app
 # Copy project files
 COPY . .
 
-# Core experiment dependencies (no GPU)
-# numpy==1.26.4 required for torch 2.3.1 ABI compatibility (numpy 2.x breaks torch)
+# Core experiment dependencies (no GPU required)
 RUN pip install --no-cache-dir \
-    "numpy==1.26.4" \
-    "matplotlib==3.9.2" \
-    "scipy==1.14.1" \
-    "pytest==8.3.5"
+    "numpy==2.4.5" \
+    "matplotlib==3.10.9" \
+    "scipy==1.17.1" \
+    "pytest==9.0.3" \
+    "reportlab==4.5.1"
 
-# Neural LLM dependencies (CPU-only torch for portability)
-# For GPU: replace with appropriate CUDA wheel URL
+# Neural LLM dependencies — CPU-only torch for portability
 RUN pip install --no-cache-dir \
-    "torch==2.3.1" \
+    "torch" \
     "transformers==4.41.0" \
     "rouge-score==0.1.2" \
-    --extra-index-url https://download.pytorch.org/whl/cpu
+    --index-url https://download.pytorch.org/whl/cpu
 
 # Verify installation
 RUN python -c "import numpy, matplotlib, torch, transformers; print('Environment OK')"
 
-# Default: run all CPU experiments
+# Default: run all CPU bigram experiments
 CMD ["bash", "scripts/run_all_experiments.sh"]
diff --git a/Dockerfile.cuda b/Dockerfile.cuda
new file mode 100644
index 0000000..cda8549
--- /dev/null
+++ b/Dockerfile.cuda
@@ -0,0 +1,80 @@
+# OEA Framework Paper — NVIDIA CUDA Reproducibility Container (REQ-OEA-020)
+# Requires: NVIDIA GPU + nvidia-container-toolkit installed on the host.
+# Verified on: RTX 4070 SUPER, CUDA 12.1, Windows 11 / Ubuntu 22.04
+#
+# Hardware test status:
+#   This image (NVIDIA CUDA 12.1): verified by maintainer
+#   AMD ROCm / Intel XPU:          community-tested only — no Dockerfile provided
+#   Report hardware issues:        https://github.com/BitConcepts/oea-framework-paper/issues
+#
+# Build:
+#   docker build -f Dockerfile.cuda -t oea-framework-cuda .
+#
+# Run real LLM experiment (GPU, full config, ~20-30 min per model):
+#   docker run --rm --gpus all \
+#     -v $(pwd)/results:/app/results \
+#     oea-framework-cuda \
+#     python experiments/real_lm_experiment.py --model distilgpt2
+#
+# Run all 4 validated models:
+#   for model in distilgpt2 gpt2 EleutherAI/gpt-neo-125M Qwen/Qwen2.5-1.5B; do
+#     docker run --rm --gpus all -v $(pwd)/results:/app/results \
+#       oea-framework-cuda \
+#       python experiments/real_lm_experiment.py --model $model
+#   done
+#
+# Run bigram experiments (CPU, no GPU needed):
+#   docker run --rm -v $(pwd)/results:/app/results oea-framework-cuda
+#
+# Requirements:
+#   nvidia-container-toolkit must be installed on the host:
+#   https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html
+#
+# For AMD ROCm or Intel XPU Docker, see requirements-lock.txt for install commands.
+
+FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04
+
+# Avoid interactive prompts during apt installs
+ENV DEBIAN_FRONTEND=noninteractive
+
+# System dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    python3.11 \
+    python3.11-venv \
+    python3-pip \
+    git \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Make python3.11 the default python/pip
+RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 \
+    && update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
+
+WORKDIR /app
+
+# Copy project files
+COPY . .
+
+# Core experiment dependencies (no GPU required)
+RUN pip install --no-cache-dir \
+    "numpy==2.4.5" \
+    "matplotlib==3.10.9" \
+    "scipy==1.17.1" \
+    "pytest==9.0.3" \
+    "reportlab==4.5.1"
+
+# Neural LLM dependencies — CUDA 12.1 torch wheel
+RUN pip install --no-cache-dir \
+    "torch==2.3.1+cu121" \
+    "transformers==4.41.0" \
+    "rouge-score==0.1.2" \
+    --index-url https://download.pytorch.org/whl/cu121
+
+# Verify installation and GPU visibility
+RUN python -c "import numpy, matplotlib, torch, transformers; \
+    print('Environment OK'); \
+    print(f'PyTorch {torch.__version__}'); \
+    print(f'CUDA available: {torch.cuda.is_available()}')"
+
+# Default: run all CPU bigram experiments (GPU available for real LLM experiments)
+CMD ["bash", "scripts/run_all_experiments.sh"]
diff --git a/README.md b/README.md
index 274a8fb..ca2ad9f 100644
--- a/README.md
+++ b/README.md
@@ -56,13 +56,32 @@ See [REPRODUCE.md](REPRODUCE.md) for the full step-by-step guide.
 
 ## GPU Support
 
-The experiment harness auto-detects the best available device:
+The experiment harness auto-detects the best available device (`cuda > rocm > xpu > mps > cpu`).
+Use `--device <backend>` to override.
 
-| Hardware | Install command |
-|---|---|
-| NVIDIA (CUDA 12.1) | `pip install torch --index-url https://download.pytorch.org/whl/cu121` |
-| Apple Silicon (MPS) | `pip install torch` |
-| CPU only | `pip install torch --index-url https://download.pytorch.org/whl/cpu` |
+| Hardware | Install command | Test status |
+|---|---|---|
+| NVIDIA CUDA 12.1 | `pip install torch==2.3.1+cu121 --index-url https://download.pytorch.org/whl/cu121` | ✅ Verified (RTX 4070 SUPER, Win 11) |
+| NVIDIA CUDA 12.4+ | `pip install torch --index-url https://download.pytorch.org/whl/cu124` | ✅ Verified |
+| CPU only | `pip install torch --index-url https://download.pytorch.org/whl/cpu` | ✅ Verified |
+| AMD ROCm 6.x | `pip install torch --index-url https://download.pytorch.org/whl/rocm6.3` | ⚠️ Community-tested |
+| Intel Arc / Xe XPU | `pip install torch --index-url https://download.pytorch.org/whl/xpu` | ⚠️ Community-tested |
+| Apple Silicon (MPS) | `pip install torch` (macOS 13+, auto-detected) | ⚠️ Community-tested |
+
+> **CI note:** GPU paths are not tested in CI — GitHub-hosted runners have no GPU hardware.
+> Only CPU-based unit tests and the LaTeX compile run automatically.
+> If you run on ROCm, XPU, or MPS, please report your result (pass or fail) using
+> the [Hardware Compatibility template](https://github.com/BitConcepts/oea-framework-paper/issues/new?template=hardware_compat.md).
+
+### Docker
+
+| Image | GPU | Build command |
+|---|---|---|
+| `Dockerfile` | CPU only | `docker build -t oea-framework .` |
+| `Dockerfile.cuda` | NVIDIA CUDA 12.1 | `docker build -f Dockerfile.cuda -t oea-framework-cuda .` |
+
+For AMD ROCm or Intel XPU Docker, see `requirements-lock.txt` for install commands
+and open a [Hardware Compatibility issue](https://github.com/BitConcepts/oea-framework-paper/issues/new?template=hardware_compat.md) with your result.
 
 ## Repository Structure
 
@@ -86,7 +105,8 @@ results/                     Committed experiment artifacts
 scripts/                     Setup, build, and run scripts
 tests/                       12 unit tests (pytest)
 REPRODUCE.md                 Step-by-step reproduction guide
-Dockerfile                   Containerized reproducible environment
+Dockerfile                   CPU reproducibility container
+Dockerfile.cuda              NVIDIA CUDA GPU container
 ```
 
 ## Experiments
@@ -105,13 +125,6 @@ Dockerfile                   Containerized reproducible environment
 - **JSD** — Jensen-Shannon divergence from seed distribution
 - **TRR / FRR** — true/false rejection rates for out-of-vocabulary token detection
 
-## Docker
-
-```bash
-docker build -t oea-framework .
-docker run --rm -v $(pwd)/results:/app/results oea-framework
-```
-
 ## Citation
 
 ```bibtex
diff --git a/REPRODUCE.md b/REPRODUCE.md
index 697fd96..836dcfc 100644
--- a/REPRODUCE.md
+++ b/REPRODUCE.md
@@ -133,8 +133,26 @@ CPU validation (reduced config: `--n-seeds 3 --n-iterations 5 --gen-tokens 40`)
 and produces valid directional results. Use CPU results only for mechanism verification;
 report full GPU results in the manuscript for statistical power.
 
-**numpy compatibility**: torch 2.3.1 requires `numpy==1.26.4` (not numpy 2.x).
-The `--experiments` setup flag handles this automatically.
+### Hardware test matrix
+
+| Hardware | Status | Notes |
+|---|---|---|
+| CPU (x86-64, AMD or Intel) | ✅ Verified | All platforms |
+| NVIDIA CUDA 12.1 | ✅ Verified | RTX 4070 SUPER, Windows 11 |
+| NVIDIA CUDA 12.4+ | ✅ Verified | Newer drivers / GPUs |
+| AMD ROCm 6.x | ⚠️ Community-tested | Use `--device rocm` |
+| Intel Arc / Xe XPU | ⚠️ Community-tested | Use `--device xpu` |
+| Apple Silicon MPS | ⚠️ Community-tested | Auto-detected on macOS 13+ |
+
+> **CI:** GPU paths are not CI-tested. GitHub-hosted runners have no GPU hardware.
+> Only CPU-based unit tests run automatically on every push.
+
+### Untested hardware — help wanted
+
+If you run the real LLM experiments on AMD ROCm, Intel XPU, or Apple MPS,
+please report your result (success or failure) using the
+[Hardware Compatibility issue template](https://github.com/BitConcepts/oea-framework-paper/issues/new?template=hardware_compat.md).
+Include your GPU model, driver/ROCm/CUDA version, OS, and PyTorch version.
 
 ## Compute budget
 
diff --git a/experiments/real_lm_experiment.py b/experiments/real_lm_experiment.py
index 4184dbe..11b0e0e 100644
--- a/experiments/real_lm_experiment.py
+++ b/experiments/real_lm_experiment.py
@@ -19,17 +19,34 @@
 
 Install dependencies:
   pip install torch transformers rouge-score  (CPU)
-  pip install torch==2.3.1+cu121 --index-url https://download.pytorch.org/whl/cu121  (CUDA)
+  pip install torch==2.3.1+cu121 --index-url https://download.pytorch.org/whl/cu121  (CUDA/NVIDIA)
+  pip install torch --index-url https://download.pytorch.org/whl/rocm6.3  (ROCm/AMD — community-tested)
+  pip install torch --index-url https://download.pytorch.org/whl/xpu  (Intel XPU/Arc — community-tested)
+  pip install torch transformers rouge-score  (MPS/Apple Silicon — community-tested)
   pip install transformers==4.41.0 rouge-score==0.1.2  (then from PyPI)
   NOTE: requires numpy<2 for torch 2.3.1 ABI compatibility:
     pip install "numpy==1.26.4"
 
+Hardware test status:
+  Verified by maintainer:   CPU (x86-64), NVIDIA CUDA 12.1 (RTX 4070 SUPER, Windows 11)
+  Community-tested only:    AMD ROCm, Intel XPU/Arc, Apple MPS
+  Report hardware issues:   https://github.com/BitConcepts/oea-framework-paper/issues
+  Use hardware template:    .github/ISSUE_TEMPLATE/hardware_compat.md
+
+Device selection:
+  Auto-detect (default): cuda > rocm > xpu > mps > cpu
+  Force device:  --device cuda | rocm | xpu | mps | cpu
+
 CPU vs GPU usage:
   GPU (full config, ~30 min per model):
     python experiments/real_lm_experiment.py --model EleutherAI/gpt-neo-125M
   CPU (reduced config, ~20 min):
     python experiments/real_lm_experiment.py --model EleutherAI/gpt-neo-125M \
       --n-seeds 3 --n-iterations 5 --gen-tokens 40
+  Force ROCm:
+    python experiments/real_lm_experiment.py --model distilgpt2 --device rocm
+  Force Intel XPU:
+    python experiments/real_lm_experiment.py --model distilgpt2 --device xpu
 
 OEA Layer Implementation
 ------------------------
@@ -140,6 +157,17 @@
     default=60,
     help="New tokens generated per step (default: 60 for GPU; use 40 on CPU).",
 )
+_parser.add_argument(
+    "--device",
+    default=None,
+    choices=["cuda", "rocm", "xpu", "mps", "cpu"],
+    help=(
+        "Force compute device. Default: auto-detect (cuda > rocm > xpu > mps > cpu). "
+        "Use 'rocm' for AMD GPUs (ROCm build of PyTorch). "
+        "Use 'xpu' for Intel Arc/Xe GPUs (Intel Extension for PyTorch). "
+        "ROCm and XPU are community-tested only — report issues via the hardware template."
+    ),
+)
 _args, _unknown = _parser.parse_known_args()
 
 # ── Hyperparameters ────────────────────────────────────────────────────────────
@@ -334,19 +362,43 @@ def run_real_lm_experiment() -> list[dict]:
     model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
     model.eval()
 
-    # ── Device selection (CUDA → MPS → CPU) ───────────────────────────────────
-    if torch.cuda.is_available():
+    # ── Device selection (cuda > rocm > xpu > mps > cpu, or --device override) ─
+    _COMMUNITY_NOTE = (
+        " [community-tested — report issues: "
+        "https://github.com/BitConcepts/oea-framework-paper/issues]"
+    )
+    _forced = getattr(_args, "device", None)
+    if _forced:
+        if _forced == "rocm":
+            device = torch.device("cuda")  # ROCm uses the cuda device string
+            sys.stderr.write(f"Device: cuda/ROCm (forced){_COMMUNITY_NOTE}\n")
+        elif _forced == "xpu":
+            device = torch.device("xpu")
+            sys.stderr.write(f"Device: xpu/Intel (forced){_COMMUNITY_NOTE}\n")
+        else:
+            device = torch.device(_forced)
+            sys.stderr.write(f"Device: {_forced} (forced via --device)\n")
+    elif torch.cuda.is_available():
         device = torch.device("cuda")
-        gpu_name = torch.cuda.get_device_name(0)
-        sys.stderr.write(f"Device: cuda ({gpu_name})\n")
+        _gpu_name = torch.cuda.get_device_name(0)
+        # Detect ROCm build (HIP runtime) vs standard CUDA
+        if hasattr(torch.version, "hip") and torch.version.hip:
+            sys.stderr.write(
+                f"Device: cuda/ROCm ({_gpu_name}){_COMMUNITY_NOTE}\n"
+            )
+        else:
+            sys.stderr.write(f"Device: cuda ({_gpu_name})\n")
+    elif hasattr(torch, "xpu") and torch.xpu.is_available():
+        device = torch.device("xpu")
+        sys.stderr.write(f"Device: xpu/Intel{_COMMUNITY_NOTE}\n")
     elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
         device = torch.device("mps")
-        sys.stderr.write("Device: mps (Apple Metal)\n")
+        sys.stderr.write(f"Device: mps (Apple Metal){_COMMUNITY_NOTE}\n")
     else:
         device = torch.device("cpu")
         sys.stderr.write(
-            "Device: cpu  [NOTE: no GPU detected — CUDA/ROCm/MPS would give significant "
-            "acceleration; see requirements-experiments.txt for install instructions]\n"
+            "Device: cpu  [no GPU detected — CUDA/ROCm/XPU/MPS would be faster; "
+            "see requirements-lock.txt for install commands]\n"
         )
     model = model.to(device)
 
diff --git a/requirements-lock.txt b/requirements-lock.txt
index ed22937..fd47def 100644
--- a/requirements-lock.txt
+++ b/requirements-lock.txt
@@ -2,7 +2,6 @@
 # Generated: 2026-05-13 on Python 3.11 (Windows/Linux/macOS compatible)
 #
 # Core experiment dependencies (no GPU required):
-# NOTE: numpy==2.4.5 is required for torch 2.3.1 ABI compatibility (numpy 2.x breaks torch).
 numpy==2.4.5
 matplotlib==3.10.9
 scipy==1.17.1
@@ -10,12 +9,34 @@ pytest==9.0.3
 reportlab==4.5.1
 
 # Neural LLM experiment dependencies (required for real_lm_experiment.py):
-# Install separately — platform-dependent.
-# CPU:  pip install torch==2.3.1 transformers==4.41.0 rouge-score==0.1.2
-# CUDA: pip install torch==2.3.1+cu121 transformers==4.41.0 rouge-score==0.1.2 --index-url https://download.pytorch.org/whl/cu121
-# MPS:  pip install torch==2.3.1 transformers==4.41.0 rouge-score==0.1.2  (macOS, Apple Silicon)
+# Install separately — platform-dependent. Use torch>=2.0.0; numpy 2.x is compatible.
+#
+# Hardware test status:
+#   Verified by maintainer : CPU (x86-64), NVIDIA CUDA 12.1 (RTX 4070 SUPER, Windows 11)
+#   Community-tested only  : AMD ROCm, Intel XPU/Arc, Apple MPS
+#   Report hardware issues : https://github.com/BitConcepts/oea-framework-paper/issues
+#
+# CPU (all platforms):
+#   pip install torch transformers==4.41.0 rouge-score==0.1.2 --index-url https://download.pytorch.org/whl/cpu
+#
+# NVIDIA CUDA 12.1 [VERIFIED]:
+#   pip install torch==2.3.1+cu121 transformers==4.41.0 rouge-score==0.1.2 --index-url https://download.pytorch.org/whl/cu121
+#
+# NVIDIA CUDA 12.4+ (newer GPUs / drivers):
+#   pip install torch transformers==4.41.0 rouge-score==0.1.2 --index-url https://download.pytorch.org/whl/cu124
+#
+# AMD ROCm 6.x [community-tested — report issues via hardware template]:
+#   pip install torch transformers==4.41.0 rouge-score==0.1.2 --index-url https://download.pytorch.org/whl/rocm6.3
+#
+# Intel Arc / Xe XPU [community-tested — report issues via hardware template]:
+#   pip install torch transformers==4.41.0 rouge-score==0.1.2 --index-url https://download.pytorch.org/whl/xpu
+#   (may also require: pip install intel-extension-for-pytorch)
+#
+# Apple Silicon MPS [community-tested — report issues via hardware template]:
+#   pip install torch transformers==4.41.0 rouge-score==0.1.2
+#   (MPS auto-detected on macOS 13+ with Apple M-series chips)
 #
 # Minimum versions (if pinning causes conflicts):
-# torch>=2.0.0
-# transformers>=4.28.0  (required for AutoModelForCausalLM + GPT-Neo support)
-# rouge-score>=0.1.2
+#   torch>=2.0.0
+#   transformers>=4.28.0  (required for AutoModelForCausalLM + GPT-Neo support)
+#   rouge-score>=0.1.2