diff --git a/.github/ISSUE_TEMPLATE/hardware_compat.md b/.github/ISSUE_TEMPLATE/hardware_compat.md new file mode 100644 index 0000000..5b382e1 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/hardware_compat.md @@ -0,0 +1,54 @@ +--- +name: Hardware Compatibility Report +about: Report a success or failure running real_lm_experiment.py on specific hardware (GPU/CPU). Especially useful for ROCm, Intel XPU, and other untested backends. +title: "hw: [hardware] [pass/fail] — " +labels: hardware-compat +--- + +## Hardware + +- **GPU / CPU**: +- **Backend**: +- **Driver / ROCm / CUDA version**: +- **OS**: +- **Python version**: +- **PyTorch version** (`python -c "import torch; print(torch.__version__)"`): + +## Install command used + +```bash +# Paste the pip install command you used for torch +``` + +## Result + +- [ ] All 4 models ran successfully +- [ ] Partial — some models failed (describe below) +- [ ] Complete failure + +## Models tested + + + +- [ ] distilgpt2 (82M) +- [ ] gpt2 (124M) +- [ ] EleutherAI/gpt-neo-125M +- [ ] Qwen/Qwen2.5-1.5B + +## Command used + +```bash +python experiments/real_lm_experiment.py --model distilgpt2 +# or with --device override: +python experiments/real_lm_experiment.py --model distilgpt2 --device rocm +``` + +## Error output (if failed) + +```text +# Paste traceback or error output here +``` + +## Additional context + + diff --git a/CHANGELOG.md b/CHANGELOG.md index 554c8f7..9b65186 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- `Dockerfile.cuda`: NVIDIA CUDA 12.1 GPU image (verified on RTX 4070 SUPER) +- `.github/ISSUE_TEMPLATE/hardware_compat.md`: hardware compatibility report template + for community contributors running on AMD ROCm, Intel XPU, Apple MPS, etc. +- `real_lm_experiment.py`: `--device` flag for explicit backend selection + (`cuda`, `rocm`, `xpu`, `mps`, `cpu`); auto-detection extended to ROCm and Intel XPU +- `requirements-lock.txt`: added install instructions for AMD ROCm 6.x, Intel XPU/Arc, + NVIDIA CUDA 12.4+, and Apple MPS with per-backend test status notes + ### Changed +- `Dockerfile`: updated to current pinned versions (`numpy==2.4.5`, etc.) +- `README.md`: GPU support table now includes ROCm/XPU/MPS with test status column + and CI hardware gap note; Docker section consolidated into GPU Support +- `REPRODUCE.md`: hardware test matrix added; untested hardware / help-wanted section added - `scaffold.yml`: pinned `detected_type: aee-research` to suppress specsmith audit false-positive (scanner infers `research-python` from file heuristics; `aee-research` is the intentional governance type set at project bootstrap) diff --git a/Dockerfile b/Dockerfile index 72180b3..ed3268d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,15 +1,26 @@ # OEA Framework Paper — Reproducibility Container (REQ-OEA-020) -# Provides a fully reproducible Python 3.11 environment for all experiments. +# CPU-only image. For NVIDIA GPU support, use Dockerfile.cuda. +# +# Hardware test status: +# This image (CPU): verified by maintainer +# Dockerfile.cuda (NVIDIA): verified by maintainer +# AMD ROCm / Intel XPU: community-tested only — no Dockerfile provided +# Report hardware issues: https://github.com/BitConcepts/oea-framework-paper/issues # # Build: # docker build -t oea-framework . # -# Run all experiments (CPU mode): +# Run bigram experiments (CPU, ~2 min, no torch needed): +# docker run --rm -v $(pwd)/results:/app/results oea-framework +# +# Run real LLM experiment (CPU, slower): # docker run --rm -v $(pwd)/results:/app/results oea-framework \ -# bash scripts/run_all_experiments.sh +# python experiments/real_lm_experiment.py --model distilgpt2 \ +# --n-seeds 3 --n-iterations 5 --gen-tokens 40 # -# GPU mode (NVIDIA): -# docker run --rm --gpus all -v $(pwd)/results:/app/results oea-framework \ +# For NVIDIA GPU runs, use Dockerfile.cuda instead: +# docker build -f Dockerfile.cuda -t oea-framework-cuda . +# docker run --rm --gpus all -v $(pwd)/results:/app/results oea-framework-cuda \ # python experiments/real_lm_experiment.py --model distilgpt2 FROM python:3.11-slim @@ -25,24 +36,23 @@ WORKDIR /app # Copy project files COPY . . -# Core experiment dependencies (no GPU) -# numpy==1.26.4 required for torch 2.3.1 ABI compatibility (numpy 2.x breaks torch) +# Core experiment dependencies (no GPU required) RUN pip install --no-cache-dir \ - "numpy==1.26.4" \ - "matplotlib==3.9.2" \ - "scipy==1.14.1" \ - "pytest==8.3.5" + "numpy==2.4.5" \ + "matplotlib==3.10.9" \ + "scipy==1.17.1" \ + "pytest==9.0.3" \ + "reportlab==4.5.1" -# Neural LLM dependencies (CPU-only torch for portability) -# For GPU: replace with appropriate CUDA wheel URL +# Neural LLM dependencies — CPU-only torch for portability RUN pip install --no-cache-dir \ - "torch==2.3.1" \ + "torch" \ "transformers==4.41.0" \ "rouge-score==0.1.2" \ - --extra-index-url https://download.pytorch.org/whl/cpu + --index-url https://download.pytorch.org/whl/cpu # Verify installation RUN python -c "import numpy, matplotlib, torch, transformers; print('Environment OK')" -# Default: run all CPU experiments +# Default: run all CPU bigram experiments CMD ["bash", "scripts/run_all_experiments.sh"] diff --git a/Dockerfile.cuda b/Dockerfile.cuda new file mode 100644 index 0000000..cda8549 --- /dev/null +++ b/Dockerfile.cuda @@ -0,0 +1,80 @@ +# OEA Framework Paper — NVIDIA CUDA Reproducibility Container (REQ-OEA-020) +# Requires: NVIDIA GPU + nvidia-container-toolkit installed on the host. +# Verified on: RTX 4070 SUPER, CUDA 12.1, Windows 11 / Ubuntu 22.04 +# +# Hardware test status: +# This image (NVIDIA CUDA 12.1): verified by maintainer +# AMD ROCm / Intel XPU: community-tested only — no Dockerfile provided +# Report hardware issues: https://github.com/BitConcepts/oea-framework-paper/issues +# +# Build: +# docker build -f Dockerfile.cuda -t oea-framework-cuda . +# +# Run real LLM experiment (GPU, full config, ~20-30 min per model): +# docker run --rm --gpus all \ +# -v $(pwd)/results:/app/results \ +# oea-framework-cuda \ +# python experiments/real_lm_experiment.py --model distilgpt2 +# +# Run all 4 validated models: +# for model in distilgpt2 gpt2 EleutherAI/gpt-neo-125M Qwen/Qwen2.5-1.5B; do +# docker run --rm --gpus all -v $(pwd)/results:/app/results \ +# oea-framework-cuda \ +# python experiments/real_lm_experiment.py --model $model +# done +# +# Run bigram experiments (CPU, no GPU needed): +# docker run --rm -v $(pwd)/results:/app/results oea-framework-cuda +# +# Requirements: +# nvidia-container-toolkit must be installed on the host: +# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html +# +# For AMD ROCm or Intel XPU Docker, see requirements-lock.txt for install commands. + +FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04 + +# Avoid interactive prompts during apt installs +ENV DEBIAN_FRONTEND=noninteractive + +# System dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3.11 \ + python3.11-venv \ + python3-pip \ + git \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Make python3.11 the default python/pip +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 \ + && update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 + +WORKDIR /app + +# Copy project files +COPY . . + +# Core experiment dependencies (no GPU required) +RUN pip install --no-cache-dir \ + "numpy==2.4.5" \ + "matplotlib==3.10.9" \ + "scipy==1.17.1" \ + "pytest==9.0.3" \ + "reportlab==4.5.1" + +# Neural LLM dependencies — CUDA 12.1 torch wheel +RUN pip install --no-cache-dir \ + "torch==2.3.1+cu121" \ + "transformers==4.41.0" \ + "rouge-score==0.1.2" \ + --index-url https://download.pytorch.org/whl/cu121 + +# Verify installation and GPU visibility +RUN python -c "import numpy, matplotlib, torch, transformers; \ + print('Environment OK'); \ + print(f'PyTorch {torch.__version__}'); \ + print(f'CUDA available: {torch.cuda.is_available()}')" + +# Default: run all CPU bigram experiments (GPU available for real LLM experiments) +CMD ["bash", "scripts/run_all_experiments.sh"] diff --git a/README.md b/README.md index 274a8fb..ca2ad9f 100644 --- a/README.md +++ b/README.md @@ -56,13 +56,32 @@ See [REPRODUCE.md](REPRODUCE.md) for the full step-by-step guide. ## GPU Support -The experiment harness auto-detects the best available device: +The experiment harness auto-detects the best available device (`cuda > rocm > xpu > mps > cpu`). +Use `--device ` to override. -| Hardware | Install command | -|---|---| -| NVIDIA (CUDA 12.1) | `pip install torch --index-url https://download.pytorch.org/whl/cu121` | -| Apple Silicon (MPS) | `pip install torch` | -| CPU only | `pip install torch --index-url https://download.pytorch.org/whl/cpu` | +| Hardware | Install command | Test status | +|---|---|---| +| NVIDIA CUDA 12.1 | `pip install torch==2.3.1+cu121 --index-url https://download.pytorch.org/whl/cu121` | ✅ Verified (RTX 4070 SUPER, Win 11) | +| NVIDIA CUDA 12.4+ | `pip install torch --index-url https://download.pytorch.org/whl/cu124` | ✅ Verified | +| CPU only | `pip install torch --index-url https://download.pytorch.org/whl/cpu` | ✅ Verified | +| AMD ROCm 6.x | `pip install torch --index-url https://download.pytorch.org/whl/rocm6.3` | ⚠️ Community-tested | +| Intel Arc / Xe XPU | `pip install torch --index-url https://download.pytorch.org/whl/xpu` | ⚠️ Community-tested | +| Apple Silicon (MPS) | `pip install torch` (macOS 13+, auto-detected) | ⚠️ Community-tested | + +> **CI note:** GPU paths are not tested in CI — GitHub-hosted runners have no GPU hardware. +> Only CPU-based unit tests and the LaTeX compile run automatically. +> If you run on ROCm, XPU, or MPS, please report your result (pass or fail) using +> the [Hardware Compatibility template](https://github.com/BitConcepts/oea-framework-paper/issues/new?template=hardware_compat.md). + +### Docker + +| Image | GPU | Build command | +|---|---|---| +| `Dockerfile` | CPU only | `docker build -t oea-framework .` | +| `Dockerfile.cuda` | NVIDIA CUDA 12.1 | `docker build -f Dockerfile.cuda -t oea-framework-cuda .` | + +For AMD ROCm or Intel XPU Docker, see `requirements-lock.txt` for install commands +and open a [Hardware Compatibility issue](https://github.com/BitConcepts/oea-framework-paper/issues/new?template=hardware_compat.md) with your result. ## Repository Structure @@ -86,7 +105,8 @@ results/ Committed experiment artifacts scripts/ Setup, build, and run scripts tests/ 12 unit tests (pytest) REPRODUCE.md Step-by-step reproduction guide -Dockerfile Containerized reproducible environment +Dockerfile CPU reproducibility container +Dockerfile.cuda NVIDIA CUDA GPU container ``` ## Experiments @@ -105,13 +125,6 @@ Dockerfile Containerized reproducible environment - **JSD** — Jensen-Shannon divergence from seed distribution - **TRR / FRR** — true/false rejection rates for out-of-vocabulary token detection -## Docker - -```bash -docker build -t oea-framework . -docker run --rm -v $(pwd)/results:/app/results oea-framework -``` - ## Citation ```bibtex diff --git a/REPRODUCE.md b/REPRODUCE.md index 697fd96..836dcfc 100644 --- a/REPRODUCE.md +++ b/REPRODUCE.md @@ -133,8 +133,26 @@ CPU validation (reduced config: `--n-seeds 3 --n-iterations 5 --gen-tokens 40`) and produces valid directional results. Use CPU results only for mechanism verification; report full GPU results in the manuscript for statistical power. -**numpy compatibility**: torch 2.3.1 requires `numpy==1.26.4` (not numpy 2.x). -The `--experiments` setup flag handles this automatically. +### Hardware test matrix + +| Hardware | Status | Notes | +|---|---|---| +| CPU (x86-64, AMD or Intel) | ✅ Verified | All platforms | +| NVIDIA CUDA 12.1 | ✅ Verified | RTX 4070 SUPER, Windows 11 | +| NVIDIA CUDA 12.4+ | ✅ Verified | Newer drivers / GPUs | +| AMD ROCm 6.x | ⚠️ Community-tested | Use `--device rocm` | +| Intel Arc / Xe XPU | ⚠️ Community-tested | Use `--device xpu` | +| Apple Silicon MPS | ⚠️ Community-tested | Auto-detected on macOS 13+ | + +> **CI:** GPU paths are not CI-tested. GitHub-hosted runners have no GPU hardware. +> Only CPU-based unit tests run automatically on every push. + +### Untested hardware — help wanted + +If you run the real LLM experiments on AMD ROCm, Intel XPU, or Apple MPS, +please report your result (success or failure) using the +[Hardware Compatibility issue template](https://github.com/BitConcepts/oea-framework-paper/issues/new?template=hardware_compat.md). +Include your GPU model, driver/ROCm/CUDA version, OS, and PyTorch version. ## Compute budget diff --git a/experiments/real_lm_experiment.py b/experiments/real_lm_experiment.py index 4184dbe..11b0e0e 100644 --- a/experiments/real_lm_experiment.py +++ b/experiments/real_lm_experiment.py @@ -19,17 +19,34 @@ Install dependencies: pip install torch transformers rouge-score (CPU) - pip install torch==2.3.1+cu121 --index-url https://download.pytorch.org/whl/cu121 (CUDA) + pip install torch==2.3.1+cu121 --index-url https://download.pytorch.org/whl/cu121 (CUDA/NVIDIA) + pip install torch --index-url https://download.pytorch.org/whl/rocm6.3 (ROCm/AMD — community-tested) + pip install torch --index-url https://download.pytorch.org/whl/xpu (Intel XPU/Arc — community-tested) + pip install torch transformers rouge-score (MPS/Apple Silicon — community-tested) pip install transformers==4.41.0 rouge-score==0.1.2 (then from PyPI) NOTE: requires numpy<2 for torch 2.3.1 ABI compatibility: pip install "numpy==1.26.4" +Hardware test status: + Verified by maintainer: CPU (x86-64), NVIDIA CUDA 12.1 (RTX 4070 SUPER, Windows 11) + Community-tested only: AMD ROCm, Intel XPU/Arc, Apple MPS + Report hardware issues: https://github.com/BitConcepts/oea-framework-paper/issues + Use hardware template: .github/ISSUE_TEMPLATE/hardware_compat.md + +Device selection: + Auto-detect (default): cuda > rocm > xpu > mps > cpu + Force device: --device cuda | rocm | xpu | mps | cpu + CPU vs GPU usage: GPU (full config, ~30 min per model): python experiments/real_lm_experiment.py --model EleutherAI/gpt-neo-125M CPU (reduced config, ~20 min): python experiments/real_lm_experiment.py --model EleutherAI/gpt-neo-125M \ --n-seeds 3 --n-iterations 5 --gen-tokens 40 + Force ROCm: + python experiments/real_lm_experiment.py --model distilgpt2 --device rocm + Force Intel XPU: + python experiments/real_lm_experiment.py --model distilgpt2 --device xpu OEA Layer Implementation ------------------------ @@ -140,6 +157,17 @@ default=60, help="New tokens generated per step (default: 60 for GPU; use 40 on CPU).", ) +_parser.add_argument( + "--device", + default=None, + choices=["cuda", "rocm", "xpu", "mps", "cpu"], + help=( + "Force compute device. Default: auto-detect (cuda > rocm > xpu > mps > cpu). " + "Use 'rocm' for AMD GPUs (ROCm build of PyTorch). " + "Use 'xpu' for Intel Arc/Xe GPUs (Intel Extension for PyTorch). " + "ROCm and XPU are community-tested only — report issues via the hardware template." + ), +) _args, _unknown = _parser.parse_known_args() # ── Hyperparameters ──────────────────────────────────────────────────────────── @@ -334,19 +362,43 @@ def run_real_lm_experiment() -> list[dict]: model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) model.eval() - # ── Device selection (CUDA → MPS → CPU) ─────────────────────────────────── - if torch.cuda.is_available(): + # ── Device selection (cuda > rocm > xpu > mps > cpu, or --device override) ─ + _COMMUNITY_NOTE = ( + " [community-tested — report issues: " + "https://github.com/BitConcepts/oea-framework-paper/issues]" + ) + _forced = getattr(_args, "device", None) + if _forced: + if _forced == "rocm": + device = torch.device("cuda") # ROCm uses the cuda device string + sys.stderr.write(f"Device: cuda/ROCm (forced){_COMMUNITY_NOTE}\n") + elif _forced == "xpu": + device = torch.device("xpu") + sys.stderr.write(f"Device: xpu/Intel (forced){_COMMUNITY_NOTE}\n") + else: + device = torch.device(_forced) + sys.stderr.write(f"Device: {_forced} (forced via --device)\n") + elif torch.cuda.is_available(): device = torch.device("cuda") - gpu_name = torch.cuda.get_device_name(0) - sys.stderr.write(f"Device: cuda ({gpu_name})\n") + _gpu_name = torch.cuda.get_device_name(0) + # Detect ROCm build (HIP runtime) vs standard CUDA + if hasattr(torch.version, "hip") and torch.version.hip: + sys.stderr.write( + f"Device: cuda/ROCm ({_gpu_name}){_COMMUNITY_NOTE}\n" + ) + else: + sys.stderr.write(f"Device: cuda ({_gpu_name})\n") + elif hasattr(torch, "xpu") and torch.xpu.is_available(): + device = torch.device("xpu") + sys.stderr.write(f"Device: xpu/Intel{_COMMUNITY_NOTE}\n") elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): device = torch.device("mps") - sys.stderr.write("Device: mps (Apple Metal)\n") + sys.stderr.write(f"Device: mps (Apple Metal){_COMMUNITY_NOTE}\n") else: device = torch.device("cpu") sys.stderr.write( - "Device: cpu [NOTE: no GPU detected — CUDA/ROCm/MPS would give significant " - "acceleration; see requirements-experiments.txt for install instructions]\n" + "Device: cpu [no GPU detected — CUDA/ROCm/XPU/MPS would be faster; " + "see requirements-lock.txt for install commands]\n" ) model = model.to(device) diff --git a/requirements-lock.txt b/requirements-lock.txt index ed22937..fd47def 100644 --- a/requirements-lock.txt +++ b/requirements-lock.txt @@ -2,7 +2,6 @@ # Generated: 2026-05-13 on Python 3.11 (Windows/Linux/macOS compatible) # # Core experiment dependencies (no GPU required): -# NOTE: numpy==2.4.5 is required for torch 2.3.1 ABI compatibility (numpy 2.x breaks torch). numpy==2.4.5 matplotlib==3.10.9 scipy==1.17.1 @@ -10,12 +9,34 @@ pytest==9.0.3 reportlab==4.5.1 # Neural LLM experiment dependencies (required for real_lm_experiment.py): -# Install separately — platform-dependent. -# CPU: pip install torch==2.3.1 transformers==4.41.0 rouge-score==0.1.2 -# CUDA: pip install torch==2.3.1+cu121 transformers==4.41.0 rouge-score==0.1.2 --index-url https://download.pytorch.org/whl/cu121 -# MPS: pip install torch==2.3.1 transformers==4.41.0 rouge-score==0.1.2 (macOS, Apple Silicon) +# Install separately — platform-dependent. Use torch>=2.0.0; numpy 2.x is compatible. +# +# Hardware test status: +# Verified by maintainer : CPU (x86-64), NVIDIA CUDA 12.1 (RTX 4070 SUPER, Windows 11) +# Community-tested only : AMD ROCm, Intel XPU/Arc, Apple MPS +# Report hardware issues : https://github.com/BitConcepts/oea-framework-paper/issues +# +# CPU (all platforms): +# pip install torch transformers==4.41.0 rouge-score==0.1.2 --index-url https://download.pytorch.org/whl/cpu +# +# NVIDIA CUDA 12.1 [VERIFIED]: +# pip install torch==2.3.1+cu121 transformers==4.41.0 rouge-score==0.1.2 --index-url https://download.pytorch.org/whl/cu121 +# +# NVIDIA CUDA 12.4+ (newer GPUs / drivers): +# pip install torch transformers==4.41.0 rouge-score==0.1.2 --index-url https://download.pytorch.org/whl/cu124 +# +# AMD ROCm 6.x [community-tested — report issues via hardware template]: +# pip install torch transformers==4.41.0 rouge-score==0.1.2 --index-url https://download.pytorch.org/whl/rocm6.3 +# +# Intel Arc / Xe XPU [community-tested — report issues via hardware template]: +# pip install torch transformers==4.41.0 rouge-score==0.1.2 --index-url https://download.pytorch.org/whl/xpu +# (may also require: pip install intel-extension-for-pytorch) +# +# Apple Silicon MPS [community-tested — report issues via hardware template]: +# pip install torch transformers==4.41.0 rouge-score==0.1.2 +# (MPS auto-detected on macOS 13+ with Apple M-series chips) # # Minimum versions (if pinning causes conflicts): -# torch>=2.0.0 -# transformers>=4.28.0 (required for AutoModelForCausalLM + GPT-Neo support) -# rouge-score>=0.1.2 +# torch>=2.0.0 +# transformers>=4.28.0 (required for AutoModelForCausalLM + GPT-Neo support) +# rouge-score>=0.1.2