diff --git a/README.md b/README.md
index 799a0af..6214e30 100644
--- a/README.md
+++ b/README.md
@@ -9,6 +9,7 @@ A multimodal evaluation framework for scheduling LLM and VLM evaluations across
 - **Task groups** for pre-defined evaluation suites with automatic dataset pre-downloading
 - **Multi-cluster support** with auto-detection (Leonardo, LUMI, JURECA, Snellius)
 - **Image evaluation** via lmms-eval (VQAv2, MMBench, MMMU, ChartQA, DocVQA, TextVQA, OCRBench, MathVista)
+- **Video evaluation** via lmms-eval (MVBench, EgoSchema, VideoMME, ActivityNet-QA, LongVideoBench)
 - **Plugin system** for contributing custom benchmarks without touching core code
 - **Automatic building and deployment of containers**
 
@@ -75,7 +76,18 @@ Super groups: `oellm-multilingual` (all multilingual benchmarks combined)
 | `image-ocrbench` | OCRBench | lmms-eval |
 | `image-mathvista` | MathVista | lmms-eval |
 
-The lmms-eval adapter class (`llava_hf`, `qwen2_5_vl`, etc.) is auto-detected from the model name.
+### Video
+
+| Group | Benchmark | Engine |
+|---|---|---|
+| `video-understanding` | All 5 benchmarks combined | lmms-eval |
+| `video-mvbench` | MVBench (20 temporal tasks) | lmms-eval |
+| `video-egoschema` | EgoSchema (long-form egocentric QA) | lmms-eval |
+| `video-videomme` | Video-MME (11s-1h clips) | lmms-eval |
+| `video-activitynet-qa` | ActivityNet-QA (requires GPT API) | lmms-eval |
+| `video-longvideobench` | LongVideoBench (cross-segment reasoning) | lmms-eval |
+
+The lmms-eval adapter class (`llava_hf`, `llava_onevision`, `qwen2_5_vl`, etc.) is auto-detected from the model name. Install with `pip install oellm[video]` (or use a venv with lmms-eval).
 
 ### Custom Benchmarks (contrib)
 
@@ -88,6 +100,12 @@ oellm schedule-eval \
     --task-groups "image-vqa" \
     --venv-path ~/elliot-venv
 
+# Run all 5 video benchmarks
+oellm schedule-eval \
+    --models "lmms-lab/llava-onevision-7b" \
+    --task-groups "video-understanding" \
+    --venv-path ~/elliot-venv
+
 # Mix image and text benchmarks in one submission
 oellm schedule-eval \
     --models "llava-hf/llava-1.5-7b-hf" \
diff --git a/docs/VENV.md b/docs/VENV.md
index bce8ac4..553500f 100644
--- a/docs/VENV.md
+++ b/docs/VENV.md
@@ -95,7 +95,7 @@ We use [Ali's fork](https://github.com/Ali-Elganzory/evalchemy) which includes a
 
 3. Run with `EVALCHEMY_DIR` pointing to the cloned repo:
    ```bash
-   export HF_ALLOW_CODE_EVAL=1  # required by MBPP 
+   export HF_ALLOW_CODE_EVAL=1  # required by MBPP
    EVALCHEMY_DIR=$(pwd)/evalchemy oellm schedule-eval \
        --models HuggingFaceTB/SmolLM2-135M \
        --task-groups reasoning \
diff --git a/oellm/constants.py b/oellm/constants.py
index 15915a5..c8c7334 100644
--- a/oellm/constants.py
+++ b/oellm/constants.py
@@ -18,10 +18,17 @@ class EvaluationJob:
 LMMS_MODEL_ADAPTERS: list[tuple[list[str], str]] = [
     (["qwen2.5-vl", "qwen2_5_vl", "qwen2.5vl"], "qwen2_5_vl"),
     (["qwen2-vl", "qwen2_vl"], "qwen2_vl"),
+    (["llava-hf"], "llava_hf"),
+    (["llava-onevision", "llava_onevision"], "llava_onevision"),
+    (["llava-vid", "llava_vid", "llava-video"], "llava_vid"),
+    (["video-llava", "video_llava"], "video_llava"),
     (["llava"], "llava_hf"),
+    (["internvideo"], "internvideo2"),
     (["internvl"], "internvl2"),
     (["idefics"], "idefics3"),
     (["minicpm"], "minicpm_v"),
+    (["longva"], "longva"),
+    (["videochat2"], "videochat2"),
     (["qwen"], "qwen_vl"),
 ]
 
diff --git a/oellm/contrib/regiondial_bench/__init__.py b/oellm/contrib/regiondial_bench/__init__.py
index 8b13789..e69de29 100644
--- a/oellm/contrib/regiondial_bench/__init__.py
+++ b/oellm/contrib/regiondial_bench/__init__.py
@@ -1 +0,0 @@
-
diff --git a/oellm/resources/task-groups.yaml b/oellm/resources/task-groups.yaml
index 6f1d757..ba602e2 100644
--- a/oellm/resources/task-groups.yaml
+++ b/oellm/resources/task-groups.yaml
@@ -35,6 +35,13 @@ task_metrics:
   mathvista_testmini_cot: llm_as_judge_eval
   mathvista_testmini_format: llm_as_judge_eval
   mathvista_testmini_solution: llm_as_judge_eval
+  # lmms-eval video benchmark metrics
+  video_mmmu: mmmu_acc
+  egoschema: submission
+  videomme: videomme_perception_score
+  # ActivityNet-QA requires GPT API access for evaluation (LLM-as-judge)
+  activitynetqa: gpt_eval_accuracy
+  longvideobench_val_v: lvb_acc
 
 task_groups:
   open-sci-0.01:
@@ -416,6 +423,64 @@ task_groups:
       - task: mathvista_testmini
         dataset: AI4Math/MathVista
 
+  # ── Video Modality (lmms-eval) ────────────────────────────────────────────
+  video-understanding:
+    description: "Video understanding benchmarks via lmms-eval (VideoMMMU, EgoSchema, VideoMME, ActivityNet-QA, LongVideoBench)"
+    suite: lmms_eval
+    n_shots: [0]
+    tasks:
+      - task: video_mmmu
+        dataset: lmms-lab/VideoMMMU
+      - task: egoschema
+        dataset: lmms-lab/egoschema
+      - task: videomme
+        dataset: lmms-lab/Video-MME
+      - task: activitynetqa
+        dataset: lmms-lab/ActivityNetQA
+      - task: longvideobench_val_v
+        dataset: longvideobench/LongVideoBench
+
+  # ── Individual Video Benchmarks (single-task groups for targeted runs) ────
+  video-videommmu:
+    description: "VideoMMMU multi-discipline video understanding via lmms-eval"
+    suite: lmms_eval
+    n_shots: [0]
+    tasks:
+      - task: video_mmmu
+        dataset: lmms-lab/VideoMMMU
+
+  video-egoschema:
+    description: "EgoSchema long-form egocentric video QA via lmms-eval"
+    suite: lmms_eval
+    n_shots: [0]
+    tasks:
+      - task: egoschema
+        dataset: lmms-lab/egoschema
+
+  video-videomme:
+    description: "Video-MME full-spectrum video understanding (11s-1h) via lmms-eval"
+    suite: lmms_eval
+    n_shots: [0]
+    tasks:
+      - task: videomme
+        dataset: lmms-lab/Video-MME
+
+  video-activitynet-qa:
+    description: "ActivityNet-QA open-ended activity video QA via lmms-eval (requires GPT API for scoring)"
+    suite: lmms_eval
+    n_shots: [0]
+    tasks:
+      - task: activitynetqa
+        dataset: lmms-lab/ActivityNetQA
+
+  video-longvideobench:
+    description: "LongVideoBench long-video cross-segment reasoning via lmms-eval"
+    suite: lmms_eval
+    n_shots: [0]
+    tasks:
+      - task: longvideobench_val_v
+        dataset: longvideobench/LongVideoBench
+
   dclm-core-22:
     description: "DCLM core 22 evaluation tasks (lm-eval-harness, matching LLM Foundry task types)"
     suite: lm-eval-harness
diff --git a/oellm/resources/template.sbatch b/oellm/resources/template.sbatch
index 17c374b..d67ab94 100644
--- a/oellm/resources/template.sbatch
+++ b/oellm/resources/template.sbatch
@@ -190,11 +190,17 @@ do
             _lmms_adapter="${{eval_suite#*:}}"
             OUTPUT_JSON="{evals_dir}/$(openssl rand -hex 5).json"
 
+            # LLaVA adapters need model_name to avoid a missing-import bug in lmms-eval
+            _lmms_extra_args=""
+            if [[ "$_lmms_adapter" == "llava_onevision" || "$_lmms_adapter" == "llava_vid" || "$_lmms_adapter" == "video_llava" ]]; then
+                _lmms_extra_args=",model_name=$(basename "$model_path")"
+            fi
+
             if [ -n "$VENV_PATH" ]; then
                 source "$VENV_PATH/bin/activate"
                 python -m lmms_eval \
                     --model "$_lmms_adapter" \
-                    --model_args "pretrained=$model_path,device_map=auto" \
+                    --model_args "pretrained=$model_path,device_map=auto$_lmms_extra_args" \
                     --tasks "$task_path" \
                     --num_fewshot "$n_shot" \
                     --output_path "$OUTPUT_JSON" \
@@ -205,7 +211,7 @@ do
                     $EVAL_SIF_PATH \
                     python -m lmms_eval \
                         --model "$_lmms_adapter" \
-                        --model_args "pretrained=$model_path,device_map=auto" \
+                        --model_args "pretrained=$model_path,device_map=auto$_lmms_extra_args" \
                         --tasks "$task_path" \
                         --num_fewshot "$n_shot" \
                         --output_path "$OUTPUT_JSON" \
diff --git a/oellm/task_groups.py b/oellm/task_groups.py
index 7291420..e925273 100644
--- a/oellm/task_groups.py
+++ b/oellm/task_groups.py
@@ -9,6 +9,7 @@
 class DatasetSpec:
     repo_id: str
     subset: str | None = None
+    video: bool = False
 
 
 @dataclass
@@ -154,16 +155,16 @@ class TaskGroupResult:
 
 def _iter_all_tasks(
     parsed: dict[str, TaskSuperGroup | TaskGroup],
-) -> Iterable[tuple[_Task, str]]:
-    """Yield ``(task, suite)`` pairs from a parsed group dict, flattening super groups."""
-    for group in parsed.values():
+) -> Iterable[tuple[_Task, str, str]]:
+    """Yield ``(task, suite, group_name)`` triples from a parsed group dict, flattening super groups."""
+    for group_name, group in parsed.items():
         if isinstance(group, TaskGroup):
             for t in group.tasks:
-                yield t, t.suite or group.suite
+                yield t, t.suite or group.suite, group_name
         else:
             for g in group.task_groups:
                 for t in g.tasks:
-                    yield t, t.suite or g.suite
+                    yield t, t.suite or g.suite, g.name
 
 
 def _expand_task_groups(group_names: Iterable[str]) -> list[TaskGroupResult]:
@@ -173,7 +174,7 @@ def _expand_task_groups(group_names: Iterable[str]) -> list[TaskGroupResult]:
         raise ValueError(f"Unknown task group(s): {', '.join(sorted(missing))}")
 
     results: list[TaskGroupResult] = []
-    for t, suite in _iter_all_tasks(parsed):
+    for t, suite, _gname in _iter_all_tasks(parsed):
         for shot in (int(s) for s in (t.n_shots or [])):
             results.append(TaskGroupResult(task=t.name, n_shot=shot, suite=suite))
 
@@ -198,22 +199,28 @@ def _collect_dataset_specs(group_names: Iterable[str]) -> list[DatasetSpec]:
     parsed = _parse_task_groups([str(n).strip() for n in group_names if str(n).strip()])
 
     specs: list[DatasetSpec] = []
-    seen: set[tuple[str, str | None]] = set()
+    seen: set[tuple[str, str | None, str | None]] = set()
 
-    def add_spec(dataset: str | None, subset: str | None):
+    def add_spec(
+        dataset: str | None,
+        subset: str | None,
+        video: bool = False,
+    ):
         if dataset is None:
             return
         key = (dataset, subset)
         if key not in seen:
             seen.add(key)
-            specs.append(DatasetSpec(repo_id=dataset, subset=subset))
+            specs.append(DatasetSpec(repo_id=dataset, subset=subset, video=video))
+
+    for t, _, group_name in _iter_all_tasks(parsed):
+        is_video = group_name.startswith("video-")
 
-    for t, _ in _iter_all_tasks(parsed):
         if t.dataset == "facebook/flores" and not t.subset:
             for lang in _extract_flores_subsets(t.name):
                 add_spec(t.dataset, lang)
         else:
-            add_spec(t.dataset, t.subset)
+            add_spec(t.dataset, t.subset, video=is_video)
 
     return specs
 
@@ -225,7 +232,7 @@ def _collect_hf_model_repos(group_names: Iterable[str]) -> list[str]:
     repos: list[str] = []
     seen: set[str] = set()
 
-    for t, _ in _iter_all_tasks(parsed):
+    for t, _, _gname in _iter_all_tasks(parsed):
         for repo_id in t.hf_models or []:
             if repo_id not in seen:
                 seen.add(repo_id)
@@ -238,24 +245,32 @@ def _collect_hf_dataset_files(group_names: Iterable[str]) -> list[dict]:
     """Return deduplicated HF dataset file specs declared in task ``hf_dataset_files`` fields."""
     parsed = _parse_task_groups([str(n).strip() for n in group_names if str(n).strip()])
 
-    # Merge patterns from all tasks that share the same repo_id so that
-    # a single snapshot_download fetches everything needed.
-    merged: dict[str, list[str]] = {}
+    # Merge patterns from all tasks that share the same (repo_id, revision)
+    # so that a single snapshot_download fetches everything needed.
+    merged: dict[tuple[str, str | None], list[str]] = {}
 
-    for t, _ in _iter_all_tasks(parsed):
+    for t, _, _gname in _iter_all_tasks(parsed):
         for spec in t.hf_dataset_files or []:
             repo_id = spec.get("repo_id", "")
             if not repo_id:
                 continue
+            revision = spec.get("revision")
             patterns = spec.get("patterns") or []
-            if repo_id not in merged:
-                merged[repo_id] = list(patterns)
+            key = (repo_id, revision)
+            if key not in merged:
+                merged[key] = list(patterns)
             else:
                 for p in patterns:
-                    if p not in merged[repo_id]:
-                        merged[repo_id].append(p)
+                    if p not in merged[key]:
+                        merged[key].append(p)
 
-    return [{"repo_id": rid, "patterns": pats} for rid, pats in merged.items()]
+    result = []
+    for (rid, rev), pats in merged.items():
+        entry: dict = {"repo_id": rid, "patterns": pats}
+        if rev:
+            entry["revision"] = rev
+        result.append(entry)
+    return result
 
 
 def _build_task_dataset_map() -> dict[str, list[DatasetSpec]]:
@@ -268,7 +283,7 @@ def _build_task_dataset_map() -> dict[str, list[DatasetSpec]]:
 
     task_map: dict[str, list[DatasetSpec]] = {}
 
-    for t, _ in _iter_all_tasks(parsed):
+    for t, _, _gname in _iter_all_tasks(parsed):
         if t.dataset and t.name not in task_map:
             if t.dataset == "facebook/flores" and not t.subset:
                 task_map[t.name] = [
diff --git a/oellm/utils.py b/oellm/utils.py
index 27bde7d..0c381da 100644
--- a/oellm/utils.py
+++ b/oellm/utils.py
@@ -314,21 +314,9 @@ def _process_model_paths(models: Iterable[str]):
                         if "HF_HOME" in os.environ
                         else None
                     )
-                    try:
-                        from huggingface_hub import try_to_load_from_cache
-
-                        cached = try_to_load_from_cache(
-                            model, "config.json", cache_dir=cache_dir
-                        )
-                        if isinstance(cached, str):
-                            logging.info(
-                                f"Model '{model}' already cached, skipping download"
-                            )
-                            per_model_paths.append(model)
-                            continue
-                    except Exception:
-                        pass
                     status.update(f"Downloading '{model}' ({idx}/{len(models_list)})")
+                    # snapshot_download is idempotent — it skips files that
+                    # are already cached and only fetches missing ones.
                     snapshot_download(
                         repo_id=model,
                         cache_dir=cache_dir,
@@ -373,16 +361,20 @@ def _pre_download_hf_dataset_files(dataset_files: list[dict]) -> None:
         for idx, spec in enumerate(dataset_files, 1):
             repo_id = spec.get("repo_id", "")
             patterns = spec.get("patterns")
+            revision = spec.get("revision")
             status.update(f"Downloading '{repo_id}' ({idx}/{len(dataset_files)})")
             try:
-                snapshot_download(
-                    repo_id=repo_id,
-                    repo_type="dataset",
-                    allow_patterns=patterns,
-                    cache_dir=Path(os.getenv("HF_HOME")) / "hub"
+                kwargs = {
+                    "repo_id": repo_id,
+                    "repo_type": "dataset",
+                    "allow_patterns": patterns,
+                    "cache_dir": Path(os.getenv("HF_HOME")) / "hub"
                     if "HF_HOME" in os.environ
                     else None,
-                )
+                }
+                if revision:
+                    kwargs["revision"] = revision
+                snapshot_download(**kwargs)
             except Exception as e:
                 logging.warning(f"Failed to download dataset files from '{repo_id}': {e}")
 
@@ -391,6 +383,7 @@ def _pre_download_datasets_from_specs(
     specs: Iterable, trust_remote_code: bool = True
 ) -> None:
     from datasets import get_dataset_config_names, load_dataset
+    from huggingface_hub import snapshot_download
 
     specs_list = list(specs)
     if not specs_list:
@@ -406,6 +399,18 @@ def _pre_download_datasets_from_specs(
             label = f"{spec.repo_id}" + (f"/{spec.subset}" if spec.subset else "")
             status.update(f"Downloading '{label}' ({idx}/{len(specs_list)})")
 
+            # Video datasets: lmms-eval calls snapshot_download at runtime
+            # to get raw video files, then symlinks them into $HF_HOME.
+            # Pre-download so offline compute nodes find everything cached.
+            if spec.video:
+                try:
+                    snapshot_download(
+                        repo_id=spec.repo_id,
+                        repo_type="dataset",
+                    )
+                except Exception as e:
+                    logging.warning(f"Failed to snapshot_download '{spec.repo_id}': {e}")
+
             try:
                 load_dataset(
                     spec.repo_id,
diff --git a/pyproject.toml b/pyproject.toml
index 242f8ea..37a2f8f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,6 +23,9 @@ dev = [
 image = [
     "lmms-eval @ git+https://github.com/EvolvingLMMs-Lab/lmms-eval.git",
 ]
+video = [
+    "lmms-eval @ git+https://github.com/EvolvingLMMs-Lab/lmms-eval.git",
+]
 
 [project.scripts]
 oellm = "oellm.main:main"
diff --git a/requirements-venv-evalchemy.txt b/requirements-venv-evalchemy.txt
index 9ec7d3d..c63c3fb 100644
--- a/requirements-venv-evalchemy.txt
+++ b/requirements-venv-evalchemy.txt
@@ -1,6 +1,6 @@
 # Dependencies for evalchemy evaluation
 
-# lm-eval fork used by evalchemy 
+# lm-eval fork used by evalchemy
 lm-eval @ git+https://github.com/EtashGuha/lm-evaluation-harness@etashg/tokenize_fix
 
 scipy==1.17.0
diff --git a/tests/test_video_task_groups.py b/tests/test_video_task_groups.py
new file mode 100644
index 0000000..f5d7274
--- /dev/null
+++ b/tests/test_video_task_groups.py
@@ -0,0 +1,205 @@
+import os
+import sys
+from importlib.resources import files
+from pathlib import Path
+from unittest.mock import patch
+
+import yaml
+
+from oellm.task_groups import (
+    _collect_dataset_specs,
+    _expand_task_groups,
+    get_all_task_group_names,
+)
+
+VIDEO_TASK_GROUP = "video-understanding"
+
+EXPECTED_TASKS = {
+    "video_mmmu",
+    "egoschema",
+    "videomme",
+    "activitynetqa",
+    "longvideobench_val_v",
+}
+
+EXPECTED_DATASETS = {
+    "lmms-lab/VideoMMMU",
+    "lmms-lab/egoschema",
+    "lmms-lab/Video-MME",
+    "lmms-lab/ActivityNetQA",
+    "longvideobench/LongVideoBench",
+}
+
+
+class TestVideoTaskGroupInRegistry:
+    def test_video_understanding_present_in_yaml(self):
+        all_groups = get_all_task_group_names()
+        assert VIDEO_TASK_GROUP in all_groups
+
+    def test_video_understanding_suite_is_lmms_eval(self):
+        data = yaml.safe_load((files("oellm.resources") / "task-groups.yaml").read_text())
+        suite = data["task_groups"][VIDEO_TASK_GROUP]["suite"]
+        assert suite == "lmms_eval"
+
+    def test_video_understanding_has_five_tasks(self):
+        data = yaml.safe_load((files("oellm.resources") / "task-groups.yaml").read_text())
+        tasks = data["task_groups"][VIDEO_TASK_GROUP]["tasks"]
+        assert len(tasks) == 5
+
+    def test_individual_video_groups_present(self):
+        all_groups = get_all_task_group_names()
+        for name in [
+            "video-videommmu",
+            "video-egoschema",
+            "video-videomme",
+            "video-activitynet-qa",
+            "video-longvideobench",
+        ]:
+            assert name in all_groups, f"{name} not in task group registry"
+
+
+class TestVideoTaskGroupExpansion:
+    def test_expands_to_correct_task_names(self):
+        results = _expand_task_groups([VIDEO_TASK_GROUP])
+        task_names = {r.task for r in results}
+        assert task_names == EXPECTED_TASKS
+
+    def test_all_tasks_have_zero_shot(self):
+        results = _expand_task_groups([VIDEO_TASK_GROUP])
+        for r in results:
+            assert r.n_shot == 0, f"{r.task} has n_shot={r.n_shot}, expected 0"
+
+    def test_all_tasks_route_to_lmms_eval(self):
+        results = _expand_task_groups([VIDEO_TASK_GROUP])
+        for r in results:
+            assert r.suite == "lmms_eval", (
+                f"{r.task} has suite='{r.suite}', expected 'lmms_eval'"
+            )
+
+    def test_expand_individual_video_group(self):
+        results = _expand_task_groups(["video-videommmu"])
+        assert len(results) == 1
+        assert results[0].task == "video_mmmu"
+        assert results[0].suite == "lmms_eval"
+
+
+class TestVideoTaskGroupDatasetSpecs:
+    def test_all_expected_datasets_present(self):
+        specs = _collect_dataset_specs([VIDEO_TASK_GROUP])
+        repo_ids = {s.repo_id for s in specs}
+        assert repo_ids == EXPECTED_DATASETS
+
+    def test_no_duplicate_dataset_specs(self):
+        specs = _collect_dataset_specs([VIDEO_TASK_GROUP])
+        keys = [(s.repo_id, s.subset) for s in specs]
+        assert len(keys) == len(set(keys)), "Duplicate dataset specs found"
+
+    def test_videomme_dataset_included(self):
+        specs = _collect_dataset_specs([VIDEO_TASK_GROUP])
+        repo_ids = {s.repo_id for s in specs}
+        assert "lmms-lab/Video-MME" in repo_ids
+
+
+class TestVideoTaskGroupScheduleEvals:
+    """Verify video-understanding integrates with the schedule_evals dry-run path."""
+
+    def test_schedule_evals_dry_run_video(self, tmp_path):
+        from oellm.main import schedule_evals
+
+        with (
+            patch("oellm.scheduler._load_cluster_env"),
+            patch("oellm.scheduler._num_jobs_in_queue", return_value=0),
+            patch(
+                "oellm.runner.detect_lmms_model_type",
+                return_value="llava_onevision",
+            ),
+            patch.dict(os.environ, {"EVAL_OUTPUT_DIR": str(tmp_path)}),
+        ):
+            schedule_evals(
+                models="lmms-lab/llava-onevision-7b",
+                task_groups=VIDEO_TASK_GROUP,
+                skip_checks=True,
+                venv_path=str(Path(sys.prefix)),
+                dry_run=True,
+            )
+
+        sbatch_files = list(tmp_path.glob("**/submit_evals.sbatch"))
+        assert len(sbatch_files) == 1
+        sbatch_content = sbatch_files[0].read_text()
+        assert "lmms_eval" in sbatch_content
+
+    def test_schedule_evals_jobs_csv_has_lmms_eval_suite(self, tmp_path):
+        import pandas as pd
+
+        from oellm.main import schedule_evals
+
+        with (
+            patch("oellm.scheduler._load_cluster_env"),
+            patch("oellm.scheduler._num_jobs_in_queue", return_value=0),
+            patch(
+                "oellm.runner.detect_lmms_model_type",
+                return_value="llava_onevision",
+            ),
+            patch.dict(os.environ, {"EVAL_OUTPUT_DIR": str(tmp_path)}),
+        ):
+            schedule_evals(
+                models="lmms-lab/llava-onevision-7b",
+                task_groups=VIDEO_TASK_GROUP,
+                skip_checks=True,
+                venv_path=str(Path(sys.prefix)),
+                dry_run=True,
+            )
+
+        csv_files = list(tmp_path.glob("**/jobs.csv"))
+        assert len(csv_files) == 1
+        df = pd.read_csv(csv_files[0])
+        assert all(s.startswith("lmms_eval") for s in df["eval_suite"].unique())
+        assert set(df["task_path"].unique()) == EXPECTED_TASKS
+
+
+class TestVideoModelAdapters:
+    """Verify video-specific model adapter detection."""
+
+    def test_llava_onevision_detected(self):
+        from oellm.constants import detect_lmms_model_type
+
+        assert detect_lmms_model_type("lmms-lab/llava-onevision-7b") == "llava_onevision"
+
+    def test_llava_vid_detected(self):
+        from oellm.constants import detect_lmms_model_type
+
+        assert detect_lmms_model_type("llava-vid-7b") == "llava_vid"
+
+    def test_video_llava_detected(self):
+        from oellm.constants import detect_lmms_model_type
+
+        assert detect_lmms_model_type("video-llava-7b") == "video_llava"
+
+    def test_longva_detected(self):
+        from oellm.constants import detect_lmms_model_type
+
+        assert detect_lmms_model_type("longva-7b") == "longva"
+
+    def test_internvideo_detected(self):
+        from oellm.constants import detect_lmms_model_type
+
+        assert detect_lmms_model_type("internvideo2-chat") == "internvideo2"
+
+    def test_llava_hf_onevision_routes_to_llava_hf(self):
+        """HuggingFace-format llava-onevision models must use llava_hf, not llava_onevision."""
+        from oellm.constants import detect_lmms_model_type
+
+        assert (
+            detect_lmms_model_type("llava-hf/llava-onevision-qwen2-0.5b-ov-hf")
+            == "llava_hf"
+        )
+
+    def test_generic_llava_still_works(self):
+        from oellm.constants import detect_lmms_model_type
+
+        assert detect_lmms_model_type("llava-hf/llava-1.5-7b-hf") == "llava_hf"
+
+    def test_qwen25_vl_still_works(self):
+        from oellm.constants import detect_lmms_model_type
+
+        assert detect_lmms_model_type("Qwen/Qwen2.5-VL-7B-Instruct") == "qwen2_5_vl"