mlcommons · Victor49152 · May 12, 2026 · May 12, 2026 · May 12, 2026 · May 12, 2026
@@ -179,27 +179,38 @@ def enable_streaming(self) -> bool:
 
 
 def _check_tokenizer_exists(model_name: str) -> bool:
-    """Check if a HuggingFace tokenizer exists for the model (API only, no download).
-
-    Returns True if the model repo exists and has tokenizer files, False otherwise.
-    This function is a probe — it never loads or downloads the tokenizer itself.
-    Downstream consumers that need tokenization (e.g. the MetricsAggregator
-    subprocess for ISL/OSL/TPOT, Harmony transforms for prompt preprocessing,
-    and any future plugin with its own tokenization need) each load their own
-    instance as required.
+    """Check if a tokenizer exists for the model (local dir or HF repo, no download).
+
+    Returns True if a tokenizer is available, False otherwise. This function is
+    a probe — it never loads or downloads the tokenizer itself. Downstream
+    consumers that need tokenization (e.g. the MetricsAggregator subprocess
+    for ISL/OSL/TPOT, Harmony transforms for prompt preprocessing, and any
+    future plugin with its own tokenization need) each load their own instance
+    as required.
+
+    ``model_name`` may be a local checkpoint directory (e.g. an NVFP4 snapshot
+    cached under ``/root/.cache/huggingface/hub/...``) or an HF repo ID. Local
+    directories are probed directly; otherwise we ask the HF Hub for the file
+    listing.
     """
     try:
-        info = model_info(model_name)
-        # Check for tokenizer files in the repo
-        siblings = {s.rfilename for s in (info.siblings or [])}
+        local_path = Path(model_name)
+        if local_path.is_dir():
+            siblings = {p.name for p in local_path.iterdir() if p.is_file()}
+        else:
+            info = model_info(model_name)
+            siblings = {s.rfilename for s in (info.siblings or [])}
+
         has_tokenizer = (
             "tokenizer_config.json" in siblings or "tokenizer.json" in siblings
         )
+
         if has_tokenizer:
             logger.info(f"Tokenizer available for model: {model_name}")
         else:
             logger.warning(f"Model {model_name} found but has no tokenizer files")
         return has_tokenizer
+
     except ImportError:
         # huggingface_hub not installed — fall back to assuming it works
         logger.info(

@@ -191,8 +191,15 @@ def issue(self, sample_index: int) -> str | None:
         prompt_data: PromptData
         if isinstance(data, dict):
             token_ids = data.get("input_tokens") or data.get("token_ids")
+            # Multimodal datasets store ``prompt`` as a list of OpenAI content
+            # parts (e.g. [{"type": "text", ...}, {"type": "image_url", ...}])
+            # which the HTTP adapter handles directly. `PromptData.text` is only
+            # meaningful for ISL reporting on text-only prompts.
+            # Therefore, setting `text=None` for non-string prompts
+            # means that ISL reporting will be unavailable for multimodal samples.
+            prompt = data.get("prompt")
             prompt_data = PromptData(
-                text=data.get("prompt"),
+                text=prompt if isinstance(prompt, str) else None,
                 token_ids=tuple(token_ids) if token_ids is not None else None,
             )
         else:

@@ -108,21 +108,30 @@ class ChatCompletionRequest(
 class ChatCompletionResponseMessage(
     msgspec.Struct, frozen=True, kw_only=True, omit_defaults=True, gc=False
 ):  # type: ignore[call-arg]
-    """Response message from OpenAI."""
+    """Response message from OpenAI.
+
+    ``content`` and ``refusal`` are nullable per the OpenAI spec and vLLM
+    routinely omits them (e.g. when the model returns no text or no refusal
+    block), so they default to ``None`` to allow successful decoding.
+    """
 
     role: str
-    content: str | None
-    refusal: str | None
+    content: str | None = None
+    refusal: str | None = None
 
 
 class ChatCompletionChoice(
     msgspec.Struct, frozen=True, kw_only=True, omit_defaults=True, gc=False
 ):  # type: ignore[call-arg]
-    """A single choice in the completion response."""
+    """A single choice in the completion response.
+
+    ``finish_reason`` may be omitted in non-final SSE chunks; default to
+    ``None`` so decoding intermediate frames does not fail.
+    """
 
     index: int
     message: ChatCompletionResponseMessage
-    finish_reason: str | None
+    finish_reason: str | None = None
 
 
 class CompletionUsage(
@@ -142,15 +151,22 @@ class ChatCompletionResponse(
     omit_defaults=False,
     gc=False,
 ):  # type: ignore[call-arg]
-    """OpenAI chat completion response."""
+    """OpenAI chat completion response.
+
+    Most servers (vLLM, Dynamo, etc.) legitimately omit a number of these
+    fields — e.g. ``usage`` is only emitted on the final SSE chunk,
+    ``system_fingerprint`` is rarely populated, and ``created``/``model``
+    can be missing in some response variants. All of these get safe
+    defaults so the decoder accepts whatever the server sends.
+    """
 
     id: str
     object: str = "chat.completion"
-    created: int
-    model: str
+    created: int = 0
+    model: str = ""
     choices: list[ChatCompletionChoice]
-    usage: CompletionUsage | None
-    system_fingerprint: str | None
+    usage: CompletionUsage | None = None
+    system_fingerprint: str | None = None
 
 
 # ============================================================================