From 4691744fb809e043b6d3a45bb0cdd04eba05bc09 Mon Sep 17 00:00:00 2001
From: mingyuanm <mingyuanm@nvidia.com>
Date: Thu, 7 May 2026 11:30:21 -0700
Subject: [PATCH 1/8] Add 8k subset to predefined dataset list

Signed-off-by: Mingyuan Ma <mingyuanm@nvidia.com>
---
 docs/dataset_manager/DESIGN.md                |   5 +-
 .../dataset_manager/__init__.py               |   3 +-
 .../shopify_product_catalogue/__init__.py     |  58 +++++--
 .../test_shopify_product_catalogue.py         | 149 +++++++++++++++---
 4 files changed, 178 insertions(+), 37 deletions(-)

diff --git a/docs/dataset_manager/DESIGN.md b/docs/dataset_manager/DESIGN.md
index 5650e601..024950e2 100644
--- a/docs/dataset_manager/DESIGN.md
+++ b/docs/dataset_manager/DESIGN.md
@@ -110,8 +110,9 @@ configs. Each predefined dataset ships with default transforms for supported mod
 | `cnndailymail`              | CNN/DailyMail | Summarization                              |
 | `open_orca`                 | OpenOrca      | General instruction                        |
 | `livecodebench`             | LiveCodeBench | Code generation; requires additional setup |
-| `shopify_product_catalogue` | Shopify       | E-commerce Q&A (q3vl)                      |
-| `random`                    | Synthetic     | Generated prompts for throughput testing   |
+| `shopify_product_catalogue`    | Shopify       | E-commerce Q&A (q3vl)                      |
+| `shopify_product_catalogue_8k` | Shopify       | 8k sample variant of Shopify product catalogue (q3vl) |
+| `random`                       | Synthetic     | Generated prompts for throughput testing   |
 
 ## Preset System
 
diff --git a/src/inference_endpoint/dataset_manager/__init__.py b/src/inference_endpoint/dataset_manager/__init__.py
index 4bb6c575..cf675bea 100644
--- a/src/inference_endpoint/dataset_manager/__init__.py
+++ b/src/inference_endpoint/dataset_manager/__init__.py
@@ -27,7 +27,7 @@
 from .predefined.livecodebench import LiveCodeBench
 from .predefined.open_orca import OpenOrca
 from .predefined.random import RandomDataset
-from .predefined.shopify_product_catalogue import ShopifyProductCatalogue
+from .predefined.shopify_product_catalogue import ShopifyProductCatalogue, ShopifyProductCatalogue8k
 from .transforms import (
     AddStaticColumns,
     ColumnFilter,
@@ -58,4 +58,5 @@
     "CNNDailyMail",
     "RandomDataset",
     "ShopifyProductCatalogue",
+    "ShopifyProductCatalogue8k",
 ]
diff --git a/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py b/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py
index 6bae9f43..9051d9a5 100644
--- a/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py
+++ b/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py
@@ -13,10 +13,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Shopify product catalogue dataset for multimodal product taxonomy classification."""
+"""Shopify product catalogue datasets for multimodal product taxonomy classification."""
 
 import base64
 import json
+from abc import ABC
 from io import BytesIO
 from logging import getLogger
 from pathlib import Path
@@ -66,16 +67,12 @@ def _process_sample_to_row(sample: dict[str, Any]) -> dict[str, Any]:
     }
 
 
-class ShopifyProductCatalogue(
-    Dataset,
-    dataset_id="shopify_product_catalogue",
-):
-    """Shopify product catalogue: multimodal benchmark for product taxonomy classification.
+class BaseShopifyProductCatalogue(Dataset, ABC):
+    """Abstract base class for Shopify product catalogue datasets.
 
-    Reference: https://huggingface.co/datasets/Shopify/product-catalogue
-
-    Each sample includes product image, title, description, and candidate categories.
-    Compatible with OpenAI multimodal adapter (prompt/system with vision content).
+    Contains shared logic for downloading and processing product catalogue
+    data from HuggingFace. Subclasses only need to define REPO_ID and
+    dataset_id.
     """
 
     COLUMN_NAMES = [
@@ -91,7 +88,8 @@ class ShopifyProductCatalogue(
 
     PRESETS = presets
 
-    REPO_ID = "Shopify/product-catalogue"
+    REPO_ID: str
+    """HuggingFace dataset repository ID. Must be set by subclass."""
 
     @classmethod
     def generate(
@@ -139,7 +137,7 @@ def generate(
 
         ds = load_dataset(cls.REPO_ID, split=split_key, **load_options)
         logger.info(
-            f"Loaded {len(ds)} samples from Shopify product catalogue ({split_key})"
+            f"Loaded {len(ds)} samples from {cls.REPO_ID} ({split_key})"
         )
 
         all_rows: list[dict[str, Any]] = []
@@ -158,4 +156,38 @@ def generate(
         return df
 
 
-__all__ = ["ProductMetadata", "ShopifyProductCatalogue"]
+class ShopifyProductCatalogue(
+    BaseShopifyProductCatalogue,
+    dataset_id="shopify_product_catalogue",
+):
+    """Shopify product catalogue: multimodal benchmark for product taxonomy classification.
+
+    Reference: https://huggingface.co/datasets/Shopify/product-catalogue
+
+    Each sample includes product image, title, description, and candidate categories.
+    Compatible with OpenAI multimodal adapter (prompt/system with vision content).
+    """
+
+    REPO_ID = "Shopify/product-catalogue"
+
+
+class ShopifyProductCatalogue8k(
+    BaseShopifyProductCatalogue,
+    dataset_id="shopify_product_catalogue_8k",
+):
+    """Shopify product catalogue 8k: 8,000 sample variant for product taxonomy classification.
+
+    Reference: https://huggingface.co/datasets/nvidia/Shopify-product-catalogue-8k
+
+    Each sample includes product image, title, description, and candidate categories.
+    Compatible with OpenAI multimodal adapter (prompt/system with vision content).
+    """
+
+    REPO_ID = "nvidia/Shopify-product-catalogue-8k"
+
+
+__all__ = [
+    "ProductMetadata",
+    "ShopifyProductCatalogue",
+    "ShopifyProductCatalogue8k",
+]
diff --git a/tests/unit/dataset_manager/test_shopify_product_catalogue.py b/tests/unit/dataset_manager/test_shopify_product_catalogue.py
index a837970f..97f36ace 100644
--- a/tests/unit/dataset_manager/test_shopify_product_catalogue.py
+++ b/tests/unit/dataset_manager/test_shopify_product_catalogue.py
@@ -15,6 +15,10 @@
 
 """Unit tests for Shopify product catalogue dataset initialization and transforms."""
 
+import pytest
+
+pytestmark = pytest.mark.unit
+
 import base64
 import json
 from io import BytesIO
@@ -23,10 +27,12 @@
 from unittest.mock import patch
 
 import pandas as pd
-import pytest
 from inference_endpoint.dataset_manager.predefined.shopify_product_catalogue import (
+    BaseShopifyProductCatalogue,
     ShopifyProductCatalogue,
+    ShopifyProductCatalogue8k,
 )
+from inference_endpoint.dataset_manager.dataset import Dataset
 from inference_endpoint.dataset_manager.predefined.shopify_product_catalogue.presets import (
     ShopifyMultimodalFormatter,
     q3vl,
@@ -41,6 +47,27 @@ def _make_pil_image(image_format: str = "JPEG") -> Image.Image:
     return img
 
 
+@pytest.fixture
+def mock_hf_dataset() -> list[dict]:
+    """Synthetic HuggingFace-style dataset for mocking load_from_huggingface.
+
+    Returns a list of sample dicts (indexable like HF Dataset via ds[i]).
+    """
+    return [
+        _make_mock_hf_row(
+            product_title="Shirt A",
+            product_description="Blue cotton shirt",
+            ground_truth_category="Clothing > Shirts > Polo",
+        ),
+        _make_mock_hf_row(
+            product_title="Shirt B",
+            product_description="Red silk shirt",
+            product_image=_make_pil_image("PNG"),
+            ground_truth_category="Clothing > Shirts > Dress",
+        ),
+    ]
+
+
 def _make_mock_hf_row(
     *,
     product_title: str = "Test Product",
@@ -74,26 +101,6 @@ def _make_mock_hf_row(
 class TestShopifyProductCatalogueGenerate:
     """Tests for ShopifyProductCatalogue.generate()."""
 
-    @pytest.fixture
-    def mock_hf_dataset(self) -> list[dict]:
-        """Synthetic HuggingFace-style dataset for mocking load_from_huggingface.
-
-        Returns a list of sample dicts (indexable like HF Dataset via ds[i]).
-        """
-        return [
-            _make_mock_hf_row(
-                product_title="Shirt A",
-                product_description="Blue cotton shirt",
-                ground_truth_category="Clothing > Shirts > Polo",
-            ),
-            _make_mock_hf_row(
-                product_title="Shirt B",
-                product_description="Red silk shirt",
-                product_image=_make_pil_image("PNG"),
-                ground_truth_category="Clothing > Shirts > Dress",
-            ),
-        ]
-
     def test_generate_produces_expected_columns(
         self, tmp_path: Path, mock_hf_dataset: list[dict]
     ) -> None:
@@ -354,3 +361,103 @@ def test_get_dataloader_with_q3vl_preset(self, tmp_path: Path) -> None:
         assert "system" in sample
         assert "prompt" in sample
         assert isinstance(sample["prompt"], list)
+
+
+class TestShopifyProductCatalogue8k:
+    """Tests for ShopifyProductCatalogue8k class."""
+
+    def test_class_inherits_from_base(self) -> None:
+        """ShopifyProductCatalogue8k inherits from BaseShopifyProductCatalogue."""
+        assert issubclass(ShopifyProductCatalogue8k, BaseShopifyProductCatalogue)
+
+    def test_has_correct_repo_id(self) -> None:
+        """REPO_ID points to nvidia/Shopify-product-catalogue-8k."""
+        assert ShopifyProductCatalogue8k.REPO_ID == "nvidia/Shopify-product-catalogue-8k"
+
+    def test_has_correct_dataset_id(self) -> None:
+        """DATASET_ID is shopify_product_catalogue_8k."""
+        assert ShopifyProductCatalogue8k.DATASET_ID == "shopify_product_catalogue_8k"
+
+    def test_registered_in_dataset_predefined(self) -> None:
+        """Class is auto-registered in Dataset.PREDEFINED."""
+        assert "shopify_product_catalogue_8k" in Dataset.PREDEFINED
+        assert Dataset.PREDEFINED["shopify_product_catalogue_8k"] is ShopifyProductCatalogue8k
+
+    def test_shares_column_names_with_base(self) -> None:
+        """Column names are identical to ShopifyProductCatalogue."""
+        assert ShopifyProductCatalogue8k.COLUMN_NAMES == ShopifyProductCatalogue.COLUMN_NAMES
+
+    def test_shares_presets_with_base(self) -> None:
+        """Presets are shared with base class (q3vl works)."""
+        assert ShopifyProductCatalogue8k.PRESETS is ShopifyProductCatalogue.PRESETS
+        assert hasattr(ShopifyProductCatalogue8k.PRESETS, "q3vl")
+
+    def test_generate_uses_correct_repo_id(
+        self, tmp_path: Path, mock_hf_dataset: list[dict]
+    ) -> None:
+        """Generate uses the correct REPO_ID for 8k variant."""
+        with patch(
+            "inference_endpoint.dataset_manager.predefined.shopify_product_catalogue.load_dataset",
+            return_value=mock_hf_dataset,
+        ) as mock_load:
+            df = ShopifyProductCatalogue8k.generate(
+                datasets_dir=tmp_path,
+                split=["train"],
+                force=True,
+            )
+        # Verify load_dataset was called with the 8k repo ID
+        mock_load.assert_called_once()
+        assert mock_load.call_args.args[0] == "nvidia/Shopify-product-catalogue-8k"
+        # Verify output has correct columns
+        assert list(df.columns) == ShopifyProductCatalogue8k.COLUMN_NAMES
+
+    def test_generate_uses_correct_dataset_id_for_paths(
+        self, tmp_path: Path, mock_hf_dataset: list[dict]
+    ) -> None:
+        """Generated files use the 8k dataset_id in paths."""
+        with patch(
+            "inference_endpoint.dataset_manager.predefined.shopify_product_catalogue.load_dataset",
+            return_value=mock_hf_dataset,
+        ):
+            ShopifyProductCatalogue8k.generate(
+                datasets_dir=tmp_path,
+                split=["train"],
+                force=True,
+            )
+        # Verify cache path uses shopify_product_catalogue_8k
+        expected_path = tmp_path / "shopify_product_catalogue_8k" / "train" / "shopify_product_catalogue_8k_train.parquet"
+        assert expected_path.exists()
+
+    def test_get_dataloader_with_q3vl_preset(self, tmp_path: Path) -> None:
+        """get_dataloader with q3vl preset works for 8k variant."""
+        mock_df = pd.DataFrame(
+            [
+                {
+                    "product_title": "T1",
+                    "product_description": "D1",
+                    "product_image_base64": "YQ==",
+                    "product_image_format": "JPEG",
+                    "potential_product_categories": "[]",
+                    "ground_truth_category": "A > B",
+                    "ground_truth_brand": "Brand",
+                    "ground_truth_is_secondhand": "false",
+                }
+            ]
+        )
+        with patch.object(
+            ShopifyProductCatalogue8k,
+            "generate",
+            return_value=mock_df,
+        ):
+            loader = ShopifyProductCatalogue8k.get_dataloader(
+                datasets_dir=tmp_path,
+                transforms=q3vl(),
+                num_repeats=1,
+                force_regenerate=True,
+            )
+        loader.load()
+        assert loader.num_samples() == 1
+        sample = loader.load_sample(0)
+        assert "system" in sample
+        assert "prompt" in sample
+        assert isinstance(sample["prompt"], list)

From e0fbc01c792d268b8d0f13545ae54cc60fc6aeb9 Mon Sep 17 00:00:00 2001
From: mingyuanm <mingyuanm@nvidia.com>
Date: Thu, 7 May 2026 11:35:08 -0700
Subject: [PATCH 2/8] Update yaml configs

Signed-off-by: Mingyuan Ma <mingyuanm@nvidia.com>
---
 ...ractive_qwen3_vl_235b_a22b_shopify_8k.yaml | 45 +++++++++++++++++++
 .../offline_qwen3_vl_235b_a22b_shopify.yaml   |  4 +-
 .../online_qwen3_vl_235b_a22b_shopify.yaml    |  2 +-
 3 files changed, 48 insertions(+), 3 deletions(-)
 create mode 100644 examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml

diff --git a/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml b/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml
new file mode 100644
index 00000000..35180dee
--- /dev/null
+++ b/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml
@@ -0,0 +1,45 @@
+# Interactive Benchmark - Qwen3-VL-235B-A22B on Shopify Product Catalogue 8k
+# Use this for interactive testing and development with the smaller 8k dataset
+name: "interactive-qwen3-vl-235b-a22b-shopify-8k-benchmark"
+version: "1.0"
+type: "online"
+timeout: 1800 # 30 minutes for quick interactive runs
+
+model_params:
+  name: "Qwen/Qwen3-VL-235B-A22B-Instruct"
+  # tokenizer_name: "Qwen/Qwen3-VL-235B-A22B-Instruct"  # Set this if model name is a local/container path
+  temperature: 0
+  top_p: 1
+  max_new_tokens: 150
+  streaming: "on" # Required for TTFT/TPOT measurement
+
+datasets:
+  - name: shopify_product_catalogue_8k::q3vl
+    type: "performance"
+
+settings:
+  runtime:
+    min_duration_ms: 60000 # 1 minute for quick tests
+    n_samples_to_issue: 100 # Small batch for interactive testing (increase to 8000 for full run)
+    scheduler_random_seed: 42
+    dataloader_random_seed: 42
+
+  load_pattern:
+    type: "poisson"
+    target_qps: 6.5
+
+  client:
+    num_workers: 2
+    transport:
+      type: zmq
+      recv_buffer_size: 16777216
+      send_buffer_size: 16777216
+    max_connections: 1000
+    worker_initialization_timeout: 120
+
+endpoint_config:
+  endpoints:
+    - "http://localhost:8000"
+  api_key: null
+
+report_dir: results/qwen3_vl_235b_a22b_shopify_8k_benchmark_interactive/
diff --git a/examples/08_Qwen3-VL-235B-A22B_Example/offline_qwen3_vl_235b_a22b_shopify.yaml b/examples/08_Qwen3-VL-235B-A22B_Example/offline_qwen3_vl_235b_a22b_shopify.yaml
index 95445781..2cb3d669 100644
--- a/examples/08_Qwen3-VL-235B-A22B_Example/offline_qwen3_vl_235b_a22b_shopify.yaml
+++ b/examples/08_Qwen3-VL-235B-A22B_Example/offline_qwen3_vl_235b_a22b_shopify.yaml
@@ -25,7 +25,7 @@ datasets:
 settings:
   runtime:
     min_duration_ms: 600000 # 10 minute
-    n_samples_to_issue: 100 # Limit queries for testing (remove or increase for full run)
+    n_samples_to_issue: 48289 # Full dataset size for this benchmark; lower this (e.g., 1000 or 100) for quick tests, or remove for an unconstrained full run
     scheduler_random_seed: 42 # For Poisson/distribution sampling
     dataloader_random_seed: 42 # For dataset shuffling
 
@@ -33,7 +33,7 @@ settings:
     type: "max_throughput"
 
   client:
-    num_workers: 2
+    num_workers: 5
     transport:
       type: zmq
       recv_buffer_size: 16777216
diff --git a/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml b/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml
index db23f163..badf8fb8 100644
--- a/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml
+++ b/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml
@@ -27,7 +27,7 @@ settings:
     target_qps: 6.5
 
   client:
-    num_workers: 2
+    num_workers: 5
     transport:
       type: zmq
       recv_buffer_size: 16777216

From cfa4de2462cfcf70e3eaf23f0a777930e6e36303 Mon Sep 17 00:00:00 2001
From: mingyuanm <mingyuanm@nvidia.com>
Date: Thu, 7 May 2026 11:51:16 -0700
Subject: [PATCH 3/8] Train split only for the subset

Signed-off-by: Mingyuan Ma <mingyuanm@nvidia.com>
---
 .../shopify_product_catalogue/__init__.py       | 17 ++++++++++++-----
 .../test_shopify_product_catalogue.py           |  5 +++++
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py b/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py
index 9051d9a5..7b8b6723 100644
--- a/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py
+++ b/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py
@@ -21,7 +21,7 @@
 from io import BytesIO
 from logging import getLogger
 from pathlib import Path
-from typing import Any
+from typing import Any, ClassVar
 
 import pandas as pd
 from datasets import load_dataset
@@ -71,8 +71,8 @@ class BaseShopifyProductCatalogue(Dataset, ABC):
     """Abstract base class for Shopify product catalogue datasets.
 
     Contains shared logic for downloading and processing product catalogue
-    data from HuggingFace. Subclasses only need to define REPO_ID and
-    dataset_id.
+    data from HuggingFace. Subclasses only need to define REPO_ID,
+    dataset_id, and optionally DEFAULT_SPLITS.
     """
 
     COLUMN_NAMES = [
@@ -91,6 +91,9 @@ class BaseShopifyProductCatalogue(Dataset, ABC):
     REPO_ID: str
     """HuggingFace dataset repository ID. Must be set by subclass."""
 
+    DEFAULT_SPLITS: ClassVar[list[str]] = ["train", "test"]
+    """Default splits to load when split is not specified. Override in subclass if needed."""
+
     @classmethod
     def generate(
         cls,
@@ -108,7 +111,7 @@ def generate(
 
         Args:
             datasets_dir: Directory to save transformed dataset.
-            split: Splits to load (e.g. ["train", "test"]). Defaults to ["train", "test"].
+            split: Splits to load (e.g. ["train", "test"]). Defaults to DEFAULT_SPLITS.
             force: Regenerate even if file exists.
             token: HuggingFace token for gated datasets.
             revision: Dataset revision/branch. Defaults to "main".
@@ -118,7 +121,7 @@ def generate(
             product_image_format, potential_product_categories.
         """
         if split is None:
-            split = ["train", "test"]
+            split = cls.DEFAULT_SPLITS
         split_key = "+".join(split)
         filename = f"{cls.DATASET_ID}_{split_key}.parquet"
         dst_path = datasets_dir / cls.DATASET_ID / split_key / filename
@@ -181,10 +184,14 @@ class ShopifyProductCatalogue8k(
 
     Each sample includes product image, title, description, and candidate categories.
     Compatible with OpenAI multimodal adapter (prompt/system with vision content).
+
+    Note: This dataset only has a train split (no test split).
     """
 
     REPO_ID = "nvidia/Shopify-product-catalogue-8k"
 
+    DEFAULT_SPLITS = ["train"]
+
 
 __all__ = [
     "ProductMetadata",
diff --git a/tests/unit/dataset_manager/test_shopify_product_catalogue.py b/tests/unit/dataset_manager/test_shopify_product_catalogue.py
index 97f36ace..8b7c0831 100644
--- a/tests/unit/dataset_manager/test_shopify_product_catalogue.py
+++ b/tests/unit/dataset_manager/test_shopify_product_catalogue.py
@@ -392,6 +392,11 @@ def test_shares_presets_with_base(self) -> None:
         assert ShopifyProductCatalogue8k.PRESETS is ShopifyProductCatalogue.PRESETS
         assert hasattr(ShopifyProductCatalogue8k.PRESETS, "q3vl")
 
+    def test_default_splits_is_train_only(self) -> None:
+        """8k variant defaults to train split only (no test split available)."""
+        assert ShopifyProductCatalogue8k.DEFAULT_SPLITS == ["train"]
+        assert ShopifyProductCatalogue.DEFAULT_SPLITS == ["train", "test"]
+
     def test_generate_uses_correct_repo_id(
         self, tmp_path: Path, mock_hf_dataset: list[dict]
     ) -> None:

From a9ad840eb3b099372e490be0a51135ab0fce9fcf Mon Sep 17 00:00:00 2001
From: mingyuanm <mingyuanm@nvidia.com>
Date: Mon, 11 May 2026 11:15:50 -0700
Subject: [PATCH 4/8] Update default configs

Signed-off-by: Mingyuan Ma <mingyuanm@nvidia.com>
---
 .../interactive_qwen3_vl_235b_a22b_shopify_8k.yaml | 14 ++++++++++----
 .../online_qwen3_vl_235b_a22b_shopify.yaml         |  9 ++++++++-
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml b/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml
index 35180dee..3160dab4 100644
--- a/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml
+++ b/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml
@@ -16,20 +16,26 @@ model_params:
 datasets:
   - name: shopify_product_catalogue_8k::q3vl
     type: "performance"
+  - name: shopify_product_catalogue_8k::q3vl
+    type: "accuracy"
+    accuracy_config:
+      eval_method: "shopify_category_f1"
+      ground_truth: "ground_truth_category"
+      extractor: "identity_extractor"
+      num_repeats: 1
 
 settings:
   runtime:
-    min_duration_ms: 60000 # 1 minute for quick tests
-    n_samples_to_issue: 100 # Small batch for interactive testing (increase to 8000 for full run)
+    min_duration_ms: 600000 # 10 minute
     scheduler_random_seed: 42
     dataloader_random_seed: 42
 
   load_pattern:
     type: "poisson"
-    target_qps: 6.5
+    target_qps: 5
 
   client:
-    num_workers: 2
+    num_workers: 5
     transport:
       type: zmq
       recv_buffer_size: 16777216
diff --git a/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml b/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml
index badf8fb8..0d8f8c6a 100644
--- a/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml
+++ b/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml
@@ -14,6 +14,13 @@ model_params:
 datasets:
   - name: shopify_product_catalogue::q3vl
     type: "performance"
+  - name: shopify_product_catalogue::q3vl
+    type: "accuracy"
+    accuracy_config:
+      eval_method: "shopify_category_f1"
+      ground_truth: "ground_truth_category"
+      extractor: "identity_extractor"
+      num_repeats: 1
 
 settings:
   runtime:
@@ -24,7 +31,7 @@ settings:
 
   load_pattern:
     type: "poisson"
-    target_qps: 6.5
+    target_qps: 6
 
   client:
     num_workers: 5

From 22541d67f93dcc877d1d17750a40a4712dc2013f Mon Sep 17 00:00:00 2001
From: Mingyuan Ma <mingyuanm@nvidia.com>
Date: Wed, 13 May 2026 09:37:38 -0700
Subject: [PATCH 5/8] precommit

Signed-off-by: Mingyuan Ma <mingyuanm@nvidia.com>
---
 docs/dataset_manager/DESIGN.md                | 18 +++++++--------
 .../dataset_manager/__init__.py               |  5 +++-
 .../shopify_product_catalogue/__init__.py     |  7 +++---
 .../test_shopify_product_catalogue.py         | 23 +++++++++++++++----
 4 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/docs/dataset_manager/DESIGN.md b/docs/dataset_manager/DESIGN.md
index 024950e2..e1c8c6d1 100644
--- a/docs/dataset_manager/DESIGN.md
+++ b/docs/dataset_manager/DESIGN.md
@@ -103,16 +103,16 @@ Transforms are composed in order; each receives the output of the previous.
 Registered in `dataset.py` under `Dataset.PREDEFINED`. Referenced by name in rulesets and YAML
 configs. Each predefined dataset ships with default transforms for supported model families.
 
-| Name                        | Source        | Notes                                      |
-| --------------------------- | ------------- | ------------------------------------------ |
-| `aime25`                    | AIME 2025     | Math reasoning                             |
-| `gpqa`                      | GPQA Diamond  | Science QA                                 |
-| `cnndailymail`              | CNN/DailyMail | Summarization                              |
-| `open_orca`                 | OpenOrca      | General instruction                        |
-| `livecodebench`             | LiveCodeBench | Code generation; requires additional setup |
-| `shopify_product_catalogue`    | Shopify       | E-commerce Q&A (q3vl)                      |
+| Name                           | Source        | Notes                                                 |
+| ------------------------------ | ------------- | ----------------------------------------------------- |
+| `aime25`                       | AIME 2025     | Math reasoning                                        |
+| `gpqa`                         | GPQA Diamond  | Science QA                                            |
+| `cnndailymail`                 | CNN/DailyMail | Summarization                                         |
+| `open_orca`                    | OpenOrca      | General instruction                                   |
+| `livecodebench`                | LiveCodeBench | Code generation; requires additional setup            |
+| `shopify_product_catalogue`    | Shopify       | E-commerce Q&A (q3vl)                                 |
 | `shopify_product_catalogue_8k` | Shopify       | 8k sample variant of Shopify product catalogue (q3vl) |
-| `random`                       | Synthetic     | Generated prompts for throughput testing   |
+| `random`                       | Synthetic     | Generated prompts for throughput testing              |
 
 ## Preset System
 
diff --git a/src/inference_endpoint/dataset_manager/__init__.py b/src/inference_endpoint/dataset_manager/__init__.py
index cf675bea..6a78dae2 100644
--- a/src/inference_endpoint/dataset_manager/__init__.py
+++ b/src/inference_endpoint/dataset_manager/__init__.py
@@ -27,7 +27,10 @@
 from .predefined.livecodebench import LiveCodeBench
 from .predefined.open_orca import OpenOrca
 from .predefined.random import RandomDataset
-from .predefined.shopify_product_catalogue import ShopifyProductCatalogue, ShopifyProductCatalogue8k
+from .predefined.shopify_product_catalogue import (
+    ShopifyProductCatalogue,
+    ShopifyProductCatalogue8k,
+)
 from .transforms import (
     AddStaticColumns,
     ColumnFilter,
diff --git a/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py b/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py
index 7b8b6723..a8981198 100644
--- a/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py
+++ b/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py
@@ -24,9 +24,10 @@
 from typing import Any, ClassVar
 
 import pandas as pd
-from datasets import load_dataset
 from tqdm import tqdm
 
+from datasets import load_dataset
+
 from ...dataset import Dataset
 from . import presets
 from .metadata import ProductMetadata
@@ -139,9 +140,7 @@ def generate(
             load_options["revision"] = revision
 
         ds = load_dataset(cls.REPO_ID, split=split_key, **load_options)
-        logger.info(
-            f"Loaded {len(ds)} samples from {cls.REPO_ID} ({split_key})"
-        )
+        logger.info(f"Loaded {len(ds)} samples from {cls.REPO_ID} ({split_key})")
 
         all_rows: list[dict[str, Any]] = []
         for i in tqdm(
diff --git a/tests/unit/dataset_manager/test_shopify_product_catalogue.py b/tests/unit/dataset_manager/test_shopify_product_catalogue.py
index 8b7c0831..ac4b1f7f 100644
--- a/tests/unit/dataset_manager/test_shopify_product_catalogue.py
+++ b/tests/unit/dataset_manager/test_shopify_product_catalogue.py
@@ -27,12 +27,12 @@
 from unittest.mock import patch
 
 import pandas as pd
+from inference_endpoint.dataset_manager.dataset import Dataset
 from inference_endpoint.dataset_manager.predefined.shopify_product_catalogue import (
     BaseShopifyProductCatalogue,
     ShopifyProductCatalogue,
     ShopifyProductCatalogue8k,
 )
-from inference_endpoint.dataset_manager.dataset import Dataset
 from inference_endpoint.dataset_manager.predefined.shopify_product_catalogue.presets import (
     ShopifyMultimodalFormatter,
     q3vl,
@@ -372,7 +372,9 @@ def test_class_inherits_from_base(self) -> None:
 
     def test_has_correct_repo_id(self) -> None:
         """REPO_ID points to nvidia/Shopify-product-catalogue-8k."""
-        assert ShopifyProductCatalogue8k.REPO_ID == "nvidia/Shopify-product-catalogue-8k"
+        assert (
+            ShopifyProductCatalogue8k.REPO_ID == "nvidia/Shopify-product-catalogue-8k"
+        )
 
     def test_has_correct_dataset_id(self) -> None:
         """DATASET_ID is shopify_product_catalogue_8k."""
@@ -381,11 +383,17 @@ def test_has_correct_dataset_id(self) -> None:
     def test_registered_in_dataset_predefined(self) -> None:
         """Class is auto-registered in Dataset.PREDEFINED."""
         assert "shopify_product_catalogue_8k" in Dataset.PREDEFINED
-        assert Dataset.PREDEFINED["shopify_product_catalogue_8k"] is ShopifyProductCatalogue8k
+        assert (
+            Dataset.PREDEFINED["shopify_product_catalogue_8k"]
+            is ShopifyProductCatalogue8k
+        )
 
     def test_shares_column_names_with_base(self) -> None:
         """Column names are identical to ShopifyProductCatalogue."""
-        assert ShopifyProductCatalogue8k.COLUMN_NAMES == ShopifyProductCatalogue.COLUMN_NAMES
+        assert (
+            ShopifyProductCatalogue8k.COLUMN_NAMES
+            == ShopifyProductCatalogue.COLUMN_NAMES
+        )
 
     def test_shares_presets_with_base(self) -> None:
         """Presets are shared with base class (q3vl works)."""
@@ -430,7 +438,12 @@ def test_generate_uses_correct_dataset_id_for_paths(
                 force=True,
             )
         # Verify cache path uses shopify_product_catalogue_8k
-        expected_path = tmp_path / "shopify_product_catalogue_8k" / "train" / "shopify_product_catalogue_8k_train.parquet"
+        expected_path = (
+            tmp_path
+            / "shopify_product_catalogue_8k"
+            / "train"
+            / "shopify_product_catalogue_8k_train.parquet"
+        )
         assert expected_path.exists()
 
     def test_get_dataloader_with_q3vl_preset(self, tmp_path: Path) -> None:

From 2221e2f0e850d7b01df7dae668513093e0276078 Mon Sep 17 00:00:00 2001
From: Mingyuan Ma <mingyuanm@nvidia.com>
Date: Wed, 13 May 2026 09:49:25 -0700
Subject: [PATCH 6/8] fix import lines

Signed-off-by: Mingyuan Ma <mingyuanm@nvidia.com>
---
 .../unit/dataset_manager/test_shopify_product_catalogue.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/unit/dataset_manager/test_shopify_product_catalogue.py b/tests/unit/dataset_manager/test_shopify_product_catalogue.py
index ac4b1f7f..865efac1 100644
--- a/tests/unit/dataset_manager/test_shopify_product_catalogue.py
+++ b/tests/unit/dataset_manager/test_shopify_product_catalogue.py
@@ -15,10 +15,6 @@
 
 """Unit tests for Shopify product catalogue dataset initialization and transforms."""
 
-import pytest
-
-pytestmark = pytest.mark.unit
-
 import base64
 import json
 from io import BytesIO
@@ -27,6 +23,7 @@
 from unittest.mock import patch
 
 import pandas as pd
+import pytest
 from inference_endpoint.dataset_manager.dataset import Dataset
 from inference_endpoint.dataset_manager.predefined.shopify_product_catalogue import (
     BaseShopifyProductCatalogue,
@@ -39,6 +36,8 @@
 )
 from PIL import Image
 
+pytestmark = pytest.mark.unit
+
 
 def _make_pil_image(image_format: str = "JPEG") -> Image.Image:
     """Create a minimal 1x1 PIL Image with the given format attribute set."""

From 0239d029c8ee709b94d518ac48d2e6b333ce31f5 Mon Sep 17 00:00:00 2001
From: Mingyuan Ma <mingyuanm@nvidia.com>
Date: Wed, 13 May 2026 09:55:52 -0700
Subject: [PATCH 7/8] precommit

Signed-off-by: Mingyuan Ma <mingyuanm@nvidia.com>
---
 .../predefined/shopify_product_catalogue/__init__.py           | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py b/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py
index a8981198..9461b4e5 100644
--- a/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py
+++ b/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py
@@ -24,9 +24,8 @@
 from typing import Any, ClassVar
 
 import pandas as pd
-from tqdm import tqdm
-
 from datasets import load_dataset
+from tqdm import tqdm
 
 from ...dataset import Dataset
 from . import presets

From fc297130ca316ea125c4f241501c5f7f71b39590 Mon Sep 17 00:00:00 2001
From: Mingyuan Ma <mingyuanm@nvidia.com>
Date: Wed, 13 May 2026 15:08:28 -0700
Subject: [PATCH 8/8] Use default temperature None

Signed-off-by: Mingyuan Ma <mingyuanm@nvidia.com>
---
 .../interactive_qwen3_vl_235b_a22b_shopify_8k.yaml              | 2 --
 .../offline_qwen3_vl_235b_a22b_shopify.yaml                     | 1 -
 .../online_qwen3_vl_235b_a22b_shopify.yaml                      | 1 -
 3 files changed, 4 deletions(-)

diff --git a/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml b/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml
index 3160dab4..a311a21c 100644
--- a/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml
+++ b/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml
@@ -7,8 +7,6 @@ timeout: 1800 # 30 minutes for quick interactive runs
 
 model_params:
   name: "Qwen/Qwen3-VL-235B-A22B-Instruct"
-  # tokenizer_name: "Qwen/Qwen3-VL-235B-A22B-Instruct"  # Set this if model name is a local/container path
-  temperature: 0
   top_p: 1
   max_new_tokens: 150
   streaming: "on" # Required for TTFT/TPOT measurement
diff --git a/examples/08_Qwen3-VL-235B-A22B_Example/offline_qwen3_vl_235b_a22b_shopify.yaml b/examples/08_Qwen3-VL-235B-A22B_Example/offline_qwen3_vl_235b_a22b_shopify.yaml
index 2cb3d669..a9e1c606 100644
--- a/examples/08_Qwen3-VL-235B-A22B_Example/offline_qwen3_vl_235b_a22b_shopify.yaml
+++ b/examples/08_Qwen3-VL-235B-A22B_Example/offline_qwen3_vl_235b_a22b_shopify.yaml
@@ -7,7 +7,6 @@ timeout: 14400 # Perf + acc run takes over 3 hours, consider limit n_samples_to_
 
 model_params:
   name: "Qwen/Qwen3-VL-235B-A22B-Instruct"
-  temperature: 0
   top_p: 1
   max_new_tokens: 150
 
diff --git a/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml b/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml
index 0d8f8c6a..f2c4b4b7 100644
--- a/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml
+++ b/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml
@@ -6,7 +6,6 @@ timeout: 14400
 
 model_params:
   name: "Qwen/Qwen3-VL-235B-A22B-Instruct"
-  temperature: 0
   top_p: 1
   max_new_tokens: 150
   streaming: "on" # Required for TTFT/TPOT measurement