From 4691744fb809e043b6d3a45bb0cdd04eba05bc09 Mon Sep 17 00:00:00 2001 From: mingyuanm Date: Thu, 7 May 2026 11:30:21 -0700 Subject: [PATCH 1/8] Add 8k subset to predefined dataset list Signed-off-by: Mingyuan Ma --- docs/dataset_manager/DESIGN.md | 5 +- .../dataset_manager/__init__.py | 3 +- .../shopify_product_catalogue/__init__.py | 58 +++++-- .../test_shopify_product_catalogue.py | 149 +++++++++++++++--- 4 files changed, 178 insertions(+), 37 deletions(-) diff --git a/docs/dataset_manager/DESIGN.md b/docs/dataset_manager/DESIGN.md index 5650e601..024950e2 100644 --- a/docs/dataset_manager/DESIGN.md +++ b/docs/dataset_manager/DESIGN.md @@ -110,8 +110,9 @@ configs. Each predefined dataset ships with default transforms for supported mod | `cnndailymail` | CNN/DailyMail | Summarization | | `open_orca` | OpenOrca | General instruction | | `livecodebench` | LiveCodeBench | Code generation; requires additional setup | -| `shopify_product_catalogue` | Shopify | E-commerce Q&A (q3vl) | -| `random` | Synthetic | Generated prompts for throughput testing | +| `shopify_product_catalogue` | Shopify | E-commerce Q&A (q3vl) | +| `shopify_product_catalogue_8k` | Shopify | 8k sample variant of Shopify product catalogue (q3vl) | +| `random` | Synthetic | Generated prompts for throughput testing | ## Preset System diff --git a/src/inference_endpoint/dataset_manager/__init__.py b/src/inference_endpoint/dataset_manager/__init__.py index 4bb6c575..cf675bea 100644 --- a/src/inference_endpoint/dataset_manager/__init__.py +++ b/src/inference_endpoint/dataset_manager/__init__.py @@ -27,7 +27,7 @@ from .predefined.livecodebench import LiveCodeBench from .predefined.open_orca import OpenOrca from .predefined.random import RandomDataset -from .predefined.shopify_product_catalogue import ShopifyProductCatalogue +from .predefined.shopify_product_catalogue import ShopifyProductCatalogue, ShopifyProductCatalogue8k from .transforms import ( AddStaticColumns, ColumnFilter, @@ -58,4 +58,5 @@ "CNNDailyMail", "RandomDataset", "ShopifyProductCatalogue", + "ShopifyProductCatalogue8k", ] diff --git a/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py b/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py index 6bae9f43..9051d9a5 100644 --- a/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py +++ b/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py @@ -13,10 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Shopify product catalogue dataset for multimodal product taxonomy classification.""" +"""Shopify product catalogue datasets for multimodal product taxonomy classification.""" import base64 import json +from abc import ABC from io import BytesIO from logging import getLogger from pathlib import Path @@ -66,16 +67,12 @@ def _process_sample_to_row(sample: dict[str, Any]) -> dict[str, Any]: } -class ShopifyProductCatalogue( - Dataset, - dataset_id="shopify_product_catalogue", -): - """Shopify product catalogue: multimodal benchmark for product taxonomy classification. +class BaseShopifyProductCatalogue(Dataset, ABC): + """Abstract base class for Shopify product catalogue datasets. - Reference: https://huggingface.co/datasets/Shopify/product-catalogue - - Each sample includes product image, title, description, and candidate categories. - Compatible with OpenAI multimodal adapter (prompt/system with vision content). + Contains shared logic for downloading and processing product catalogue + data from HuggingFace. Subclasses only need to define REPO_ID and + dataset_id. """ COLUMN_NAMES = [ @@ -91,7 +88,8 @@ class ShopifyProductCatalogue( PRESETS = presets - REPO_ID = "Shopify/product-catalogue" + REPO_ID: str + """HuggingFace dataset repository ID. Must be set by subclass.""" @classmethod def generate( @@ -139,7 +137,7 @@ def generate( ds = load_dataset(cls.REPO_ID, split=split_key, **load_options) logger.info( - f"Loaded {len(ds)} samples from Shopify product catalogue ({split_key})" + f"Loaded {len(ds)} samples from {cls.REPO_ID} ({split_key})" ) all_rows: list[dict[str, Any]] = [] @@ -158,4 +156,38 @@ def generate( return df -__all__ = ["ProductMetadata", "ShopifyProductCatalogue"] +class ShopifyProductCatalogue( + BaseShopifyProductCatalogue, + dataset_id="shopify_product_catalogue", +): + """Shopify product catalogue: multimodal benchmark for product taxonomy classification. + + Reference: https://huggingface.co/datasets/Shopify/product-catalogue + + Each sample includes product image, title, description, and candidate categories. + Compatible with OpenAI multimodal adapter (prompt/system with vision content). + """ + + REPO_ID = "Shopify/product-catalogue" + + +class ShopifyProductCatalogue8k( + BaseShopifyProductCatalogue, + dataset_id="shopify_product_catalogue_8k", +): + """Shopify product catalogue 8k: 8,000 sample variant for product taxonomy classification. + + Reference: https://huggingface.co/datasets/nvidia/Shopify-product-catalogue-8k + + Each sample includes product image, title, description, and candidate categories. + Compatible with OpenAI multimodal adapter (prompt/system with vision content). + """ + + REPO_ID = "nvidia/Shopify-product-catalogue-8k" + + +__all__ = [ + "ProductMetadata", + "ShopifyProductCatalogue", + "ShopifyProductCatalogue8k", +] diff --git a/tests/unit/dataset_manager/test_shopify_product_catalogue.py b/tests/unit/dataset_manager/test_shopify_product_catalogue.py index a837970f..97f36ace 100644 --- a/tests/unit/dataset_manager/test_shopify_product_catalogue.py +++ b/tests/unit/dataset_manager/test_shopify_product_catalogue.py @@ -15,6 +15,10 @@ """Unit tests for Shopify product catalogue dataset initialization and transforms.""" +import pytest + +pytestmark = pytest.mark.unit + import base64 import json from io import BytesIO @@ -23,10 +27,12 @@ from unittest.mock import patch import pandas as pd -import pytest from inference_endpoint.dataset_manager.predefined.shopify_product_catalogue import ( + BaseShopifyProductCatalogue, ShopifyProductCatalogue, + ShopifyProductCatalogue8k, ) +from inference_endpoint.dataset_manager.dataset import Dataset from inference_endpoint.dataset_manager.predefined.shopify_product_catalogue.presets import ( ShopifyMultimodalFormatter, q3vl, @@ -41,6 +47,27 @@ def _make_pil_image(image_format: str = "JPEG") -> Image.Image: return img +@pytest.fixture +def mock_hf_dataset() -> list[dict]: + """Synthetic HuggingFace-style dataset for mocking load_from_huggingface. + + Returns a list of sample dicts (indexable like HF Dataset via ds[i]). + """ + return [ + _make_mock_hf_row( + product_title="Shirt A", + product_description="Blue cotton shirt", + ground_truth_category="Clothing > Shirts > Polo", + ), + _make_mock_hf_row( + product_title="Shirt B", + product_description="Red silk shirt", + product_image=_make_pil_image("PNG"), + ground_truth_category="Clothing > Shirts > Dress", + ), + ] + + def _make_mock_hf_row( *, product_title: str = "Test Product", @@ -74,26 +101,6 @@ def _make_mock_hf_row( class TestShopifyProductCatalogueGenerate: """Tests for ShopifyProductCatalogue.generate().""" - @pytest.fixture - def mock_hf_dataset(self) -> list[dict]: - """Synthetic HuggingFace-style dataset for mocking load_from_huggingface. - - Returns a list of sample dicts (indexable like HF Dataset via ds[i]). - """ - return [ - _make_mock_hf_row( - product_title="Shirt A", - product_description="Blue cotton shirt", - ground_truth_category="Clothing > Shirts > Polo", - ), - _make_mock_hf_row( - product_title="Shirt B", - product_description="Red silk shirt", - product_image=_make_pil_image("PNG"), - ground_truth_category="Clothing > Shirts > Dress", - ), - ] - def test_generate_produces_expected_columns( self, tmp_path: Path, mock_hf_dataset: list[dict] ) -> None: @@ -354,3 +361,103 @@ def test_get_dataloader_with_q3vl_preset(self, tmp_path: Path) -> None: assert "system" in sample assert "prompt" in sample assert isinstance(sample["prompt"], list) + + +class TestShopifyProductCatalogue8k: + """Tests for ShopifyProductCatalogue8k class.""" + + def test_class_inherits_from_base(self) -> None: + """ShopifyProductCatalogue8k inherits from BaseShopifyProductCatalogue.""" + assert issubclass(ShopifyProductCatalogue8k, BaseShopifyProductCatalogue) + + def test_has_correct_repo_id(self) -> None: + """REPO_ID points to nvidia/Shopify-product-catalogue-8k.""" + assert ShopifyProductCatalogue8k.REPO_ID == "nvidia/Shopify-product-catalogue-8k" + + def test_has_correct_dataset_id(self) -> None: + """DATASET_ID is shopify_product_catalogue_8k.""" + assert ShopifyProductCatalogue8k.DATASET_ID == "shopify_product_catalogue_8k" + + def test_registered_in_dataset_predefined(self) -> None: + """Class is auto-registered in Dataset.PREDEFINED.""" + assert "shopify_product_catalogue_8k" in Dataset.PREDEFINED + assert Dataset.PREDEFINED["shopify_product_catalogue_8k"] is ShopifyProductCatalogue8k + + def test_shares_column_names_with_base(self) -> None: + """Column names are identical to ShopifyProductCatalogue.""" + assert ShopifyProductCatalogue8k.COLUMN_NAMES == ShopifyProductCatalogue.COLUMN_NAMES + + def test_shares_presets_with_base(self) -> None: + """Presets are shared with base class (q3vl works).""" + assert ShopifyProductCatalogue8k.PRESETS is ShopifyProductCatalogue.PRESETS + assert hasattr(ShopifyProductCatalogue8k.PRESETS, "q3vl") + + def test_generate_uses_correct_repo_id( + self, tmp_path: Path, mock_hf_dataset: list[dict] + ) -> None: + """Generate uses the correct REPO_ID for 8k variant.""" + with patch( + "inference_endpoint.dataset_manager.predefined.shopify_product_catalogue.load_dataset", + return_value=mock_hf_dataset, + ) as mock_load: + df = ShopifyProductCatalogue8k.generate( + datasets_dir=tmp_path, + split=["train"], + force=True, + ) + # Verify load_dataset was called with the 8k repo ID + mock_load.assert_called_once() + assert mock_load.call_args.args[0] == "nvidia/Shopify-product-catalogue-8k" + # Verify output has correct columns + assert list(df.columns) == ShopifyProductCatalogue8k.COLUMN_NAMES + + def test_generate_uses_correct_dataset_id_for_paths( + self, tmp_path: Path, mock_hf_dataset: list[dict] + ) -> None: + """Generated files use the 8k dataset_id in paths.""" + with patch( + "inference_endpoint.dataset_manager.predefined.shopify_product_catalogue.load_dataset", + return_value=mock_hf_dataset, + ): + ShopifyProductCatalogue8k.generate( + datasets_dir=tmp_path, + split=["train"], + force=True, + ) + # Verify cache path uses shopify_product_catalogue_8k + expected_path = tmp_path / "shopify_product_catalogue_8k" / "train" / "shopify_product_catalogue_8k_train.parquet" + assert expected_path.exists() + + def test_get_dataloader_with_q3vl_preset(self, tmp_path: Path) -> None: + """get_dataloader with q3vl preset works for 8k variant.""" + mock_df = pd.DataFrame( + [ + { + "product_title": "T1", + "product_description": "D1", + "product_image_base64": "YQ==", + "product_image_format": "JPEG", + "potential_product_categories": "[]", + "ground_truth_category": "A > B", + "ground_truth_brand": "Brand", + "ground_truth_is_secondhand": "false", + } + ] + ) + with patch.object( + ShopifyProductCatalogue8k, + "generate", + return_value=mock_df, + ): + loader = ShopifyProductCatalogue8k.get_dataloader( + datasets_dir=tmp_path, + transforms=q3vl(), + num_repeats=1, + force_regenerate=True, + ) + loader.load() + assert loader.num_samples() == 1 + sample = loader.load_sample(0) + assert "system" in sample + assert "prompt" in sample + assert isinstance(sample["prompt"], list) From e0fbc01c792d268b8d0f13545ae54cc60fc6aeb9 Mon Sep 17 00:00:00 2001 From: mingyuanm Date: Thu, 7 May 2026 11:35:08 -0700 Subject: [PATCH 2/8] Update yaml configs Signed-off-by: Mingyuan Ma --- ...ractive_qwen3_vl_235b_a22b_shopify_8k.yaml | 45 +++++++++++++++++++ .../offline_qwen3_vl_235b_a22b_shopify.yaml | 4 +- .../online_qwen3_vl_235b_a22b_shopify.yaml | 2 +- 3 files changed, 48 insertions(+), 3 deletions(-) create mode 100644 examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml diff --git a/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml b/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml new file mode 100644 index 00000000..35180dee --- /dev/null +++ b/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml @@ -0,0 +1,45 @@ +# Interactive Benchmark - Qwen3-VL-235B-A22B on Shopify Product Catalogue 8k +# Use this for interactive testing and development with the smaller 8k dataset +name: "interactive-qwen3-vl-235b-a22b-shopify-8k-benchmark" +version: "1.0" +type: "online" +timeout: 1800 # 30 minutes for quick interactive runs + +model_params: + name: "Qwen/Qwen3-VL-235B-A22B-Instruct" + # tokenizer_name: "Qwen/Qwen3-VL-235B-A22B-Instruct" # Set this if model name is a local/container path + temperature: 0 + top_p: 1 + max_new_tokens: 150 + streaming: "on" # Required for TTFT/TPOT measurement + +datasets: + - name: shopify_product_catalogue_8k::q3vl + type: "performance" + +settings: + runtime: + min_duration_ms: 60000 # 1 minute for quick tests + n_samples_to_issue: 100 # Small batch for interactive testing (increase to 8000 for full run) + scheduler_random_seed: 42 + dataloader_random_seed: 42 + + load_pattern: + type: "poisson" + target_qps: 6.5 + + client: + num_workers: 2 + transport: + type: zmq + recv_buffer_size: 16777216 + send_buffer_size: 16777216 + max_connections: 1000 + worker_initialization_timeout: 120 + +endpoint_config: + endpoints: + - "http://localhost:8000" + api_key: null + +report_dir: results/qwen3_vl_235b_a22b_shopify_8k_benchmark_interactive/ diff --git a/examples/08_Qwen3-VL-235B-A22B_Example/offline_qwen3_vl_235b_a22b_shopify.yaml b/examples/08_Qwen3-VL-235B-A22B_Example/offline_qwen3_vl_235b_a22b_shopify.yaml index 95445781..2cb3d669 100644 --- a/examples/08_Qwen3-VL-235B-A22B_Example/offline_qwen3_vl_235b_a22b_shopify.yaml +++ b/examples/08_Qwen3-VL-235B-A22B_Example/offline_qwen3_vl_235b_a22b_shopify.yaml @@ -25,7 +25,7 @@ datasets: settings: runtime: min_duration_ms: 600000 # 10 minute - n_samples_to_issue: 100 # Limit queries for testing (remove or increase for full run) + n_samples_to_issue: 48289 # Full dataset size for this benchmark; lower this (e.g., 1000 or 100) for quick tests, or remove for an unconstrained full run scheduler_random_seed: 42 # For Poisson/distribution sampling dataloader_random_seed: 42 # For dataset shuffling @@ -33,7 +33,7 @@ settings: type: "max_throughput" client: - num_workers: 2 + num_workers: 5 transport: type: zmq recv_buffer_size: 16777216 diff --git a/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml b/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml index db23f163..badf8fb8 100644 --- a/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml +++ b/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml @@ -27,7 +27,7 @@ settings: target_qps: 6.5 client: - num_workers: 2 + num_workers: 5 transport: type: zmq recv_buffer_size: 16777216 From cfa4de2462cfcf70e3eaf23f0a777930e6e36303 Mon Sep 17 00:00:00 2001 From: mingyuanm Date: Thu, 7 May 2026 11:51:16 -0700 Subject: [PATCH 3/8] Train split only for the subset Signed-off-by: Mingyuan Ma --- .../shopify_product_catalogue/__init__.py | 17 ++++++++++++----- .../test_shopify_product_catalogue.py | 5 +++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py b/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py index 9051d9a5..7b8b6723 100644 --- a/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py +++ b/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py @@ -21,7 +21,7 @@ from io import BytesIO from logging import getLogger from pathlib import Path -from typing import Any +from typing import Any, ClassVar import pandas as pd from datasets import load_dataset @@ -71,8 +71,8 @@ class BaseShopifyProductCatalogue(Dataset, ABC): """Abstract base class for Shopify product catalogue datasets. Contains shared logic for downloading and processing product catalogue - data from HuggingFace. Subclasses only need to define REPO_ID and - dataset_id. + data from HuggingFace. Subclasses only need to define REPO_ID, + dataset_id, and optionally DEFAULT_SPLITS. """ COLUMN_NAMES = [ @@ -91,6 +91,9 @@ class BaseShopifyProductCatalogue(Dataset, ABC): REPO_ID: str """HuggingFace dataset repository ID. Must be set by subclass.""" + DEFAULT_SPLITS: ClassVar[list[str]] = ["train", "test"] + """Default splits to load when split is not specified. Override in subclass if needed.""" + @classmethod def generate( cls, @@ -108,7 +111,7 @@ def generate( Args: datasets_dir: Directory to save transformed dataset. - split: Splits to load (e.g. ["train", "test"]). Defaults to ["train", "test"]. + split: Splits to load (e.g. ["train", "test"]). Defaults to DEFAULT_SPLITS. force: Regenerate even if file exists. token: HuggingFace token for gated datasets. revision: Dataset revision/branch. Defaults to "main". @@ -118,7 +121,7 @@ def generate( product_image_format, potential_product_categories. """ if split is None: - split = ["train", "test"] + split = cls.DEFAULT_SPLITS split_key = "+".join(split) filename = f"{cls.DATASET_ID}_{split_key}.parquet" dst_path = datasets_dir / cls.DATASET_ID / split_key / filename @@ -181,10 +184,14 @@ class ShopifyProductCatalogue8k( Each sample includes product image, title, description, and candidate categories. Compatible with OpenAI multimodal adapter (prompt/system with vision content). + + Note: This dataset only has a train split (no test split). """ REPO_ID = "nvidia/Shopify-product-catalogue-8k" + DEFAULT_SPLITS = ["train"] + __all__ = [ "ProductMetadata", diff --git a/tests/unit/dataset_manager/test_shopify_product_catalogue.py b/tests/unit/dataset_manager/test_shopify_product_catalogue.py index 97f36ace..8b7c0831 100644 --- a/tests/unit/dataset_manager/test_shopify_product_catalogue.py +++ b/tests/unit/dataset_manager/test_shopify_product_catalogue.py @@ -392,6 +392,11 @@ def test_shares_presets_with_base(self) -> None: assert ShopifyProductCatalogue8k.PRESETS is ShopifyProductCatalogue.PRESETS assert hasattr(ShopifyProductCatalogue8k.PRESETS, "q3vl") + def test_default_splits_is_train_only(self) -> None: + """8k variant defaults to train split only (no test split available).""" + assert ShopifyProductCatalogue8k.DEFAULT_SPLITS == ["train"] + assert ShopifyProductCatalogue.DEFAULT_SPLITS == ["train", "test"] + def test_generate_uses_correct_repo_id( self, tmp_path: Path, mock_hf_dataset: list[dict] ) -> None: From a9ad840eb3b099372e490be0a51135ab0fce9fcf Mon Sep 17 00:00:00 2001 From: mingyuanm Date: Mon, 11 May 2026 11:15:50 -0700 Subject: [PATCH 4/8] Update default configs Signed-off-by: Mingyuan Ma --- .../interactive_qwen3_vl_235b_a22b_shopify_8k.yaml | 14 ++++++++++---- .../online_qwen3_vl_235b_a22b_shopify.yaml | 9 ++++++++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml b/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml index 35180dee..3160dab4 100644 --- a/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml +++ b/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml @@ -16,20 +16,26 @@ model_params: datasets: - name: shopify_product_catalogue_8k::q3vl type: "performance" + - name: shopify_product_catalogue_8k::q3vl + type: "accuracy" + accuracy_config: + eval_method: "shopify_category_f1" + ground_truth: "ground_truth_category" + extractor: "identity_extractor" + num_repeats: 1 settings: runtime: - min_duration_ms: 60000 # 1 minute for quick tests - n_samples_to_issue: 100 # Small batch for interactive testing (increase to 8000 for full run) + min_duration_ms: 600000 # 10 minute scheduler_random_seed: 42 dataloader_random_seed: 42 load_pattern: type: "poisson" - target_qps: 6.5 + target_qps: 5 client: - num_workers: 2 + num_workers: 5 transport: type: zmq recv_buffer_size: 16777216 diff --git a/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml b/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml index badf8fb8..0d8f8c6a 100644 --- a/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml +++ b/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml @@ -14,6 +14,13 @@ model_params: datasets: - name: shopify_product_catalogue::q3vl type: "performance" + - name: shopify_product_catalogue::q3vl + type: "accuracy" + accuracy_config: + eval_method: "shopify_category_f1" + ground_truth: "ground_truth_category" + extractor: "identity_extractor" + num_repeats: 1 settings: runtime: @@ -24,7 +31,7 @@ settings: load_pattern: type: "poisson" - target_qps: 6.5 + target_qps: 6 client: num_workers: 5 From 22541d67f93dcc877d1d17750a40a4712dc2013f Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Wed, 13 May 2026 09:37:38 -0700 Subject: [PATCH 5/8] precommit Signed-off-by: Mingyuan Ma --- docs/dataset_manager/DESIGN.md | 18 +++++++-------- .../dataset_manager/__init__.py | 5 +++- .../shopify_product_catalogue/__init__.py | 7 +++--- .../test_shopify_product_catalogue.py | 23 +++++++++++++++---- 4 files changed, 34 insertions(+), 19 deletions(-) diff --git a/docs/dataset_manager/DESIGN.md b/docs/dataset_manager/DESIGN.md index 024950e2..e1c8c6d1 100644 --- a/docs/dataset_manager/DESIGN.md +++ b/docs/dataset_manager/DESIGN.md @@ -103,16 +103,16 @@ Transforms are composed in order; each receives the output of the previous. Registered in `dataset.py` under `Dataset.PREDEFINED`. Referenced by name in rulesets and YAML configs. Each predefined dataset ships with default transforms for supported model families. -| Name | Source | Notes | -| --------------------------- | ------------- | ------------------------------------------ | -| `aime25` | AIME 2025 | Math reasoning | -| `gpqa` | GPQA Diamond | Science QA | -| `cnndailymail` | CNN/DailyMail | Summarization | -| `open_orca` | OpenOrca | General instruction | -| `livecodebench` | LiveCodeBench | Code generation; requires additional setup | -| `shopify_product_catalogue` | Shopify | E-commerce Q&A (q3vl) | +| Name | Source | Notes | +| ------------------------------ | ------------- | ----------------------------------------------------- | +| `aime25` | AIME 2025 | Math reasoning | +| `gpqa` | GPQA Diamond | Science QA | +| `cnndailymail` | CNN/DailyMail | Summarization | +| `open_orca` | OpenOrca | General instruction | +| `livecodebench` | LiveCodeBench | Code generation; requires additional setup | +| `shopify_product_catalogue` | Shopify | E-commerce Q&A (q3vl) | | `shopify_product_catalogue_8k` | Shopify | 8k sample variant of Shopify product catalogue (q3vl) | -| `random` | Synthetic | Generated prompts for throughput testing | +| `random` | Synthetic | Generated prompts for throughput testing | ## Preset System diff --git a/src/inference_endpoint/dataset_manager/__init__.py b/src/inference_endpoint/dataset_manager/__init__.py index cf675bea..6a78dae2 100644 --- a/src/inference_endpoint/dataset_manager/__init__.py +++ b/src/inference_endpoint/dataset_manager/__init__.py @@ -27,7 +27,10 @@ from .predefined.livecodebench import LiveCodeBench from .predefined.open_orca import OpenOrca from .predefined.random import RandomDataset -from .predefined.shopify_product_catalogue import ShopifyProductCatalogue, ShopifyProductCatalogue8k +from .predefined.shopify_product_catalogue import ( + ShopifyProductCatalogue, + ShopifyProductCatalogue8k, +) from .transforms import ( AddStaticColumns, ColumnFilter, diff --git a/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py b/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py index 7b8b6723..a8981198 100644 --- a/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py +++ b/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py @@ -24,9 +24,10 @@ from typing import Any, ClassVar import pandas as pd -from datasets import load_dataset from tqdm import tqdm +from datasets import load_dataset + from ...dataset import Dataset from . import presets from .metadata import ProductMetadata @@ -139,9 +140,7 @@ def generate( load_options["revision"] = revision ds = load_dataset(cls.REPO_ID, split=split_key, **load_options) - logger.info( - f"Loaded {len(ds)} samples from {cls.REPO_ID} ({split_key})" - ) + logger.info(f"Loaded {len(ds)} samples from {cls.REPO_ID} ({split_key})") all_rows: list[dict[str, Any]] = [] for i in tqdm( diff --git a/tests/unit/dataset_manager/test_shopify_product_catalogue.py b/tests/unit/dataset_manager/test_shopify_product_catalogue.py index 8b7c0831..ac4b1f7f 100644 --- a/tests/unit/dataset_manager/test_shopify_product_catalogue.py +++ b/tests/unit/dataset_manager/test_shopify_product_catalogue.py @@ -27,12 +27,12 @@ from unittest.mock import patch import pandas as pd +from inference_endpoint.dataset_manager.dataset import Dataset from inference_endpoint.dataset_manager.predefined.shopify_product_catalogue import ( BaseShopifyProductCatalogue, ShopifyProductCatalogue, ShopifyProductCatalogue8k, ) -from inference_endpoint.dataset_manager.dataset import Dataset from inference_endpoint.dataset_manager.predefined.shopify_product_catalogue.presets import ( ShopifyMultimodalFormatter, q3vl, @@ -372,7 +372,9 @@ def test_class_inherits_from_base(self) -> None: def test_has_correct_repo_id(self) -> None: """REPO_ID points to nvidia/Shopify-product-catalogue-8k.""" - assert ShopifyProductCatalogue8k.REPO_ID == "nvidia/Shopify-product-catalogue-8k" + assert ( + ShopifyProductCatalogue8k.REPO_ID == "nvidia/Shopify-product-catalogue-8k" + ) def test_has_correct_dataset_id(self) -> None: """DATASET_ID is shopify_product_catalogue_8k.""" @@ -381,11 +383,17 @@ def test_has_correct_dataset_id(self) -> None: def test_registered_in_dataset_predefined(self) -> None: """Class is auto-registered in Dataset.PREDEFINED.""" assert "shopify_product_catalogue_8k" in Dataset.PREDEFINED - assert Dataset.PREDEFINED["shopify_product_catalogue_8k"] is ShopifyProductCatalogue8k + assert ( + Dataset.PREDEFINED["shopify_product_catalogue_8k"] + is ShopifyProductCatalogue8k + ) def test_shares_column_names_with_base(self) -> None: """Column names are identical to ShopifyProductCatalogue.""" - assert ShopifyProductCatalogue8k.COLUMN_NAMES == ShopifyProductCatalogue.COLUMN_NAMES + assert ( + ShopifyProductCatalogue8k.COLUMN_NAMES + == ShopifyProductCatalogue.COLUMN_NAMES + ) def test_shares_presets_with_base(self) -> None: """Presets are shared with base class (q3vl works).""" @@ -430,7 +438,12 @@ def test_generate_uses_correct_dataset_id_for_paths( force=True, ) # Verify cache path uses shopify_product_catalogue_8k - expected_path = tmp_path / "shopify_product_catalogue_8k" / "train" / "shopify_product_catalogue_8k_train.parquet" + expected_path = ( + tmp_path + / "shopify_product_catalogue_8k" + / "train" + / "shopify_product_catalogue_8k_train.parquet" + ) assert expected_path.exists() def test_get_dataloader_with_q3vl_preset(self, tmp_path: Path) -> None: From 2221e2f0e850d7b01df7dae668513093e0276078 Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Wed, 13 May 2026 09:49:25 -0700 Subject: [PATCH 6/8] fix import lines Signed-off-by: Mingyuan Ma --- .../unit/dataset_manager/test_shopify_product_catalogue.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/unit/dataset_manager/test_shopify_product_catalogue.py b/tests/unit/dataset_manager/test_shopify_product_catalogue.py index ac4b1f7f..865efac1 100644 --- a/tests/unit/dataset_manager/test_shopify_product_catalogue.py +++ b/tests/unit/dataset_manager/test_shopify_product_catalogue.py @@ -15,10 +15,6 @@ """Unit tests for Shopify product catalogue dataset initialization and transforms.""" -import pytest - -pytestmark = pytest.mark.unit - import base64 import json from io import BytesIO @@ -27,6 +23,7 @@ from unittest.mock import patch import pandas as pd +import pytest from inference_endpoint.dataset_manager.dataset import Dataset from inference_endpoint.dataset_manager.predefined.shopify_product_catalogue import ( BaseShopifyProductCatalogue, @@ -39,6 +36,8 @@ ) from PIL import Image +pytestmark = pytest.mark.unit + def _make_pil_image(image_format: str = "JPEG") -> Image.Image: """Create a minimal 1x1 PIL Image with the given format attribute set.""" From 0239d029c8ee709b94d518ac48d2e6b333ce31f5 Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Wed, 13 May 2026 09:55:52 -0700 Subject: [PATCH 7/8] precommit Signed-off-by: Mingyuan Ma --- .../predefined/shopify_product_catalogue/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py b/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py index a8981198..9461b4e5 100644 --- a/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py +++ b/src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py @@ -24,9 +24,8 @@ from typing import Any, ClassVar import pandas as pd -from tqdm import tqdm - from datasets import load_dataset +from tqdm import tqdm from ...dataset import Dataset from . import presets From fc297130ca316ea125c4f241501c5f7f71b39590 Mon Sep 17 00:00:00 2001 From: Mingyuan Ma Date: Wed, 13 May 2026 15:08:28 -0700 Subject: [PATCH 8/8] Use default temperature None Signed-off-by: Mingyuan Ma --- .../interactive_qwen3_vl_235b_a22b_shopify_8k.yaml | 2 -- .../offline_qwen3_vl_235b_a22b_shopify.yaml | 1 - .../online_qwen3_vl_235b_a22b_shopify.yaml | 1 - 3 files changed, 4 deletions(-) diff --git a/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml b/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml index 3160dab4..a311a21c 100644 --- a/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml +++ b/examples/08_Qwen3-VL-235B-A22B_Example/interactive_qwen3_vl_235b_a22b_shopify_8k.yaml @@ -7,8 +7,6 @@ timeout: 1800 # 30 minutes for quick interactive runs model_params: name: "Qwen/Qwen3-VL-235B-A22B-Instruct" - # tokenizer_name: "Qwen/Qwen3-VL-235B-A22B-Instruct" # Set this if model name is a local/container path - temperature: 0 top_p: 1 max_new_tokens: 150 streaming: "on" # Required for TTFT/TPOT measurement diff --git a/examples/08_Qwen3-VL-235B-A22B_Example/offline_qwen3_vl_235b_a22b_shopify.yaml b/examples/08_Qwen3-VL-235B-A22B_Example/offline_qwen3_vl_235b_a22b_shopify.yaml index 2cb3d669..a9e1c606 100644 --- a/examples/08_Qwen3-VL-235B-A22B_Example/offline_qwen3_vl_235b_a22b_shopify.yaml +++ b/examples/08_Qwen3-VL-235B-A22B_Example/offline_qwen3_vl_235b_a22b_shopify.yaml @@ -7,7 +7,6 @@ timeout: 14400 # Perf + acc run takes over 3 hours, consider limit n_samples_to_ model_params: name: "Qwen/Qwen3-VL-235B-A22B-Instruct" - temperature: 0 top_p: 1 max_new_tokens: 150 diff --git a/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml b/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml index 0d8f8c6a..f2c4b4b7 100644 --- a/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml +++ b/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml @@ -6,7 +6,6 @@ timeout: 14400 model_params: name: "Qwen/Qwen3-VL-235B-A22B-Instruct" - temperature: 0 top_p: 1 max_new_tokens: 150 streaming: "on" # Required for TTFT/TPOT measurement