From ff9c62ef24359c3cc5fe91b684fb6828772f623f Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Thu, 6 Nov 2025 00:01:58 -0800 Subject: [PATCH 1/2] Use torch._grouped_mm in eager mode This gives a fair comparison between eager and other modes. The constraints mentioned in the comment seem to have been fixed at least for Blackwell. --- thunder/benchmarks/layers_for_inference_benchmark.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/thunder/benchmarks/layers_for_inference_benchmark.py b/thunder/benchmarks/layers_for_inference_benchmark.py index 32ea0a574c..7103d0432a 100644 --- a/thunder/benchmarks/layers_for_inference_benchmark.py +++ b/thunder/benchmarks/layers_for_inference_benchmark.py @@ -348,16 +348,14 @@ def _group_sizes_from_offsets(offsets: torch.Tensor) -> list[int]: if LooseVersion(torch.__version__) >= LooseVersion("2.8.0"): - # Required otherwise, there is a graph-break. + # Required -- otherwise there is a graph-break. _grouped_mm = torch.compiler.allow_in_graph(torch._grouped_mm) +else: + _grouped_mm = None -# This function should be replaced with torch._grouped_mm. However, -# torch._grouped_mm is yet to be usable because it requires offsets being -# multiples of 16. def grouped_mm(a: torch.Tensor, b: torch.Tensor, offsets: torch.Tensor) -> torch.Tensor: - if torch.compiler.is_compiling(): - # NOTE: This path also works for `thunder.jit` as it has a lookaside for `torch.compiler.is_compiling`. + if _grouped_mm: return _grouped_mm(a, b, offsets) group_sizes = _group_sizes_from_offsets(offsets) From cbd61d91740d034999ad96f6f1921ecdcf2c6bb2 Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Mon, 10 Nov 2025 21:31:25 -0800 Subject: [PATCH 2/2] Update layers_for_inference_benchmark.py Co-authored-by: Masaki --- thunder/benchmarks/layers_for_inference_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thunder/benchmarks/layers_for_inference_benchmark.py b/thunder/benchmarks/layers_for_inference_benchmark.py index 7103d0432a..66f896c903 100644 --- a/thunder/benchmarks/layers_for_inference_benchmark.py +++ b/thunder/benchmarks/layers_for_inference_benchmark.py @@ -355,7 +355,7 @@ def _group_sizes_from_offsets(offsets: torch.Tensor) -> list[int]: def grouped_mm(a: torch.Tensor, b: torch.Tensor, offsets: torch.Tensor) -> torch.Tensor: - if _grouped_mm: + if _grouped_mm is not None: return _grouped_mm(a, b, offsets) group_sizes = _group_sizes_from_offsets(offsets)