From dd8ffaf2bb395dad70f615f76ead3553fb1690f1 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Wed, 1 Apr 2026 14:34:39 +0000 Subject: [PATCH 1/2] Add lds stall preset to profiler --- .../rocm_benchmarks/bench_fa2_prefill.py | 23 ++- rocm_profiler/rocm_profiler.py | 155 ++++++++++++++++++ 2 files changed, 170 insertions(+), 8 deletions(-) diff --git a/benchmarks/rocm_benchmarks/bench_fa2_prefill.py b/benchmarks/rocm_benchmarks/bench_fa2_prefill.py index b541100d0b..40ad9c6b80 100644 --- a/benchmarks/rocm_benchmarks/bench_fa2_prefill.py +++ b/benchmarks/rocm_benchmarks/bench_fa2_prefill.py @@ -24,6 +24,7 @@ python benchmarks/rocm_benchmarks/bench_fa2_prefill.py --counters occupancy python benchmarks/rocm_benchmarks/bench_fa2_prefill.py --counters stall python benchmarks/rocm_benchmarks/bench_fa2_prefill.py --counters compute + python benchmarks/rocm_benchmarks/bench_fa2_prefill.py --counters lds_stall # Override the output file label prefix: python benchmarks/rocm_benchmarks/bench_fa2_prefill.py --counters occupancy --label fa2_occ @@ -47,14 +48,20 @@ benchmarks/rocm_benchmarks/