diff --git a/.gitignore b/.gitignore index 0ae1fb6a..06784f4f 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,6 @@ build-*/ benchmark_results.csv __pycache__/ *.pyc + +# Tuner artifacts (run_tuner.py) +fusilli_tuning_*/ diff --git a/README.md b/README.md index d0e46383..131ae3c0 100644 --- a/README.md +++ b/README.md @@ -278,6 +278,53 @@ python benchmarks/run_benchmark.py \ -f commands.txt -o results.csv ``` +### Tuner + +The Fusilli tuner (`benchmarks/run_tuner.py`) generates optimized IREE tuning +specs for Fusilli operations. It wraps the +[IREE tuner](https://github.com/nod-ai/amd-shark-ai/tree/main/amdsharktuner) to automatically generate, +compile, and benchmark tuning candidates. + +**AMDGPU only.** The tuner targets ROCm dispatches and requires a build +configured with `-DFUSILLI_SYSTEMS_AMDGPU=ON`, an AMD GPU at runtime, and +amdsharktuner installed from source. The PyPI release lags and isn't compatible +with the IREE RC pinned in `version.json`, so install from GitHub directly: + +```shell +pip install --pre \ + "amdsharktuner @ git+https://github.com/nod-ai/amd-shark-ai.git@main#subdirectory=amdsharktuner" \ + --find-links https://iree.dev/pip-release-links.html +``` + +**Single operation:** +```shell +python benchmarks/run_tuner.py \ + --devices hip://0 \ + --num-candidates 30 \ + --output-td-spec tuning_spec.mlir \ + --fusilli-args "matmul -M 1024 -N 1024 -K 1024 --a_type bf16 --b_type bf16 --out_type bf16" +``` + +**Multiple operations from file:** +```shell +python benchmarks/run_tuner.py \ + --devices hip://0 \ + --num-candidates 30 \ + --output-td-spec tuning_spec.mlir \ + --commands-file commands.txt +``` + +When tuning multiple commands, the best spec from each command is automatically +chained as the starting spec for the next command. To start from an existing +spec, use `--starter-td-spec `. + +The generated tuning spec can then be used with the benchmark driver: +```shell +FUSILLI_EXTRA_COMPILER_FLAGS="--iree-codegen-tuning-spec-path=tuning_spec.mlir" \ + build/bin/benchmarks/fusilli_benchmark_driver --iter 100 \ + matmul -M 1024 -N 1024 -K 1024 --a_type bf16 --b_type bf16 --out_type bf16 +``` + ### Sanitizers Fusilli supports building with the following sanitizers: diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index fafd7ce0..401b9b1c 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -42,6 +42,33 @@ if(FUSILLI_SYSTEMS_AMDGPU) ENVIRONMENT "${FUSILLI_SANITIZER_TEST_ENV_VARS}" ) endif() + + # Test tuner runner (GPU integration tests) + add_test( + NAME fusilli_tuner_runner_tests + COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/test_tuner_runner.sh + ${CMAKE_CURRENT_SOURCE_DIR}/run_tuner.py + $ + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + ) + + # Configure sanitizer options + if(FUSILLI_SANITIZER_TEST_ENV_VARS) + set_tests_properties( + fusilli_tuner_runner_tests PROPERTIES + ENVIRONMENT "${FUSILLI_SANITIZER_TEST_ENV_VARS}" + ) + endif() +endif() + +# Tuner cache extraction unit tests (CPU-only, no GPU or amdsharktuner needed) +if(FUSILLI_BUILD_TESTS) + add_test( + NAME fusilli_tuner_cache_tests + COMMAND python3 -m unittest + ${CMAKE_CURRENT_SOURCE_DIR}/test_tuner_cache.py -v + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + ) endif() # Add some benchmark configurations for CI coverage. diff --git a/benchmarks/run_tuner.py b/benchmarks/run_tuner.py new file mode 100644 index 00000000..874bd4ce --- /dev/null +++ b/benchmarks/run_tuner.py @@ -0,0 +1,610 @@ +#!/usr/bin/env python3 +# Copyright 2026 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +"""Fusilli Tuner — tune IREE kernels generated by Fusilli operations. + +AMDGPU only. Wraps amdsharktuner.libtuner to generate, compile, and benchmark +tuning candidates for Fusilli operations on ROCm. Produces an MLIR tuning spec +that can be passed to Fusilli via FUSILLI_EXTRA_COMPILER_FLAGS or +--Xiree-compile. + +Requires: + - Fusilli built with -DFUSILLI_SYSTEMS_AMDGPU=ON + - An AMD GPU + ROCm runtime + - amdsharktuner from source (PyPI lags; install from GitHub): + pip install --pre \\ + "amdsharktuner @ git+https://github.com/nod-ai/amd-shark-ai.git@main#subdirectory=amdsharktuner" \\ + --find-links https://iree.dev/pip-release-links.html + +Usage: + # Single command: + python benchmarks/run_tuner.py \\ + --devices hip://0 --num-candidates 30 \\ + --fusilli-args "matmul -M 1024 -N 1024 -K 1024 --a_type bf16 --b_type bf16 --out_type bf16" + + # Multiple commands from file: + python benchmarks/run_tuner.py \\ + --devices hip://0 --num-candidates 30 \\ + --commands-file commands.txt --output-td-spec tuning_spec.mlir +""" + +import argparse +import logging +import os +import shlex +import shutil +import subprocess +import sys +import tempfile +from datetime import datetime +from pathlib import Path +from typing import Iterator, Optional + +_HAS_LIBTUNER = True +_LIBTUNER_IMPORT_ERROR: Optional[Exception] = None +try: + from amdsharktuner import common, libtuner + from typing_extensions import override +except ( + Exception +) as exc: # noqa: BLE001 - amdsharktuner can raise RuntimeError on version mismatch + _HAS_LIBTUNER = False + _LIBTUNER_IMPORT_ERROR = exc + + +def _require_libtuner(): + """Exit with a helpful message if amdsharktuner is not installed.""" + if not _HAS_LIBTUNER: + print( + "ERROR: amdsharktuner is required but not installed.\n" + f"Import failed with: {_LIBTUNER_IMPORT_ERROR}\n" + "Install from GitHub (PyPI release lags):\n" + " pip install --pre \\\n" + ' "amdsharktuner @ git+https://github.com/nod-ai/' + 'amd-shark-ai.git@main#subdirectory=amdsharktuner" \\\n' + " --find-links https://iree.dev/pip-release-links.html\n" + "See https://github.com/nod-ai/amd-shark-ai/tree/main/amdsharktuner for details.", + file=sys.stderr, + ) + sys.exit(1) + + +# ===----------------------------------------------------------------------=== # +# Tuner classes (only defined when amdsharktuner is available) +# ===----------------------------------------------------------------------=== # + +if _HAS_LIBTUNER: + + class FusilliPathConfig(libtuner.PathConfig): + """Path configuration with timestamped Fusilli tuning directories.""" + + def _name_base_dir(self) -> Path: + timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M") + return Path(f"./fusilli_tuning_{timestamp}") + + def create_benchmark_path_config( + self, benchmark_name: str + ) -> libtuner.PathConfig: + """Create a PathConfig for a specific benchmark under the main directory.""" + base_dir = self.base_dir + + class BenchmarkPathConfig(libtuner.PathConfig): + def _name_base_dir(self) -> Path: + return base_dir / benchmark_name + + return BenchmarkPathConfig() + + class FusilliTuner(libtuner.TuningClient): + """Tuning client for IREE kernels generated by Fusilli.""" + + def __init__(self, tuner_context: common.TunerContext): + super().__init__(tuner_context) + self.compile_flags: list[str] = [] + self.benchmark_flags: list[str] = [] + # Per-candidate compile budget; covers the bulk of dispatch + # compiles while terminating runaway pathological candidates. + self.compile_timeout: Optional[float] = 16 + self.benchmark_timeout: Optional[float] = None + self.auto_benchmark_timeout: bool = True + + @override + def get_iree_compile_flags(self) -> list[str]: + return self.compile_flags + + @override + def get_iree_compile_timeout_s(self) -> Optional[float]: + return self.compile_timeout + + @override + def get_iree_benchmark_module_flags(self) -> list[str]: + return self.benchmark_flags + + @override + def get_iree_benchmark_timeout_s(self) -> Optional[float]: + return self.benchmark_timeout + + @override + def is_auto_iree_benchmark_timeout(self) -> bool: + return self.auto_benchmark_timeout + + @override + def should_prune_slower_candidates(self) -> bool: + return True + + +# ===----------------------------------------------------------------------=== # +# Cache and command utilities +# ===----------------------------------------------------------------------=== # + + +def find_cached_artifacts(base_dir: Path) -> tuple[Path, Path]: + """Find source MLIR and compile command from Fusilli cache. + + Fusilli cache structure (controlled by FUSILLI_CACHE_DIR): + base_dir/.cache/fusilli//*.mlir + base_dir/.cache/fusilli//*.txt + + Returns: + Tuple of (source_mlir_path, compile_command_path). + + Raises: + FileNotFoundError: If cache structure is missing or unexpected. + """ + fusilli_cache = base_dir / ".cache" / "fusilli" + + if not fusilli_cache.exists(): + raise FileNotFoundError(f"Fusilli cache not found at {fusilli_cache}") + + graph_dirs = list(fusilli_cache.iterdir()) + if len(graph_dirs) != 1: + raise FileNotFoundError( + f"Expected exactly one graph directory in {fusilli_cache}, " + f"found {len(graph_dirs)}" + ) + + graph_dir = graph_dirs[0] + + mlir_files = list(graph_dir.glob("*.mlir")) + txt_files = list(graph_dir.glob("*.txt")) + + if len(mlir_files) != 1: + raise FileNotFoundError( + f"Expected exactly one .mlir file in {graph_dir}, " + f"found {len(mlir_files)}" + ) + if len(txt_files) != 1: + raise FileNotFoundError( + f"Expected exactly one .txt file in {graph_dir}, " f"found {len(txt_files)}" + ) + + source_mlir = mlir_files[0] + compile_cmd = txt_files[0] + + # Symlink-escape guard: relative_to() raises ValueError if either + # resolved path lies outside base_dir. + source_mlir.resolve().relative_to(base_dir.resolve()) + compile_cmd.resolve().relative_to(base_dir.resolve()) + + return source_mlir, compile_cmd + + +def build_compile_args(compile_command: str, benchmarks_dir: Path) -> list[str]: + """Transform Fusilli's compile command into tuner-compatible iree-compile args. + + Strips output flags and scheduling statistics flags, then appends + tuner-specific flags for dumping executable benchmarks. + """ + tokens = shlex.split(compile_command) + + compile_args: list[str] = ["iree-compile"] + args_iter: Iterator[str] = iter(tokens[1:]) + for arg in args_iter: + # Skip "-o " (Fusilli generates as separate arg + path). + if arg == "-o": + next(args_iter, None) + continue + # Skip scheduling statistics flags (Fusilli uses "=" syntax). + if arg.startswith( + ( + "--iree-scheduling-dump-statistics-format=", + "--iree-scheduling-dump-statistics-file=", + ) + ): + continue + compile_args.append(arg) + + compile_args += [ + "--iree-config-add-tuner-attributes", + "--iree-hal-dump-executable-benchmarks-to", + str(benchmarks_dir), + "-o", + os.devnull, + ] + + return compile_args + + +def load_commands( + commands_file: Optional[str], fusilli_op_args: list[str] +) -> list[list[str]]: + """Load Fusilli commands from file or CLI args. + + Caller must ensure exactly one of commands_file / fusilli_op_args is set; + main() validates this before invoking. + + Returns a list of commands, each as a list of string tokens. + """ + if not commands_file: + return [fusilli_op_args] + + with open(commands_file) as f: + return [ + shlex.split(line) + for line in f + if line.strip() and not line.strip().startswith("#") + ] + + +# ===----------------------------------------------------------------------=== # +# Driver and tuning orchestration +# ===----------------------------------------------------------------------=== # + + +def run_fusilli_benchmark_driver( + driver_path: str, + cli_args: list[str], + cache_dir: Path, +) -> None: + """Run fusilli_benchmark_driver --dump to generate MLIR artifacts. + + Args: + driver_path: Path to fusilli_benchmark_driver binary. + cli_args: Fusilli operation arguments (e.g., ["conv", "-F", "1", ...]). + cache_dir: Directory for FUSILLI_CACHE_DIR override. + + Raises: + RuntimeError: If the driver exits with non-zero status. + """ + cmd = [driver_path, "--dump", "--iter", "1"] + cli_args + + env = os.environ.copy() + env["FUSILLI_CACHE_DIR"] = str(cache_dir) + + logging.info(f"> {shlex.join(cmd)}") + logging.info(f" FUSILLI_CACHE_DIR={cache_dir}") + + result = subprocess.run(cmd, env=env, capture_output=True, text=True) + + if result.returncode == 0: + if result.stdout: + logging.debug(f"Driver stdout:\n{result.stdout}") + return + + logging.error(f"Driver failed with return code {result.returncode}") + if result.stdout: + logging.error(f"stdout: {result.stdout}") + if result.stderr: + logging.error(f"stderr: {result.stderr}") + raise RuntimeError(f"fusilli_benchmark_driver failed with code {result.returncode}") + + +def tune_fusilli_dispatch( + benchmark_path: Path, + args: argparse.Namespace, + path_config: "libtuner.PathConfig", + root_logger: logging.Logger, + summary_handler: logging.Handler, + starter_td_spec: Optional[Path], +) -> Optional[Path]: + """Tune a single Fusilli dispatch using libtuner. + + Runs the three-phase tuning loop: generate candidates, compile, benchmark. + + Returns: + Path to output tuning spec if tuning succeeded, None otherwise. + """ + args.input_file = benchmark_path + + if starter_td_spec and starter_td_spec.exists(): + args.starter_td_spec = starter_td_spec + else: + args.starter_td_spec = None + + logging.info("Generating candidate tuning specs...") + with common.TunerContext(logger=root_logger) as tuner_context: + tuner_context.logger.addHandler(summary_handler) + tuner = FusilliTuner(tuner_context) + + candidates = libtuner.generate_candidate_specs(args, path_config, tuner) + logging.info(f"Stored candidate specs in {path_config.specs_dir}") + + logging.info("Compiling dispatch candidates...") + tuner.compile_flags = ["--compile-from=executable-sources"] + compiled = libtuner.compile(args, path_config, candidates, tuner) + + logging.info("Benchmarking compiled dispatch candidates...") + tuner.benchmark_flags = ["--input=1", "--benchmark_repetitions=3"] + top_candidates = libtuner.benchmark( + args, + path_config, + compiled, + tuner, + args.fusilli_num_dispatch_candidates, + args.fusilli_dispatch_benchmark_timeout_mins, + ) + + if not top_candidates: + logging.warning("No candidates performed better than baseline.") + return None + + logging.info(f"Top dispatch candidates: {top_candidates}") + for cid in top_candidates: + logging.info(f" {tuner.candidate_trackers[cid].spec_path.resolve()}") + + best_id = top_candidates[0] + best_spec = tuner.candidate_trackers[best_id].spec_path + shutil.copy(best_spec, args.output_td_spec) + logging.info(f"Saved best tuning spec to: {args.output_td_spec}") + print(f"Saved best tuning spec to: {args.output_td_spec}") + + return args.output_td_spec + + +def process_fusilli_command( + cli_args: list[str], + args: argparse.Namespace, + fusilli_path_config: "FusilliPathConfig", + root_logger: logging.Logger, + starter_td_spec: Optional[Path], + command_idx: int, +) -> Optional[Path]: + """Process a single Fusilli command: dump MLIR, extract benchmarks, tune. + + Returns: + Path to the best tuning spec if tuning succeeded, None otherwise. + """ + # Create isolated temp directory for this command's cache. + if args.tmp_dir: + base_tmp = Path(args.tmp_dir) + base_tmp.mkdir(parents=True, exist_ok=True) + tmp_dir = Path(tempfile.mkdtemp(dir=base_tmp, prefix="fusilli_cache_")) + else: + Path("fusilli_tuner").mkdir(exist_ok=True) + tmp_dir = Path(tempfile.mkdtemp(dir="fusilli_tuner", prefix="fusilli_cache_")) + logging.info(f"Using temporary directory: {tmp_dir}") + + # Step 1: Generate MLIR artifacts via --dump. + run_fusilli_benchmark_driver(args.fusilli_driver, cli_args, tmp_dir) + + # Step 2: Extract cached artifacts. + source_mlir, compile_cmd_path = find_cached_artifacts(tmp_dir) + logging.debug(f"source_mlir: {source_mlir}") + + compile_command = compile_cmd_path.read_text().strip() + + # Step 3: Compile with tuner flags to extract executable benchmarks. + benchmarks_dir = tmp_dir / "benchmarks" + compile_args = build_compile_args(compile_command, benchmarks_dir) + + logging.info(f"> {shlex.join(compile_args)}") + compile_result = subprocess.run(compile_args, capture_output=True, text=True) + + if compile_result.returncode != 0: + logging.error(f"iree-compile failed with code {compile_result.returncode}") + if compile_result.stdout: + logging.error(f"stdout: {compile_result.stdout}") + if compile_result.stderr: + logging.error(f"stderr: {compile_result.stderr}") + raise RuntimeError(f"iree-compile failed with code {compile_result.returncode}") + + if not benchmarks_dir.exists(): + logging.warning(f"No benchmarks directory found at {benchmarks_dir}") + return None + + # Step 4: Tune each generated benchmark dispatch. + # Sort for deterministic spec-chaining order across runs (os.listdir + # is filesystem-defined). + best_spec_path: Optional[Path] = None + dispatch_starter_td_spec = starter_td_spec + benchmark_files = sorted(os.listdir(benchmarks_dir)) + + for benchmark_file in benchmark_files: + benchmark_path = benchmarks_dir / benchmark_file + logging.info(f"Tuning benchmark: {benchmark_path}") + + benchmark_name = benchmark_file.replace("_benchmark.mlir", "") + op_type = cli_args[0] if cli_args else "unknown" + unique_name = f"{benchmark_name}_{op_type}_{command_idx}" + + benchmark_path_config = fusilli_path_config.create_benchmark_path_config( + unique_name + ) + benchmark_path_config.base_dir.mkdir(parents=True, exist_ok=True) + + summary_log = benchmark_path_config.base_dir / "summary.log" + summary_handler = logging.FileHandler(summary_log) + summary_handler.setLevel(logging.INFO) + summary_handler.setFormatter( + logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") + ) + + try: + result = tune_fusilli_dispatch( + benchmark_path, + args, + benchmark_path_config, + root_logger, + summary_handler, + dispatch_starter_td_spec, + ) + if result: + best_spec_path = result + dispatch_starter_td_spec = result + + if benchmark_path_config.run_log is not None: + print(f"\nDetailed logs: {benchmark_path_config.run_log.resolve()}") + print(f"Summary: {summary_log.resolve()}") + except Exception: + logging.exception(f"Error tuning benchmark {benchmark_path}") + raise + finally: + root_logger.removeHandler(summary_handler) + summary_handler.close() + + return args.output_td_spec if best_spec_path else None + + +# ===----------------------------------------------------------------------=== # +# CLI and entry point +# ===----------------------------------------------------------------------=== # + + +def insert_placeholder_input_file(argv: list[str]) -> list[str]: + """Insert placeholder to satisfy libtuner's required input_file argument. + + Fusilli generates files internally rather than from a pre-existing input + file. This placeholder will be overridden per-dispatch in tune_fusilli_dispatch. + """ + return [argv[0], "fusilli.mlir"] + argv[1:] + + +def parse_args(argv: list[str]) -> tuple[argparse.Namespace, list[str]]: + """Parse command line arguments. + + Returns: + Tuple of (parsed_args, fusilli_op_args). + """ + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + group = parser.add_argument_group("Fusilli Tuner Options") + group.add_argument( + "--fusilli-args", + type=str, + help='Fusilli operation command, e.g.: --fusilli-args="conv -F 1 --bf16 ..."', + ) + group.add_argument( + "--commands-file", + type=str, + help="File with Fusilli commands (one per line).", + ) + group.add_argument( + "--output-td-spec", + type=Path, + default=Path("tuning-spec.mlir"), + help="Output tuning spec file (default: tuning-spec.mlir).", + ) + group.add_argument( + "--tmp-dir", + type=str, + default="", + help="Temp directory for Fusilli cache. Auto-created if not specified.", + ) + + script_dir = Path(__file__).parent.absolute() + default_driver = ( + script_dir.parent / "build" / "bin" / "benchmarks" / "fusilli_benchmark_driver" + ) + group.add_argument( + "--fusilli-driver", + type=str, + default=str(default_driver), + help=f"Path to fusilli_benchmark_driver (default: {default_driver}).", + ) + group.add_argument( + "--fusilli-num-dispatch-candidates", + type=int, + default=None, + help="Limit top dispatch candidates to benchmark.", + ) + group.add_argument( + "--fusilli-dispatch-benchmark-timeout-mins", + type=float, + default=None, + help="Time budget in minutes per dispatch for benchmarking.", + ) + + if _HAS_LIBTUNER: + # Insert placeholder for libtuner's required input_file positional arg. + argv_with_placeholder = insert_placeholder_input_file(argv) + + original_argv = sys.argv + sys.argv = argv_with_placeholder + try: + args = libtuner.parse_arguments(parser) + finally: + sys.argv = original_argv + + if "--codegen-pipeline" not in argv_with_placeholder: + args.codegen_pipeline = libtuner.CodegenPipelines.llvmgpu_tile_and_fuse + else: + # Fallback: parse only fusilli-specific args (enough for --help). + args, _ = parser.parse_known_args(argv[1:]) + + fusilli_op_args = shlex.split(args.fusilli_args) if args.fusilli_args else [] + + return args, fusilli_op_args + + +def main() -> int: + """Main entry point for the Fusilli tuner.""" + args, fusilli_op_args = parse_args(sys.argv) + _require_libtuner() + + if args.commands_file and fusilli_op_args: + print( + "ERROR: Cannot specify both --commands-file and --fusilli-args", + file=sys.stderr, + ) + return 1 + if not args.commands_file and not fusilli_op_args: + print( + "ERROR: Must specify either --commands-file or --fusilli-args", + file=sys.stderr, + ) + return 1 + + fusilli_path_config = FusilliPathConfig() + fusilli_path_config.base_dir.mkdir(parents=True, exist_ok=True) + + root_logger = libtuner.setup_logging(args, fusilli_path_config) + print(fusilli_path_config.run_log) + + logging.warning("Fusilli Tuner is still experimental") + + if not args.dry_run: + logging.info("Validating devices") + libtuner.validate_devices(args.devices) + logging.info("Validation successful!") + + commands = load_commands(args.commands_file, fusilli_op_args) + + starter_td_spec: Optional[Path] = args.starter_td_spec + for idx, cli_args in enumerate(commands): + msg = f">>> ({idx + 1}/{len(commands)}) {shlex.join(cli_args)}" + logging.info(msg) + + result_spec = process_fusilli_command( + cli_args, + args, + fusilli_path_config, + root_logger, + starter_td_spec, + idx + 1, + ) + + # Chain: best spec from this command becomes starter for next. + if result_spec: + starter_td_spec = result_spec + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/benchmarks/test_tuner_cache.py b/benchmarks/test_tuner_cache.py new file mode 100644 index 00000000..755e05c9 --- /dev/null +++ b/benchmarks/test_tuner_cache.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +# Copyright 2026 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import os +import tempfile +import unittest +from pathlib import Path + +# Add benchmarks/ to path so we can import run_tuner. +# The utility functions under test don't depend on amdsharktuner, and the +# import guard in run_tuner.py defers the sys.exit to main(). +import sys + +sys.path.insert(0, str(Path(__file__).parent)) + +from run_tuner import build_compile_args, find_cached_artifacts, load_commands + + +class TestFindCachedArtifacts(unittest.TestCase): + def test_finds_mlir_and_txt(self): + """Given a valid cache structure, returns paths to .mlir and .txt files.""" + with tempfile.TemporaryDirectory() as tmp: + base = Path(tmp) + graph_dir = base / ".cache" / "fusilli" / "abc123" + graph_dir.mkdir(parents=True) + mlir_file = graph_dir / "iree-compile-input.mlir" + txt_file = graph_dir / "iree-compile-command.txt" + mlir_file.write_text("module {}") + txt_file.write_text("iree-compile input.mlir -o out.vmfb") + + mlir_path, txt_path = find_cached_artifacts(base) + self.assertEqual(mlir_path, mlir_file) + self.assertEqual(txt_path, txt_file) + + def test_raises_when_no_cache_dir(self): + """Raises FileNotFoundError when .cache/fusilli doesn't exist.""" + with tempfile.TemporaryDirectory() as tmp: + with self.assertRaises(FileNotFoundError): + find_cached_artifacts(Path(tmp)) + + def test_raises_when_multiple_graph_dirs(self): + """Raises FileNotFoundError when multiple graph directories exist.""" + with tempfile.TemporaryDirectory() as tmp: + base = Path(tmp) + cache = base / ".cache" / "fusilli" + (cache / "hash1").mkdir(parents=True) + (cache / "hash2").mkdir(parents=True) + with self.assertRaises(FileNotFoundError): + find_cached_artifacts(base) + + def test_raises_when_no_mlir_file(self): + """Raises FileNotFoundError when no .mlir file exists.""" + with tempfile.TemporaryDirectory() as tmp: + base = Path(tmp) + graph_dir = base / ".cache" / "fusilli" / "abc123" + graph_dir.mkdir(parents=True) + (graph_dir / "cmd.txt").write_text("iree-compile ...") + with self.assertRaises(FileNotFoundError): + find_cached_artifacts(base) + + +class TestBuildCompileArgs(unittest.TestCase): + def test_strips_output_and_stats_flags(self): + """Filters -o, scheduling stats flags, and adds tuner flags.""" + cmd = ( + "iree-compile input.mlir " + "--iree-hal-target-backends=rocm " + "--iree-scheduling-dump-statistics-format=json " + "--iree-scheduling-dump-statistics-file=stats.json " + "-o output.vmfb" + ) + result = build_compile_args(cmd, Path("/tmp/benchmarks")) + + self.assertEqual(result[0], "iree-compile") + self.assertIn("--iree-hal-target-backends=rocm", result) + # Original "-o output.vmfb" should be stripped. + self.assertNotIn("output.vmfb", result) + self.assertNotIn("--iree-scheduling-dump-statistics-format=json", result) + # Tuner-specific flags should be appended. + self.assertIn("--iree-config-add-tuner-attributes", result) + self.assertIn("--iree-hal-dump-executable-benchmarks-to", result) + # Output redirected to platform null device. + idx = result.index("-o") + self.assertEqual(result[idx + 1], os.devnull) + + def test_preserves_input_mlir(self): + """Keeps the input MLIR path from the original command.""" + cmd = "iree-compile my_model.mlir --iree-hal-target-backends=rocm -o out.vmfb" + result = build_compile_args(cmd, Path("/tmp/bench")) + self.assertIn("my_model.mlir", result) + + +class TestLoadCommands(unittest.TestCase): + def test_loads_from_args(self): + """When no file given, returns fusilli_op_args as single command.""" + result = load_commands(None, ["conv", "-F", "1", "--bf16"]) + self.assertEqual(result, [["conv", "-F", "1", "--bf16"]]) + + def test_loads_from_file(self): + """Reads commands from file, skipping comments and blank lines.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f: + f.write("# comment\n") + f.write("conv -F 1 --bf16\n") + f.write("\n") + f.write("matmul -M 1024 -N 1024 -K 1024\n") + f.flush() + tmp_path = f.name + + try: + result = load_commands(tmp_path, []) + self.assertEqual(len(result), 2) + self.assertEqual(result[0], ["conv", "-F", "1", "--bf16"]) + self.assertEqual( + result[1], ["matmul", "-M", "1024", "-N", "1024", "-K", "1024"] + ) + finally: + os.unlink(tmp_path) + + def test_prefers_file_when_both_given(self): + """When a file is given, fusilli_op_args is ignored (gating is in main).""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f: + f.write("conv -F 1 --bf16\n") + f.flush() + tmp_path = f.name + + try: + result = load_commands(tmp_path, ["matmul", "-M", "16"]) + self.assertEqual(result, [["conv", "-F", "1", "--bf16"]]) + finally: + os.unlink(tmp_path) + + +if __name__ == "__main__": + unittest.main() diff --git a/benchmarks/test_tuner_runner.sh b/benchmarks/test_tuner_runner.sh new file mode 100755 index 00000000..02b52ed3 --- /dev/null +++ b/benchmarks/test_tuner_runner.sh @@ -0,0 +1,75 @@ +#!/bin/bash +# Copyright 2026 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +set -euo pipefail +set -x + +# Arguments from CMake +TUNER_SCRIPT="$1" +BENCHMARK_DRIVER="$2" +TMP_FILES=() + +cleanup() { + rm -f "${TMP_FILES[@]}" +} +trap cleanup EXIT + +# This test is registered only under FUSILLI_SYSTEMS_AMDGPU; libtuner must be +# importable. If it isn't, the environment is misconfigured (e.g., test.sh's +# pip install failed) and we fail loudly rather than silently degrading. +# Mirror the import path run_tuner.py uses, since the top-level package can +# load successfully while these submodules raise on version mismatch with +# iree-compiler. +if ! python3 -c "from amdsharktuner import common, libtuner" >/dev/null 2>&1; then + echo "ERROR: amdsharktuner is not importable; cannot run tuner integration tests." + echo " This test is gated on FUSILLI_SYSTEMS_AMDGPU=ON, which implies a" + echo " fully configured tuner environment. Check that build_tools/scripts/test.sh" + echo " ran the amdsharktuner install successfully." + python3 -c "from amdsharktuner import common, libtuner" || true + exit 1 +fi + +# Test 1: Verify --help works and reports the expected libtuner option groups. +HELP_OUTPUT="$(mktemp)" +TMP_FILES+=("${HELP_OUTPUT}") +python3 "${TUNER_SCRIPT}" --help > "${HELP_OUTPUT}" 2>&1 +grep -q "Fusilli Tuner Options" "${HELP_OUTPUT}" +grep -q "General Options" "${HELP_OUTPUT}" +grep -q "Candidate Generation Options" "${HELP_OUTPUT}" +echo "PASSED: run_tuner.py --help" + +# Cache extraction unit tests (pure-Python wrapper logic). +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +(cd "${SCRIPT_DIR}" && python3 -m unittest test_tuner_cache -v) +echo "PASSED: cache extraction unit tests" + +# Test 2: Verify error on missing args +MISSING_ARGS_OUTPUT="$(mktemp)" +TMP_FILES+=("${MISSING_ARGS_OUTPUT}") +if python3 "${TUNER_SCRIPT}" --devices hip://0 \ + --fusilli-driver "${BENCHMARK_DRIVER}" >"${MISSING_ARGS_OUTPUT}" 2>&1; then + echo "ERROR: Expected failure when no --fusilli-args or --commands-file given" + exit 1 +fi +grep -q "Must specify either --commands-file or --fusilli-args" "${MISSING_ARGS_OUTPUT}" +echo "PASSED: run_tuner.py rejects missing args" + +# Test 3: Verify error on both args +CONFLICTING_ARGS_OUTPUT="$(mktemp)" +TMP_FILES+=("${CONFLICTING_ARGS_OUTPUT}") +if python3 "${TUNER_SCRIPT}" \ + --devices hip://0 \ + --fusilli-driver "${BENCHMARK_DRIVER}" \ + --fusilli-args "matmul -M 16 -N 16 -K 16 --a_type f32 --b_type f32 --out_type f32" \ + --commands-file /dev/null >"${CONFLICTING_ARGS_OUTPUT}" 2>&1; then + echo "ERROR: Expected failure when both --fusilli-args and --commands-file given" + exit 1 +fi +grep -q "Cannot specify both --commands-file and --fusilli-args" "${CONFLICTING_ARGS_OUTPUT}" +echo "PASSED: run_tuner.py rejects conflicting args" + +echo "ALL TESTS PASSED" diff --git a/build_tools/scripts/test.sh b/build_tools/scripts/test.sh index 36e36a19..4244ad2b 100755 --- a/build_tools/scripts/test.sh +++ b/build_tools/scripts/test.sh @@ -84,6 +84,16 @@ while [[ $# -gt 0 ]]; do esac done +# Install amdsharktuner for the AMDGPU-only tuner tests. PyPI lags the IREE +# RC pinned in version.json, so install from GitHub. Tracks @main deliberately +# (no manual pin-bump); pin a SHA here, in README.md, and in run_tuner.py if +# upstream breaks CI. +if grep -q "^FUSILLI_SYSTEMS_AMDGPU:BOOL=ON$" "${BUILD_DIR}/CMakeCache.txt" 2>/dev/null; then + pip install --pre \ + "amdsharktuner @ git+https://github.com/nod-ai/amd-shark-ai.git@main#subdirectory=amdsharktuner" \ + --find-links https://iree.dev/pip-release-links.html +fi + if [[ "${BACKEND}" == "cli" ]]; then export FUSILLI_COMPILE_BACKEND_USE_CLI=1 echo "=== Fusilli test: backend=cli ==="