|
| 1 | +import argparse |
| 2 | +from dataclasses import dataclass |
| 3 | +from pathlib import Path |
| 4 | +from typing import Any |
| 5 | + |
| 6 | +import pyperf |
| 7 | + |
| 8 | + |
| 9 | +@dataclass(frozen=True) |
| 10 | +class BenchmarkStats: |
| 11 | + name: str |
| 12 | + min: float |
| 13 | + median: float |
| 14 | + max: float |
| 15 | + |
| 16 | + |
| 17 | +def extract_stats(bench: Any) -> BenchmarkStats: |
| 18 | + """Extract min, median, and max from a pyperf Benchmark object.""" |
| 19 | + import statistics |
| 20 | + |
| 21 | + # Try to use pyperf's built-in methods if available |
| 22 | + try: |
| 23 | + min_val = float(bench.min()) |
| 24 | + median_val = float(bench.median()) |
| 25 | + max_val = float(bench.max()) |
| 26 | + except (AttributeError, TypeError): |
| 27 | + # Fallback: extract all values from runs and compute statistics |
| 28 | + values = [] |
| 29 | + for run in bench.get_runs(): |
| 30 | + # Skip calibration runs (they don't have 'values', only 'warmups') |
| 31 | + if hasattr(run, "values") and run.values: |
| 32 | + values.extend(run.values) |
| 33 | + |
| 34 | + if not values: |
| 35 | + # If no values found, try to use mean as fallback |
| 36 | + min_val = median_val = max_val = float(bench.mean()) |
| 37 | + else: |
| 38 | + min_val = float(min(values)) |
| 39 | + median_val = float(statistics.median(values)) |
| 40 | + max_val = float(max(values)) |
| 41 | + |
| 42 | + return BenchmarkStats( |
| 43 | + name=bench.get_name(), |
| 44 | + min=min_val, |
| 45 | + median=median_val, |
| 46 | + max=max_val, |
| 47 | + ) |
| 48 | + |
| 49 | + |
| 50 | +def load_benchmark_stats(path: Path) -> dict[str, BenchmarkStats]: |
| 51 | + """Load a pyperf JSON file and extract statistics for each benchmark.""" |
| 52 | + suite = pyperf.BenchmarkSuite.load(str(path)) |
| 53 | + stats: dict[str, BenchmarkStats] = {} |
| 54 | + for bench in suite: |
| 55 | + stats[bench.get_name()] = extract_stats(bench) |
| 56 | + return stats |
| 57 | + |
| 58 | + |
| 59 | +def format_time(seconds: float) -> str: |
| 60 | + """Format time in seconds to a readable string.""" |
| 61 | + if seconds < 1e-6: |
| 62 | + return f"{seconds * 1e9:.3f} ns" |
| 63 | + elif seconds < 1e-3: |
| 64 | + return f"{seconds * 1e6:.3f} μs" |
| 65 | + elif seconds < 1.0: |
| 66 | + return f"{seconds * 1e3:.3f} ms" |
| 67 | + else: |
| 68 | + return f"{seconds:.3f} s" |
| 69 | + |
| 70 | + |
| 71 | +def get_time_unit(seconds: float) -> str: |
| 72 | + """Get the appropriate unit for a time value in seconds.""" |
| 73 | + if seconds < 1e-6: |
| 74 | + return "ns" |
| 75 | + elif seconds < 1e-3: |
| 76 | + return "μs" |
| 77 | + elif seconds < 1.0: |
| 78 | + return "ms" |
| 79 | + else: |
| 80 | + return "s" |
| 81 | + |
| 82 | + |
| 83 | +def print_table( |
| 84 | + a_stats: dict[str, BenchmarkStats], |
| 85 | + b_stats: dict[str, BenchmarkStats], |
| 86 | + a_label: str, |
| 87 | + b_label: str, |
| 88 | + csv: bool = False, |
| 89 | +) -> None: |
| 90 | + """Print a formatted table comparing two benchmark result sets.""" |
| 91 | + # Find common benchmarks |
| 92 | + common_names = sorted(set(a_stats.keys()) & set(b_stats.keys())) |
| 93 | + if not common_names: |
| 94 | + print("ERROR: No common benchmark names found between the two files.") |
| 95 | + return |
| 96 | + |
| 97 | + if csv: |
| 98 | + # CSV output |
| 99 | + print("Benchmark Name," |
| 100 | + f"{a_label}_min (s),{a_label}_median (s),{a_label}_max (s)," |
| 101 | + f"{b_label}_min (s),{b_label}_median (s),{b_label}_max (s)," |
| 102 | + f"B/A_ratio") |
| 103 | + for name in common_names: |
| 104 | + a = a_stats[name] |
| 105 | + b = b_stats[name] |
| 106 | + ratio = b.median / a.median if a.median > 0 else float("inf") |
| 107 | + print(f'"{name}",{a.min:.9e},{a.median:.9e},{a.max:.9e},' |
| 108 | + f'{b.min:.9e},{b.median:.9e},{b.max:.9e},' |
| 109 | + f'{ratio:.6e}') |
| 110 | + return |
| 111 | + |
| 112 | + # Determine a common unit for all values (pick based on median values) |
| 113 | + # Use the most common unit across all median values |
| 114 | + all_medians = [a_stats[name].median for name in common_names] + \ |
| 115 | + [b_stats[name].median for name in common_names] |
| 116 | + sample_median = sorted(all_medians)[len(all_medians) // 2] # Use median of medians |
| 117 | + common_unit = get_time_unit(sample_median) |
| 118 | + |
| 119 | + # Convert seconds to the common unit |
| 120 | + def to_common_unit(seconds: float) -> float: |
| 121 | + if common_unit == "ns": |
| 122 | + return seconds * 1e9 |
| 123 | + elif common_unit == "μs": |
| 124 | + return seconds * 1e6 |
| 125 | + elif common_unit == "ms": |
| 126 | + return seconds * 1e3 |
| 127 | + else: |
| 128 | + return seconds |
| 129 | + |
| 130 | + # Calculate maximum width for median values across ALL benchmarks |
| 131 | + # Using numbers without units - convert to common unit first |
| 132 | + all_med_values = [f"{to_common_unit(a_stats[name].median):.3f}" for name in common_names] + \ |
| 133 | + [f"{to_common_unit(b_stats[name].median):.3f}" for name in common_names] |
| 134 | + |
| 135 | + med_width = max(len(v) for v in all_med_values) |
| 136 | + |
| 137 | + # Table header - make benchmark name column narrower |
| 138 | + max_name_len = max(len(name) for name in common_names) |
| 139 | + name_col_width = max(25, max_name_len + 1) # Reduced from 50 to 25 |
| 140 | + |
| 141 | + # Calculate stats column width (just median value width) |
| 142 | + stats_col_width = med_width |
| 143 | + |
| 144 | + # Ratio column width - narrow, just enough for values like "2.10x" or "0.50x slower" |
| 145 | + ratio_header = f"{a_label} faster by" |
| 146 | + ratio_col_width = 10 # Fixed at 10 chars max |
| 147 | + |
| 148 | + total_width = name_col_width + stats_col_width + stats_col_width + ratio_col_width + 9 # 9 for separators |
| 149 | + |
| 150 | + # Build header line to measure its actual length |
| 151 | + header_line = ( |
| 152 | + f"{'Benchmark Name':<{name_col_width}} | " |
| 153 | + f"{'A':>{stats_col_width}} | " |
| 154 | + f"{'B':>{stats_col_width}} | " |
| 155 | + f"{ratio_header:>{ratio_col_width}}" |
| 156 | + ) |
| 157 | + separator_width = len(header_line) |
| 158 | + |
| 159 | + print(f"\nBenchmark Comparison (median {common_unit}): {a_label} vs {b_label}") |
| 160 | + print("=" * separator_width) |
| 161 | + |
| 162 | + # Simple column headers: just "A" and "B" |
| 163 | + print(header_line) |
| 164 | + print("-" * separator_width) |
| 165 | + |
| 166 | + # Table rows with median values only (numbers only, no units) |
| 167 | + for name in common_names: |
| 168 | + a = a_stats[name] |
| 169 | + b = b_stats[name] |
| 170 | + |
| 171 | + # Format median value with right alignment (no units) |
| 172 | + # Convert to common unit |
| 173 | + a_med_str = f"{to_common_unit(a.median):.3f}".rjust(stats_col_width) |
| 174 | + b_med_str = f"{to_common_unit(b.median):.3f}".rjust(stats_col_width) |
| 175 | + |
| 176 | + # Calculate ratio: B/A shows how many times faster A is than B |
| 177 | + # > 1 means A is faster, < 1 means A is slower (B is faster) |
| 178 | + ratio = b.median / a.median if a.median > 0 else float("inf") |
| 179 | + if ratio == float("inf"): |
| 180 | + ratio_str = "inf" |
| 181 | + elif ratio >= 1.0: |
| 182 | + # A is faster: "A is 2.1x faster than B" |
| 183 | + ratio_str = f"{ratio:.2f}x" |
| 184 | + else: |
| 185 | + # A is slower: show as "B is faster" by inverting |
| 186 | + ratio_inv = a.median / b.median |
| 187 | + ratio_str = f"{ratio_inv:.2f}x slower" |
| 188 | + |
| 189 | + print(f"{name:<{name_col_width}} | {a_med_str} | {b_med_str} | {ratio_str:>{ratio_col_width}}") |
| 190 | + |
| 191 | + print("=" * separator_width) |
| 192 | + |
| 193 | + |
| 194 | +def main(argv: list[str] | None = None) -> int: |
| 195 | + p = argparse.ArgumentParser( |
| 196 | + description="Compare two pyperf JSON files and print a statistics table" |
| 197 | + ) |
| 198 | + p.add_argument("a", type=Path, help="First pyperf JSON (e.g. arm64.json)") |
| 199 | + p.add_argument("b", type=Path, help="Second pyperf JSON (e.g. x86_64.json)") |
| 200 | + p.add_argument("--a-label", default=None, help="Label for first file (default: filename stem)") |
| 201 | + p.add_argument("--b-label", default=None, help="Label for second file (default: filename stem)") |
| 202 | + p.add_argument("--csv", action="store_true", help="Output as CSV instead of formatted table") |
| 203 | + args = p.parse_args(argv) |
| 204 | + |
| 205 | + a_stats = load_benchmark_stats(args.a) |
| 206 | + b_stats = load_benchmark_stats(args.b) |
| 207 | + |
| 208 | + a_label = args.a_label or args.a.stem |
| 209 | + b_label = args.b_label or args.b.stem |
| 210 | + |
| 211 | + print_table(a_stats, b_stats, a_label, b_label, csv=args.csv) |
| 212 | + return 0 |
| 213 | + |
| 214 | + |
| 215 | +if __name__ == "__main__": |
| 216 | + raise SystemExit(main()) |
| 217 | + |
0 commit comments