-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplot_runtimes.py
More file actions
116 lines (100 loc) · 3.95 KB
/
plot_runtimes.py
File metadata and controls
116 lines (100 loc) · 3.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
import subprocess
import re
import numpy as np
import matplotlib.pyplot as plt
# Define matrix dimensions and file names
matrix_sizes = [
(2000, 1500, 1000),
(4000, 3000, 2000),
(6000, 4000, 3000),
(8000, 5000, 3000),
(10000, 5000, 5000),
(10000, 5000, 10000)
]
# Define source files for each executable
source_files = {
"cuBLAS": "float_cublas.cu",
"Simple": "float_cuda_simple_matrix_multiplication.cu",
"Tiled": "float_cuda_tiled_matrix_multiplication.cu",
"Optimized": "float_cuda_2d_tiled_register_cache_matrix_multiplication.cu",
}
# Define the resulting executables
executables = {
name: f"./{name.lower()}_matrix_multiply" for name in source_files.keys()
}
# Regex pattern to extract the runtime from the output
runtime_pattern = r"Matrix multiplication completed in ([0-9.]+) seconds"
# Function to compile CUDA source files
def compile_executables():
for name, source in source_files.items():
print(f"Compiling {name}...")
result = subprocess.run(
["nvcc", source, "-o", executables[name], "-lcudart", "-lcublas"],
capture_output=True,
text=True
)
if result.returncode != 0:
print(f"Compilation failed for {name}: {result.stderr}")
raise RuntimeError(f"Failed to compile {source}")
# Function to generate matrices
def generate_matrices(rowsA, colsA, colsB, matrixA, matrixB):
subprocess.run(["python3", "generate.py", str(rowsA), str(colsA), str(colsB), matrixA, matrixB])
# Function to measure runtime
def measure_runtime(executable, matrixA, matrixB, result_matrix, num_iterations=10):
runtimes = []
for _ in range(num_iterations):
result = subprocess.run([executable, matrixA, matrixB, result_matrix],
capture_output=True, text=True)
match = re.search(runtime_pattern, result.stdout)
if match:
runtimes.append(float(match.group(1)))
else:
print(f"Failed to extract runtime for {executable}: {result.stdout}")
return np.mean(runtimes), (np.max(runtimes) - np.min(runtimes)) / 2 if runtimes else (None, None)
# Benchmarking
def benchmark():
results = {key: [] for key in executables.keys()}
total_operations = []
for rowsA, colsA, colsB in matrix_sizes:
ops = rowsA * colsA * colsB # Total operations for matrix multiplication
total_operations.append(ops)
print(f"\nBenchmarking for {ops} operations ({rowsA}x{colsA} * {colsA}x{colsB})")
matrixA, matrixB = "matrixA.txt", "matrixB.txt"
result_matrix = "result_matrix.txt"
# Generate matrices
generate_matrices(rowsA, colsA, colsB, matrixA, matrixB)
for name, executable in executables.items():
print(f"Running {name}...")
avg_runtime, uncertainty = measure_runtime(executable, matrixA, matrixB, result_matrix)
if avg_runtime is not None:
print(f"{name}: {avg_runtime:.6f} ± {uncertainty:.6f} seconds")
results[name].append(avg_runtime)
else:
print(f"{name} failed to produce a valid runtime.")
results[name].append(None)
return results, total_operations
# Plotting
def plot_results(results, total_operations):
plt.figure(figsize=(10, 6))
for name, runtimes in results.items():
if any(runtimes):
plt.plot(total_operations, runtimes, label=name, marker='o')
plt.xlabel("Total Operations (FLOPs)")
plt.ylabel("Runtime (seconds)")
plt.title("Matrix Multiplication Runtimes vs Total Operations")
plt.xscale("log")
plt.yscale("log")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("benchmark_results.png")
plt.show()
# Main function
if __name__ == "__main__":
try:
compile_executables()
results, total_operations = benchmark()
plot_results(results, total_operations)
except Exception as e:
print(f"Error: {e}")