-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_MMult.c
More file actions
97 lines (71 loc) · 2.34 KB
/
test_MMult.c
File metadata and controls
97 lines (71 loc) · 2.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#include <stdio.h>
// #include <malloc.h>
#include <stdlib.h>
#include "parameters.h"
void REF_MMult(int, int, int, double *, int, double *, int, double *, int );
void MY_MMult(int, int, int, double *, int, double *, int, double *, int );
void copy_matrix(int, int, double *, int, double *, int );
void random_matrix(int, int, double *, int);
double compare_matrices( int, int, double *, int, double *, int );
double dclock();
int main()
{
int
p,
m, n, k,
lda, ldb, ldc,
rep;
double
dtime, dtime_best,
gflops,
diff;
double
*a, *b, *c, *cref, *cold;
printf( "MY_MMult = [\n" );
for ( p=PFIRST; p<=PLAST; p+=PINC ){
m = ( M == -1 ? p : M );
n = ( N == -1 ? p : N );
k = ( K == -1 ? p : K );
gflops = 2.0 * m * n * k * 1.0e-09;
lda = ( LDA == -1 ? m : LDA );
ldb = ( LDB == -1 ? k : LDB );
ldc = ( LDC == -1 ? m : LDC );
/* Allocate space for the matrices */
/* Note: I create an extra column in A to make sure that
prefetching beyond the matrix does not cause a segfault */
a = ( double * ) malloc( lda * (k+1) * sizeof( double ) );
b = ( double * ) malloc( ldb * n * sizeof( double ) );
c = ( double * ) malloc( ldc * n * sizeof( double ) );
cold = ( double * ) malloc( ldc * n * sizeof( double ) );
cref = ( double * ) malloc( ldc * n * sizeof( double ) );
/* Generate random matrices A, B, Cold */
random_matrix( m, k, a, lda );
random_matrix( k, n, b, ldb );
random_matrix( m, n, cold, ldc );
copy_matrix( m, n, cold, ldc, cref, ldc );
/* Run the reference implementation so the answers can be compared */
REF_MMult( m, n, k, a, lda, b, ldb, cref, ldc );
/* Time the "optimized" implementation */
for ( rep=0; rep<NREPEATS; rep++ ){
copy_matrix( m, n, cold, ldc, c, ldc );
/* Time your implementation */
dtime = dclock();
MY_MMult( m, n, k, a, lda, b, ldb, c, ldc );
dtime = dclock() - dtime;
if ( rep==0 )
dtime_best = dtime;
else
dtime_best = ( dtime < dtime_best ? dtime : dtime_best );
}
diff = compare_matrices( m, n, c, ldc, cref, ldc );
printf( "%d %le %le \n", p, gflops / dtime_best, diff );
fflush( stdout );
free( a );
free( b );
free( c );
free( cold );
free( cref );
}
printf( "];\n" );
exit( 0 );
}