-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinitializeGPUsBindWithCPU.cpp
More file actions
134 lines (101 loc) · 2.89 KB
/
initializeGPUsBindWithCPU.cpp
File metadata and controls
134 lines (101 loc) · 2.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#include <sstream>
#include <iostream>
#include <mpi.h>
#include "openacc.h"
#include <string>
#define RED "\033[01;31m"
#define RESET "\033[22;0m"
#define OPENACC 1
using namespace std;
// Jaber Hasbestan, PhD
// compile withe mpi enabled openacc
// alse used c++11 standard
/*! \brief
* code to extract node number from processor name
* in other words extracts integers from string
*/
int extractIntegerWords(string str) {
std::string temp;
int number = 0;
for (unsigned int i = 0; i < str.size(); i++) {
if (isdigit(str[i])) {
for (unsigned int a = i; a < str.size(); a++) {
temp += str[a];
}
break;
}
}
std::istringstream stream(temp);
stream >> number;
return (number);
}
/*! \brief Helper Function to
* check one-on-one mapping between CPU-GPU.
*
* 1. Processor name is obtained using MPI_Get_processor_name()
* 2. Node numbers are extracted from the string
* 3. Local nodal communicator is constricted using MPI_Comm_split()
* 4. The size of this new communicator is returned to OpenACCINIT
* 5. The size of the nodl communicator should be equal to number of GPU's
* 6. If not, error code 1 is thrown
*/
int createNodalCommunicator() {
MPI_Comm nodalComm;
int my_rank, np, my_new_rank, new_np;
int color, key, len;
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &np);
char pname[256];
MPI_Get_processor_name(pname, &len);
// some stream wizardry to extract node number from the proc name
stringstream ss;
string s;
ss << pname;
ss >> s;
// cout<<s<<endl;
color = extractIntegerWords(s);
key = my_rank;
MPI_Comm_split(MPI_COMM_WORLD, color, key, &nodalComm);
MPI_Comm_rank(nodalComm, &my_new_rank);
MPI_Comm_size(nodalComm, &new_np);
cout << "Process" << my_rank << " (in COMM_WORLD): split communicator has "
<< new_np << " processe " << endl;
MPI_Comm_free(&nodalComm);
return (new_np);
}
/*! \brief
*
* Perform CPU-GPU Binding
*
*/
int OPENACC_Init(int &my_rank, int &com_size)
{
int result = 1;
#if (OPENACC)
int nodalComSize;
acc_init(acc_device_nvidia); // OpenACC call
const int num_dev = acc_get_num_devices(acc_device_nvidia); // #GPUs
nodalComSize = createNodalCommunicator();
cout << nodalComSize << " " << num_dev << endl;
if ((nodalComSize == num_dev) && (num_dev != 0)) {
const int dev_id = my_rank % num_dev;
acc_set_device_num(dev_id, acc_device_nvidia); // assign GPU to one MPI process
cout << "MPI process " << my_rank << " is assigned to GPU " << dev_id << "\n";
}
else
{
cout << RED << " unsuccessful mapping " << RESET << endl;
exit(1);
}
#endif
return (result);
}
// driver
int main(int argc, char *argv[]) {
int my_rank, np;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &np);
OPENACC_Init(my_rank, np);
MPI_Finalize();
}