-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathVectorAddition.cu
More file actions
38 lines (36 loc) · 1.07 KB
/
VectorAddition.cu
File metadata and controls
38 lines (36 loc) · 1.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
%%cu
#include <stdio.h>
#define BS 8
#define N 10
void print(int *A, int n)
{
for(int i=0; i<n; i++) printf("%d ",A[i]);
}
__global__ void addition( int *A,int *B,int *C, int n)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if(i < n) C[i]=A[i]+B[i];
}
int main(void)
{
int threadsPerBlock, blocksPerGrid, n, *A,*B,*C,*dA,*dB,*dC;
n = N; threadsPerBlock = BS;
blocksPerGrid = (n + BS - 1) / BS;
A = (int *)malloc(n * sizeof(int));
B = (int *)malloc(n * sizeof(int));
C = (int *)malloc(n * sizeof(int));
for (int i = 0; i < n; i++) A[i] = i * 10;
for (int i = 0; i < n; i++) B[i] = i * 20;
cudaMalloc((void **)&dA, n * sizeof(int));
cudaMalloc((void **)&dB, n * sizeof(int));
cudaMalloc((void **)&dC, n * sizeof(int));
cudaMemcpy(dA, A, n * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(dB, B, n * sizeof(int), cudaMemcpyHostToDevice);
addition<<<blocksPerGrid, threadsPerBlock>>>(dA,dB,dC,n);
cudaMemcpy(C, dC, n * sizeof(int), cudaMemcpyDeviceToHost);
print(C,n);
cudaFree(dA); free(A);
cudaFree(dB); free(B);
cudaFree(dC); free(C);
return 0;
}