-
Notifications
You must be signed in to change notification settings - Fork 0
/
01-add-error-handling-solution.cu
85 lines (66 loc) · 1.67 KB
/
01-add-error-handling-solution.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#include <stdio.h>
void init(int *a, int N)
{
int i;
for (i = 0; i < N; ++i)
{
a[i] = i;
}
}
__global__
void doubleElements(int *a, int N)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
int stride = gridDim.x * blockDim.x;
/*
* The previous code (now commented out) attempted
* to access an element outside the range of `a`.
*/
// for (int i = idx; i < N + stride; i += stride)
for (int i = idx; i < N; i += stride)
{
a[i] *= 2;
}
}
bool checkElementsAreDoubled(int *a, int N)
{
int i;
for (i = 0; i < N; ++i)
{
if (a[i] != i*2) return false;
}
return true;
}
int main()
{
int N = 10000;
int *a;
size_t size = N * sizeof(int);
cudaMallocManaged(&a, size);
init(a, N);
/*
* The previous code (now commented out) attempted to launch
* the kernel with more than the maximum number of threads per
* block, which is 1024.
*/
size_t threads_per_block = 1024;
/* size_t threads_per_block = 2048; */
size_t number_of_blocks = 32;
cudaError_t syncErr, asyncErr;
doubleElements<<<number_of_blocks, threads_per_block>>>(a, N);
/*
* Catch errors for both the kernel launch above and any
* errors that occur during the asynchronous `doubleElements`
* kernel execution.
*/
syncErr = cudaGetLastError();
asyncErr = cudaDeviceSynchronize();
/*
* Print errors should they exist.
*/
if (syncErr != cudaSuccess) printf("Error: %s\n", cudaGetErrorString(syncErr));
if (asyncErr != cudaSuccess) printf("Error: %s\n", cudaGetErrorString(asyncErr));
bool areDoubled = checkElementsAreDoubled(a, N);
printf("All elements were doubled? %s\n", areDoubled ? "TRUE" : "FALSE");
cudaFree(a);
}