-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmandelbrot-dyn.cpp
307 lines (259 loc) · 9.04 KB
/
mandelbrot-dyn.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
#include <cuda.h>
#include <nvrtc.h>
#include <vector_types.h>
#include <vector_functions.h>
#include <omp.h>
#include <string>
#include <cstring>
#include <fstream>
#include <sstream>
#include <cstdio>
#include <iomanip>
#include <iostream>
#define NVRTC_SAFE_CALL(x) \
do { \
nvrtcResult result = x; \
if (result != NVRTC_SUCCESS) { \
std::cerr << "\nerror: " #x " failed with error " \
<< nvrtcGetErrorString(result) << '\n'; \
exit(1); \
} \
} while(0)
#define CUDA_SAFE_CALL(x) \
do { \
CUresult result = x; \
if (result != CUDA_SUCCESS) { \
const char *msg; \
cuGetErrorName(result, &msg); \
std::cerr << "\nerror: " #x " failed with error " \
<< msg << '\n'; \
exit(1); \
} \
} while(0)
/** time spent in device */
double gpu_time = 0;
/** gets the color, given the dwell */
void dwell_color(int *r, int *g, int *b, int dwell);
/** save image to disk */
void savePPM(const std::string& name, unsigned char* src, int width, int height, int numChannels)
{
std::string ext;
std::string format;
{
if (numChannels == 1)
{
format = "P5\n";
ext = ".pgm";
}
else if (numChannels == 3 || numChannels == 4)
{
format = "P6\n";
ext = ".ppm";
}
else
{
//assert(0);
}
std::fstream fh((name + ext).c_str(), std::fstream::out | std::fstream::binary);
fh << format;
fh << width << " " << height << "\n" << 0xff << std::endl;
for (int j = 0; j < height; ++j)
{
for (int i = 0; i < width; ++i)
{
if (numChannels == 1)
{
fh << (unsigned char)(src[numChannels * (i + j*width) + 0]);
}
else if (numChannels == 3 || numChannels == 4)
{
fh << (unsigned char)(src[numChannels * (i + j*width) + 0]);
fh << (unsigned char)(src[numChannels * (i + j*width) + 1]);
fh << (unsigned char)(src[numChannels * (i + j*width) + 2]);
}
else
{
//assert(0);
}
}
}
fh.flush();
fh.close();
}
}
#define MAX_DWELL 256
/** block size along */
#define BSX 64
#define BSY 4
/** subdivision when launched from host */
#define INIT_SUBDIV 32
/** gets the color, given the dwell (on host) */
#define CUT_DWELL (MAX_DWELL / 4)
void dwell_color(int *r, int *g, int *b, int dwell) {
// black for the Mandelbrot set
if(dwell >= MAX_DWELL) {
*r = *g = *b = 0;
} else {
// cut at zero
if(dwell < 0)
dwell = 0;
if(dwell <= CUT_DWELL) {
// from black to blue the first half
*r = *g = 0;
*b = 128 + dwell * 127 / (CUT_DWELL);
} else {
// from blue to white for the second half
*b = 255;
*r = *g = (dwell - CUT_DWELL) * 255 / (MAX_DWELL - CUT_DWELL);
}
}
} // dwell_color
/** file path helper */
bool findFullPath(const std::string& root, std::string& filePath)
{
bool fileFound = false;
const std::string resourcePath = root;
filePath = resourcePath + filePath;
for (unsigned int i = 0; i < 16; ++i)
{
std::ifstream file;
file.open(filePath.c_str());
if (file.is_open())
{
fileFound = true;
break;
}
filePath = "../" + filePath;
}
return fileFound;
}
/** data size */
#define W (16 * 1024)
#define H (16 * 1024)
int main(int argc, char **argv)
{
// Load CUDA C++ source code in character string.
std::string rootStr = "cuda-nvrtc-cdp/";
std::string filePath = "mandelbrot-dyn.cu";
bool fileFound = findFullPath(rootStr, filePath);
if (fileFound == false)
{
std::cout << "CUDA kernel source not found! Exiting ..." << std::endl;
std::cout << std::endl;
return -1;
}
std::fstream kernelFile(filePath.c_str(), std::ios::in);
std::stringstream buffer;
buffer << kernelFile.rdbuf();
std::string mandelDynStr = buffer.str() + "\n";
// Create an instance of nvrtcProgram with the MANDEL-DYN code string.
nvrtcProgram prog;
NVRTC_SAFE_CALL(
nvrtcCreateProgram(&prog, // prog
mandelDynStr.c_str(), // buffer
"mandelDyn.cu", // name
0, // numHeaders
NULL, // headers
NULL)); // includeNames
// Compile the program for compute_35 with relocatable-device-code enabled.
const char *opts[] =
{
"--gpu-architecture=compute_35",
"--relocatable-device-code=true"
};
nvrtcResult compileResult = nvrtcCompileProgram(prog, // prog
2, // numOptions
opts); // options
// Obtain compilation log from the program.
size_t logSize;
NVRTC_SAFE_CALL(nvrtcGetProgramLogSize(prog, &logSize));
char *log = new char[logSize];
NVRTC_SAFE_CALL(nvrtcGetProgramLog(prog, log));
if (logSize > 1)
std::cout << log << '\n';
delete[] log;
if (compileResult != NVRTC_SUCCESS) {
exit(1);
}
// Obtain PTX from the program.
size_t ptxSize;
NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &ptxSize));
char *ptx = new char[ptxSize];
NVRTC_SAFE_CALL(nvrtcGetPTX(prog, ptx));
// Destroy the program.
NVRTC_SAFE_CALL(nvrtcDestroyProgram(&prog));
// Load the generated PTX and get a handle to the SAXPY kernel.
CUcontext cuContext;
CUdevice cuDevice;
CUmodule cuModule;
CUfunction cuKernel;
CUDA_SAFE_CALL(cuInit(0));
CUDA_SAFE_CALL(cuDeviceGet(&cuDevice, 0));
CUDA_SAFE_CALL(cuCtxCreate(&cuContext, CU_CTX_SCHED_BLOCKING_SYNC | CU_CTX_MAP_HOST, cuDevice));
CUlinkState linkState;
void *cubin;
size_t cubinSize;
CUDA_SAFE_CALL(cuLinkCreate(0, NULL, NULL, &linkState));
CUDA_SAFE_CALL(cuLinkAddFile(linkState, CU_JIT_INPUT_LIBRARY, CUDADEVRTLIB, 0, NULL, NULL));
CUDA_SAFE_CALL(cuLinkAddData(linkState, CU_JIT_INPUT_PTX, (void*)ptx, strlen(ptx), 0, 0, 0, 0));
CUDA_SAFE_CALL(cuLinkComplete(linkState, &cubin, &cubinSize));
CUDA_SAFE_CALL(cuModuleLoadData(&cuModule, cubin));
CUDA_SAFE_CALL(cuModuleGetFunction(&cuKernel, cuModule, "mandelbrot_block_k"));
// Generate input for execution, and create output buffers.
int w = W, h = H;
int dwell_sz = w * h * sizeof(int);
int *h_dwells = 0;
h_dwells = (int*)malloc(dwell_sz);
CUdeviceptr dOut;
CUDA_SAFE_CALL(cuMemAlloc(&dOut, dwell_sz));
int blockX = BSX;
int blockY = BSY;
int gridX = INIT_SUBDIV;
int gridY = INIT_SUBDIV;
float2 cmin = make_float2(-1.5f, -1.0f);
float2 cmax = make_float2(0.5f, 1.0f);
int x0 = 0;
int y0 = 0;
int d = W / INIT_SUBDIV;
int depth = 0;
// Execute MANDELBROT-DYN.
std::cout << "Running mandelbrot-dyn on " << W << " x " << H << " elements" << std::endl;
double t1 = omp_get_wtime();
void *args[] = { &dOut, &w, &h, &cmin, &cmax, &x0, &y0, &d, &depth };
CUDA_SAFE_CALL(
cuLaunchKernel(cuKernel,
gridX, gridY, 1, // grid dim
blockX, blockY, 1, // block dim
0, NULL, // shared mem and stream
args, 0)); // arguments
CUDA_SAFE_CALL(cuCtxSynchronize());
double t2 = omp_get_wtime();
gpu_time = t2 - t1;
// Retrieve and save output.
CUDA_SAFE_CALL(cuMemcpyDtoH(h_dwells, dOut, dwell_sz));
//for (int i = 0; i < w*h; ++i)
//{
// int curr = h_dwells[i];
//
// int r, g, b;
// dwell_color(&r, &g, &b, curr);
//
// int pixel =
// (((unsigned int)b) << 0 |
// (((unsigned int)g) << 8) |
// (((unsigned int)r) << 16));
//
// h_dwells[i] = pixel;
//}
//savePPM("mandel-dyn", (unsigned char*)h_dwells, w, h, 4);
// print performance
printf("Mandelbrot set computed in %.3lf s, at %.3lf Mpix/s\n", gpu_time, h * w * 1e-6 / gpu_time);
// Release resources.
CUDA_SAFE_CALL(cuMemFree(dOut));
CUDA_SAFE_CALL(cuModuleUnload(cuModule));
CUDA_SAFE_CALL(cuLinkDestroy(linkState));
CUDA_SAFE_CALL(cuCtxDestroy(cuContext));
free(h_dwells);
std::cout << std::endl;
return 0;
}