-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathraytracer_cuda.cu
403 lines (371 loc) · 15.6 KB
/
raytracer_cuda.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
// [header]
// A very basic raytracer example.
// [/header]
// [compile]
// nvcc -o raytracer_cuda raytracer_cuda.cu
// [/compile]
// [ignore]
// Copyright (C) 2012 www.scratchapixel.com
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// [/ignore]
#include <cstdlib>
#include <cstdio>
#include <cmath>
#include <fstream>
#include <vector>
#include <iostream>
#include <cassert>
#include <time.h>
#define GIG 1000000000
#define CPG 2.9 // Cycles per GHz -- Adjust to your computer
// Assertion to check for errors
#define CUDA_SAFE_CALL(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"CUDA_SAFE_CALL: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
#if defined __linux__ || defined __APPLE__
// "Compiled for Linux
#else
// Windows doesn't define these values by default, Linux does
#define M_PI 3.141592653589793
#define INFINITY 1e8
#endif
#ifdef __CUDACC__
#define CUDA_CALLABLE_MEMBER __host__ __device__
#else
#define CUDA_CALLABLE_MEMBER
#endif
template<typename T>
class Vec3
{
public:
T x, y, z;
CUDA_CALLABLE_MEMBER Vec3() : x(T(0)), y(T(0)), z(T(0)) {}
CUDA_CALLABLE_MEMBER Vec3(T xx) : x(xx), y(xx), z(xx) {}
CUDA_CALLABLE_MEMBER Vec3(T xx, T yy, T zz) : x(xx), y(yy), z(zz) {}
CUDA_CALLABLE_MEMBER Vec3& normalize()
{
T nor2 = length2();
if (nor2 > 0) {
T invNor = 1 / sqrt(nor2);
x *= invNor, y *= invNor, z *= invNor;
}
return *this;
}
CUDA_CALLABLE_MEMBER Vec3<T> operator * (const T &f) const { return Vec3<T>(x * f, y * f, z * f); }
CUDA_CALLABLE_MEMBER Vec3<T> operator * (const Vec3<T> &v) const { return Vec3<T>(x * v.x, y * v.y, z * v.z); }
CUDA_CALLABLE_MEMBER T dot(const Vec3<T> &v) const { return x * v.x + y * v.y + z * v.z; }
CUDA_CALLABLE_MEMBER Vec3<T> operator - (const Vec3<T> &v) const { return Vec3<T>(x - v.x, y - v.y, z - v.z); }
CUDA_CALLABLE_MEMBER Vec3<T> operator + (const Vec3<T> &v) const { return Vec3<T>(x + v.x, y + v.y, z + v.z); }
CUDA_CALLABLE_MEMBER Vec3<T>& operator += (const Vec3<T> &v) { x += v.x, y += v.y, z += v.z; return *this; }
CUDA_CALLABLE_MEMBER Vec3<T>& operator *= (const Vec3<T> &v) { x *= v.x, y *= v.y, z *= v.z; return *this; }
CUDA_CALLABLE_MEMBER Vec3<T> operator - () const { return Vec3<T>(-x, -y, -z); }
CUDA_CALLABLE_MEMBER T length2() const { return x * x + y * y + z * z; }
CUDA_CALLABLE_MEMBER T length() const { return sqrt(length2()); }
CUDA_CALLABLE_MEMBER friend std::ostream & operator << (std::ostream &os, const Vec3<T> &v)
{
os << "[" << v.x << " " << v.y << " " << v.z << "]";
return os;
}
};
typedef Vec3<float> Vec3f;
class Sphere
{
public:
Vec3f center; /// position of the sphere
float radius, radius2; /// sphere radius and radius^2
Vec3f surfaceColor, emissionColor; /// surface color and emission (light)
float transparency, reflection; /// surface transparency and reflectivity
CUDA_CALLABLE_MEMBER Sphere(
const Vec3f &c, // consts here
const float &r,
const Vec3f &sc,
const float &refl = 0,
const float &transp = 0,
const Vec3f &ec = 0) :
center(c), radius(r), radius2(r * r), surfaceColor(sc), emissionColor(ec),
transparency(transp), reflection(refl)
{ /* empty */ }
//[comment]
// Compute a ray-sphere intersection using the geometric solution
//[/comment]
CUDA_CALLABLE_MEMBER bool intersect(const Vec3f &rayorig, const Vec3f &raydir, float &t0, float &t1) const
{
Vec3f l = center - rayorig;
float tca = l.dot(raydir);
if (tca < 0) return false;
float d2 = l.dot(l) - tca * tca;
if (d2 > radius2) return false;
float thc = sqrt(radius2 - d2);
t0 = tca - thc;
t1 = tca + thc;
return true;
}
};
//[comment]
// This variable controls the maximum recursion depth
//[/comment]
#define MAX_RAY_DEPTH 5
//cpu time calculation
struct timespec diff(struct timespec start, struct timespec end)
{
struct timespec temp;
if ((end.tv_nsec-start.tv_nsec)<0) {
temp.tv_sec = end.tv_sec-start.tv_sec-1;
temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
} else {
temp.tv_sec = end.tv_sec-start.tv_sec;
temp.tv_nsec = end.tv_nsec-start.tv_nsec;
}
return temp;
}
float mix(const float &a, const float &b, const float &mix)
{
return b * mix + a * (1 - mix);
}
//[comment]
// This is the main trace function. It takes a ray as argument (defined by its origin
// and direction). We test if this ray intersects any of the geometry in the scene.
// If the ray intersects an object, we compute the intersection point, the normal
// at the intersection point, and shade this point using this information.
// Shading depends on the surface property (is it transparent, reflective, diffuse).
// The function returns a color for the ray. If the ray intersects an object that
// is the color of the object at the intersection point, otherwise it returns
// the background color.
//[/comment]
Vec3f trace(
const Vec3f &rayorig,
const Vec3f &raydir,
//const std::vector<Sphere> &spheres,
Sphere &sphere_arr,
const int &depth)
{
//if (raydir.length() != 1) std::cerr << "Error " << raydir << std::endl;
float tnear = INFINITY;
std::vector<Sphere> spheres;
const Sphere* sphere = NULL;
// find intersection of this ray with the sphere in the scene
/// tried to convert array back to vector
for(int ii = 0; ii < 6; ii++){
spheres.push_back(*sphere_arr[ii]);
}
int len = spheres.size();
for (unsigned i = 0; i < len; ++i) {
float t0 = INFINITY, t1 = INFINITY;
if (spheres[i].intersect(rayorig, raydir, t0, t1)) {
if (t0 < 0) t0 = t1;
if (t0 < tnear) {
tnear = t0;
sphere = &spheres[i];
}
}
}
///
// if there's no intersection return black or background color
if (!sphere) return Vec3f(2);
Vec3f surfaceColor = 0; // color of the ray/surfaceof the object intersected by the ray
Vec3f phit = rayorig + raydir * tnear; // point of intersection
Vec3f nhit = phit - sphere->center; // normal at the intersection point
nhit.normalize(); // normalize normal direction
// If the normal and the view direction are not opposite to each other
// reverse the normal direction. That also means we are inside the sphere so set
// the inside bool to true. Finally reverse the sign of IdotN which we want
// positive.
float bias = 1e-4; // add some bias to the point from which we will be tracing
bool inside = false;
if (raydir.dot(nhit) > 0) nhit = -nhit, inside = true;
if ((sphere->transparency > 0 || sphere->reflection > 0) && depth < MAX_RAY_DEPTH) {
float facingratio = -raydir.dot(nhit);
// change the mix value to tweak the effect
float fresneleffect = mix(pow(1 - facingratio, 3), 1, 0.1);
// compute reflection direction (not need to normalize because all vectors
// are already normalized)
Vec3f refldir = raydir - nhit * 2 * raydir.dot(nhit);
refldir.normalize();
Vec3f reflection = trace(phit + nhit * bias, refldir, spheres, depth + 1);
Vec3f refraction = 0;
// if the sphere is also transparent compute refraction ray (transmission)
if (sphere->transparency) {
float ior = 1.1, eta = (inside) ? ior : 1 / ior; // are we inside or outside the surface?
float cosi = -nhit.dot(raydir);
float k = 1 - eta * eta * (1 - cosi * cosi);
Vec3f refrdir = raydir * eta + nhit * (eta * cosi - sqrt(k));
refrdir.normalize();
refraction = trace(phit - nhit * bias, refrdir, spheres, depth + 1);
}
// the result is a mix of reflection and refraction (if the sphere is transparent)
surfaceColor = (
reflection * fresneleffect +
refraction * (1 - fresneleffect) * sphere->transparency) * sphere->surfaceColor;
}
else {
// it's a diffuse object, no need to raytrace any further
for (unsigned i = 0; i < len; ++i) {
if (spheres[i].emissionColor.x > 0) {
// this is a light
Vec3f transmission = 1;
Vec3f lightDirection = spheres[i].center - phit;
lightDirection.normalize();
for (unsigned j = 0; j < len; ++j) {
if (i != j) {
float t0, t1;
if (spheres[j].intersect(phit + nhit * bias, lightDirection, t0, t1)) {
transmission = 0;
break;
}
}
}
surfaceColor += sphere->surfaceColor * transmission *
std::max(float(0), nhit.dot(lightDirection)) * spheres[i].emissionColor;
}
}
}
return surfaceColor + sphere->emissionColor;
}
//[comment]
// Main rendering function. We compute a camera ray for each pixel of the image
// trace it and return a color. If the ray hits a sphere, we return the color of the
// sphere at the intersection point, else we return the background color.
//[/comment]
/*
void render(const std::vector<Sphere> &spheres)
{
unsigned width = 1920, height = 1080;
Vec3f *image = new Vec3f[width * height], *pixel = image;
float invWidth = 1 / float(width), invHeight = 1 / float(height);
float fov = 30, aspectratio = width / float(height);
float angle = tan(M_PI * 0.5 * fov / 180.);
// Trace rays
for (unsigned y = 0; y < height; ++y) {
for (unsigned x = 0; x < width; ++x, ++pixel) {
float xx = (2 * ((x + 0.5) * invWidth) - 1) * angle * aspectratio;
float yy = (1 - 2 * ((y + 0.5) * invHeight)) * angle;
Vec3f raydir(xx, yy, -1);
raydir.normalize();
*pixel = trace(Vec3f(0), raydir, spheres, 0);
}
}
// Save result to a PPM image (keep these flags if you compile under Windows)
std::ofstream ofs("./untitled.ppm", std::ios::out | std::ios::binary);
ofs << "P6\n" << width << " " << height << "\n255\n";
for (unsigned i = 0; i < width * height; ++i) {
ofs << (unsigned char)(std::min(float(1), image[i].x) * 255) <<
(unsigned char)(std::min(float(1), image[i].y) * 255) <<
(unsigned char)(std::min(float(1), image[i].z) * 255);
}
ofs.close();
delete [] image;
}*/
__global__ void render_cuda(Sphere *d_spheres, int height, int width, Vec3f *pixel)
{
float invWidth = 1 / float(width), invHeight = 1 / float(height);
float fov = 30, aspectratio = width / float(height);
float angle = tan(M_PI * 0.5 * fov / 180.);
unsigned y = blockIdx.y * blockDim.y + threadIdx.y;
unsigned x = blockIdx.x * blockDim.x + threadIdx.x;
int size = width * height;
for(int k = 0; k < size; k++){
float xx = (2 * ((x + 0.5) * invWidth) - 1) * angle * aspectratio;
float yy = (1 - 2 * ((y + 0.5) * invHeight)) * angle;
Vec3f raydir(xx, yy, -1);
raydir.normalize();
pixel[y*width+x] = trace(Vec3f(0), raydir, *d_spheres, 0);
}
}
//[comment]
// In the main function, we will create the scene which is composed of 5 spheres
// and 1 light (which is also a sphere). Then, once the scene description is complete
// we render that scene, by calling the render() function.
//[/comment]
int main(int argc, char **argv)
{
// dimensions of image
unsigned width = 1920;
unsigned height = 1080;
// GPU Timing variables
cudaEvent_t start, stop;
float elapsed_gpu;
//image array
Vec3f *image = new Vec3f[width * height];
srand48(13);
std::vector<Sphere> spheres;
// position, radius, surface color, reflectivity, transparency, emission color
spheres.push_back(Sphere(Vec3f( 0.0, -10004, -20), 10000, Vec3f(0.20, 0.20, 0.20), 0, 0.0));
spheres.push_back(Sphere(Vec3f( 0.0, 0, -20), 4, Vec3f(1.00, 0.32, 0.36), 1, 0.5));
spheres.push_back(Sphere(Vec3f( 5.0, -1, -15), 2, Vec3f(0.90, 0.76, 0.46), 1, 0.0));
spheres.push_back(Sphere(Vec3f( 5.0, 0, -25), 3, Vec3f(0.65, 0.77, 0.97), 1, 0.0));
spheres.push_back(Sphere(Vec3f(-5.5, 0, -15), 3, Vec3f(0.90, 0.90, 0.90), 1, 0.0));
// light
spheres.push_back(Sphere(Vec3f( 0.0, 20, -30), 3, Vec3f(0.00, 0.00, 0.00), 0, 0.0, Vec3f(3)));
//get size of spheres vector
int size = spheres.size() * sizeof(Sphere);
int flatArraySize = width * height * sizeof(Vec3f);
Sphere *h_spheres[6];
*h_spheres[0] = Sphere(Vec3f( 0.0, -10004, -20), 10000, Vec3f(0.20, 0.20, 0.20), 0, 0.0);
*h_spheres[1] = Sphere(Vec3f( 0.0, 0, -20), 4, Vec3f(1.00, 0.32, 0.36), 1, 0.5);
*h_spheres[2] = Sphere(Vec3f( 5.0, -1, -15), 2, Vec3f(0.90, 0.76, 0.46), 1, 0.0);
*h_spheres[3] = Sphere(Vec3f( 5.0, 0, -25), 3, Vec3f(0.65, 0.77, 0.97), 1, 0.0);
*h_spheres[4] = Sphere(Vec3f(-5.5, 0, -15), 3, Vec3f(0.90, 0.90, 0.90), 1, 0.0);
*h_spheres[5] = Sphere(Vec3f( 0.0, 20, -30), 3, Vec3f(0.00, 0.00, 0.00), 0, 0.0, Vec3f(3));
// arrays on the GPU
Sphere *d_spheres;
Vec3f *pixel;
// allocate GPU memory
CUDA_SAFE_CALL(cudaMalloc(&d_spheres, size));
CUDA_SAFE_CALL(cudaMalloc(&pixel, flatArraySize));
// Transfer the arrays to the GPU memory
CUDA_SAFE_CALL(cudaMemcpy(d_spheres, h_spheres, size, cudaMemcpyHostToDevice));
CUDA_SAFE_CALL(cudaMemcpy(pixel, image, size, cudaMemcpyHostToDevice));
//call kernel function and choose dimension of the problem
dim3 dimGrid(128,72);
dim3 dimBlock(16, 16);
//the rendering computation
#if PRINT_TIME
// Create the cuda events
cudaEventCreate(&start);
cudaEventCreate(&stop);
// Record event on the default stream
cudaEventRecord(start, 0);
#endif
render_cuda<<<dimGrid, dimBlock>>>(d_spheres, height, width, pixel);
//render(spheres);
#if PRINT_TIME
// Stop and destroy the timer
cudaEventRecord(stop,0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&elapsed_gpu, start, stop);
printf("\nGPU calculation time: %f (msec)\n", elapsed_gpu);
cudaEventDestroy(start);
cudaEventDestroy(stop);
#endif
// Transfer the results back to the host
CUDA_SAFE_CALL(cudaMemcpy(image, pixel, size, cudaMemcpyDeviceToHost));
// Save result to a PPM image (keep these flags if you compile under Windows)
std::ofstream ofs("./untitled.ppm", std::ios::out | std::ios::binary);
ofs << "P6\n" << width << " " << height << "\n255\n";
for (unsigned i = 0; i < width * height; ++i) {
ofs << (unsigned char)(std::min(float(1), image[i].x) * 255) <<
(unsigned char)(std::min(float(1), image[i].y) * 255) <<
(unsigned char)(std::min(float(1), image[i].z) * 255);
}
ofs.close();
delete [] image;
return 0;
}