diff --git a/CMakeLists.txt b/CMakeLists.txt index 0856984..c84cfed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,15 @@ project(adaboost CXX) # First, define all the compilation options. option(BUILD_TESTS "Build tests." OFF) option(INSTALL_GOOGLETEST "For installing GoogleTest on your system along with the build." OFF) +option(BUILD_CUDA "Build with CUDA support." OFF) + +include(CheckLanguage) +check_language(CUDA) +if(CMAKE_CUDA_COMPILER) + enable_language(CUDA) +else() + message(STATUS "Building without CUDA support as no CUDA compiler is found on this system.") +endif() # Set required standard to C++11. @@ -37,4 +46,6 @@ install(DIRECTORY adaboost DESTINATION ${CMAKE_INSTALL_PREFIX}/include install(FILES ${CMAKE_BINARY_DIR}/libs/libadaboost_core.so ${CMAKE_BINARY_DIR}/libs/libadaboost_utils.so + ${CMAKE_BINARY_DIR}/libs/libadaboost_cuda.so + ${CMAKE_BINARY_DIR}/libs/libadaboost_cuda_wrappers.so DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) diff --git a/adaboost/CMakeLists.txt b/adaboost/CMakeLists.txt index c55f6f4..3a96aab 100644 --- a/adaboost/CMakeLists.txt +++ b/adaboost/CMakeLists.txt @@ -3,10 +3,22 @@ add_library(adaboost_utils SHARED utils/utils_impl.cpp) add_library(adaboost_core SHARED core/data_structures_impl.cpp - core/operations_impl.cpp) + core/operations_impl.cpp + utils/utils_impl.cpp) +if(BUILD_CUDA) + add_library(adaboost_cuda_wrappers SHARED + utils/cuda_wrappers_impl.cu) + add_library(adaboost_cuda SHARED + cuda/cuda_data_structures_impl.cu + utils/cuda_wrappers_impl.cu) +endif() if(BUILD_TESTS) set(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR}/bin) - set(LINK_LIBRARIES gtest gtest_main pthread adaboost_core adaboost_utils) + set(LINK_LIBRARIES gtest gtest_main pthread adaboost_core) add_executable(test_core tests/test_core.cpp) target_link_libraries(test_core ${LINK_LIBRARIES}) + if(BUILD_CUDA) + add_executable(test_cuda tests/test_cuda.cpp) + target_link_libraries(test_cuda ${LINK_LIBRARIES} adaboost_cuda) + endif() endif() diff --git a/adaboost/core/data_structures.hpp b/adaboost/core/data_structures.hpp index 64d0585..a7c67be 100644 --- a/adaboost/core/data_structures.hpp +++ b/adaboost/core/data_structures.hpp @@ -79,6 +79,8 @@ namespace adaboost */ unsigned int get_size() const; + data_type_vector* get_data_pointer() const; + /* * Used for freeing memory. */ @@ -102,7 +104,7 @@ namespace adaboost unsigned int rows, cols; //! Array for storing data internally in matrices. - data_type_matrix **data; + data_type_matrix* data; /* * For reserving space in memory accoring to a given rows and columns. @@ -112,7 +114,7 @@ namespace adaboost * @param _rows Number of rows for which the space is to be reserved. * @param _cols Number of columns for which the space is to be reserved. */ - static data_type_matrix** + static data_type_matrix* _reserve_space(unsigned int _rows, unsigned int _cols); @@ -174,6 +176,8 @@ namespace adaboost */ unsigned int get_cols() const; + data_type_matrix* get_data_pointer() const; + /* * Used for freeing memory. */ diff --git a/adaboost/core/data_structures_impl.cpp b/adaboost/core/data_structures_impl.cpp index 692ce78..37bf6f5 100644 --- a/adaboost/core/data_structures_impl.cpp +++ b/adaboost/core/data_structures_impl.cpp @@ -3,6 +3,7 @@ #include #include +#include namespace adaboost { @@ -66,6 +67,12 @@ namespace adaboost return this->size; } + template + data_type_vector* Vector::get_data_pointer() const + { + return this->data; + } + template Vector:: ~Vector() @@ -84,16 +91,12 @@ namespace adaboost } template - data_type_matrix** Matrix:: + data_type_matrix* Matrix:: _reserve_space(unsigned int _rows, unsigned int _cols) { adaboost::utils::check(_rows > 0, "Number of rows should be positive."); adaboost::utils::check(_cols > 0, "Number of cols should be positive."); - data_type_matrix** new_data = new data_type_matrix*[_rows]; - for(unsigned int i = 0; i < _rows; i++) - { - new_data[i] = new data_type_matrix[_cols]; - } + data_type_matrix* new_data = new data_type_matrix[_rows*_cols]; return new_data; } @@ -114,7 +117,7 @@ namespace adaboost "Row index out of range."); adaboost::utils::check(y >= 0 && y < this->get_cols(), "Column index out of range."); - return this->data[x][y]; + return this->data[x*this->cols + y]; } template @@ -127,7 +130,7 @@ namespace adaboost "Row index out of range."); adaboost::utils::check(y >= 0 && y < this->get_cols(), "Column index out of range."); - this->data[x][y] = value; + this->data[x*this->cols + y] = value; } template @@ -138,7 +141,7 @@ namespace adaboost { for(unsigned int j = 0; j < this->cols; j++) { - this->data[i][j] = value; + this->data[i*this->cols + j] = value; } } } @@ -157,16 +160,19 @@ namespace adaboost return this->cols; } + template + data_type_matrix* Matrix::get_data_pointer() const + { + return this->data; + } + template Matrix:: ~Matrix() { if(this->data != NULL) { - for(unsigned int i = 0; i < this->rows; i++) - { - delete [] this->data[i]; - } + delete [] this->data; } } diff --git a/adaboost/cuda/cuda_data_structures.hpp b/adaboost/cuda/cuda_data_structures.hpp new file mode 100644 index 0000000..b6f67c9 --- /dev/null +++ b/adaboost/cuda/cuda_data_structures.hpp @@ -0,0 +1,158 @@ +#ifndef CUDA_ADABOOST_CORE_DATA_STRUCTURES_HPP +#define CUDA_ADABOOST_CORE_DATA_STRUCTURES_HPP + +#include + +namespace adaboost +{ + namespace cuda + { + namespace core + { + /* + * This class represents the GPU version + * of adaboost::core::Vector. + * + * @tparam data_type_vector Data type of the elements + * supported by C++. + */ + template + class VectorGPU: public adaboost::core::Vector + { + private: + + //! Array for storing data on GPU. + data_type_vector* data_gpu; + + //! The size of the vector stored on GPU. + unsigned size_gpu; + + /* + * For reserving space in GPU memory accoring to a given size. + * Used in initializer list of parameterized constructors. + * Returns a new pointer. + * + * @param _size The size for which the space is to be reserved + * on GPU. + */ + static data_type_vector* + _reserve_space_gpu(unsigned _size_gpu); + + public: + + /* + * Default constructor. + * Sets VectorGPU::data_gpu to NULL and size_gpu to 0. + */ + VectorGPU(); + + /* + * Prameterized constructor. + * + * @param _size The size of the vector on GPU. + * Must be positive. + */ + VectorGPU(unsigned _size); + + /* + * Used for filling the vector with a given value. + * If block size is passed 0 then the values are + * filled on the CPU otherwise they are filled on + * GPU. + * + * @param value The value with which the vector is + * to be populated. + * @param block_size The number of threads to be + * launched per block on GPU. + */ + void fill(data_type_vector value, + unsigned block_size=0); + + /* + * Copies the data from GPU to CPU. + */ + void copy_to_host(); + + /* + * Copies the data from CPU to GPU. + */ + void copy_to_device(); + + /* + * Returns the size of the vector. + * By default returns the size of the + * vector on GPU. + * + * @param gpu If true then size of the + * vector on GPU otherwise size of the + * vector on CPU is returned. + */ + unsigned get_size(bool gpu=true) const; + + /* + * Returns the data pointer, by default, of + * the vector on GPU. + * + * @param gpu If true then data pointer on GPU + * is returned otherwise the one on CPU + * is returned. + */ + data_type_vector* get_data_pointer(bool gpu=true) const; + + /* + * Frees the memory from both CPU and GPU. + */ + ~VectorGPU(); + }; + + template + void product_gpu(const VectorGPU& vec1, + const VectorGPU& vec2, + data_type_vector& result, + unsigned block_size=0); + + template + class MatrixGPU: public adaboost::core::Matrix + { + private: + + data_type_matrix* data_gpu; + + unsigned rows_gpu, cols_gpu; + + static data_type_matrix* _reserve_space_gpu + (unsigned _rows_gpu, unsigned _cols_gpu); + + public: + + MatrixGPU(); + + MatrixGPU(unsigned _rows, unsigned _cols); + + void fill(data_type_matrix value, + unsigned block_size_x=0, + unsigned block_size_y=0); + + void copy_to_host(); + + void copy_to_device(); + + unsigned get_rows(bool gpu=true) const; + + unsigned get_cols(bool gpu=true) const; + + data_type_matrix* get_data_pointer(bool gpu=true) const; + + ~MatrixGPU(); + }; + + template + void multiply_gpu(const MatrixGPU& mat1, + const MatrixGPU& mat2, + MatrixGPU& result); + + } // namespace core + } // namespace cuda +} // namespace adaboost + +#endif diff --git a/adaboost/cuda/cuda_data_structures_impl.cu b/adaboost/cuda/cuda_data_structures_impl.cu new file mode 100644 index 0000000..a884b8c --- /dev/null +++ b/adaboost/cuda/cuda_data_structures_impl.cu @@ -0,0 +1,6 @@ +#ifndef CUDA_ADABOOST_CORE_DATA_STRUCTURES_IMPL_CU +#define CUDA_ADABOOST_CORE_DATA_STRUCTURES_IMPL_CU + +#include "cuda_data_structures_impl.hpp" + +#endif diff --git a/adaboost/cuda/cuda_data_structures_impl.hpp b/adaboost/cuda/cuda_data_structures_impl.hpp new file mode 100644 index 0000000..e48f3ce --- /dev/null +++ b/adaboost/cuda/cuda_data_structures_impl.hpp @@ -0,0 +1,397 @@ +#ifndef CUDA_ADABOOST_CORE_DATA_STRUCTURES_IMPL_HPP +#define CUDA_ADABOOST_CORE_DATA_STRUCTURES_IMPL_HPP + +#include +#include +#include +#include +#include + +#define MAX_BLOCK_SIZE 1024 +#define BLOCK_SIZE 16 + +namespace adaboost +{ + namespace cuda + { + namespace core + { + template + data_type_vector* + VectorGPU:: + _reserve_space_gpu(unsigned _size_gpu) + { + adaboost::utils::check(_size_gpu > 0, + "The size of the vector should be positive."); + unsigned bytes = _size_gpu*sizeof(data_type_vector); + data_type_vector* new_pointer; + adaboost::utils::cuda::cuda_malloc((void**)&new_pointer, bytes); + return new_pointer; + } + + template + VectorGPU:: + VectorGPU(): + adaboost::core::Vector(), + size_gpu(0), + data_gpu(NULL) + { + } + + template + VectorGPU:: + VectorGPU(unsigned _size): + adaboost::core::Vector(_size), + data_gpu(_reserve_space_gpu(_size)), + size_gpu(_size) + { + } + + template + __global__ void fill_vector_kernel + (data_type_vector* data, + unsigned size, + data_type_vector value) + { + unsigned index = threadIdx.x; + unsigned stride = blockDim.x; + for(unsigned i = index; i < size; i += stride) + { + data[i] = value; + } + } + + template + void VectorGPU:: + fill(data_type_vector value, + unsigned block_size) + { + if(block_size == 0) + { + this->adaboost::core::Vector::fill(value); + } + else + { + fill_vector_kernel + <<< + (this->size_gpu + block_size - 1)/block_size, + block_size + >>>(this->data_gpu, this->size_gpu, value); + } + } + + template + void + VectorGPU::copy_to_host() + { + adaboost::utils::cuda::cuda_memcpy( + this->get_data_pointer(false), + this->data_gpu, + this->size_gpu*sizeof(data_type_vector), + adaboost::utils::cuda::DeviceToHost); + } + + template + void + VectorGPU::copy_to_device() + { + adaboost::utils::cuda::cuda_memcpy( + this->data_gpu, + this->get_data_pointer(false), + this->size_gpu*sizeof(data_type_vector), + adaboost::utils::cuda::HostToDevice); + } + + template + unsigned VectorGPU:: + get_size(bool gpu) const + { + if(gpu) + { + return this->size_gpu; + } + else + { + return this->adaboost::core::Vector::get_size(); + } + } + + template + data_type_vector* VectorGPU:: + get_data_pointer(bool gpu) const + { + if(gpu) + { + return this->data_gpu; + } + else + { + return this->adaboost::core::Vector::get_data_pointer(); + } + } + + template + VectorGPU:: + ~VectorGPU() + { + adaboost::utils::cuda::cuda_free(this->data_gpu); + } + + template + __global__ + void product_kernel + (data_type_vector* v1, data_type_vector* v2, data_type_vector* v3, + unsigned size) + { + __shared__ data_type_vector cache[MAX_BLOCK_SIZE]; + data_type_vector temp = 0; + unsigned thread_i = threadIdx.x + blockDim.x*blockIdx.x; + unsigned cache_i = threadIdx.x; + while(thread_i < size) + { + temp += v1[thread_i]*v2[thread_i]; + thread_i = blockDim.x*gridDim.x; + } + cache[cache_i] = temp; + __syncthreads(); + + unsigned i = blockDim.x/2; + while(i != 0) + { + if(cache_i < i) + { + cache[cache_i] += cache[cache_i + i]; + } + __syncthreads(); + i /= 2; + } + + if(cache_i == 0) + v3[blockIdx.x] = cache[0]; + } + + template + void product_gpu(const VectorGPU& vec1, + const VectorGPU& vec2, + data_type_vector& result, + unsigned block_size) + { + if(block_size == 0) + { + return adaboost::core::product(vec1, vec2, result); + } + else + { + adaboost::utils::check(vec1.get_size() == vec2.get_size(), + "Size of vectors don't match."); + adaboost::utils::check(block_size > 0, + "Size of the block should be a positive multiple of 32."); + unsigned num_blocks = (vec1.get_size() + block_size - 1)/block_size; + VectorGPU temp_result(num_blocks); + product_kernel + <<< + num_blocks, + block_size + >>>(vec1.get_data_pointer(), vec2.get_data_pointer(), + temp_result.get_data_pointer(), vec1.get_size()); + temp_result.copy_to_host(); + result = 0; + for(unsigned i = 0; i < num_blocks; i++) + { + result += temp_result.at(i); + } + } + } + + template + data_type_matrix* + MatrixGPU:: + _reserve_space_gpu + (unsigned _rows_gpu, unsigned _cols_gpu) + { + adaboost::utils::check(_rows_gpu > 0, + "The number of rows in matrix should be positive."); + adaboost::utils::check(_cols_gpu > 0, + "The number of cols in matrix should be positive."); + unsigned bytes = _rows_gpu*_cols_gpu*sizeof(data_type_matrix); + data_type_matrix* new_pointer; + adaboost::utils::cuda::cuda_malloc((void**)&new_pointer, bytes); + return new_pointer; + } + + template + MatrixGPU:: + MatrixGPU(): + adaboost::core::Matrix(), + rows_gpu(0), + cols_gpu(0) + { + } + + template + MatrixGPU:: + MatrixGPU(unsigned _rows, unsigned _cols): + adaboost::core::Matrix(_rows, _cols), + data_gpu(_reserve_space_gpu(_rows, _cols)), + rows_gpu(_rows), + cols_gpu(_cols) + { + } + + template + __global__ + void fill_matrix_kernel + (data_type_matrix* data, + unsigned cols, + data_type_matrix value) + { + unsigned row = blockDim.y*blockIdx.y + threadIdx.y; + unsigned col = blockDim.x*blockIdx.x + threadIdx.x; + data[row*cols + col] = value; + } + + template + void MatrixGPU:: + fill(data_type_matrix value, + unsigned block_size_x, + unsigned block_size_y) + { + if(block_size_x == 0 || block_size_y == 0) + { + this->adaboost::core::Matrix::fill(value); + } + else + { + dim3 gridDim((this->cols_gpu + block_size_x - 1)/block_size_x, + (this->rows_gpu + block_size_y - 1)/block_size_y); + dim3 blockDim(block_size_x, block_size_y); + fill_matrix_kernel + <<>> + (this->data_gpu, + this->cols_gpu, + value); + } + } + + template + void MatrixGPU:: + copy_to_host() + { + adaboost::utils::cuda::cuda_memcpy( + this->get_data_pointer(false), + this->data_gpu, + this->rows_gpu*this->cols_gpu*sizeof(data_type_matrix), + adaboost::utils::cuda::DeviceToHost); + } + + template + void MatrixGPU:: + copy_to_device() + { + adaboost::utils::cuda::cuda_memcpy( + this->data_gpu, + this->get_data_pointer(false), + this->rows_gpu*this->cols_gpu*sizeof(data_type_matrix), + adaboost::utils::cuda::HostToDevice); + } + + template + unsigned MatrixGPU:: + get_rows(bool gpu) const + { + if(gpu) + { + return this->rows_gpu; + } + else + { + return this->adaboost::core:: + Matrix::get_rows(); + } + } + + template + unsigned MatrixGPU:: + get_cols(bool gpu) const + { + if(gpu) + { + return this->cols_gpu; + } + else + { + return this->adaboost::core:: + Matrix::get_cols(); + } + } + + template + data_type_matrix* MatrixGPU:: + get_data_pointer(bool gpu) const + { + if(gpu) + { + return this->data_gpu; + } + else + { + return this->adaboost::core::Matrix::get_data_pointer(); + } + } + + template + MatrixGPU:: + ~MatrixGPU() + { + adaboost::utils::cuda::cuda_free(this->data_gpu); + } + + template + __global__ + void multiply_kernel( + data_type_matrix* mat1, + data_type_matrix* mat2, + data_type_matrix* result, + unsigned mat1_rows, + unsigned mat1_cols, + unsigned mat2_rows, + unsigned mat2_cols) + { + data_type_matrix cvalue = 0.0; + unsigned row = blockIdx.y*blockDim.y + threadIdx.y; + unsigned col = blockIdx.x*blockDim.x + threadIdx.x; + if(row > mat1_rows || col > mat2_cols) + return ; + for(unsigned e = 0; e < mat1_cols; e++) + cvalue += mat1[row*mat1_cols+e] * mat2[e*mat2_cols+col]; + result[row*mat2_cols+col] = cvalue; + } + + template + void multiply_gpu(const MatrixGPU& mat1, + const MatrixGPU& mat2, + MatrixGPU& result) + { + adaboost::utils::check(mat1.get_cols() == mat2.get_rows(), + "Order of matrices don't match."); + dim3 gridDim((mat2.get_cols() + BLOCK_SIZE - 1)/BLOCK_SIZE, + (mat1.get_rows() + BLOCK_SIZE - 1)/BLOCK_SIZE); + dim3 blockDim(BLOCK_SIZE, BLOCK_SIZE); + multiply_kernel + <<>> + (mat1.get_data_pointer(), + mat2.get_data_pointer(), + result.get_data_pointer(), + mat1.get_rows(), + mat1.get_cols(), + mat2.get_rows(), + mat2.get_cols()); + } + + #include "instantiated_templates_cuda_data_structures.hpp" + + } // namespace core + } // namespace cuda +} // namespace adaboost + +#endif diff --git a/adaboost/cuda/instantiated_templates_cuda_data_structures.hpp b/adaboost/cuda/instantiated_templates_cuda_data_structures.hpp new file mode 100644 index 0000000..0819923 --- /dev/null +++ b/adaboost/cuda/instantiated_templates_cuda_data_structures.hpp @@ -0,0 +1,96 @@ +template class VectorGPU; +template class VectorGPU; +template class VectorGPU; +template class VectorGPU; +template class VectorGPU; +template class VectorGPU; +template class VectorGPU; +template class VectorGPU; +template class VectorGPU; +template class VectorGPU; +template class VectorGPU; +template class VectorGPU; +template void product_gpu( +const VectorGPU&, const VectorGPU&, bool& result, unsigned); +template void product_gpu( +const VectorGPU&, const VectorGPU&, short& result, unsigned); +template void product_gpu( +const VectorGPU&, const VectorGPU&, unsigned short& result, unsigned); +template void product_gpu( +const VectorGPU&, const VectorGPU&, int& result, unsigned); +template void product_gpu( +const VectorGPU&, const VectorGPU&, unsigned int& result, unsigned); +template void product_gpu( +const VectorGPU&, const VectorGPU&, long& result, unsigned); +template void product_gpu( +const VectorGPU&, const VectorGPU&, unsigned long& result, unsigned); +template void product_gpu( +const VectorGPU&, const VectorGPU&, long long& result, unsigned); +template void product_gpu( +const VectorGPU&, const VectorGPU&, unsigned long long& result, unsigned); +template void product_gpu( +const VectorGPU&, const VectorGPU&, float& result, unsigned); +template void product_gpu( +const VectorGPU&, const VectorGPU&, double& result, unsigned); +template void product_gpu( +const VectorGPU&, const VectorGPU&, long double& result, unsigned); +template class MatrixGPU; +template class MatrixGPU; +template class MatrixGPU; +template class MatrixGPU; +template class MatrixGPU; +template class MatrixGPU; +template class MatrixGPU; +template class MatrixGPU; +template class MatrixGPU; +template class MatrixGPU; +template class MatrixGPU; +template class MatrixGPU; +template void multiply_gpu +(const MatrixGPU& mat1, +const MatrixGPU& mat2, +MatrixGPU& result); +template void multiply_gpu +(const MatrixGPU& mat1, +const MatrixGPU& mat2, +MatrixGPU& result); +template void multiply_gpu +(const MatrixGPU& mat1, +const MatrixGPU& mat2, +MatrixGPU& result); +template void multiply_gpu +(const MatrixGPU& mat1, +const MatrixGPU& mat2, +MatrixGPU& result); +template void multiply_gpu +(const MatrixGPU& mat1, +const MatrixGPU& mat2, +MatrixGPU& result); +template void multiply_gpu +(const MatrixGPU& mat1, +const MatrixGPU& mat2, +MatrixGPU& result); +template void multiply_gpu +(const MatrixGPU& mat1, +const MatrixGPU& mat2, +MatrixGPU& result); +template void multiply_gpu +(const MatrixGPU& mat1, +const MatrixGPU& mat2, +MatrixGPU& result); +template void multiply_gpu +(const MatrixGPU& mat1, +const MatrixGPU& mat2, +MatrixGPU& result); +template void multiply_gpu +(const MatrixGPU& mat1, +const MatrixGPU& mat2, +MatrixGPU& result); +template void multiply_gpu +(const MatrixGPU& mat1, +const MatrixGPU& mat2, +MatrixGPU& result); +template void multiply_gpu +(const MatrixGPU& mat1, +const MatrixGPU& mat2, +MatrixGPU& result); diff --git a/adaboost/tests/test_cuda.cpp b/adaboost/tests/test_cuda.cpp new file mode 100644 index 0000000..9e23610 --- /dev/null +++ b/adaboost/tests/test_cuda.cpp @@ -0,0 +1,7 @@ +#include + +int main(int ac, char* av[]) +{ + testing::InitGoogleTest(&ac, av); + return RUN_ALL_TESTS(); +} diff --git a/adaboost/tests/test_cuda_data_structures.hpp b/adaboost/tests/test_cuda_data_structures.hpp new file mode 100644 index 0000000..0b3903c --- /dev/null +++ b/adaboost/tests/test_cuda_data_structures.hpp @@ -0,0 +1,77 @@ +#include +#include +#include +#include +#include + +TEST(Cuda, VectorGPU) +{ + adaboost::utils::cuda::cuda_event_t has_happened; + adaboost::utils::cuda::cuda_event_create(&has_happened); + adaboost::cuda::core::VectorGPU vec1(1000); + adaboost::cuda::core::VectorGPU vec2(1000); + unsigned block_size = 32; + vec1.fill(1.0, block_size); + vec2.fill(1.0, block_size); + adaboost::utils::cuda::cuda_event_record(has_happened); + adaboost::utils::cuda::cuda_event_synchronize(has_happened); + float result_gpu; + product_gpu(vec1, vec2, result_gpu, block_size); + adaboost::utils::cuda::cuda_event_record(has_happened); + adaboost::utils::cuda::cuda_event_synchronize(has_happened); + EXPECT_EQ(result_gpu, 1000.0)<<"Result from product on GPU should be 1000.0"; + vec1.copy_to_host(); + vec2.copy_to_host(); + adaboost::utils::cuda::cuda_event_record(has_happened); + adaboost::utils::cuda::cuda_event_synchronize(has_happened); + for(unsigned i = 0; i < 1000; i++) + { + std::string msg1 = "All entries of VectorGPU should be 1"; + EXPECT_EQ(1, vec1.at(i))< mat_f; + EXPECT_EQ(0, mat_f.get_cols())<<"Number of columns should be 0"; + EXPECT_EQ(0, mat_f.get_rows())<<"Number of rows should be 0."; + adaboost::cuda::core::MatrixGPU mat1(3, 3), mat2(3, 3), mat3(2, 1); + mat1.fill(4.0); + mat2.fill(5.0); + mat1.copy_to_device(); + mat2.copy_to_device(); + adaboost::utils::cuda::cuda_event_record(has_happened); + adaboost::utils::cuda::cuda_event_synchronize(has_happened); + adaboost::cuda::core::MatrixGPU result1(3, 3); + adaboost::cuda::core::multiply_gpu(mat1, mat2, result1); + adaboost::utils::cuda::cuda_event_record(has_happened); + adaboost::utils::cuda::cuda_event_synchronize(has_happened); + result1.copy_to_host(); + for(unsigned int i = 0; i < 3; i++) + { + for(unsigned int j = 0; j < 3; j++) + { + EXPECT_EQ(60.0, result1.at(i, j)); + } + } + mat3.set(0, 0, 6.0); + mat3.set(1, 0, 6.0); + EXPECT_THROW({ + try + { + adaboost::cuda::core::multiply_gpu(mat1, mat3, result1); + } + catch(const std::logic_error& e) + { + EXPECT_STREQ("Order of matrices don't match.", e.what()); + throw; + } + }, std::logic_error); +} diff --git a/adaboost/utils/cuda_wrappers.hpp b/adaboost/utils/cuda_wrappers.hpp new file mode 100644 index 0000000..5e1227e --- /dev/null +++ b/adaboost/utils/cuda_wrappers.hpp @@ -0,0 +1,33 @@ +#ifndef ADABOOST_UTILS_CUDA_WRAPPERS_HPP +#define ADABOOST_UTILS_CUDA_WRAPPERS_HPP + +#include +#include + +namespace adaboost +{ + namespace utils + { + namespace cuda + { + enum direction {HostToDevice, DeviceToHost}; + typedef cudaEvent_t cuda_event_t; + + void cuda_malloc(void** ptr, unsigned num_bytes); + + void cuda_memcpy + (void* ptr_1, void* ptr_2, unsigned num_bytes, direction d); + + void cuda_event_create(cuda_event_t* event_ptr); + + void cuda_event_record(cuda_event_t event); + + void cuda_event_synchronize(cuda_event_t event); + + void cuda_free(void* ptr); + + } // namspace cuda + } // namespace utils +} // namespace adaboost + +#endif diff --git a/adaboost/utils/cuda_wrappers_impl.cu b/adaboost/utils/cuda_wrappers_impl.cu new file mode 100644 index 0000000..f4a855f --- /dev/null +++ b/adaboost/utils/cuda_wrappers_impl.cu @@ -0,0 +1,6 @@ +#ifndef ADABOOST_UTILS_CUDA_WRAPPERS_IMPL_CU +#define ADABOOST_UTILS_CUDA_WRAPPERS_IMPL_CU + +#include "cuda_wrappers_impl.hpp" + +#endif diff --git a/adaboost/utils/cuda_wrappers_impl.hpp b/adaboost/utils/cuda_wrappers_impl.hpp new file mode 100644 index 0000000..5787fbe --- /dev/null +++ b/adaboost/utils/cuda_wrappers_impl.hpp @@ -0,0 +1,54 @@ +#ifndef ADABOOST_UTILS_CUDA_WRAPPERS_IMPL_HPP +#define ADABOOST_UTILS_CUDA_WRAPPERS_IMPL_HPP + +#include + +namespace adaboost +{ + namespace utils + { + namespace cuda + { + void cuda_malloc(void** ptr, unsigned num_bytes) + { + cudaMalloc(ptr, num_bytes); + } + + void cuda_memcpy + (void* ptr_1, void* ptr_2, unsigned num_bytes, direction d) + { + if(d == HostToDevice) + { + cudaMemcpy(ptr_1, ptr_2, num_bytes, cudaMemcpyHostToDevice); + } + else if(d == DeviceToHost) + { + cudaMemcpy(ptr_1, ptr_2, num_bytes, cudaMemcpyDeviceToHost); + } + } + + void cuda_event_create(cuda_event_t* event_ptr) + { + cudaEventCreate(event_ptr); + } + + void cuda_event_record(cuda_event_t event) + { + cudaEventRecord(event); + } + + void cuda_event_synchronize(cuda_event_t event) + { + cudaEventSynchronize(event); + } + + void cuda_free(void* ptr) + { + cudaFree(ptr); + } + + } // namespace cuda + } // namespace utils +} // namespace adaboost + +#endif