Skip to content

Commit

Permalink
Add more c++ for opencl kernels (atleast_{1,2,3}d, expand_dims, tile,…
Browse files Browse the repository at this point in the history
… where) (#261)

* add atleast_1d c++ for opencl kernels

* add atleast_2d c++ for opencl kernels

* add atleast_3d c++ for opencl kernels

* add expand_dims c++ for opencl kernels

* add flatten c++ for opencl kernels

* add flip c++ for opencl kernels

* add pooling c++ for opencl kernels

* add reshape c++ for opencl kernels

* add squeeze c++ for opencl kernels

* add tile c++ for opencl kernels

* add where c++ for opencl kernels

* fix atleast_nd indexing for opencl

* use nm_size_t for cast indexing

* add nmtools_index_attribute fo comput_offset and compute_strides

* use nm_index_t for pooling indexing metafunction

* make sure to remove address space when inferring type for tile indexing

* add nmtools_index_attribute for view decorator

* try to fix for opencl kernel compilation

* try to remove address space for get_common_type metafunction

* add nmtools_address_private and nmtools_address_generic macro

* add pooling and slice indexing test data

* add nm_utl_size_t and nm_utl_index_t macro alias

* update tests and kernel_spv

* try to fix ci

* try to fix ci
  • Loading branch information
alifahrri authored Dec 10, 2023
1 parent 20877d0 commit 61d19b1
Show file tree
Hide file tree
Showing 53 changed files with 86,374 additions and 52,968 deletions.
129 changes: 129 additions & 0 deletions include/nmtools/array/eval/opencl/kernels/atleast_1d.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#ifndef NMTOOLS_ARRAY_EVAL_OPENCL_KERNELS_ATLEAST_1D_HPP
#define NMTOOLS_ARRAY_EVAL_OPENCL_KERNELS_ATLEAST_1D_HPP

#include "nmtools/array/ndarray.hpp"
#include "nmtools/array/view/atleast_1d.hpp"
#include "nmtools/array/eval/opencl/kernel_helper.hpp"
#include "nmtools/array/index/cast.hpp"

#define nmtools_cl_kernel_name(out_type,inp_type) atleast_1d_##out_type##_##inp_type
#define nmtools_cl_kernel_name_str(out_type,inp_type) nm_stringify(atleast_1d_##out_type##_##inp_type)

#ifdef NMTOOLS_OPENCL_BUILD_KERNELS

namespace nm = nmtools;
namespace na = nmtools::array;
namespace view = nmtools::view;
namespace meta = nmtools::meta;
namespace opencl = nmtools::array::opencl;

#define nmtools_cl_kernel(out_type,inp_type) \
kernel void nmtools_cl_kernel_name(out_type,inp_type) \
( global out_type* out_ptr \
, global const inp_type* inp_ptr \
, global const nm_cl_index_t* out_shape_ptr \
, global const nm_cl_index_t* inp_shape_ptr \
, const nm_cl_size_t out_dim \
, const nm_cl_size_t inp_dim \
) \
{ \
auto input = na::create_array(inp_ptr,inp_shape_ptr,inp_dim); \
auto output = na::create_mutable_array(out_ptr,out_shape_ptr,out_dim); \
auto result = view::atleast_1d(input); \
opencl::assign_array(output,result); \
}

nmtools_cl_kernel(float,float)
nmtools_cl_kernel(double,double)

#else // NMTOOLS_OPENCL_BUILD_KERNELS

#include "nmtools/array/eval/opencl/context.hpp"
#include <cstring>

extern unsigned char nm_cl_atleast_1d_spv[];
extern unsigned int nm_cl_atleast_1d_spv_len;

namespace nmtools::array::opencl
{
template <typename...args_t>
struct kernel_t<
view::decorator_t<view::atleast_1d_t,args_t...>
> {
using view_t = view::decorator_t<view::atleast_1d_t,args_t...>;

view_t view;
std::shared_ptr<context_t> context;

static auto get_spirv()
{
using vector = nmtools_list<unsigned char>;
auto spirv = vector();
spirv.resize(nm_cl_atleast_1d_spv_len);
memcpy(spirv.data(),nm_cl_atleast_1d_spv,sizeof(unsigned char)*nm_cl_atleast_1d_spv_len);
return spirv;
}

template <typename inp_t, typename out_t=inp_t>
static auto kernel_name()
{
if constexpr (meta::is_same_v<inp_t,float> && meta::is_same_v<out_t,float>) {
return nmtools_cl_kernel_name_str(float,float);
} else if constexpr (meta::is_same_v<inp_t,double> && meta::is_same_v<out_t,double>) {
return nmtools_cl_kernel_name_str(double,double);
}
}

template <typename output_t>
auto eval(output_t& output)
{
using out_t = meta::get_element_type_t<output_t>;

const auto& inp_array = *get_array(view);

using inp_t = meta::get_element_type_t<meta::remove_cvref_pointer_t<decltype(inp_array)>>;

auto inp_buffer = context->create_buffer(inp_array);
auto out_buffer = context->create_buffer<out_t>(nmtools::size(output));

auto kernel_name = this->kernel_name<inp_t,out_t>();

if (!context->has_kernel(kernel_name)) {
context->create_kernel(get_spirv(),kernel_name);
}

auto kernel = context->get_kernel(kernel_name);

auto out_size = nmtools::size(output);
[[maybe_unused]] auto inp_size = nmtools::size(inp_array);

auto out_shape = nmtools::shape(output);
auto inp_shape = nmtools::shape(inp_array);

auto out_shape_buffer = context->create_buffer(index::cast<nm_cl_index_t>(out_shape));
auto inp_shape_buffer = context->create_buffer(index::cast<nm_cl_index_t>(inp_shape));

auto out_dim = nmtools::len(out_shape);
auto inp_dim = nmtools::len(inp_shape);

auto kernel_info = kernel.kernel_info_;
auto local_size = nmtools_array{kernel_info->preferred_work_group_size_multiple};
auto global_size = nmtools_array{size_t(std::ceil(float(out_size) / local_size[0])) * local_size[0]};

auto default_args = nmtools_tuple{
out_buffer
, inp_buffer
, out_shape_buffer
, inp_shape_buffer
, (nm_cl_size_t)out_dim
, (nm_cl_size_t)inp_dim
};

context->set_args(kernel,default_args);
context->run(kernel,out_buffer,output,global_size,local_size);
}
};
}
#endif // NMTOOLS_OPENCL_BUILD_KERNELS

#endif // NMTOOLS_ARRAY_EVAL_OPENCL_KERNELS_ATLEAST_1D_HPP
129 changes: 129 additions & 0 deletions include/nmtools/array/eval/opencl/kernels/atleast_2d.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#ifndef NMTOOLS_ARRAY_EVAL_OPENCL_KERNELS_ATLEAST_2D_HPP
#define NMTOOLS_ARRAY_EVAL_OPENCL_KERNELS_ATLEAST_2D_HPP

#include "nmtools/array/ndarray.hpp"
#include "nmtools/array/view/atleast_2d.hpp"
#include "nmtools/array/eval/opencl/kernel_helper.hpp"
#include "nmtools/array/index/cast.hpp"

#define nmtools_cl_kernel_name(out_type,inp_type) atleast_2d_##out_type##_##inp_type
#define nmtools_cl_kernel_name_str(out_type,inp_type) nm_stringify(atleast_2d_##out_type##_##inp_type)

#ifdef NMTOOLS_OPENCL_BUILD_KERNELS

namespace nm = nmtools;
namespace na = nmtools::array;
namespace view = nmtools::view;
namespace meta = nmtools::meta;
namespace opencl = nmtools::array::opencl;

#define nmtools_cl_kernel(out_type,inp_type) \
kernel void nmtools_cl_kernel_name(out_type,inp_type) \
( global out_type* out_ptr \
, global const inp_type* inp_ptr \
, global const nm_cl_index_t* out_shape_ptr \
, global const nm_cl_index_t* inp_shape_ptr \
, const nm_cl_size_t out_dim \
, const nm_cl_size_t inp_dim \
) \
{ \
auto input = na::create_array(inp_ptr,inp_shape_ptr,inp_dim); \
auto output = na::create_mutable_array(out_ptr,out_shape_ptr,out_dim); \
auto result = view::atleast_2d(input); \
opencl::assign_array(output,result); \
}

nmtools_cl_kernel(float,float)
nmtools_cl_kernel(double,double)

#else // NMTOOLS_OPENCL_BUILD_KERNELS

#include "nmtools/array/eval/opencl/context.hpp"
#include <cstring>

extern unsigned char nm_cl_atleast_2d_spv[];
extern unsigned int nm_cl_atleast_2d_spv_len;

namespace nmtools::array::opencl
{
template <typename...args_t>
struct kernel_t<
view::decorator_t<view::atleast_2d_t,args_t...>
> {
using view_t = view::decorator_t<view::atleast_2d_t,args_t...>;

view_t view;
std::shared_ptr<context_t> context;

static auto get_spirv()
{
using vector = nmtools_list<unsigned char>;
auto spirv = vector();
spirv.resize(nm_cl_atleast_2d_spv_len);
memcpy(spirv.data(),nm_cl_atleast_2d_spv,sizeof(unsigned char)*nm_cl_atleast_2d_spv_len);
return spirv;
}

template <typename inp_t, typename out_t=inp_t>
static auto kernel_name()
{
if constexpr (meta::is_same_v<inp_t,float> && meta::is_same_v<out_t,float>) {
return nmtools_cl_kernel_name_str(float,float);
} else if constexpr (meta::is_same_v<inp_t,double> && meta::is_same_v<out_t,double>) {
return nmtools_cl_kernel_name_str(double,double);
}
}

template <typename output_t>
auto eval(output_t& output)
{
using out_t = meta::get_element_type_t<output_t>;

const auto& inp_array = *get_array(view);

using inp_t = meta::get_element_type_t<meta::remove_cvref_pointer_t<decltype(inp_array)>>;

auto inp_buffer = context->create_buffer(inp_array);
auto out_buffer = context->create_buffer<out_t>(nmtools::size(output));

auto kernel_name = this->kernel_name<inp_t,out_t>();

if (!context->has_kernel(kernel_name)) {
context->create_kernel(get_spirv(),kernel_name);
}

auto kernel = context->get_kernel(kernel_name);

auto out_size = nmtools::size(output);
[[maybe_unused]] auto inp_size = nmtools::size(inp_array);

auto out_shape = nmtools::shape(output);
auto inp_shape = nmtools::shape(inp_array);

auto out_shape_buffer = context->create_buffer(index::cast<nm_cl_index_t>(out_shape));
auto inp_shape_buffer = context->create_buffer(index::cast<nm_cl_index_t>(inp_shape));

auto out_dim = nmtools::len(out_shape);
auto inp_dim = nmtools::len(inp_shape);

auto kernel_info = kernel.kernel_info_;
auto local_size = nmtools_array{kernel_info->preferred_work_group_size_multiple};
auto global_size = nmtools_array{size_t(std::ceil(float(out_size) / local_size[0])) * local_size[0]};

auto default_args = nmtools_tuple{
out_buffer
, inp_buffer
, out_shape_buffer
, inp_shape_buffer
, (nm_cl_size_t)out_dim
, (nm_cl_size_t)inp_dim
};

context->set_args(kernel,default_args);
context->run(kernel,out_buffer,output,global_size,local_size);
}
};
}
#endif // NMTOOLS_OPENCL_BUILD_KERNELS

#endif // NMTOOLS_ARRAY_EVAL_OPENCL_KERNELS_ATLEAST_2D_HPP
129 changes: 129 additions & 0 deletions include/nmtools/array/eval/opencl/kernels/atleast_3d.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#ifndef NMTOOLS_ARRAY_EVAL_OPENCL_KERNELS_ATLEAST_3D_HPP
#define NMTOOLS_ARRAY_EVAL_OPENCL_KERNELS_ATLEAST_3D_HPP

#include "nmtools/array/ndarray.hpp"
#include "nmtools/array/view/atleast_3d.hpp"
#include "nmtools/array/eval/opencl/kernel_helper.hpp"
#include "nmtools/array/index/cast.hpp"

#define nmtools_cl_kernel_name(out_type,inp_type) atleast_3d_##out_type##_##inp_type
#define nmtools_cl_kernel_name_str(out_type,inp_type) nm_stringify(atleast_3d_##out_type##_##inp_type)

#ifdef NMTOOLS_OPENCL_BUILD_KERNELS

namespace nm = nmtools;
namespace na = nmtools::array;
namespace view = nmtools::view;
namespace meta = nmtools::meta;
namespace opencl = nmtools::array::opencl;

#define nmtools_cl_kernel(out_type,inp_type) \
kernel void nmtools_cl_kernel_name(out_type,inp_type) \
( global out_type* out_ptr \
, global const inp_type* inp_ptr \
, global const nm_cl_index_t* out_shape_ptr \
, global const nm_cl_index_t* inp_shape_ptr \
, const nm_cl_size_t out_dim \
, const nm_cl_size_t inp_dim \
) \
{ \
auto input = na::create_array(inp_ptr,inp_shape_ptr,inp_dim); \
auto output = na::create_mutable_array(out_ptr,out_shape_ptr,out_dim); \
auto result = view::atleast_3d(input); \
opencl::assign_array(output,result); \
}

nmtools_cl_kernel(float,float)
nmtools_cl_kernel(double,double)

#else // NMTOOLS_OPENCL_BUILD_KERNELS

#include "nmtools/array/eval/opencl/context.hpp"
#include <cstring>

extern unsigned char nm_cl_atleast_3d_spv[];
extern unsigned int nm_cl_atleast_3d_spv_len;

namespace nmtools::array::opencl
{
template <typename...args_t>
struct kernel_t<
view::decorator_t<view::atleast_3d_t,args_t...>
> {
using view_t = view::decorator_t<view::atleast_3d_t,args_t...>;

view_t view;
std::shared_ptr<context_t> context;

static auto get_spirv()
{
using vector = nmtools_list<unsigned char>;
auto spirv = vector();
spirv.resize(nm_cl_atleast_3d_spv_len);
memcpy(spirv.data(),nm_cl_atleast_3d_spv,sizeof(unsigned char)*nm_cl_atleast_3d_spv_len);
return spirv;
}

template <typename inp_t, typename out_t=inp_t>
static auto kernel_name()
{
if constexpr (meta::is_same_v<inp_t,float> && meta::is_same_v<out_t,float>) {
return nmtools_cl_kernel_name_str(float,float);
} else if constexpr (meta::is_same_v<inp_t,double> && meta::is_same_v<out_t,double>) {
return nmtools_cl_kernel_name_str(double,double);
}
}

template <typename output_t>
auto eval(output_t& output)
{
using out_t = meta::get_element_type_t<output_t>;

const auto& inp_array = *get_array(view);

using inp_t = meta::get_element_type_t<meta::remove_cvref_pointer_t<decltype(inp_array)>>;

auto inp_buffer = context->create_buffer(inp_array);
auto out_buffer = context->create_buffer<out_t>(nmtools::size(output));

auto kernel_name = this->kernel_name<inp_t,out_t>();

if (!context->has_kernel(kernel_name)) {
context->create_kernel(get_spirv(),kernel_name);
}

auto kernel = context->get_kernel(kernel_name);

auto out_size = nmtools::size(output);
[[maybe_unused]] auto inp_size = nmtools::size(inp_array);

auto out_shape = nmtools::shape(output);
auto inp_shape = nmtools::shape(inp_array);

auto out_shape_buffer = context->create_buffer(index::cast<nm_cl_index_t>(out_shape));
auto inp_shape_buffer = context->create_buffer(index::cast<nm_cl_index_t>(inp_shape));

auto out_dim = nmtools::len(out_shape);
auto inp_dim = nmtools::len(inp_shape);

auto kernel_info = kernel.kernel_info_;
auto local_size = nmtools_array{kernel_info->preferred_work_group_size_multiple};
auto global_size = nmtools_array{size_t(std::ceil(float(out_size) / local_size[0])) * local_size[0]};

auto default_args = nmtools_tuple{
out_buffer
, inp_buffer
, out_shape_buffer
, inp_shape_buffer
, (nm_cl_size_t)out_dim
, (nm_cl_size_t)inp_dim
};

context->set_args(kernel,default_args);
context->run(kernel,out_buffer,output,global_size,local_size);
}
};
}
#endif // NMTOOLS_OPENCL_BUILD_KERNELS

#endif // NMTOOLS_ARRAY_EVAL_OPENCL_KERNELS_ATLEAST_3D_HPP
Loading

0 comments on commit 61d19b1

Please sign in to comment.