Skip to content

Commit

Permalink
use USM for computing kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
Xiangyu-Hu committed Aug 5, 2024
1 parent 4d8db78 commit d6af491
Show file tree
Hide file tree
Showing 7 changed files with 62 additions and 125 deletions.
7 changes: 4 additions & 3 deletions src/shared/particle_dynamics/dynamics_algorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,11 @@ class SimpleDynamicsCK : public LocalDynamicsType, public BaseDynamics<void>
{
this->setUpdated(this->identifier_.getSPHBody());
this->setupDynamics(dt);
particle_for(kernel_implementation_,
ComputingKernel *computing_kernel = kernel_implementation_.getComputingKernel();
particle_for(ExecutionPolicy{},
this->identifier_.LoopRange(),
[=](size_t i, auto &&computing_kernel)
{ computing_kernel.update(i, dt); });
[=](size_t i)
{ computing_kernel->update(i, dt); });
};

protected:
Expand Down
12 changes: 6 additions & 6 deletions src/shared/particle_dynamics/execution/execution.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,21 @@ class Implementation<LocalDynamicsType, ExecutionPolicy>

public:
explicit Implementation(LocalDynamicsType &local_dynamics)
: local_dynamics_(local_dynamics), delegated_kernel_(nullptr) {}
: local_dynamics_(local_dynamics), computing_kernel_(nullptr) {}

ComputingKernel *getDelegatedKernel()
ComputingKernel *getComputingKernel()
{
if (delegated_kernel_ == nullptr)
if (computing_kernel_ == nullptr)
{
delegated_kernel_ =
computing_kernel_ =
kernel_ptr_keeper_.template createPtr<ComputingKernel>(local_dynamics_);
}
return delegated_kernel_;
return computing_kernel_;
}

private:
LocalDynamicsType &local_dynamics_;
ComputingKernel *delegated_kernel_;
ComputingKernel *computing_kernel_;
};
} // namespace execution
} // namespace SPH
Expand Down
26 changes: 0 additions & 26 deletions src/shared/particle_dynamics/particle_iterators.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,32 +74,6 @@ inline void particle_for(const ParallelPolicy &par, const IndexRange &particles_
ap);
};

template <class LocalDynamicsType, class ComputingKernelFunction>
inline void particle_for(Implementation<LocalDynamicsType, SequencedPolicy> &kernel_implementation,
const IndexRange &particles_range, const ComputingKernelFunction &kernel_function)
{
auto delegated_kernel = kernel_implementation.getDelegatedKernel();
for (size_t i = particles_range.begin(); i < particles_range.end(); ++i)
kernel_function(i, *delegated_kernel);
};

template <class LocalDynamicsType, class ComputingKernelFunction>
inline void particle_for(Implementation<LocalDynamicsType, ParallelPolicy> &kernel_implementation,
const IndexRange &particles_range, const ComputingKernelFunction &kernel_function)
{
auto delegated_kernel = kernel_implementation.getDelegatedKernel();
parallel_for(
particles_range,
[&](const IndexRange &r)
{
for (size_t i = r.begin(); i < r.end(); ++i)
{
kernel_function(i, *delegated_kernel);
}
},
ap);
};

/**
* Bodypart By Particle-wise iterators (for sequential and parallel computing).
*/
Expand Down
58 changes: 45 additions & 13 deletions src/src_sycl/shared/particle_dynamics/execution_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,37 +115,69 @@ class ExecutionEvent
private:
std::vector<sycl::event> event_list_;
};
} // namespace execution

/* SYCL memory transfer utilities */
template <class T>
inline T *allocateDeviceOnly(std::size_t size)
{
return sycl::malloc_device<T>(size, execution::execution_instance.getQueue());
}

template <class T>
inline T *allocateDeviceShared(std::size_t size)
{
return sycl::malloc_shared<T>(size, execution::execution_instance.getQueue());
}

template <class T>
inline void freeDeviceData(T *device_mem)
{
sycl::free(device_mem, execution::execution_instance.getQueue());
}

template <class T>
inline execution::ExecutionEvent copyToDevice(const T *host, T *device, std::size_t size)
{
return execution::execution_instance.getQueue().memcpy(device, host, size * sizeof(T));
}

template <class T>
inline execution::ExecutionEvent copyFromDevice(T *host, const T *device, std::size_t size)
{
return execution::execution_instance.getQueue().memcpy(host, device, size * sizeof(T));
}

namespace execution
{
template <class LocalDynamicsType>
class Implementation<LocalDynamicsType, ParallelDevicePolicy>
{
using ComputingKernel = typename LocalDynamicsType::ComputingKernel;
using ComputingKernelBuffer = sycl::buffer<ComputingKernel>;
UniquePtrKeeper<ComputingKernel> kernel_ptr_keeper_;
UniquePtrKeeper<ComputingKernelBuffer> kernel_buffer_ptr_keeper_;

public:
explicit Implementation(LocalDynamicsType &local_dynamics)
: local_dynamics_(local_dynamics), computing_kernel_(nullptr),
computing_kernel_buffer_(nullptr) {}

ComputingKernelBuffer &getBuffer()
: local_dynamics_(local_dynamics), computing_kernel_(nullptr) {}
~Implementation()
{
freeDeviceData(computing_kernel_);
}
ComputingKernel *getComputingKernel()
{
if (computing_kernel_ == nullptr)
{
computing_kernel_ = kernel_ptr_keeper_
.template createPtr<ComputingKernel>(local_dynamics_);
computing_kernel_buffer_ = kernel_buffer_ptr_keeper_
.template createPtr<ComputingKernelBuffer>(computing_kernel_, 1);

computing_kernel_ = allocateDeviceOnly<ComputingKernel>(1);
ComputingKernel host = ComputingKernel(local_dynamics_);
copyToDevice(&host, computing_kernel_, 1);
}

return *computing_kernel_buffer_;
return computing_kernel_;
}

private:
LocalDynamicsType &local_dynamics_;
ComputingKernel *computing_kernel_;
ComputingKernelBuffer *computing_kernel_buffer_;
};
} // namespace execution
} // namespace SPH
Expand Down
15 changes: 6 additions & 9 deletions src/src_sycl/shared/particle_dynamics/particle_iterators_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,17 @@

namespace SPH
{
template <class ComputingKernelType, class ComputingKernelFunction>
inline void particle_for(Implementation<ComputingKernelType, ParallelDevicePolicy> &kernel_implementation,
const IndexRange &particles_range, const ComputingKernelFunction &kernel_function)
template <class LocalDynamicsFunction>
inline void particle_for(const ParallelDevicePolicy &par_device,
const IndexRange &particles_range, const LocalDynamicsFunction &local_dynamics_function)
{
auto &sycl_queue = execution_instance.getQueue();
auto &sycl_buffer = kernel_implementation.getBuffer();
const size_t particles_size = particles_range.size();
sycl_queue.submit([&](sycl::handler &cgh)
{
auto sycl_accessor = sycl_buffer.get_access(cgh, sycl::read_write);
cgh.parallel_for(execution_instance.getUniformNdRange(particles_size), [=](sycl::nd_item<1> index) {
{ cgh.parallel_for(execution_instance.getUniformNdRange(particles_size), [=](sycl::nd_item<1> index)
{
if(index.get_global_id(0) < particles_size)
kernel_function(index.get_global_id(0), sycl_accessor[0]);
}); })
local_dynamics_function(index.get_global_id(0)); }); })
.wait_and_throw();
}
} // namespace SPH
Expand Down
67 changes: 0 additions & 67 deletions src/src_sycl/shared/variable/base_variable_sycl.h

This file was deleted.

2 changes: 1 addition & 1 deletion src/src_sycl/shared/variable/base_variable_sycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#define BASE_VARIABLE_SYCL_HPP

#include "base_variable.h"
#include "base_variable_sycl.h"
#include "execution_sycl.h"

namespace SPH
{
Expand Down

0 comments on commit d6af491

Please sign in to comment.