Skip to content

Commit

Permalink
Formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
sbaldu committed Jan 31, 2024
1 parent de7ba0d commit 89f875b
Show file tree
Hide file tree
Showing 7 changed files with 111 additions and 71 deletions.
37 changes: 27 additions & 10 deletions CLUEstering/alpaka/AlpakaCore/CachedBufAlloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,24 @@ namespace cms::alpakatools {
namespace traits {

//! The caching memory allocator trait.
template <typename TElem, typename TDim, typename TIdx, typename TDev, typename TQueue, typename TSfinae = void>
template <typename TElem,
typename TDim,
typename TIdx,
typename TDev,
typename TQueue,
typename TSfinae = void>
struct CachedBufAlloc {
static_assert(alpaka::meta::DependentFalseType<TDev>::value, "This device does not support a caching allocator");
static_assert(alpaka::meta::DependentFalseType<TDev>::value,
"This device does not support a caching allocator");
};

//! The caching memory allocator implementation for the CPU device
template <typename TElem, typename TDim, typename TIdx, typename TQueue>
struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, TQueue, void> {
template <typename TExtent>
ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev, TQueue queue, TExtent const& extent)
ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
TQueue queue,
TExtent const& extent)
-> alpaka::BufCpu<TElem, TDim, TIdx> {
// non-cached host-only memory
return alpaka::allocAsyncBuf<TElem, TIdx>(queue, extent);
Expand All @@ -35,7 +43,8 @@ namespace cms::alpakatools {
template <typename TExtent>
ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
alpaka::QueueCudaRtNonBlocking queue,
TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
TExtent const& extent)
-> alpaka::BufCpu<TElem, TDim, TIdx> {
ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;

auto& allocator = getHostCachingAllocator<alpaka::QueueCudaRtNonBlocking>();
Expand All @@ -48,15 +57,18 @@ namespace cms::alpakatools {
// use a custom deleter to return the buffer to the CachingAllocator
auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };

return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
return alpaka::BufCpu<TElem, TDim, TIdx>(
dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
}
};

//! The caching memory allocator implementation for the CUDA device
template <typename TElem, typename TDim, typename TIdx, typename TQueue>
struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCudaRt, TQueue, void> {
template <typename TExtent>
ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCudaRt const& dev, TQueue queue, TExtent const& extent)
ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCudaRt const& dev,
TQueue queue,
TExtent const& extent)
-> alpaka::BufCudaRt<TElem, TDim, TIdx> {
ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;

Expand Down Expand Up @@ -88,7 +100,8 @@ namespace cms::alpakatools {
template <typename TExtent>
ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
alpaka::QueueHipRtNonBlocking queue,
TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
TExtent const& extent)
-> alpaka::BufCpu<TElem, TDim, TIdx> {
ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;

auto& allocator = getHostCachingAllocator<alpaka::QueueHipRtNonBlocking>();
Expand All @@ -101,15 +114,18 @@ namespace cms::alpakatools {
// use a custom deleter to return the buffer to the CachingAllocator
auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };

return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
return alpaka::BufCpu<TElem, TDim, TIdx>(
dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
}
};

//! The caching memory allocator implementation for the ROCm/HIP device
template <typename TElem, typename TDim, typename TIdx, typename TQueue>
struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevHipRt, TQueue, void> {
template <typename TExtent>
ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevHipRt const& dev, TQueue queue, TExtent const& extent)
ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevHipRt const& dev,
TQueue queue,
TExtent const& extent)
-> alpaka::BufHipRt<TElem, TDim, TIdx> {
ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;

Expand Down Expand Up @@ -137,7 +153,8 @@ namespace cms::alpakatools {

template <typename TElem, typename TIdx, typename TExtent, typename TQueue, typename TDev>
ALPAKA_FN_HOST auto allocCachedBuf(TDev const& dev, TQueue queue, TExtent const& extent = TExtent()) {
return traits::CachedBufAlloc<TElem, alpaka::Dim<TExtent>, TIdx, TDev, TQueue>::allocCachedBuf(dev, queue, extent);
return traits::CachedBufAlloc<TElem, alpaka::Dim<TExtent>, TIdx, TDev, TQueue>::allocCachedBuf(
dev, queue, extent);
}

} // namespace cms::alpakatools
Expand Down
64 changes: 34 additions & 30 deletions CLUEstering/alpaka/AlpakaCore/CachingAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ namespace cms::alpakatools {

// The "memory device" type can either be the same as the "synchronisation device" type, or be the host CPU.
static_assert(std::is_same_v<Device, alpaka::Dev<Queue>> or std::is_same_v<Device, alpaka::DevCpu>,
"The \"memory device\" type can either be the same as the \"synchronisation device\" type, or be the "
"The \"memory device\" type can either be the same as the \"synchronisation device\" "
"type, or be the "
"host CPU.");

struct CachedBytes {
Expand All @@ -103,17 +104,18 @@ namespace cms::alpakatools {

explicit CachingAllocator(
Device const& device,
unsigned int binGrowth, // bin growth factor;
unsigned int minBin, // smallest bin, corresponds to binGrowth^minBin bytes;
// smaller allocations are rounded to this value;
unsigned int maxBin, // largest bin, corresponds to binGrowth^maxBin bytes;
// larger allocations will fail;
size_t maxCachedBytes, // total storage for the allocator (0 means no limit);
double maxCachedFraction, // fraction of total device memory taken for the allocator (0 means no limit);
// if both maxCachedBytes and maxCachedFraction are non-zero,
// the smallest resulting value is used.
unsigned int binGrowth, // bin growth factor;
unsigned int minBin, // smallest bin, corresponds to binGrowth^minBin bytes;
// smaller allocations are rounded to this value;
unsigned int maxBin, // largest bin, corresponds to binGrowth^maxBin bytes;
// larger allocations will fail;
size_t maxCachedBytes, // total storage for the allocator (0 means no limit);
double
maxCachedFraction, // fraction of total device memory taken for the allocator (0 means no limit);
// if both maxCachedBytes and maxCachedFraction are non-zero,
// the smallest resulting value is used.
bool reuseSameQueueAllocations, // reuse non-ready allocations if they are in the same queue as the new one;
// this is safe only if all memory operations are scheduled in the same queue
// this is safe only if all memory operations are scheduled in the same queue
bool debug)
: device_(device),
binGrowth_(binGrowth),
Expand Down Expand Up @@ -199,22 +201,22 @@ namespace cms::alpakatools {

if (debug_) {
std::ostringstream out;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " returned " << block.bytes << " bytes at "
<< ptr << " from associated queue " << block.queue->m_spQueueImpl.get() << " , event "
<< block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size() << " available blocks cached ("
<< cachedBytes_.free << " bytes), " << liveBlocks_.size() << " live blocks (" << cachedBytes_.live
<< " bytes) outstanding." << std::endl;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " returned " << block.bytes
<< " bytes at " << ptr << " from associated queue " << block.queue->m_spQueueImpl.get()
<< " , event " << block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size()
<< " available blocks cached (" << cachedBytes_.free << " bytes), " << liveBlocks_.size()
<< " live blocks (" << cachedBytes_.live << " bytes) outstanding." << std::endl;
std::cout << out.str() << std::endl;
}
} else {
// if the buffer is not recached, it is automatically freed when block goes out of scope
if (debug_) {
std::ostringstream out;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " freed " << block.bytes << " bytes at "
<< ptr << " from associated queue " << block.queue->m_spQueueImpl.get() << ", event "
<< block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size() << " available blocks cached ("
<< cachedBytes_.free << " bytes), " << liveBlocks_.size() << " live blocks (" << cachedBytes_.live
<< " bytes) outstanding." << std::endl;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " freed " << block.bytes
<< " bytes at " << ptr << " from associated queue " << block.queue->m_spQueueImpl.get()
<< ", event " << block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size()
<< " available blocks cached (" << cachedBytes_.free << " bytes), " << liveBlocks_.size()
<< " live blocks (" << cachedBytes_.live << " bytes) outstanding." << std::endl;
std::cout << out.str() << std::endl;
}
}
Expand Down Expand Up @@ -302,8 +304,8 @@ namespace cms::alpakatools {
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " reused cached block at "
<< block.buffer->data() << " (" << block.bytes << " bytes) for queue "
<< block.queue->m_spQueueImpl.get() << ", event " << block.event->m_spEventImpl.get()
<< " (previously associated with stream " << iBlock->second.queue->m_spQueueImpl.get() << " , event "
<< iBlock->second.event->m_spEventImpl.get() << ")." << std::endl;
<< " (previously associated with stream " << iBlock->second.queue->m_spQueueImpl.get()
<< " , event " << iBlock->second.event->m_spEventImpl.get() << ")." << std::endl;
std::cout << out.str() << std::endl;
}

Expand All @@ -326,7 +328,8 @@ namespace cms::alpakatools {
} else {
// unsupported combination
static_assert(std::is_same_v<Device, alpaka::Dev<Queue>> or std::is_same_v<Device, alpaka::DevCpu>,
"The \"memory device\" type can either be the same as the \"synchronisation device\" type, or be "
"The \"memory device\" type can either be the same as the \"synchronisation device\" "
"type, or be "
"the host CPU.");
}
}
Expand All @@ -338,8 +341,8 @@ namespace cms::alpakatools {
// the allocation attempt failed: free all cached blocks on the device and retry
if (debug_) {
std::ostringstream out;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " failed to allocate " << block.bytes
<< " bytes for queue " << block.queue->m_spQueueImpl.get()
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " failed to allocate "
<< block.bytes << " bytes for queue " << block.queue->m_spQueueImpl.get()
<< ", retrying after freeing cached allocations" << std::endl;
std::cout << out.str() << std::endl;
}
Expand All @@ -365,7 +368,8 @@ namespace cms::alpakatools {
std::ostringstream out;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " allocated new block at "
<< block.buffer->data() << " (" << block.bytes << " bytes associated with queue "
<< block.queue->m_spQueueImpl.get() << ", event " << block.event->m_spEventImpl.get() << "." << std::endl;
<< block.queue->m_spQueueImpl.get() << ", event " << block.event->m_spEventImpl.get() << "."
<< std::endl;
std::cout << out.str() << std::endl;
}
}
Expand All @@ -380,9 +384,9 @@ namespace cms::alpakatools {
if (debug_) {
std::ostringstream out;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " freed " << iBlock->second.bytes
<< " bytes.\n\t\t " << (cachedBlocks_.size() - 1) << " available blocks cached (" << cachedBytes_.free
<< " bytes), " << liveBlocks_.size() << " live blocks (" << cachedBytes_.live << " bytes) outstanding."
<< std::endl;
<< " bytes.\n\t\t " << (cachedBlocks_.size() - 1) << " available blocks cached ("
<< cachedBytes_.free << " bytes), " << liveBlocks_.size() << " live blocks ("
<< cachedBytes_.live << " bytes) outstanding." << std::endl;
std::cout << out.str() << std::endl;
}

Expand Down
8 changes: 4 additions & 4 deletions CLUEstering/alpaka/AlpakaCore/HostOnlyTask.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ namespace alpaka {

ALPAKA_FN_HOST static auto enqueue(QueueCudaRtNonBlocking& queue, HostOnlyTask task) -> void {
auto pTask = std::make_unique<HostOnlyTask>(std::move(task));
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
cudaStreamAddCallback(alpaka::getNativeHandle(queue), callback, static_cast<void*>(pTask.release()), 0u));
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cudaStreamAddCallback(
alpaka::getNativeHandle(queue), callback, static_cast<void*>(pTask.release()), 0u));
}
};
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED
Expand All @@ -54,8 +54,8 @@ namespace alpaka {

ALPAKA_FN_HOST static auto enqueue(QueueHipRtNonBlocking& queue, HostOnlyTask task) -> void {
auto pTask = std::make_unique<HostOnlyTask>(std::move(task));
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
hipStreamAddCallback(alpaka::getNativeHandle(queue), callback, static_cast<void*>(pTask.release()), 0u));
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipStreamAddCallback(
alpaka::getNativeHandle(queue), callback, static_cast<void*>(pTask.release()), 0u));
}
};
#endif // ALPAKA_ACC_GPU_HIP_ENABLED
Expand Down
3 changes: 2 additions & 1 deletion CLUEstering/alpaka/AlpakaCore/StreamCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ namespace cms::alpakatools {
// will be returned to the cache by the shared_ptr destructor.
// This function is thread safe
ALPAKA_FN_HOST std::shared_ptr<Queue> get(Device const& dev) {
return cache_[cms::alpakatools::getDeviceIndex(dev)].makeOrGet([dev]() { return std::make_unique<Queue>(dev); });
return cache_[cms::alpakatools::getDeviceIndex(dev)].makeOrGet(
[dev]() { return std::make_unique<Queue>(dev); });
}

private:
Expand Down
Loading

0 comments on commit 89f875b

Please sign in to comment.