Formatting

cms-patatrack · Jan 31, 2024 · 89f875b · 89f875b
1 parent de7ba0d
commit 89f875b
Show file tree

Hide file tree

Showing 7 changed files with 111 additions and 71 deletions.
diff --git a/CLUEstering/alpaka/AlpakaCore/CachedBufAlloc.h b/CLUEstering/alpaka/AlpakaCore/CachedBufAlloc.h
@@ -11,16 +11,24 @@ namespace cms::alpakatools {
   namespace traits {
 
     //! The caching memory allocator trait.
-    template <typename TElem, typename TDim, typename TIdx, typename TDev, typename TQueue, typename TSfinae = void>
+    template <typename TElem,
+              typename TDim,
+              typename TIdx,
+              typename TDev,
+              typename TQueue,
+              typename TSfinae = void>
     struct CachedBufAlloc {
-      static_assert(alpaka::meta::DependentFalseType<TDev>::value, "This device does not support a caching allocator");
+      static_assert(alpaka::meta::DependentFalseType<TDev>::value,
+                    "This device does not support a caching allocator");
     };
 
     //! The caching memory allocator implementation for the CPU device
     template <typename TElem, typename TDim, typename TIdx, typename TQueue>
     struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, TQueue, void> {
       template <typename TExtent>
-      ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev, TQueue queue, TExtent const& extent)
+      ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
+                                                TQueue queue,
+                                                TExtent const& extent)
           -> alpaka::BufCpu<TElem, TDim, TIdx> {
         // non-cached host-only memory
         return alpaka::allocAsyncBuf<TElem, TIdx>(queue, extent);
@@ -35,7 +43,8 @@ namespace cms::alpakatools {
       template <typename TExtent>
       ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
                                                 alpaka::QueueCudaRtNonBlocking queue,
-                                                TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
+                                                TExtent const& extent)
+          -> alpaka::BufCpu<TElem, TDim, TIdx> {
         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
         auto& allocator = getHostCachingAllocator<alpaka::QueueCudaRtNonBlocking>();
@@ -48,15 +57,18 @@ namespace cms::alpakatools {
         // use a custom deleter to return the buffer to the CachingAllocator
         auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
 
-        return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
+        return alpaka::BufCpu<TElem, TDim, TIdx>(
+            dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
       }
     };
 
     //! The caching memory allocator implementation for the CUDA device
     template <typename TElem, typename TDim, typename TIdx, typename TQueue>
     struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCudaRt, TQueue, void> {
       template <typename TExtent>
-      ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCudaRt const& dev, TQueue queue, TExtent const& extent)
+      ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCudaRt const& dev,
+                                                TQueue queue,
+                                                TExtent const& extent)
           -> alpaka::BufCudaRt<TElem, TDim, TIdx> {
         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
@@ -88,7 +100,8 @@ namespace cms::alpakatools {
       template <typename TExtent>
       ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
                                                 alpaka::QueueHipRtNonBlocking queue,
-                                                TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
+                                                TExtent const& extent)
+          -> alpaka::BufCpu<TElem, TDim, TIdx> {
         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
         auto& allocator = getHostCachingAllocator<alpaka::QueueHipRtNonBlocking>();
@@ -101,15 +114,18 @@ namespace cms::alpakatools {
         // use a custom deleter to return the buffer to the CachingAllocator
         auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
 
-        return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
+        return alpaka::BufCpu<TElem, TDim, TIdx>(
+            dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
       }
     };
 
     //! The caching memory allocator implementation for the ROCm/HIP device
     template <typename TElem, typename TDim, typename TIdx, typename TQueue>
     struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevHipRt, TQueue, void> {
       template <typename TExtent>
-      ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevHipRt const& dev, TQueue queue, TExtent const& extent)
+      ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevHipRt const& dev,
+                                                TQueue queue,
+                                                TExtent const& extent)
           -> alpaka::BufHipRt<TElem, TDim, TIdx> {
         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
@@ -137,7 +153,8 @@ namespace cms::alpakatools {
 
   template <typename TElem, typename TIdx, typename TExtent, typename TQueue, typename TDev>
   ALPAKA_FN_HOST auto allocCachedBuf(TDev const& dev, TQueue queue, TExtent const& extent = TExtent()) {
-    return traits::CachedBufAlloc<TElem, alpaka::Dim<TExtent>, TIdx, TDev, TQueue>::allocCachedBuf(dev, queue, extent);
+    return traits::CachedBufAlloc<TElem, alpaka::Dim<TExtent>, TIdx, TDev, TQueue>::allocCachedBuf(
+        dev, queue, extent);
   }
 
 }  // namespace cms::alpakatools

diff --git a/CLUEstering/alpaka/AlpakaCore/CachingAllocator.h b/CLUEstering/alpaka/AlpakaCore/CachingAllocator.h
@@ -92,7 +92,8 @@ namespace cms::alpakatools {
 
     // The "memory device" type can either be the same as the "synchronisation device" type, or be the host CPU.
     static_assert(std::is_same_v<Device, alpaka::Dev<Queue>> or std::is_same_v<Device, alpaka::DevCpu>,
-                  "The \"memory device\" type can either be the same as the \"synchronisation device\" type, or be the "
+                  "The \"memory device\" type can either be the same as the \"synchronisation device\" "
+                  "type, or be the "
                   "host CPU.");
 
     struct CachedBytes {
@@ -103,17 +104,18 @@ namespace cms::alpakatools {
 
     explicit CachingAllocator(
         Device const& device,
-        unsigned int binGrowth,          // bin growth factor;
-        unsigned int minBin,             // smallest bin, corresponds to binGrowth^minBin bytes;
-                                         // smaller allocations are rounded to this value;
-        unsigned int maxBin,             // largest bin, corresponds to binGrowth^maxBin bytes;
-                                         // larger allocations will fail;
-        size_t maxCachedBytes,           // total storage for the allocator (0 means no limit);
-        double maxCachedFraction,        // fraction of total device memory taken for the allocator (0 means no limit);
-                                         // if both maxCachedBytes and maxCachedFraction are non-zero,
-                                         // the smallest resulting value is used.
+        unsigned int binGrowth,  // bin growth factor;
+        unsigned int minBin,     // smallest bin, corresponds to binGrowth^minBin bytes;
+                                 // smaller allocations are rounded to this value;
+        unsigned int maxBin,     // largest bin, corresponds to binGrowth^maxBin bytes;
+                                 // larger allocations will fail;
+        size_t maxCachedBytes,   // total storage for the allocator (0 means no limit);
+        double
+            maxCachedFraction,  // fraction of total device memory taken for the allocator (0 means no limit);
+                                // if both maxCachedBytes and maxCachedFraction are non-zero,
+                                // the smallest resulting value is used.
         bool reuseSameQueueAllocations,  // reuse non-ready allocations if they are in the same queue as the new one;
-                                         // this is safe only if all memory operations are scheduled in the same queue
+        // this is safe only if all memory operations are scheduled in the same queue
         bool debug)
         : device_(device),
           binGrowth_(binGrowth),
@@ -199,22 +201,22 @@ namespace cms::alpakatools {
 
         if (debug_) {
           std::ostringstream out;
-          out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " returned " << block.bytes << " bytes at "
-              << ptr << " from associated queue " << block.queue->m_spQueueImpl.get() << " , event "
-              << block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size() << " available blocks cached ("
-              << cachedBytes_.free << " bytes), " << liveBlocks_.size() << " live blocks (" << cachedBytes_.live
-              << " bytes) outstanding." << std::endl;
+          out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " returned " << block.bytes
+              << " bytes at " << ptr << " from associated queue " << block.queue->m_spQueueImpl.get()
+              << " , event " << block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size()
+              << " available blocks cached (" << cachedBytes_.free << " bytes), " << liveBlocks_.size()
+              << " live blocks (" << cachedBytes_.live << " bytes) outstanding." << std::endl;
           std::cout << out.str() << std::endl;
         }
       } else {
         // if the buffer is not recached, it is automatically freed when block goes out of scope
         if (debug_) {
           std::ostringstream out;
-          out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " freed " << block.bytes << " bytes at "
-              << ptr << " from associated queue " << block.queue->m_spQueueImpl.get() << ", event "
-              << block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size() << " available blocks cached ("
-              << cachedBytes_.free << " bytes), " << liveBlocks_.size() << " live blocks (" << cachedBytes_.live
-              << " bytes) outstanding." << std::endl;
+          out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " freed " << block.bytes
+              << " bytes at " << ptr << " from associated queue " << block.queue->m_spQueueImpl.get()
+              << ", event " << block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size()
+              << " available blocks cached (" << cachedBytes_.free << " bytes), " << liveBlocks_.size()
+              << " live blocks (" << cachedBytes_.live << " bytes) outstanding." << std::endl;
           std::cout << out.str() << std::endl;
         }
       }
@@ -302,8 +304,8 @@ namespace cms::alpakatools {
             out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " reused cached block at "
                 << block.buffer->data() << " (" << block.bytes << " bytes) for queue "
                 << block.queue->m_spQueueImpl.get() << ", event " << block.event->m_spEventImpl.get()
-                << " (previously associated with stream " << iBlock->second.queue->m_spQueueImpl.get() << " , event "
-                << iBlock->second.event->m_spEventImpl.get() << ")." << std::endl;
+                << " (previously associated with stream " << iBlock->second.queue->m_spQueueImpl.get()
+                << " , event " << iBlock->second.event->m_spEventImpl.get() << ")." << std::endl;
             std::cout << out.str() << std::endl;
           }
 
@@ -326,7 +328,8 @@ namespace cms::alpakatools {
       } else {
         // unsupported combination
         static_assert(std::is_same_v<Device, alpaka::Dev<Queue>> or std::is_same_v<Device, alpaka::DevCpu>,
-                      "The \"memory device\" type can either be the same as the \"synchronisation device\" type, or be "
+                      "The \"memory device\" type can either be the same as the \"synchronisation device\" "
+                      "type, or be "
                       "the host CPU.");
       }
     }
@@ -338,8 +341,8 @@ namespace cms::alpakatools {
         // the allocation attempt failed: free all cached blocks on the device and retry
         if (debug_) {
           std::ostringstream out;
-          out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " failed to allocate " << block.bytes
-              << " bytes for queue " << block.queue->m_spQueueImpl.get()
+          out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " failed to allocate "
+              << block.bytes << " bytes for queue " << block.queue->m_spQueueImpl.get()
               << ", retrying after freeing cached allocations" << std::endl;
           std::cout << out.str() << std::endl;
         }
@@ -365,7 +368,8 @@ namespace cms::alpakatools {
         std::ostringstream out;
         out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " allocated new block at "
             << block.buffer->data() << " (" << block.bytes << " bytes associated with queue "
-            << block.queue->m_spQueueImpl.get() << ", event " << block.event->m_spEventImpl.get() << "." << std::endl;
+            << block.queue->m_spQueueImpl.get() << ", event " << block.event->m_spEventImpl.get() << "."
+            << std::endl;
         std::cout << out.str() << std::endl;
       }
     }
@@ -380,9 +384,9 @@ namespace cms::alpakatools {
         if (debug_) {
           std::ostringstream out;
           out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " freed " << iBlock->second.bytes
-              << " bytes.\n\t\t  " << (cachedBlocks_.size() - 1) << " available blocks cached (" << cachedBytes_.free
-              << " bytes), " << liveBlocks_.size() << " live blocks (" << cachedBytes_.live << " bytes) outstanding."
-              << std::endl;
+              << " bytes.\n\t\t  " << (cachedBlocks_.size() - 1) << " available blocks cached ("
+              << cachedBytes_.free << " bytes), " << liveBlocks_.size() << " live blocks ("
+              << cachedBytes_.live << " bytes) outstanding." << std::endl;
           std::cout << out.str() << std::endl;
         }
 

diff --git a/CLUEstering/alpaka/AlpakaCore/HostOnlyTask.h b/CLUEstering/alpaka/AlpakaCore/HostOnlyTask.h
@@ -34,8 +34,8 @@ namespace alpaka {
 
       ALPAKA_FN_HOST static auto enqueue(QueueCudaRtNonBlocking& queue, HostOnlyTask task) -> void {
         auto pTask = std::make_unique<HostOnlyTask>(std::move(task));
-        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
-            cudaStreamAddCallback(alpaka::getNativeHandle(queue), callback, static_cast<void*>(pTask.release()), 0u));
+        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cudaStreamAddCallback(
+            alpaka::getNativeHandle(queue), callback, static_cast<void*>(pTask.release()), 0u));
       }
     };
 #endif  // ALPAKA_ACC_GPU_CUDA_ENABLED
@@ -54,8 +54,8 @@ namespace alpaka {
 
       ALPAKA_FN_HOST static auto enqueue(QueueHipRtNonBlocking& queue, HostOnlyTask task) -> void {
         auto pTask = std::make_unique<HostOnlyTask>(std::move(task));
-        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
-            hipStreamAddCallback(alpaka::getNativeHandle(queue), callback, static_cast<void*>(pTask.release()), 0u));
+        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipStreamAddCallback(
+            alpaka::getNativeHandle(queue), callback, static_cast<void*>(pTask.release()), 0u));
       }
     };
 #endif  // ALPAKA_ACC_GPU_HIP_ENABLED

diff --git a/CLUEstering/alpaka/AlpakaCore/StreamCache.h b/CLUEstering/alpaka/AlpakaCore/StreamCache.h
@@ -24,7 +24,8 @@ namespace cms::alpakatools {
     // will be returned to the cache by the shared_ptr destructor.
     // This function is thread safe
     ALPAKA_FN_HOST std::shared_ptr<Queue> get(Device const& dev) {
-      return cache_[cms::alpakatools::getDeviceIndex(dev)].makeOrGet([dev]() { return std::make_unique<Queue>(dev); });
+      return cache_[cms::alpakatools::getDeviceIndex(dev)].makeOrGet(
+          [dev]() { return std::make_unique<Queue>(dev); });
     }
 
   private: