From 9b43d33e91c5263fb50c47cccc53b2becbc5d851 Mon Sep 17 00:00:00 2001 From: Gabriele Cimador Date: Thu, 14 Nov 2024 16:37:43 +0100 Subject: [PATCH] Removed TPCCluterDecompressor.inc --- GPU/GPUTracking/CMakeLists.txt | 1 - .../DataCompression/GPUTPCDecompression.h | 1 + .../GPUTPCDecompressionKernels.h | 8 - .../TPCClusterDecompressionCore.inc | 267 +++++++++--------- .../DataCompression/TPCClusterDecompressor.h | 5 - .../TPCClusterDecompressor.inc | 164 ----------- 6 files changed, 137 insertions(+), 309 deletions(-) delete mode 100644 GPU/GPUTracking/DataCompression/TPCClusterDecompressor.inc diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 282f8b8f25031..6400fbc65dc61 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -121,7 +121,6 @@ set(HDRS_INSTALL Base/GPUReconstructionKernels.h DataCompression/GPUTPCClusterRejection.h DataCompression/GPUTPCCompressionKernels.inc - DataCompression/TPCClusterDecompressor.inc DataCompression/TPCClusterDecompressionCore.inc DataTypes/GPUdEdxInfo.h DataTypes/GPUHostDataTypes.h diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h index 038fbd905db4f..d9871613d8401 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h @@ -44,6 +44,7 @@ class GPUTPCDecompression : public GPUProcessor friend class GPUTPCDecompressionUtilKernels; friend class GPUChainTracking; friend class TPCClusterDecompressionCore; + public: #ifndef GPUCA_GPUCODE void InitializeProcessor(); diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h index cfa3589dd21f7..622e1fd984fa7 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h @@ -45,14 +45,6 @@ class GPUTPCDecompressionKernels : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors, Args... args); - - /*template - GPUd() static void decompressTrack(o2::tpc::CompressedClusters& cmprClusters, const GPUParam& param, const uint32_t maxTime, const uint32_t trackIndex, uint32_t& clusterOffset, Args&... args); - GPUdi() static o2::tpc::ClusterNative decompressTrackStore(const o2::tpc::CompressedClusters& cmprClusters, const uint32_t clusterOffset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, GPUTPCDecompression& decompressor); - - template - GPUdi() static void decompressHits(const o2::tpc::CompressedClusters& cmprClusters, const uint32_t start, const uint32_t end, Args&... args); - GPUdi() static void decompressHitsStore(const o2::tpc::CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, o2::tpc::ClusterNative*& clusterNativeBuffer);*/ GPUd() static uint32_t computeLinearTmpBufferIndex(uint32_t slice, uint32_t row, uint32_t maxClustersPerBuffer) { diff --git a/GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc b/GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc index 5a2fc1e85c71d..73352182328d5 100644 --- a/GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc +++ b/GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc @@ -21,165 +21,170 @@ #include "GPUCommonAlgorithm.h" #include "GPUO2DataTypes.h" +#ifndef GPUCA_GPUCODE +#include +#endif + using namespace o2::tpc; namespace GPUCA_NAMESPACE::gpu { -class TPCClusterDecompressionCore{ - public: - -#ifndef GPUCA_GPUCODE -GPUhi() static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::function func) +class TPCClusterDecompressionCore { - const auto cluster = ClusterNative(time, clustersCompressed.flagsA[offset], pad, clustersCompressed.sigmaTimeA[offset], clustersCompressed.sigmaPadA[offset], clustersCompressed.qMaxA[offset], clustersCompressed.qTotA[offset]); - func(cluster, offset); - return cluster; -} + public: +#ifndef GPUCA_GPUCODE + GPUhi() static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::function func) + { + const auto cluster = ClusterNative(time, clustersCompressed.flagsA[offset], pad, clustersCompressed.sigmaTimeA[offset], clustersCompressed.sigmaPadA[offset], clustersCompressed.qMaxA[offset], clustersCompressed.qTotA[offset]); + func(cluster, offset); + return cluster; + } -GPUhi() static const auto& decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector& clusterVector) -{ - clusterVector.emplace_back(time, clustersCompressed.flagsA[offset], pad, clustersCompressed.sigmaTimeA[offset], clustersCompressed.sigmaPadA[offset], clustersCompressed.qMaxA[offset], clustersCompressed.qTotA[offset]); - return clusterVector.back(); -} + GPUhi() static const auto& decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector& clusterVector) + { + clusterVector.emplace_back(time, clustersCompressed.flagsA[offset], pad, clustersCompressed.sigmaTimeA[offset], clustersCompressed.sigmaPadA[offset], clustersCompressed.qMaxA[offset], clustersCompressed.qTotA[offset]); + return clusterVector.back(); + } -GPUhi() static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector (&clusters)[GPUCA_NSLICES][GPUCA_ROW_COUNT], std::atomic_flag (&locks)[GPUCA_NSLICES][GPUCA_ROW_COUNT]) -{ - std::vector& clusterVector = clusters[slice][row]; - auto& lock = locks[slice][row]; - while (lock.test_and_set(std::memory_order_acquire)) { + GPUhi() static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector (&clusters)[GPUCA_NSLICES][GPUCA_ROW_COUNT], std::atomic_flag (&locks)[GPUCA_NSLICES][GPUCA_ROW_COUNT]) + { + std::vector& clusterVector = clusters[slice][row]; + auto& lock = locks[slice][row]; + while (lock.test_and_set(std::memory_order_acquire)) { + } + ClusterNative retVal = decompressTrackStore(clustersCompressed, offset, slice, row, pad, time, clusterVector); + lock.clear(std::memory_order_release); + return retVal; } - ClusterNative retVal = decompressTrackStore(clustersCompressed, offset, slice, row, pad, time, clusterVector); - lock.clear(std::memory_order_release); - return retVal; -} #endif -GPUdi() static ClusterNative decompressTrackStore(const CompressedClusters& cmprClusters, const uint32_t clusterOffset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, GPUTPCDecompression& decompressor) -{ - uint32_t tmpBufferIndex = slice * (GPUCA_ROW_COUNT * decompressor.mMaxNativeClustersPerBuffer) + row * decompressor.mMaxNativeClustersPerBuffer; - uint32_t currentClusterIndex = CAMath::AtomicAdd(decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), 1u); - const ClusterNative c(time, cmprClusters.flagsA[clusterOffset], pad, cmprClusters.sigmaTimeA[clusterOffset], cmprClusters.sigmaPadA[clusterOffset], cmprClusters.qMaxA[clusterOffset], cmprClusters.qTotA[clusterOffset]); - if (currentClusterIndex < decompressor.mMaxNativeClustersPerBuffer) { - decompressor.mTmpNativeClusters[tmpBufferIndex + currentClusterIndex] = c; - } else { - decompressor.raiseError(GPUErrors::ERROR_DECOMPRESSION_ATTACHED_CLUSTER_OVERFLOW, slice * 1000 + row, currentClusterIndex, decompressor.mMaxNativeClustersPerBuffer); - CAMath::AtomicExch(decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), decompressor.mMaxNativeClustersPerBuffer); + GPUdi() static ClusterNative decompressTrackStore(const CompressedClusters& cmprClusters, const uint32_t clusterOffset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, GPUTPCDecompression& decompressor) + { + uint32_t tmpBufferIndex = slice * (GPUCA_ROW_COUNT * decompressor.mMaxNativeClustersPerBuffer) + row * decompressor.mMaxNativeClustersPerBuffer; + uint32_t currentClusterIndex = CAMath::AtomicAdd(decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), 1u); + const ClusterNative c(time, cmprClusters.flagsA[clusterOffset], pad, cmprClusters.sigmaTimeA[clusterOffset], cmprClusters.sigmaPadA[clusterOffset], cmprClusters.qMaxA[clusterOffset], cmprClusters.qTotA[clusterOffset]); + if (currentClusterIndex < decompressor.mMaxNativeClustersPerBuffer) { + decompressor.mTmpNativeClusters[tmpBufferIndex + currentClusterIndex] = c; + } else { + decompressor.raiseError(GPUErrors::ERROR_DECOMPRESSION_ATTACHED_CLUSTER_OVERFLOW, slice * 1000 + row, currentClusterIndex, decompressor.mMaxNativeClustersPerBuffer); + CAMath::AtomicExch(decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), decompressor.mMaxNativeClustersPerBuffer); + } + return c; } - return c; -} -template -GPUhdi() static void decompressTrack(const CompressedClusters& cmprClusters, const GPUParam& param, const uint32_t maxTime, const uint32_t trackIndex, uint32_t& clusterOffset, Args&... args) -{ - float zOffset = 0; - uint32_t slice = cmprClusters.sliceA[trackIndex]; - uint32_t row = cmprClusters.rowA[trackIndex]; - GPUTPCCompressionTrackModel track; - uint32_t clusterIndex; - for (clusterIndex = 0; clusterIndex < cmprClusters.nTrackClusters[trackIndex]; clusterIndex++) { - uint32_t pad = 0, time = 0; - if (clusterIndex != 0) { - uint8_t tmpSlice = cmprClusters.sliceLegDiffA[clusterOffset - trackIndex - 1]; - bool changeLeg = (tmpSlice >= GPUCA_NSLICES); - if (changeLeg) { - tmpSlice -= GPUCA_NSLICES; - } - if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) { - slice += tmpSlice; - if (slice >= GPUCA_NSLICES) { - slice -= GPUCA_NSLICES; + template + GPUdi() static void decompressTrack(const CompressedClusters& cmprClusters, const GPUParam& param, const uint32_t maxTime, const uint32_t& trackIndex, uint32_t& clusterOffset, Args&... args) + { + float zOffset = 0; + uint32_t slice = cmprClusters.sliceA[trackIndex]; + uint32_t row = cmprClusters.rowA[trackIndex]; + GPUTPCCompressionTrackModel track; + uint32_t clusterIndex; + for (clusterIndex = 0; clusterIndex < cmprClusters.nTrackClusters[trackIndex]; clusterIndex++) { + uint32_t pad = 0, time = 0; + if (clusterIndex != 0) { + uint8_t tmpSlice = cmprClusters.sliceLegDiffA[clusterOffset - trackIndex - 1]; + bool changeLeg = (tmpSlice >= GPUCA_NSLICES); + if (changeLeg) { + tmpSlice -= GPUCA_NSLICES; + } + if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) { + slice += tmpSlice; + if (slice >= GPUCA_NSLICES) { + slice -= GPUCA_NSLICES; + } + row += cmprClusters.rowDiffA[clusterOffset - trackIndex - 1]; + if (row >= GPUCA_ROW_COUNT) { + row -= GPUCA_ROW_COUNT; + } + } else { + slice = tmpSlice; + row = cmprClusters.rowDiffA[clusterOffset - trackIndex - 1]; + } + if (changeLeg && track.Mirror()) { + break; + } + if (track.Propagate(param.tpcGeometry.Row2X(row), param.SliceParam[slice].Alpha)) { + break; + } + uint32_t timeTmp = cmprClusters.timeResA[clusterOffset - trackIndex - 1]; + if (timeTmp & 800000) { + timeTmp |= 0xFF000000; + } + time = timeTmp + ClusterNative::packTime(CAMath::Max(0.f, param.tpcGeometry.LinearZ2Time(slice, track.Z() + zOffset))); + float tmpPad = CAMath::Max(0.f, CAMath::Min((float)param.tpcGeometry.NPads(GPUCA_ROW_COUNT - 1), param.tpcGeometry.LinearY2Pad(slice, row, track.Y()))); + pad = cmprClusters.padResA[clusterOffset - trackIndex - 1] + ClusterNative::packPad(tmpPad); + time = time & 0xFFFFFF; + pad = (uint16_t)pad; + if (pad >= param.tpcGeometry.NPads(row) * ClusterNative::scalePadPacked) { + if (pad >= 0xFFFF - 11968) { // Constant 11968 = (2^15 - MAX_PADS(138) * scalePadPacked(64)) / 2 + pad = 0; + } else { + pad = param.tpcGeometry.NPads(row) * ClusterNative::scalePadPacked - 1; + } } - row += cmprClusters.rowDiffA[clusterOffset - trackIndex - 1]; - if (row >= GPUCA_ROW_COUNT) { - row -= GPUCA_ROW_COUNT; + if (param.continuousMaxTimeBin > 0 && time >= maxTime) { + if (time >= 0xFFFFFF - 544768) { // Constant 544768 = (2^23 - LHCMAXBUNCHES(3564) * MAXORBITS(256) * scaleTimePacked(64) / BCPERTIMEBIN(8)) / 2) + time = 0; + } else { + time = maxTime; + } } } else { - slice = tmpSlice; - row = cmprClusters.rowDiffA[clusterOffset - trackIndex - 1]; + time = cmprClusters.timeA[trackIndex]; + pad = cmprClusters.padA[trackIndex]; } - if (changeLeg && track.Mirror()) { - break; + const auto cluster = decompressTrackStore(cmprClusters, clusterOffset, slice, row, pad, time, args...); + float y = param.tpcGeometry.LinearPad2Y(slice, row, cluster.getPad()); + float z = param.tpcGeometry.LinearTime2Z(slice, cluster.getTime()); + if (clusterIndex == 0) { + zOffset = z; + track.Init(param.tpcGeometry.Row2X(row), y, z - zOffset, param.SliceParam[slice].Alpha, cmprClusters.qPtA[trackIndex], param); } - if (track.Propagate(param.tpcGeometry.Row2X(row), param.SliceParam[slice].Alpha)) { + if (clusterIndex + 1 < cmprClusters.nTrackClusters[trackIndex] && track.Filter(y, z - zOffset, row)) { break; } - uint32_t timeTmp = cmprClusters.timeResA[clusterOffset - trackIndex - 1]; - if (timeTmp & 800000) { - timeTmp |= 0xFF000000; - } - time = timeTmp + ClusterNative::packTime(CAMath::Max(0.f, param.tpcGeometry.LinearZ2Time(slice, track.Z() + zOffset))); - float tmpPad = CAMath::Max(0.f, CAMath::Min((float)param.tpcGeometry.NPads(GPUCA_ROW_COUNT - 1), param.tpcGeometry.LinearY2Pad(slice, row, track.Y()))); - pad = cmprClusters.padResA[clusterOffset - trackIndex - 1] + ClusterNative::packPad(tmpPad); - time = time & 0xFFFFFF; - pad = (uint16_t)pad; - if (pad >= param.tpcGeometry.NPads(row) * ClusterNative::scalePadPacked) { - if (pad >= 0xFFFF - 11968) { // Constant 11968 = (2^15 - MAX_PADS(138) * scalePadPacked(64)) / 2 - pad = 0; - } else { - pad = param.tpcGeometry.NPads(row) * ClusterNative::scalePadPacked - 1; - } - } - if (param.continuousMaxTimeBin > 0 && time >= maxTime) { - if (time >= 0xFFFFFF - 544768) { // Constant 544768 = (2^23 - LHCMAXBUNCHES(3564) * MAXORBITS(256) * scaleTimePacked(64) / BCPERTIMEBIN(8)) / 2) - time = 0; - } else { - time = maxTime; - } - } - } else { - time = cmprClusters.timeA[trackIndex]; - pad = cmprClusters.padA[trackIndex]; + clusterOffset++; } - const auto cluster = decompressTrackStore(cmprClusters, clusterOffset, slice, row, pad, time, args...); - float y = param.tpcGeometry.LinearPad2Y(slice, row, cluster.getPad()); - float z = param.tpcGeometry.LinearTime2Z(slice, cluster.getTime()); - if (clusterIndex == 0) { - zOffset = z; - track.Init(param.tpcGeometry.Row2X(row), y, z - zOffset, param.SliceParam[slice].Alpha, cmprClusters.qPtA[trackIndex], param); - } - if (clusterIndex + 1 < cmprClusters.nTrackClusters[trackIndex] && track.Filter(y, z - zOffset, row)) { - break; - } - clusterOffset++; + clusterOffset += cmprClusters.nTrackClusters[trackIndex] - clusterIndex; } - clusterOffset += cmprClusters.nTrackClusters[trackIndex] - clusterIndex; -} -GPUhdi() static const auto& decompressHitsStore(const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, ClusterNative*& clusterBuffer) -{ - return ((*(clusterBuffer++) = ClusterNative(time, cmprClusters.flagsU[k], pad, cmprClusters.sigmaTimeU[k], cmprClusters.sigmaPadU[k], cmprClusters.qMaxU[k], cmprClusters.qTotU[k]))); -} + GPUdi() static const auto& decompressHitsStore(const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, ClusterNative*& clusterBuffer) + { + return ((*(clusterBuffer++) = ClusterNative(time, cmprClusters.flagsU[k], pad, cmprClusters.sigmaTimeU[k], cmprClusters.sigmaPadU[k], cmprClusters.qMaxU[k], cmprClusters.qTotU[k]))); + } -GPUhdi() static auto decompressHitsStore(const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, std::function func) -{ - const auto cluster = ClusterNative(time, cmprClusters.flagsU[k], pad, cmprClusters.sigmaTimeU[k], cmprClusters.sigmaPadU[k], cmprClusters.qMaxU[k], cmprClusters.qTotU[k]); - func(cluster, k); - return cluster; -} +#ifndef GPUCA_GPUCODE + GPUhi() static auto decompressHitsStore(const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, std::function func) + { + const auto cluster = ClusterNative(time, cmprClusters.flagsU[k], pad, cmprClusters.sigmaTimeU[k], cmprClusters.sigmaPadU[k], cmprClusters.qMaxU[k], cmprClusters.qTotU[k]); + func(cluster, k); + return cluster; + } +#endif -template -GPUdi() static void decompressHits(const CompressedClusters& cmprClusters, const uint32_t start, const uint32_t end, Args&... args) -{ - uint32_t time = 0; - uint16_t pad = 0; - for (uint32_t k = start; k < end; k++) { - if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) { - uint32_t timeTmp = cmprClusters.timeDiffU[k]; - if (timeTmp & 800000) { - timeTmp |= 0xFF000000; + template + GPUdi() static void decompressHits(const CompressedClusters& cmprClusters, const uint32_t start, const uint32_t end, Args&... args) + { + uint32_t time = 0; + uint16_t pad = 0; + for (uint32_t k = start; k < end; k++) { + if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) { + uint32_t timeTmp = cmprClusters.timeDiffU[k]; + if (timeTmp & 800000) { + timeTmp |= 0xFF000000; + } + time += timeTmp; + pad += cmprClusters.padDiffU[k]; + } else { + time = cmprClusters.timeDiffU[k]; + pad = cmprClusters.padDiffU[k]; } - time += timeTmp; - pad += cmprClusters.padDiffU[k]; - } else { - time = cmprClusters.timeDiffU[k]; - pad = cmprClusters.padDiffU[k]; + decompressHitsStore(cmprClusters, k, time, pad, args...); } - decompressHitsStore(cmprClusters, k, time, pad, args...); } -} - }; -} +} // namespace GPUCA_NAMESPACE::gpu #endif \ No newline at end of file diff --git a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.h b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.h index fc96f5fc72e28..d8e404b8a2ab7 100644 --- a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.h +++ b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.h @@ -35,11 +35,6 @@ class TPCClusterDecompressor static constexpr uint32_t NSLICES = GPUCA_NSLICES; static int32_t decompress(const o2::tpc::CompressedClustersFlat* clustersCompressed, o2::tpc::ClusterNativeAccess& clustersNative, std::function allocator, const GPUParam& param, bool deterministicRec); static int32_t decompress(const o2::tpc::CompressedClusters* clustersCompressed, o2::tpc::ClusterNativeAccess& clustersNative, std::function allocator, const GPUParam& param, bool deterministicRec); - - template - static void decompressTrack(const o2::tpc::CompressedClusters* clustersCompressed, const GPUParam& param, const uint32_t maxTime, const uint32_t i, uint32_t& offset, Args&... args); - template - static void decompressHits(const o2::tpc::CompressedClusters* clustersCompressed, const uint32_t start, const uint32_t end, Args&... args); }; } // namespace GPUCA_NAMESPACE::gpu diff --git a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.inc b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.inc deleted file mode 100644 index 2ea75b21bf22e..0000000000000 --- a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.inc +++ /dev/null @@ -1,164 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file TPCClusterDecompressor.inc -/// \author David Rohr - -#include "TPCClusterDecompressor.h" -#include "GPUO2DataTypes.h" -#include "GPUParam.h" -#include "GPUTPCCompressionTrackModel.h" -#include -#include -#include -#include - -using namespace GPUCA_NAMESPACE::gpu; -using namespace o2::tpc; - -static inline auto decompressTrackStore(const o2::tpc::CompressedClusters* clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::function func) -{ - const auto cluster = ClusterNative(time, clustersCompressed->flagsA[offset], pad, clustersCompressed->sigmaTimeA[offset], clustersCompressed->sigmaPadA[offset], clustersCompressed->qMaxA[offset], clustersCompressed->qTotA[offset]); - func(cluster, offset); - return cluster; -} - -static inline const auto& decompressTrackStore(const o2::tpc::CompressedClusters* clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector& clusterVector) -{ - clusterVector.emplace_back(time, clustersCompressed->flagsA[offset], pad, clustersCompressed->sigmaTimeA[offset], clustersCompressed->sigmaPadA[offset], clustersCompressed->qMaxA[offset], clustersCompressed->qTotA[offset]); - return clusterVector.back(); -} - -static inline auto decompressTrackStore(const o2::tpc::CompressedClusters* clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector (&clusters)[GPUCA_NSLICES][GPUCA_ROW_COUNT], std::atomic_flag (&locks)[GPUCA_NSLICES][GPUCA_ROW_COUNT]) -{ - std::vector& clusterVector = clusters[slice][row]; - auto& lock = locks[slice][row]; - while (lock.test_and_set(std::memory_order_acquire)) { - } - // Note the return type is ClusterNative, not auto&, since a different thread might append another cluster, and the vector expansion can change the cluster pointer, so the cluster reference might be invalid - // TODO: A new version that might use a plain array + counter to fill the clusters should change this and the function return type to auto& - ClusterNative retVal = decompressTrackStore(clustersCompressed, offset, slice, row, pad, time, clusterVector); - lock.clear(std::memory_order_release); - return retVal; -} - -template -inline void TPCClusterDecompressor::decompressTrack(const CompressedClusters* clustersCompressed, const GPUParam& param, const uint32_t maxTime, const uint32_t i, uint32_t& offset, Args&... args) -{ - float zOffset = 0; - uint32_t slice = clustersCompressed->sliceA[i]; - uint32_t row = clustersCompressed->rowA[i]; - GPUTPCCompressionTrackModel track; - uint32_t j; - for (j = 0; j < clustersCompressed->nTrackClusters[i]; j++) { - uint32_t pad = 0, time = 0; - if (j) { - uint8_t tmpSlice = clustersCompressed->sliceLegDiffA[offset - i - 1]; - bool changeLeg = (tmpSlice >= NSLICES); - if (changeLeg) { - tmpSlice -= NSLICES; - } - if (clustersCompressed->nComppressionModes & GPUSettings::CompressionDifferences) { - slice += tmpSlice; - if (slice >= NSLICES) { - slice -= NSLICES; - } - row += clustersCompressed->rowDiffA[offset - i - 1]; - if (row >= GPUCA_ROW_COUNT) { - row -= GPUCA_ROW_COUNT; - } - } else { - slice = tmpSlice; - row = clustersCompressed->rowDiffA[offset - i - 1]; - } - if (changeLeg && track.Mirror()) { - break; - } - if (track.Propagate(param.tpcGeometry.Row2X(row), param.SliceParam[slice].Alpha)) { - break; - } - uint32_t timeTmp = clustersCompressed->timeResA[offset - i - 1]; - if (timeTmp & 800000) { - timeTmp |= 0xFF000000; - } - time = timeTmp + ClusterNative::packTime(CAMath::Max(0.f, param.tpcGeometry.LinearZ2Time(slice, track.Z() + zOffset))); - float tmpPad = CAMath::Max(0.f, CAMath::Min((float)param.tpcGeometry.NPads(GPUCA_ROW_COUNT - 1), param.tpcGeometry.LinearY2Pad(slice, row, track.Y()))); - pad = clustersCompressed->padResA[offset - i - 1] + ClusterNative::packPad(tmpPad); - time = time & 0xFFFFFF; - pad = (uint16_t)pad; - if (pad >= param.tpcGeometry.NPads(row) * ClusterNative::scalePadPacked) { - if (pad >= 0xFFFF - 11968) { // Constant 11968 = (2^15 - MAX_PADS(138) * scalePadPacked(64)) / 2 - pad = 0; - } else { - pad = param.tpcGeometry.NPads(row) * ClusterNative::scalePadPacked - 1; - } - } - if (param.continuousMaxTimeBin > 0 && time >= maxTime) { - if (time >= 0xFFFFFF - 544768) { // Constant 544768 = (2^23 - LHCMAXBUNCHES(3564) * MAXORBITS(256) * scaleTimePacked(64) / BCPERTIMEBIN(8)) / 2) - time = 0; - } else { - time = maxTime; - } - } - } else { - time = clustersCompressed->timeA[i]; - pad = clustersCompressed->padA[i]; - } - const auto& cluster = decompressTrackStore(clustersCompressed, offset, slice, row, pad, time, args...); - float y = param.tpcGeometry.LinearPad2Y(slice, row, cluster.getPad()); - float z = param.tpcGeometry.LinearTime2Z(slice, cluster.getTime()); - if (j == 0) { - zOffset = z; - track.Init(param.tpcGeometry.Row2X(row), y, z - zOffset, param.SliceParam[slice].Alpha, clustersCompressed->qPtA[i], param); - } - if (j + 1 < clustersCompressed->nTrackClusters[i] && track.Filter(y, z - zOffset, row)) { - break; - } - offset++; - } - offset += clustersCompressed->nTrackClusters[i] - j; -} - -static inline const auto& decompressHitsStore(const CompressedClusters* clustersCompressed, uint32_t k, uint32_t time, uint16_t pad, ClusterNative*& cl) -{ - return ((*(cl++) = ClusterNative(time, clustersCompressed->flagsU[k], pad, clustersCompressed->sigmaTimeU[k], clustersCompressed->sigmaPadU[k], clustersCompressed->qMaxU[k], clustersCompressed->qTotU[k]))); -} - -static inline auto decompressHitsStore(const CompressedClusters* clustersCompressed, uint32_t k, uint32_t time, uint16_t pad, std::function func) -{ - const auto cluster = ClusterNative(time, clustersCompressed->flagsU[k], pad, clustersCompressed->sigmaTimeU[k], clustersCompressed->sigmaPadU[k], clustersCompressed->qMaxU[k], clustersCompressed->qTotU[k]); - func(cluster, k); - return cluster; -} - -template -inline void TPCClusterDecompressor::decompressHits(const CompressedClusters* clustersCompressed, const uint32_t start, const uint32_t end, Args&... args) -{ - uint32_t time = 0; - uint16_t pad = 0; - for (uint32_t k = start; k < end; k++) { - /*if (cl >= clustersNative.clustersLinear + nTotalClusters) { - throw std::runtime_error("Bad TPC CTF data, decoded more clusters than announced"); - }*/ - if (clustersCompressed->nComppressionModes & GPUSettings::CompressionDifferences) { - uint32_t timeTmp = clustersCompressed->timeDiffU[k]; - if (timeTmp & 800000) { - timeTmp |= 0xFF000000; - } - time += timeTmp; - pad += clustersCompressed->padDiffU[k]; - } else { - time = clustersCompressed->timeDiffU[k]; - pad = clustersCompressed->padDiffU[k]; - } - decompressHitsStore(clustersCompressed, k, time, pad, args...); - } -}