From 8e70f221d07f23239cbe7aa16a97c321e4218fa6 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 28 Feb 2024 19:51:23 +0100 Subject: [PATCH] GPU: Auto-generate include-list for kernel headers --- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 25 +-- ...GPUReconstructionKernelIncludes.template.h | 18 ++ GPU/GPUTracking/CMakeLists.txt | 4 + GPU/GPUTracking/cmake/kernel_helpers.cmake | 12 +- GPU/GPUTracking/kernels.cmake | 184 +++++++++--------- 5 files changed, 123 insertions(+), 120 deletions(-) create mode 100644 GPU/GPUTracking/Base/GPUReconstructionKernelIncludes.template.h diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index b3ebc1ef3e8ab..6f72074b53f8f 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -23,34 +23,13 @@ #include #include "GPUGeneralKernels.h" -#include "GPUTPCCreateSliceData.h" -#include "GPUTPCNeighboursFinder.h" -#include "GPUTPCNeighboursCleaner.h" -#include "GPUTPCStartHitsFinder.h" -#include "GPUTPCStartHitsSorter.h" -#include "GPUTPCTrackletConstructor.h" -#include "GPUTPCTrackletSelector.h" -#include "GPUTPCGlobalTracking.h" -#include "GPUTRDTrackerKernels.h" -#include "GPUTPCCreateOccupancyMap.h" -#include "GPUTPCSectorDebugSortKernels.h" -#include "GPUTPCGlobalDebugSortKernels.h" -#ifdef GPUCA_NOCOMPAT -#include "GPUTPCGMMergerGPU.h" -#endif -#ifdef GPUCA_HAVE_O2HEADERS -#include "GPUITSFitterKernels.h" -#include "GPUTPCConvertKernel.h" -#include "GPUTPCCompressionKernels.h" -#include "GPUTPCClusterFinderKernels.h" -#include "GPUTrackingRefitKernel.h" -#include "GPUTPCGMO2Output.h" -#endif +#include "GPUReconstructionKernelIncludes.h" namespace GPUCA_NAMESPACE { namespace gpu { + class GPUReconstructionCPUBackend : public GPUReconstruction { public: diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelIncludes.template.h b/GPU/GPUTracking/Base/GPUReconstructionKernelIncludes.template.h new file mode 100644 index 0000000000000..40d53fe949153 --- /dev/null +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelIncludes.template.h @@ -0,0 +1,18 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionKernelIncludes.h +/// \author David Rohr + +// clang-format off +$>,APPEND,.h">,PREPEND,#include ">, +> +// clang-format on diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 52f1b3804e97d..5f60db99b55e3 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -247,6 +247,10 @@ file(GENERATE OUTPUT include_gpu_onthefly/GPUReconstructionKernels.h INPUT Base/GPUReconstructionKernels.template.h ) +file(GENERATE + OUTPUT include_gpu_onthefly/GPUReconstructionKernelIncludes.h + INPUT Base/GPUReconstructionKernelIncludes.template.h +) if(NOT ALIGPU_BUILD_TYPE STREQUAL "O2") include_directories(${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) endif() diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index 929f359b93eaf..0b9c5b40524f2 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -15,20 +15,21 @@ add_custom_target(O2_GPU_KERNELS) define_property(TARGET PROPERTY O2_GPU_KERNELS) define_property(TARGET PROPERTY O2_GPU_KERNEL_NAMES) +define_property(TARGET PROPERTY O2_GPU_KERNEL_INCLUDES) set(O2_GPU_KERNEL_WRAPPER_FOLDER "${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly") file(MAKE_DIRECTORY ${O2_GPU_KERNEL_WRAPPER_FOLDER}) set(O2_GPU_BASE_DIR "${CMAKE_CURRENT_LIST_DIR}/../") -function(o2_gpu_add_kernel kernel_name kernel_bounds kernel_type) +function(o2_gpu_add_kernel kernel_name kernel_files kernel_bounds kernel_type) math(EXPR TMP_CHK "${ARGC} & 1") - if(NOT ${TMP_CHK}) + if(${TMP_CHK}) message(FATAL_ERROR "Invalid number of arguments to kernel ${TMP_CHK}, must be odd to have pairs of argument type, argument name") endif() list(LENGTH ARGV n) set(OPT1 "") set(OPT2 "") - if(${n} GREATER 3) + if(${n} GREATER 4) math(EXPR n "${n} - 1") - foreach(i RANGE 3 ${n} 2) + foreach(i RANGE 4 ${n} 2) math(EXPR j "${i} + 1") if(${ARGV${i}} MATCHES "\\*$") string(APPEND OPT1 ",GPUPtr1(${ARGV${i}},${ARGV${j}})") @@ -54,9 +55,10 @@ function(o2_gpu_add_kernel kernel_name kernel_bounds kernel_type) endif() set(TMP_KERNEL "GPUCA_KRNL${TMP_BOUNDS}((${kernel_name}), (${kernel_type}), (${OPT1}), (${OPT2}))\n") set(TMP_KERNEL_IN_HEADER "${TMP_PRE}${TMP_KERNEL}${TMP_POST}") + list(GET kernel_files 0 TMP_KERNEL_CLASS) set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNELS "${TMP_KERNEL_IN_HEADER}") set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_NAMES "${kernel_name}") - + set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_INCLUDES "${TMP_KERNEL_CLASS}") # add_custom_command OUTPUT option does not support target-dependend generator expressions, thus this workaround if(CUDA_ENABLED) string(REPLACE ", " "_" TMP_FILENAME "${kernel_name}") diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index ab25cccbba7d0..c6b380149297d 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -12,98 +12,98 @@ # file kernels.cmake # author David Rohr -o2_gpu_add_kernel("GPUTPCNeighboursFinder" LB_OCL1 single) -o2_gpu_add_kernel("GPUTPCNeighboursCleaner" LB_OCL1 single) -o2_gpu_add_kernel("GPUTPCStartHitsFinder" LB_OCL1 single) -o2_gpu_add_kernel("GPUTPCStartHitsSorter" LB_OCL1 single) -o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSlice" LB_OCL1 single) -o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSlices" LB_OCL1 single) -o2_gpu_add_kernel("GPUTPCTrackletSelector" LB_OCL1 both) -o2_gpu_add_kernel("GPUMemClean16" NO_OCL1 "simple, REG, (GPUCA_THREAD_COUNT, 1)" void* ptr "unsigned long" size) -o2_gpu_add_kernel("GPUitoa" NO_OCL1 "simple, REG, (GPUCA_THREAD_COUNT, 1)" int* ptr "unsigned long" size) -o2_gpu_add_kernel("GPUTPCGlobalTrackingCopyNumbers" NO_OCL1 single int n) -o2_gpu_add_kernel("GPUTPCCreateSliceData" LB single) -o2_gpu_add_kernel("GPUTPCGlobalTracking" LB single) -o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, hitData" NO single) -o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, startHits" NO single) -o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, sliceTracks" NO single) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, clearIds" NO single char parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, sectorTracks" NO single char parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, globalTracks1" NO single char parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, globalTracks2" NO single char parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, borderTracks" NO single char parameter) -o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fill" LB simple GPUTPCClusterOccupancyMapBin* map) -o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fold" LB simple GPUTPCClusterOccupancyMapBin* map) -o2_gpu_add_kernel("GPUTPCGMMergerTrackFit" LB simple int mode) -o2_gpu_add_kernel("GPUTPCGMMergerFollowLoopers" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerUnpackResetIds" LB simple int iSlice) -o2_gpu_add_kernel("GPUTPCGMMergerSliceRefit" LB simple int iSlice) -o2_gpu_add_kernel("GPUTPCGMMergerUnpackGlobal" LB simple int iSlice) -o2_gpu_add_kernel("GPUTPCGMMergerUnpackSaveNumber" NO simple int id) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step0" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step1" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step2" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step3" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step4" LB simple char useOrigTrackParam char mergeAll) -o2_gpu_add_kernel("GPUTPCGMMergerClearLinks" LB simple char output) -o2_gpu_add_kernel("GPUTPCGMMergerMergeWithinPrepare" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerMergeSlicesPrepare" LB simple int border0 int border1 char useOrigTrackParam) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step0" LB simple int iSlice char withinSlice char mergeMode) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step1" NO simple int iSlice char withinSlice char mergeMode) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step2" LB simple int iSlice char withinSlice char mergeMode) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, variant" NO simple gputpcgmmergertypes::GPUTPCGMBorderRange* range int N int cmpMax) -o2_gpu_add_kernel("GPUTPCGMMergerMergeCE" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerLinkGlobalTracks" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerCollect" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerSortTracks" NO simple) -o2_gpu_add_kernel("GPUTPCGMMergerSortTracksQPt" NO simple) -o2_gpu_add_kernel("GPUTPCGMMergerSortTracksPrepare" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step0" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step1" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step2" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step0" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step1" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step2" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step0" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step1" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step2" LB simple) +o2_gpu_add_kernel("GPUTPCNeighboursFinder" "GPUTPCNeighboursFinder" LB_OCL1 single) +o2_gpu_add_kernel("GPUTPCNeighboursCleaner" "GPUTPCNeighboursCleaner" LB_OCL1 single) +o2_gpu_add_kernel("GPUTPCStartHitsFinder" "GPUTPCStartHitsFinder" LB_OCL1 single) +o2_gpu_add_kernel("GPUTPCStartHitsSorter" "GPUTPCStartHitsSorter" LB_OCL1 single) +o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSlice" "GPUTPCTrackletConstructor" LB_OCL1 single) +o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSlices" "GPUTPCTrackletConstructor" LB_OCL1 single) +o2_gpu_add_kernel("GPUTPCTrackletSelector" "GPUTPCTrackletSelector" LB_OCL1 both) +o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO_OCL1 "simple, REG, (GPUCA_THREAD_COUNT, 1)" void* ptr "unsigned long" size) +o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO_OCL1 "simple, REG, (GPUCA_THREAD_COUNT, 1)" int* ptr "unsigned long" size) +o2_gpu_add_kernel("GPUTPCGlobalTrackingCopyNumbers" "GPUTPCGlobalTracking" NO_OCL1 single int n) +o2_gpu_add_kernel("GPUTPCCreateSliceData" "GPUTPCCreateSliceData" LB single) +o2_gpu_add_kernel("GPUTPCGlobalTracking" "GPUTPCGlobalTracking" LB single) +o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, hitData" "GPUTPCSectorDebugSortKernels" NO single) +o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, startHits" "GPUTPCSectorDebugSortKernels" NO single) +o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, sliceTracks" "GPUTPCSectorDebugSortKernels" NO single) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, clearIds" "GPUTPCGlobalDebugSortKernels" NO single char parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, sectorTracks" "GPUTPCGlobalDebugSortKernels" NO single char parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, globalTracks1" "GPUTPCGlobalDebugSortKernels" NO single char parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, globalTracks2" "GPUTPCGlobalDebugSortKernels" NO single char parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, borderTracks" "GPUTPCGlobalDebugSortKernels" NO single char parameter) +o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fill" "GPUTPCCreateOccupancyMap" LB simple GPUTPCClusterOccupancyMapBin* map) +o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fold" "GPUTPCCreateOccupancyMap" LB simple GPUTPCClusterOccupancyMapBin* map) +o2_gpu_add_kernel("GPUTPCGMMergerTrackFit" "GPUTPCGMMergerGPU" LB simple int mode) +o2_gpu_add_kernel("GPUTPCGMMergerFollowLoopers" "GPUTPCGMMergerGPU" LB simple) +o2_gpu_add_kernel("GPUTPCGMMergerUnpackResetIds" "GPUTPCGMMergerGPU" LB simple int iSlice) +o2_gpu_add_kernel("GPUTPCGMMergerSliceRefit" "GPUTPCGMMergerGPU" LB simple int iSlice) +o2_gpu_add_kernel("GPUTPCGMMergerUnpackGlobal" "GPUTPCGMMergerGPU" LB simple int iSlice) +o2_gpu_add_kernel("GPUTPCGMMergerUnpackSaveNumber" "GPUTPCGMMergerGPU" NO simple int id) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step0" "GPUTPCGMMergerGPU" LB simple) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step1" "GPUTPCGMMergerGPU" LB simple) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step2" "GPUTPCGMMergerGPU" LB simple) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step3" "GPUTPCGMMergerGPU" LB simple) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step4" "GPUTPCGMMergerGPU" LB simple char useOrigTrackParam char mergeAll) +o2_gpu_add_kernel("GPUTPCGMMergerClearLinks" "GPUTPCGMMergerGPU" LB simple char output) +o2_gpu_add_kernel("GPUTPCGMMergerMergeWithinPrepare" "GPUTPCGMMergerGPU" LB simple) +o2_gpu_add_kernel("GPUTPCGMMergerMergeSlicesPrepare" "GPUTPCGMMergerGPU" LB simple int border0 int border1 char useOrigTrackParam) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step0" "GPUTPCGMMergerGPU" LB simple int iSlice char withinSlice char mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step1" "GPUTPCGMMergerGPU" NO simple int iSlice char withinSlice char mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step2" "GPUTPCGMMergerGPU" LB simple int iSlice char withinSlice char mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, variant" "GPUTPCGMMergerGPU" NO simple gputpcgmmergertypes::GPUTPCGMBorderRange* range int N int cmpMax) +o2_gpu_add_kernel("GPUTPCGMMergerMergeCE" "GPUTPCGMMergerGPU" LB simple) +o2_gpu_add_kernel("GPUTPCGMMergerLinkGlobalTracks" "GPUTPCGMMergerGPU" LB simple) +o2_gpu_add_kernel("GPUTPCGMMergerCollect" "GPUTPCGMMergerGPU" LB simple) +o2_gpu_add_kernel("GPUTPCGMMergerSortTracks" "GPUTPCGMMergerGPU" NO simple) +o2_gpu_add_kernel("GPUTPCGMMergerSortTracksQPt" "GPUTPCGMMergerGPU" NO simple) +o2_gpu_add_kernel("GPUTPCGMMergerSortTracksPrepare" "GPUTPCGMMergerGPU" LB simple) +o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step0" "GPUTPCGMMergerGPU" LB simple) +o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step1" "GPUTPCGMMergerGPU" LB simple) +o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step2" "GPUTPCGMMergerGPU" LB simple) +o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step0" "GPUTPCGMMergerGPU" LB simple) +o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step1" "GPUTPCGMMergerGPU" LB simple) +o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step2" "GPUTPCGMMergerGPU" LB simple) +o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step0" "GPUTPCGMMergerGPU" LB simple) +o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step1" "GPUTPCGMMergerGPU" LB simple) +o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step2" "GPUTPCGMMergerGPU" LB simple) if(ALIGPU_BUILD_TYPE STREQUAL "O2" OR CONFIG_O2_EXTENSIONS) -o2_gpu_add_kernel("GPUTPCGMO2Output, prepare" LB simple) -o2_gpu_add_kernel("GPUTPCGMO2Output, sort" NO simple) -o2_gpu_add_kernel("GPUTPCGMO2Output, output" LB simple) -o2_gpu_add_kernel("GPUTPCGMO2Output, mc" NO simple) -o2_gpu_add_kernel("GPUTRDTrackerKernels, gpuVersion" LB simple GPUTRDTrackerGPU* externalInstance) -o2_gpu_add_kernel("GPUTRDTrackerKernels, o2Version" LB simple GPUTRDTracker* externalInstance) -o2_gpu_add_kernel("GPUITSFitterKernel" LB simple) -o2_gpu_add_kernel("GPUTPCConvertKernel" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionKernels, step0attached" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionKernels, step1unattached" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, unbuffered" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered32" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered64" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered128" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, multiBlock" LB simple) -o2_gpu_add_kernel("GPUTPCCFCheckPadBaseline" LB single) -o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillIndexMap" LB single) -o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillFromDigits" LB single) -o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, findFragmentStart" LB single char setPositions) -o2_gpu_add_kernel("GPUTPCCFPeakFinder" LB single) -o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, noiseSuppression" LB single) -o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, updatePeaks" LB single) -o2_gpu_add_kernel("GPUTPCCFDeconvolution" LB single) -o2_gpu_add_kernel("GPUTPCCFClusterizer" LB single char onlyMC) -o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, setRowOffsets" NO single) -o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, flatten" NO single GPUTPCLinearLabels* out) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanStart" LB single int iBuf int stage) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanUp" LB single int iBuf int nElems) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanTop" LB single int iBuf int nElems) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanDown" LB single int iBuf "unsigned int" offset int nElems) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, compactDigits" LB single int iBuf int stage ChargePos* in ChargePos* out) -o2_gpu_add_kernel("GPUTPCCFDecodeZS" LB single int firstHBF) -o2_gpu_add_kernel("GPUTPCCFDecodeZSLink" LB single int firstHBF) -o2_gpu_add_kernel("GPUTPCCFDecodeZSDenseLink" LB single int firstHBF) -o2_gpu_add_kernel("GPUTPCCFGather" LB single o2::tpc::ClusterNative* dest) -o2_gpu_add_kernel("GPUTrackingRefitKernel, mode0asGPU" LB simple) -o2_gpu_add_kernel("GPUTrackingRefitKernel, mode1asTrackParCov" LB simple) +o2_gpu_add_kernel("GPUTPCGMO2Output, prepare" "GPUTPCGMO2Output" LB simple) +o2_gpu_add_kernel("GPUTPCGMO2Output, sort" "GPUTPCGMO2Output" NO simple) +o2_gpu_add_kernel("GPUTPCGMO2Output, output" "GPUTPCGMO2Output" LB simple) +o2_gpu_add_kernel("GPUTPCGMO2Output, mc" "GPUTPCGMO2Output" NO simple) +o2_gpu_add_kernel("GPUTRDTrackerKernels, gpuVersion" "GPUTRDTrackerKernels" LB simple GPUTRDTrackerGPU* externalInstance) +o2_gpu_add_kernel("GPUTRDTrackerKernels, o2Version" "GPUTRDTrackerKernels" LB simple GPUTRDTracker* externalInstance) +o2_gpu_add_kernel("GPUITSFitterKernel" "GPUITSFitterKernels" LB simple) +o2_gpu_add_kernel("GPUTPCConvertKernel" "GPUTPCConvertKernel" LB simple) +o2_gpu_add_kernel("GPUTPCCompressionKernels, step0attached" "GPUTPCCompressionKernels" LB simple) +o2_gpu_add_kernel("GPUTPCCompressionKernels, step1unattached" "GPUTPCCompressionKernels" LB simple) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, unbuffered" "GPUTPCCompressionKernels" LB simple) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered32" "GPUTPCCompressionKernels" LB simple) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered64" "GPUTPCCompressionKernels" LB simple) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered128" "GPUTPCCompressionKernels" LB simple) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, multiBlock" "GPUTPCCompressionKernels" LB simple) +o2_gpu_add_kernel("GPUTPCCFCheckPadBaseline" "GPUTPCClusterFinderKernels" LB single) +o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillIndexMap" "GPUTPCClusterFinderKernels" LB single) +o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillFromDigits" "GPUTPCClusterFinderKernels" LB single) +o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, findFragmentStart" "GPUTPCClusterFinderKernels" LB single char setPositions) +o2_gpu_add_kernel("GPUTPCCFPeakFinder" "GPUTPCClusterFinderKernels" LB single) +o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, noiseSuppression" "GPUTPCClusterFinderKernels" LB single) +o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, updatePeaks" "GPUTPCClusterFinderKernels" LB single) +o2_gpu_add_kernel("GPUTPCCFDeconvolution" "GPUTPCClusterFinderKernels" LB single) +o2_gpu_add_kernel("GPUTPCCFClusterizer" "GPUTPCClusterFinderKernels" LB single char onlyMC) +o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, setRowOffsets" "GPUTPCClusterFinderKernels" NO single) +o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, flatten" "GPUTPCClusterFinderKernels" NO single GPUTPCLinearLabels* out) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanStart" "GPUTPCClusterFinderKernels" LB single int iBuf int stage) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanUp" "GPUTPCClusterFinderKernels" LB single int iBuf int nElems) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanTop" "GPUTPCClusterFinderKernels" LB single int iBuf int nElems) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanDown" "GPUTPCClusterFinderKernels" LB single int iBuf "unsigned int" offset int nElems) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, compactDigits" "GPUTPCClusterFinderKernels" LB single int iBuf int stage ChargePos* in ChargePos* out) +o2_gpu_add_kernel("GPUTPCCFDecodeZS" "GPUTPCClusterFinderKernels" LB single int firstHBF) +o2_gpu_add_kernel("GPUTPCCFDecodeZSLink" "GPUTPCClusterFinderKernels" LB single int firstHBF) +o2_gpu_add_kernel("GPUTPCCFDecodeZSDenseLink" "GPUTPCClusterFinderKernels" LB single int firstHBF) +o2_gpu_add_kernel("GPUTPCCFGather" "GPUTPCClusterFinderKernels" LB single o2::tpc::ClusterNative* dest) +o2_gpu_add_kernel("GPUTrackingRefitKernel, mode0asGPU" "GPUTrackingRefitKernel" LB simple) +o2_gpu_add_kernel("GPUTrackingRefitKernel, mode1asTrackParCov" "GPUTrackingRefitKernel" LB simple) endif()