From 53d1f1eac8934f19991dc79381846f294788beea Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 17 Mar 2024 16:17:57 +0100 Subject: [PATCH] Do not use std::MATH code in GPU code but o2::gpu::CAMath (#12870) * Do not use std::MATH code in GPU code but o2::gpu::CAMath * Update Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx Co-authored-by: Matteo Concas --------- Co-authored-by: Matteo Concas --- Detectors/Base/src/MatLayerCyl.cxx | 8 ++++---- Detectors/Base/src/MatLayerCylSet.cxx | 2 +- .../ITS/tracking/GPU/cuda/TrackingKernels.cu | 4 ++-- .../include/ITStracking/ClusterLines.h | 6 +++--- .../tracking/include/ITStracking/TimeFrame.h | 2 +- .../ITSMFT/ITS/tracking/src/ClusterLines.cxx | 10 +++++----- .../ITSMFT/ITS/tracking/src/TimeFrame.cxx | 14 +++++++------- .../ITSMFT/ITS/tracking/src/TrackerTraits.cxx | 18 +++++++++--------- 8 files changed, 32 insertions(+), 32 deletions(-) diff --git a/Detectors/Base/src/MatLayerCyl.cxx b/Detectors/Base/src/MatLayerCyl.cxx index 6ff822bb7faab..04f68fb81866f 100644 --- a/Detectors/Base/src/MatLayerCyl.cxx +++ b/Detectors/Base/src/MatLayerCyl.cxx @@ -97,8 +97,8 @@ void MatLayerCyl::initSegmentation(float rMin, float rMax, float zHalfSpan, int offs = alignSize(offs + nphi * sizeof(float), getBufferAlignmentBytes()); // account for alignment for (int i = nphi; i--;) { - mSliceCos[i] = std::cos(getPhiBinMin(i)); - mSliceSin[i] = std::sin(getPhiBinMin(i)); + mSliceCos[i] = o2::math_utils::cos(getPhiBinMin(i)); + mSliceSin[i] = o2::math_utils::sin(getPhiBinMin(i)); } o2::gpu::resizeArray(mCells, 0, getNCells(), reinterpret_cast(mFlatBufferPtr + offs)); @@ -273,8 +273,8 @@ void MatLayerCyl::getMeanRMS(MatCell& mean, MatCell& rms) const rms.meanX2X0 /= nc; rms.meanRho -= mean.meanRho * mean.meanRho; rms.meanX2X0 -= mean.meanX2X0 * mean.meanX2X0; - rms.meanRho = rms.meanRho > 0.f ? std::sqrt(rms.meanRho) : 0.f; - rms.meanX2X0 = rms.meanX2X0 > 0.f ? std::sqrt(rms.meanX2X0) : 0.f; + rms.meanRho = rms.meanRho > 0.f ? o2::math_utils::sqrt(rms.meanRho) : 0.f; + rms.meanX2X0 = rms.meanX2X0 > 0.f ? o2::math_utils::sqrt(rms.meanX2X0) : 0.f; } //________________________________________________________________________________ diff --git a/Detectors/Base/src/MatLayerCylSet.cxx b/Detectors/Base/src/MatLayerCylSet.cxx index fb2e720427016..12156fc55b381 100644 --- a/Detectors/Base/src/MatLayerCylSet.cxx +++ b/Detectors/Base/src/MatLayerCylSet.cxx @@ -108,7 +108,7 @@ void MatLayerCylSet::finalizeStructures() for (int i = 1; i < nlr; i++) { const auto& lr = getLayer(i); - if (std::sqrt(lr.getRMin2()) > std::sqrt(get()->mR2Intervals[nRIntervals] + Ray::Tiny)) { + if (o2::math_utils::sqrt(lr.getRMin2()) > o2::math_utils::sqrt(get()->mR2Intervals[nRIntervals] + Ray::Tiny)) { // register gap get()->mInterval2LrID[nRIntervals] = -1; get()->mR2Intervals[++nRIntervals] = lr.getRMin2(); diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index 6ca04a94efe93..9c1acc20f3abd 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -335,7 +335,7 @@ GPUg() void computeLayerTrackletsKernelSingleRof( const float zAtRmin{tanLambda * (minR - currentCluster.radius) + currentCluster.zCoordinate}; const float zAtRmax{tanLambda * (maxR - currentCluster.radius) + currentCluster.zCoordinate}; const float sqInverseDeltaZ0{1.f / (Sq(currentCluster.zCoordinate - primaryVertex.getZ()) + 2.e-8f)}; /// protecting from overflows adding the detector resolution - const float sigmaZ{std::sqrt(Sq(resolution) * Sq(tanLambda) * ((Sq(inverseR0) + sqInverseDeltaZ0) * Sq(meanDeltaR) + 1.f) + Sq(meanDeltaR * mSAngle))}; + const float sigmaZ{o2::gpu::CAMath::Sqrt(Sq(resolution) * Sq(tanLambda) * ((Sq(inverseR0) + sqInverseDeltaZ0) * Sq(meanDeltaR) + 1.f) + Sq(meanDeltaR * mSAngle))}; const int4 selectedBinsRect{getBinsRect(currentCluster, layerIndex, *utils, zAtRmin, zAtRmax, sigmaZ * trkPars->NSigmaCut, phiCut)}; if (selectedBinsRect.x == 0 && selectedBinsRect.y == 0 && selectedBinsRect.z == 0 && selectedBinsRect.w == 0) { @@ -458,7 +458,7 @@ GPUg() void computeLayerTrackletsKernelMultipleRof( const float zAtRmin{tanLambda * (minR - currentCluster.radius) + currentCluster.zCoordinate}; const float zAtRmax{tanLambda * (maxR - currentCluster.radius) + currentCluster.zCoordinate}; const float sqInverseDeltaZ0{1.f / (Sq(currentCluster.zCoordinate - primaryVertex.getZ()) + 2.e-8f)}; /// protecting from overflows adding the detector resolution - const float sigmaZ{std::sqrt(Sq(resolution) * Sq(tanLambda) * ((Sq(inverseR0) + sqInverseDeltaZ0) * Sq(meanDeltaR) + 1.f) + Sq(meanDeltaR * mSAngle))}; + const float sigmaZ{o2::gpu::CAMath::Sqrt(Sq(resolution) * Sq(tanLambda) * ((Sq(inverseR0) + sqInverseDeltaZ0) * Sq(meanDeltaR) + 1.f) + Sq(meanDeltaR * mSAngle))}; const int4 selectedBinsRect{getBinsRect(currentCluster, layerIndex, *utils, zAtRmin, zAtRmax, sigmaZ * trkPars->NSigmaCut, phiCut)}; diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/ClusterLines.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/ClusterLines.h index 41077c2f0eeb1..550b8405770dc 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/ClusterLines.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/ClusterLines.h @@ -173,9 +173,9 @@ GPUhdi() void Line::getDCAComponents(const Line& line, const float point[3], flo destArray[0] = line.originPoint[0] - point[0] + line.cosinesDirector[0] * cdelta; destArray[3] = line.originPoint[1] - point[1] + line.cosinesDirector[1] * cdelta; destArray[5] = line.originPoint[2] - point[2] + line.cosinesDirector[2] * cdelta; - destArray[1] = std::sqrt(destArray[0] * destArray[0] + destArray[3] * destArray[3]); - destArray[2] = std::sqrt(destArray[0] * destArray[0] + destArray[5] * destArray[5]); - destArray[4] = std::sqrt(destArray[3] * destArray[3] + destArray[5] * destArray[5]); + destArray[1] = o2::gpu::CAMath::Sqrt(destArray[0] * destArray[0] + destArray[3] * destArray[3]); + destArray[2] = o2::gpu::CAMath::Sqrt(destArray[0] * destArray[0] + destArray[5] * destArray[5]); + destArray[4] = o2::gpu::CAMath::Sqrt(destArray[3] * destArray[3] + destArray[5] * destArray[5]); } inline bool Line::operator==(const Line& rhs) const diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h index 5e9f5e80b6ec3..b9e955c396b94 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h @@ -108,7 +108,7 @@ class TimeFrame void setBeamPosition(const float x, const float y, const float s2, const float base = 50.f, const float systematic = 0.f) { isBeamPositionOverridden = true; - resetBeamXY(x, y, s2 / std::sqrt(base * base + systematic)); + resetBeamXY(x, y, s2 / o2::gpu::CAMath::Sqrt(base * base + systematic)); } float getBeamX() const; diff --git a/Detectors/ITSMFT/ITS/tracking/src/ClusterLines.cxx b/Detectors/ITSMFT/ITS/tracking/src/ClusterLines.cxx index 6ad842a83d78f..8605bb0ecada2 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/ClusterLines.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/ClusterLines.cxx @@ -26,8 +26,8 @@ Line::Line(std::array firstPoint, std::array secondPoint) cosinesDirector[index] = secondPoint[index] - firstPoint[index]; } - float inverseNorm{1.f / std::sqrt(cosinesDirector[0] * cosinesDirector[0] + cosinesDirector[1] * cosinesDirector[1] + - cosinesDirector[2] * cosinesDirector[2])}; + float inverseNorm{1.f / o2::gpu::CAMath::Sqrt(cosinesDirector[0] * cosinesDirector[0] + cosinesDirector[1] * cosinesDirector[1] + + cosinesDirector[2] * cosinesDirector[2])}; for (int index{0}; index < 3; ++index) { cosinesDirector[index] *= inverseNorm; } @@ -73,9 +73,9 @@ std::array Line::getDCAComponents(const Line& line, const std::array(mNrof * (trkParam.ZBins * trkParam.PhiBins + 1), 0)); @@ -375,18 +375,18 @@ void TimeFrame::initialise(const int iteration, const TrackingParameters& trkPar float oneOverR{0.001f * 0.3f * std::abs(mBz) / trkParam.TrackletMinPt}; for (unsigned int iLayer{0}; iLayer < mClusters.size(); ++iLayer) { mMSangles[iLayer] = MSangle(0.14f, trkParam.TrackletMinPt, trkParam.LayerxX0[iLayer]); - mPositionResolution[iLayer] = std::sqrt(0.5f * (trkParam.SystErrorZ2[iLayer] + trkParam.SystErrorY2[iLayer]) + trkParam.LayerResolution[iLayer] * trkParam.LayerResolution[iLayer]); + mPositionResolution[iLayer] = o2::gpu::CAMath::Sqrt(0.5f * (trkParam.SystErrorZ2[iLayer] + trkParam.SystErrorY2[iLayer]) + trkParam.LayerResolution[iLayer] * trkParam.LayerResolution[iLayer]); if (iLayer < mClusters.size() - 1) { const float& r1 = trkParam.LayerRadii[iLayer]; const float& r2 = trkParam.LayerRadii[iLayer + 1]; const float res1 = std::hypot(trkParam.PVres, mPositionResolution[iLayer]); const float res2 = std::hypot(trkParam.PVres, mPositionResolution[iLayer + 1]); - const float cosTheta1half = std::sqrt(1.f - Sq(0.5f * r1 * oneOverR)); - const float cosTheta2half = std::sqrt(1.f - Sq(0.5f * r2 * oneOverR)); + const float cosTheta1half = o2::gpu::CAMath::Sqrt(1.f - Sq(0.5f * r1 * oneOverR)); + const float cosTheta2half = o2::gpu::CAMath::Sqrt(1.f - Sq(0.5f * r2 * oneOverR)); float x = r2 * cosTheta1half - r1 * cosTheta2half; - float delta = std::sqrt(1. / (1.f - 0.25f * Sq(x * oneOverR)) * (Sq(0.25f * r1 * r2 * Sq(oneOverR) / cosTheta2half + cosTheta1half) * Sq(res1) + Sq(0.25f * r1 * r2 * Sq(oneOverR) / cosTheta1half + cosTheta2half) * Sq(res2))); - mPhiCuts[iLayer] = std::min(std::asin(0.5f * x * oneOverR) + 2.f * mMSangles[iLayer] + delta, constants::math::Pi * 0.5f); + float delta = o2::gpu::CAMath::Sqrt(1. / (1.f - 0.25f * Sq(x * oneOverR)) * (Sq(0.25f * r1 * r2 * Sq(oneOverR) / cosTheta2half + cosTheta1half) * Sq(res1) + Sq(0.25f * r1 * r2 * Sq(oneOverR) / cosTheta1half + cosTheta2half) * Sq(res2))); + mPhiCuts[iLayer] = std::min(o2::gpu::CAMath::ASin(0.5f * x * oneOverR) + 2.f * mMSangles[iLayer] + delta, constants::math::Pi * 0.5f); } } diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx index 1efd31ab50d7b..079ac3a775c12 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx @@ -98,7 +98,7 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iROFslice, in for (int iV{startVtx}; iV < endVtx; ++iV) { auto& primaryVertex{primaryVertices[iV]}; - const float resolution = std::sqrt(Sq(mTrkParams[iteration].PVres) / primaryVertex.getNContributors() + Sq(tf->getPositionResolution(iLayer))); + const float resolution = o2::gpu::CAMath::Sqrt(Sq(mTrkParams[iteration].PVres) / primaryVertex.getNContributors() + Sq(tf->getPositionResolution(iLayer))); const float tanLambda{(currentCluster.zCoordinate - primaryVertex.getZ()) * inverseR0}; @@ -106,7 +106,7 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iROFslice, in const float zAtRmax{tanLambda * (tf->getMaxR(iLayer + 1) - currentCluster.radius) + currentCluster.zCoordinate}; const float sqInverseDeltaZ0{1.f / (Sq(currentCluster.zCoordinate - primaryVertex.getZ()) + 2.e-8f)}; /// protecting from overflows adding the detector resolution - const float sigmaZ{std::sqrt(Sq(resolution) * Sq(tanLambda) * ((Sq(inverseR0) + sqInverseDeltaZ0) * Sq(meanDeltaR) + 1.f) + Sq(meanDeltaR * tf->getMSangle(iLayer)))}; + const float sigmaZ{o2::gpu::CAMath::Sqrt(Sq(resolution) * Sq(tanLambda) * ((Sq(inverseR0) + sqInverseDeltaZ0) * Sq(meanDeltaR) + 1.f) + Sq(meanDeltaR * tf->getMSangle(iLayer)))}; const int4 selectedBinsRect{getBinsRect(currentCluster, iLayer, zAtRmin, zAtRmax, sigmaZ * mTrkParams[iteration].NSigmaCut, tf->getPhiCut(iLayer))}; @@ -291,7 +291,7 @@ void TrackerTraits::computeLayerCells(const int iteration) } #ifdef OPTIMISATION_OUTPUT - float resolution{std::sqrt(0.5f * (mTrkParams[iteration].SystErrorZ2[iLayer] + mTrkParams[iteration].SystErrorZ2[iLayer + 1] + mTrkParams[iteration].SystErrorZ2[iLayer + 2] + mTrkParams[iteration].SystErrorY2[iLayer] + mTrkParams[iteration].SystErrorY2[iLayer + 1] + mTrkParams[iteration].SystErrorY2[iLayer + 2])) / mTrkParams[iteration].LayerResolution[iLayer]}; + float resolution{o2::gpu::CAMath::Sqrt(0.5f * (mTrkParams[iteration].SystErrorZ2[iLayer] + mTrkParams[iteration].SystErrorZ2[iLayer + 1] + mTrkParams[iteration].SystErrorZ2[iLayer + 2] + mTrkParams[iteration].SystErrorY2[iLayer] + mTrkParams[iteration].SystErrorY2[iLayer + 1] + mTrkParams[iteration].SystErrorY2[iLayer + 2])) / mTrkParams[iteration].LayerResolution[iLayer]}; resolution = resolution > 1.e-12 ? resolution : 1.f; #endif const int currentLayerTrackletsNum{static_cast(tf->getTracklets()[iLayer].size())}; @@ -774,7 +774,7 @@ void TrackerTraits::findShortPrimaries() continue; } - float pvRes{mTrkParams[0].PVres / std::sqrt(float(pvs[iV].getNContributors()))}; + float pvRes{mTrkParams[0].PVres / o2::gpu::CAMath::Sqrt(float(pvs[iV].getNContributors()))}; const float posVtx[2]{0.f, pvs[iV].getZ()}; const float covVtx[3]{pvRes, 0.f, pvRes}; float chi2 = temporaryTrack.getPredictedChi2(posVtx, covVtx); @@ -876,9 +876,9 @@ bool TrackerTraits::trackFollowing(TrackITSExt* track, int rof, bool outward, co } } const float phi{hypoParam.getPhi()}; - const float ePhi{std::sqrt(hypoParam.getSigmaSnp2() / hypoParam.getCsp2())}; + const float ePhi{o2::gpu::CAMath::Sqrt(hypoParam.getSigmaSnp2() / hypoParam.getCsp2())}; const float z{hypoParam.getZ()}; - const float eZ{std::sqrt(hypoParam.getSigmaZ2())}; + const float eZ{o2::gpu::CAMath::Sqrt(hypoParam.getSigmaZ2())}; const int4 selectedBinsRect{getBinsRect(iLayer, phi, mTrkParams[iteration].NSigmaCut * ePhi, z, mTrkParams[iteration].NSigmaCut * eZ)}; if (selectedBinsRect.x == 0 && selectedBinsRect.y == 0 && selectedBinsRect.z == 0 && selectedBinsRect.w == 0) { @@ -965,7 +965,7 @@ bool TrackerTraits::trackFollowing(TrackITSExt* track, int rof, bool outward, co /// frame coordinates whereas the others are referred to the global frame. track::TrackParCov TrackerTraits::buildTrackSeed(const Cluster& cluster1, const Cluster& cluster2, const TrackingFrameInfo& tf3) { - const float ca = std::cos(tf3.alphaTrackingFrame), sa = std::sin(tf3.alphaTrackingFrame); + const float ca = o2::gpu::CAMath::Cos(tf3.alphaTrackingFrame), sa = o2::gpu::CAMath::Sin(tf3.alphaTrackingFrame); const float x1 = cluster1.xCoordinate * ca + cluster1.yCoordinate * sa; const float y1 = -cluster1.xCoordinate * sa + cluster1.yCoordinate * ca; const float z1 = cluster1.zCoordinate; @@ -977,9 +977,9 @@ track::TrackParCov TrackerTraits::buildTrackSeed(const Cluster& cluster1, const const float z3 = tf3.positionTrackingFrame[1]; const bool zeroField{std::abs(getBz()) < o2::constants::math::Almost0}; - const float tgp = zeroField ? std::atan2(y3 - y1, x3 - x1) : 1.f; + const float tgp = zeroField ? o2::gpu::CAMath::ATan2(y3 - y1, x3 - x1) : 1.f; const float crv = zeroField ? 1.f : math_utils::computeCurvature(x3, y3, x2, y2, x1, y1); - const float snp = zeroField ? tgp / std::sqrt(1.f + tgp * tgp) : crv * (x3 - math_utils::computeCurvatureCentreX(x3, y3, x2, y2, x1, y1)); + const float snp = zeroField ? tgp / o2::gpu::CAMath::Sqrt(1.f + tgp * tgp) : crv * (x3 - math_utils::computeCurvatureCentreX(x3, y3, x2, y2, x1, y1)); const float tgl12 = math_utils::computeTanDipAngle(x1, y1, x2, y2, z1, z2); const float tgl23 = math_utils::computeTanDipAngle(x2, y2, x3, y3, z2, z3); const float q2pt = zeroField ? 1.f / o2::track::kMostProbablePt : crv / (getBz() * o2::constants::math::B2C);