GPU: Clean up rounding, and use deterministic functions in GPUCA_NO_F…

…AST_MATH mode
AliceO2Group · Feb 22, 2024 · 25fed02 · 25fed02
1 parent 6685127
commit 25fed02
Show file tree

Hide file tree

Showing 13 changed files with 31 additions and 38 deletions.
diff --git a/Common/MathUtils/include/MathUtils/detail/basicMath.h b/Common/MathUtils/include/MathUtils/detail/basicMath.h
@@ -65,7 +65,7 @@ GPUhdi() T abs(T x)
 template <class T>
 GPUdi() int nint(T x)
 {
-  return o2::gpu::GPUCommonMath::Nint(x);
+  return o2::gpu::GPUCommonMath::Float2IntRn(x);
 };
 
 template <class T>

diff --git a/DataFormats/Detectors/TRD/include/DataFormatsTRD/Tracklet64.h b/DataFormats/Detectors/TRD/include/DataFormatsTRD/Tracklet64.h
@@ -142,7 +142,7 @@ class Tracklet64
 
   // pad column number inside pad row as int can be off by +-1 pad (same function name as for TRD digit)
   // FIXME: understand why the offset seems to be 8 pads and not nChannels / 2 = 10.5
-  GPUd() int getPadCol() const { return GPUCA_NAMESPACE::gpu::CAMath::Nint(getPadColFloat() - 2); }
+  GPUd() int getPadCol() const { return GPUCA_NAMESPACE::gpu::CAMath::Float2IntRn(getPadColFloat() - 2); }
 
   // translate local position into global y (in cm) not taking into account calibrations (ExB, vDrift, t0)
   GPUd() float getUncalibratedY() const

diff --git a/Detectors/TRD/qc/src/RawDisplay.cxx b/Detectors/TRD/qc/src/RawDisplay.cxx
@@ -36,7 +36,7 @@ float PadColF(o2::trd::Tracklet64& tracklet)
 
   // original calculation
   // FIXME: understand why the offset seems to be 6 pads and not nChannels / 2 = 10.5
-  // return CAMath::Nint(6.f + mcmCol * ((float)constants::NCOLMCM) + padLocal);
+  // return CAMath::Round(6.f + mcmCol * ((float)constants::NCOLMCM) + padLocal);
 
   // my calculation
   return float((mcmCol + 1) * constants::NCOLMCM) + padLocal - 10.0;

diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h
@@ -73,7 +73,10 @@ class GPUCommonMath
   GPUhdni() static float Copysign(float x, float y);
   GPUd() static float TwoPi() { return 6.2831853f; }
   GPUd() static float Pi() { return 3.1415927f; }
-  GPUd() static int Nint(float x);
+  GPUd() static float Round(float x);
+  GPUd() static float Floor(float x);
+  GPUd() static unsigned int Float2UIntRn(float x);
+  GPUd() static int Float2IntRn(float x);
   GPUd() static float Modf(float x, float y);
   GPUd() static bool Finite(float x);
   GPUd() static unsigned int Clz(unsigned int val);
@@ -203,25 +206,13 @@ GPUdi() float2 GPUCommonMath::MakeFloat2(float x, float y)
 #endif // GPUCA_GPUCODE
 }
 
-GPUdi() int GPUCommonMath::Nint(float x)
-{
-  int i;
-  if (x >= 0) {
-    i = int(x + 0.5f);
-    if (x + 0.5f == float(i) && i & 1) {
-      i--;
-    }
-  } else {
-    i = int(x - 0.5f);
-    if (x - 0.5f == float(i) && i & 1) {
-      i++;
-    }
-  }
-  return i;
-}
-
 GPUdi() float GPUCommonMath::Modf(float x, float y) { return CHOICE(fmodf(x, y), fmodf(x, y), fmod(x, y)); }
+GPUdi() unsigned int GPUCommonMath::Float2UIntRn(float x) { return (unsigned int)(int)(x + 0.5f); }
+GPUdi() float GPUCommonMath::Floor(float x) { return CHOICE(floorf(x), floorf(x), floor(x)); }
+
 #ifdef GPUCA_NO_FAST_MATH
+GPUdi() float GPUCommonMath::Round(float x) { return CHOICE(roundf(x), roundf(x), round(x)); }
+GPUdi() int GPUCommonMath::Float2IntRn(float x) { return (int)Round(x); }
 GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), isfinite(x), true); }
 GPUhdi() float GPUCommonMath::Sqrt(float x) { return CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); }
 GPUdi() float GPUCommonMath::ATan(float x) { return CHOICE((float)atan((double)x), (float)atan((double)x), atan(x)); }
@@ -235,6 +226,8 @@ GPUdi() float GPUCommonMath::ACos(float x) { return CHOICE((float)acos((double)x
 GPUdi() float GPUCommonMath::Log(float x) { return CHOICE((float)log((double)x), (float)log((double)x), log(x)); }
 GPUdi() float GPUCommonMath::Exp(float x) { return CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); }
 #else
+GPUdi() float GPUCommonMath::Round(float x) { return CHOICE(roundf(x), rintf(x), rint(x)); }
+GPUdi() int GPUCommonMath::Float2IntRn(float x) { return CHOICE((int)Round(x), __float2int_rn(x), (int)Round(x)); }
 GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), true, true); }
 GPUhdi() float GPUCommonMath::Sqrt(float x) { return CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); }
 GPUdi() float GPUCommonMath::ATan(float x) { return CHOICE(atanf(x), atanf(x), atan(x)); }

diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx
@@ -1308,7 +1308,7 @@ size_t zsEncoderRun<T>::compare(std::vector<zsPage>* buffer, std::vector<o2::tpc
   } else {
     for (unsigned int j = 0; j < tmpBuffer.size(); j++) {
       const float decodeBitsFactor = (1 << (encodeBits - 10));
-      const float c = (float)((int)(tmpBuffer[j].getChargeFloat() * decodeBitsFactor + 0.5f)) / decodeBitsFactor;
+      const float c = CAMath::Round(tmpBuffer[j].getChargeFloat() * decodeBitsFactor) / decodeBitsFactor;
       int ok = c == compareBuffer[j].getChargeFloat() && (int)tmpBuffer[j].getTimeStamp() == (int)compareBuffer[j].getTimeStamp() && (int)tmpBuffer[j].getPad() == (int)compareBuffer[j].getPad() && (int)tmpBuffer[j].getRow() == (int)compareBuffer[j].getRow();
       if (ok) {
         continue;
@@ -1450,9 +1450,9 @@ void GPUReconstructionConvert::RunZSFilter(std::unique_ptr<o2::tpc::Digit[]>* bu
           buffers[i][j] = buffers[i][k];
         }
         if (zs12bit) {
-          buffers[i][j].setCharge((float)((int)(buffers[i][j].getChargeFloat() * decodeBitsFactor + 0.5f)) / decodeBitsFactor);
+          buffers[i][j].setCharge(CAMath::Round(buffers[i][j].getChargeFloat() * decodeBitsFactor) / decodeBitsFactor);
         } else {
-          buffers[i][j].setCharge((float)((int)(buffers[i][j].getChargeFloat() + 0.5f)));
+          buffers[i][j].setCharge(CAMath::Round(buffers[i][j].getChargeFloat()));
         }
         j++;
       }

diff --git a/GPU/GPUTracking/DataTypes/TPCPadGainCalib.h b/GPU/GPUTracking/DataTypes/TPCPadGainCalib.h
@@ -132,7 +132,7 @@ struct TPCPadGainCalib {
       f -= mMinCorrectionFactor;
       f *= float(NumOfSteps);
       f /= (mMaxCorrectionFactor - mMinCorrectionFactor);
-      return CAMath::Nint(f);
+      return CAMath::Round(f);
     }
 
     GPUd() float unpack(T c) const

diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx
@@ -114,7 +114,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int iT
     prop.SetMatLUT((param.rec.useMatLUT && iWay == nWays - 1) ? merger->GetConstantMem()->calibObjects.matLUT : nullptr);
     prop.SetTrack(this, iWay ? prop.GetAlpha() : Alpha);
     ConstrainSinPhi(prop.GetFitInProjections() ? 0.95f : GPUCA_MAX_SIN_PHI_LOW);
-    CADEBUG(printf("Fitting track %d way %d (sector %d, alpha %f)\n", iTrk, iWay, (int)(prop.GetAlpha() / kSectAngle + 0.5f) + (mP[1] < 0 ? 18 : 0), prop.GetAlpha()));
+    CADEBUG(printf("Fitting track %d way %d (sector %d, alpha %f)\n", iTrk, iWay, CAMath::Float2IntRn(prop.GetAlpha() / kSectAngle) + (mP[1] < 0 ? 18 : 0), prop.GetAlpha()));
 
     N = 0;
     lastUpdateX = -1;
@@ -357,7 +357,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int iT
             qtot = cl.qTot;
             qmax = cl.qMax;
             pad = cl.getPad();
-            relTime = cl.getTime() - int(cl.getTime() + 0.5f);
+            relTime = cl.getTime() - CAMath::Round(cl.getTime());
           }
           dEdx.fillCluster(qtot, qmax, clusters[ihit].row, clusters[ihit].slice, mP[2], mP[3], param, merger->GetConstantMem()->calibObjects, zz, pad, relTime);
         }
@@ -408,7 +408,7 @@ GPUdni() void GPUTPCGMTrackParam::MoveToReference(GPUTPCGMPropagator& prop, cons
     GPUTPCGMTrackParam save = *this;
     float saveAlpha = Alpha;
     for (int attempt = 0; attempt < 3; attempt++) {
-      float dAngle = floor(CAMath::ATan2(mP[0], mX) / kDeg2Rad / 20.f + 0.5f) * kSectAngle;
+      float dAngle = CAMath::Round(CAMath::ATan2(mP[0], mX) / kDeg2Rad / 20.f) * kSectAngle;
       Alpha += dAngle;
       if (prop.PropagateToXAlpha(param.rec.tpc.trackReferenceX, Alpha, 0)) {
         break;
@@ -422,7 +422,7 @@ GPUdni() void GPUTPCGMTrackParam::MoveToReference(GPUTPCGMPropagator& prop, cons
     Alpha = saveAlpha;
   }
   if (CAMath::Abs(mP[0]) > mX * CAMath::Tan(kSectAngle / 2.f)) {
-    float dAngle = floor(CAMath::ATan2(mP[0], mX) / kDeg2Rad / 20.f + 0.5f) * kSectAngle;
+    float dAngle = CAMath::Round(CAMath::ATan2(mP[0], mX) / kDeg2Rad / 20.f) * kSectAngle;
     Rotate(dAngle);
     ConstrainSinPhi();
     Alpha += dAngle;
@@ -827,7 +827,7 @@ GPUdni() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPU
   }
   float b = prop.GetBz(prop.GetAlpha(), mX, mP[0], mP[1]);
 
-  int count = CAMath::Abs((toX - X) / 0.5f) + 0.5f;
+  int count = CAMath::Float2IntRn(CAMath::Abs((toX - X) * 2.f));
   if (count == 0) {
     return;
   }

diff --git a/GPU/GPUTracking/Merger/GPUTPCGlobalMergerComponent.cxx b/GPU/GPUTracking/Merger/GPUTPCGlobalMergerComponent.cxx
@@ -448,7 +448,7 @@ int GPUTPCGlobalMergerComponent::DoEvent(const AliHLTComponentEventData& evtData
 
       // normalize the angle to +-Pi
 
-      currOutTrack->fAlpha = tp.GetAlpha() - CAMath::Nint(tp.GetAlpha() / CAMath::TwoPi()) * CAMath::TwoPi();
+      currOutTrack->fAlpha = tp.GetAlpha() - CAMath::Round(tp.GetAlpha() / CAMath::TwoPi()) * CAMath::TwoPi();
       currOutTrack->fX = tp.GetX();
       currOutTrack->fY = tp.GetY();
       currOutTrack->fZ = tp.GetZ();
@@ -518,7 +518,7 @@ int GPUTPCGlobalMergerComponent::DoEvent(const AliHLTComponentEventData& evtData
 
       // normalize the angle to +-Pi
 
-      currOutTrack->fAlpha = track.OuterParam().alpha - CAMath::Nint(tp.GetAlpha() / CAMath::TwoPi()) * CAMath::TwoPi();
+      currOutTrack->fAlpha = track.OuterParam().alpha - CAMath::Round(tp.GetAlpha() / CAMath::TwoPi()) * CAMath::TwoPi();
       currOutTrack->fX = track.OuterParam().X;
       currOutTrack->fY = track.OuterParam().P[0];
       currOutTrack->fZ = track.OuterParam().P[1];

diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx
@@ -398,7 +398,7 @@ GPUd() int GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov)
     if (mPparam->rec.tpc.trackReferenceX <= 500) {
       if (prop->PropagateToXBxByBz(trk, mPparam->rec.tpc.trackReferenceX)) {
         if (CAMath::Abs(trk.getY()) > trk.getX() * CAMath::Tan(kSectAngle / 2.f)) {
-          float newAlpha = trk.getAlpha() + floor(CAMath::ATan2(trk.getY(), trk.getX()) / kDeg2Rad / 20.f + 0.5f) * kSectAngle;
+          float newAlpha = trk.getAlpha() + CAMath::Round(CAMath::ATan2(trk.getY(), trk.getX()) / kDeg2Rad / 20.f) * kSectAngle;
           GPUTPCGMTrackParam::NormalizeAlpha(newAlpha);
           trk.rotate(newAlpha) && prop->PropagateToXBxByBz(trk, mPparam->rec.tpc.trackReferenceX);
         }

diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.cxx
@@ -88,7 +88,7 @@ GPUd() void MEM_LG(GPUTPCTrackParam)::GetDCAPoint(float x, float y, float z, flo
   if (CAMath::Abs(k) > 1.e-2f) {
     float dZ = CAMath::Abs(GetDzDs() * CAMath::TwoPi() / k);
     if (dZ > .1f) {
-      zp += CAMath::Nint((z - zp) / dZ) * dZ;
+      zp += CAMath::Round((z - zp) / dZ) * dZ;
     }
   }
 }

diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx
@@ -384,8 +384,8 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int /*nBlocks*/, int
       (void)found;
 #if !defined(__OPENCL__) || defined(__OPENCLCPP__)
       if (!found && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer) {
-        int pad = tracker.Param().tpcGeometry.LinearY2Pad(tracker.ISlice(), iRow, yUncorrected) + 0.5f;
-        if ((unsigned int)pad < tracker.Param().tpcGeometry.NPads(iRow) && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(tracker.ISlice(), iRow, pad)) {
+        unsigned int pad = CAMath::Float2UIntRn(tracker.Param().tpcGeometry.LinearY2Pad(tracker.ISlice(), iRow, yUncorrected));
+        if (pad < tracker.Param().tpcGeometry.NPads(iRow) && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(tracker.ISlice(), iRow, pad)) {
           r.mNMissed--;
           rowHit = CALINK_DEAD_CHANNEL;
         }

diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx
@@ -21,7 +21,7 @@ using namespace GPUCA_NAMESPACE::gpu::tpccf;
 
 GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, Charge q, tpc::ClusterNative& cn, const GPUParam& param) const
 {
-  cn.qTot = mQtot + 0.5f; // Round to integer
+  cn.qTot = CAMath::Float2UIntRn(mQtot);
   if (cn.qTot <= param.rec.tpc.cfQTotCutoff) {
     return false;
   }

diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.h b/GPU/GPUTracking/dEdx/GPUdEdx.h
@@ -133,7 +133,7 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int padRow, unsigned
   const float tanTheta = CAMath::Sqrt(tgl2 * sec2);
 
   // getting the topology correction
-  const int padPos = int(pad + 0.5f); // position of the pad is shifted half a pad ( pad=3 -> centre position of third pad)
+  const unsigned int padPos = CAMath::Float2UIntRn(pad); // position of the pad is shifted half a pad ( pad=3 -> centre position of third pad)
   const float absRelPad = CAMath::Abs(pad - padPos);
   const int region = param.tpcGeometry.GetRegion(padRow);
   z = CAMath::Abs(z);