Merge branch 'main' into improve_compilation_error_handling

cms-patatrack · Aug 4, 2024 · 3954883 · 3954883
2 parents 677bf4b + 9dac229
commit 3954883
Show file tree

Hide file tree

Showing 14 changed files with 202 additions and 1,115 deletions.
diff --git a/CLUEstering/CLUEstering.py b/CLUEstering/CLUEstering.py
@@ -262,7 +262,7 @@ def __init__(self, dc_: float, rhoc_: float, outlier_: float, ppbin: int = 10):
         self.elapsed_time = 0.
 
     def set_params(self, dc: float, rhoc: float,
-                   outlier: float, ppbin: int = 10) -> None:
+                   outlier: float, ppbin: int = 128) -> None:
         self.dc_ = dc
         self.rhoc = rhoc
         self.outlier = outlier
@@ -292,19 +292,19 @@ def _read_array(self, input_data: Union[list, np.ndarray]) -> None:
             if len(input_data) < 2 or len(input_data) > 11:
                 raise ValueError("Inadequate data. The supported dimensions are between" +
                                  "1 and 10.")
-            self.clust_data = clustering_data(np.asarray(input_data[:-1]).T,
-                                              np.copy(np.asarray(input_data[:-1]).T),
-                                              np.asarray(input_data[-1]),
+            self.clust_data = clustering_data(np.asarray(input_data[:-1], dtype=float).T,
+                                              np.copy(np.asarray(input_data[:-1], dtype=float).T),
+                                              np.asarray(input_data[-1], dtype=float),
                                               len(input_data[:-1]),
                                               len(input_data[-1]))
         # [[[x0, y0, z0, ...], [x1, y1, z1, ...], ...], [weights]]
         else:
             if len(input_data) != 2:
                 raise ValueError("Inadequate data. The data must contain a weight value" +
                                  "for each point.")
-            self.clust_data = clustering_data(np.asarray(input_data[0]),
-                                              np.copy(np.asarray(input_data[0])),
-                                              np.asarray(input_data[-1]),
+            self.clust_data = clustering_data(np.asarray(input_data[0], dtype=float),
+                                              np.copy(np.asarray(input_data[0], dtype=float)),
+                                              np.asarray(input_data[-1], dtype=float),
                                               len(input_data[0][0]),
                                               len(input_data[-1]))
 
@@ -329,7 +329,7 @@ def _read_string(self, input_data: str) -> Union[pd.DataFrame,None]:
 
         if not input_data.endswith('.csv'):
             raise ValueError('Wrong type of file. The file is not a csv file.')
-        df_ = pd.read_csv(input_data)
+        df_ = pd.read_csv(input_data, dtype=float)
         return df_
 
     def _read_dict_df(self, input_data: Union[dict, pd.DataFrame]) -> pd.DataFrame:
@@ -351,7 +351,7 @@ def _read_dict_df(self, input_data: Union[dict, pd.DataFrame]) -> pd.DataFrame:
             Dataframe containing the input data
         """
 
-        df_ = pd.DataFrame(input_data, copy=False)
+        df_ = pd.DataFrame(input_data, copy=False, dtype=float)
         return df_
 
     def _handle_dataframe(self, df_: pd.DataFrame) -> None:
@@ -1215,7 +1215,7 @@ def to_csv(self, output_folder: str, file_name: str) -> None:
         df_.to_csv(out_path,index=False)
 
 if __name__ == "__main__":
-    c = clusterer(0.4,5,1.)
+    c = clusterer(0.8, 5, 1.)
     c.read_data('./blob.csv')
     c.input_plotter()
     c.run_clue(backend="cpu serial", verbose=True)

diff --git a/CLUEstering/alpaka/BindingModules/binding_cpu.cc b/CLUEstering/alpaka/BindingModules/binding_cpu.cc
@@ -2,15 +2,11 @@
 #include <alpaka/alpaka.hpp>
 #include <vector>
 
-#include "../CLUE/CLUEAlgoAlpaka.h"
 #include "../CLUE/Run.h"
-#include "../DataFormats/Points.h"
-#include "../DataFormats/alpaka/PointsAlpaka.h"
 
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 #include <pybind11/functional.h>
-#include <stdint.h>
 
 namespace alpaka_serial_sync {
   void listDevices(const std::string& backend) {
@@ -27,158 +23,53 @@ namespace alpaka_serial_sync {
     }
   }
 
+  template <typename Kernel>
   std::vector<std::vector<int>> mainRun(float dc,
                                         float rhoc,
                                         float outlier,
                                         int pPBin,
                                         const std::vector<std::vector<float>>& coords,
                                         const std::vector<float>& weights,
-                                        const FlatKernel& kernel,
+                                        const Kernel& kernel,
                                         int Ndim,
                                         size_t block_size,
                                         size_t device_id) {
-    auto const dev_acc = alpaka::getDevByIdx<Acc1D>(device_id);
+    const auto dev_acc = alpaka::getDevByIdx<Acc1D>(device_id);
 
     // Create the queue
     Queue queue_(dev_acc);
 
     // Running the clustering algorithm //
     switch (Ndim) {
       [[unlikely]] case (1):
-        return run1(
+        return run<1, Kernel>(
             dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
       [[likely]] case (2):
-        return run2(
+        return run<2, Kernel>(
             dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
       [[likely]] case (3):
-        return run3(
+        return run<3, Kernel>(
             dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
       [[unlikely]] case (4):
-        return run4(
+        return run<4, Kernel>(
             dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
       [[unlikely]] case (5):
-        return run5(
+        return run<5, Kernel>(
             dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
       [[unlikely]] case (6):
-        return run6(
+        return run<6, Kernel>(
             dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
       [[unlikely]] case (7):
-        return run7(
+        return run<7, Kernel>(
             dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
       [[unlikely]] case (8):
-        return run8(
+        return run<8, Kernel>(
             dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
       [[unlikely]] case (9):
-        return run9(
+        return run<9, Kernel>(
             dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
       [[unlikely]] case (10):
-        return run10(
-            dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
-      [[unlikely]] default:
-        std::cout << "This library only works up to 10 dimensions\n";
-        return {};
-    }
-  }
-
-  std::vector<std::vector<int>> mainRun(float dc,
-                                        float rhoc,
-                                        float outlier,
-                                        int pPBin,
-                                        const std::vector<std::vector<float>>& coords,
-                                        const std::vector<float>& weights,
-                                        const ExponentialKernel& kernel,
-                                        int Ndim,
-                                        size_t block_size,
-                                        size_t device_id) {
-    auto const dev_acc = alpaka::getDevByIdx<Acc1D>(device_id);
-
-    // Create the queue
-    Queue queue_(dev_acc);
-
-    // Running the clustering algorithm //
-    switch (Ndim) {
-      [[unlikely]] case (1):
-        return run1(
-            dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
-      [[likely]] case (2):
-        return run2(
-            dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
-      [[likely]] case (3):
-        return run3(
-            dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
-      [[unlikely]] case (4):
-        return run4(
-            dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
-      [[unlikely]] case (5):
-        return run5(
-            dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
-      [[unlikely]] case (6):
-        return run6(
-            dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
-      [[unlikely]] case (7):
-        return run7(
-            dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
-      [[unlikely]] case (8):
-        return run8(
-            dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
-      [[unlikely]] case (9):
-        return run9(
-            dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
-      [[unlikely]] case (10):
-        return run10(
-            dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
-      [[unlikely]] default:
-        std::cout << "This library only works up to 10 dimensions\n";
-        return {};
-    }
-  }
-
-  std::vector<std::vector<int>> mainRun(float dc,
-                                        float rhoc,
-                                        float outlier,
-                                        int pPBin,
-                                        const std::vector<std::vector<float>>& coords,
-                                        const std::vector<float>& weights,
-                                        const GaussianKernel& kernel,
-                                        int Ndim,
-                                        size_t block_size,
-                                        size_t device_id) {
-    auto const dev_acc = alpaka::getDevByIdx<Acc1D>(device_id);
-
-    // Create the queue
-    Queue queue_(dev_acc);
-
-    // Running the clustering algorithm //
-    switch (Ndim) {
-      [[unlikely]] case (1):
-        return run1(
-            dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
-      [[likely]] case (2):
-        return run2(
-            dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
-      [[likely]] case (3):
-        return run3(
-            dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
-      [[unlikely]] case (4):
-        return run4(
-            dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
-      [[unlikely]] case (5):
-        return run5(
-            dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
-      [[unlikely]] case (6):
-        return run6(
-            dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
-      [[unlikely]] case (7):
-        return run7(
-            dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
-      [[unlikely]] case (8):
-        return run8(
-            dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
-      [[unlikely]] case (9):
-        return run9(
-            dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
-      [[unlikely]] case (10):
-        return run10(
+        return run<10, Kernel>(
             dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
       [[unlikely]] default:
         std::cout << "This library only works up to 10 dimensions\n";
@@ -202,7 +93,7 @@ namespace alpaka_serial_sync {
                                   const FlatKernel&,
                                   int,
                                   size_t,
-                                  size_t>(&mainRun),
+                                  size_t>(&mainRun<FlatKernel>),
           "mainRun");
     m.def("mainRun",
           pybind11::overload_cast<float,
@@ -214,7 +105,7 @@ namespace alpaka_serial_sync {
                                   const ExponentialKernel&,
                                   int,
                                   size_t,
-                                  size_t>(&mainRun),
+                                  size_t>(&mainRun<ExponentialKernel>),
           "mainRun");
     m.def("mainRun",
           pybind11::overload_cast<float,
@@ -226,7 +117,7 @@ namespace alpaka_serial_sync {
                                   const GaussianKernel&,
                                   int,
                                   size_t,
-                                  size_t>(&mainRun),
+                                  size_t>(&mainRun<GaussianKernel>),
           "mainRun");
   }
 };  // namespace alpaka_serial_sync