Fixed (didn't like all optims)

USCbiostats · Sep 28, 2023 · 8481f54 · 8481f54
1 parent 60c05a8
commit 8481f54
Show file tree

Hide file tree

Showing 5 changed files with 38 additions and 65 deletions.
diff --git a/.vscode/tasks.json b/.vscode/tasks.json
@@ -7,9 +7,6 @@
             "args": [
                 "-fopenmp",
                 "-fdiagnostics-color=always",
-                "-ftree-vectorize", 
-                "-march=native",
-                "-ffast-math",
                 "-g",
                 "${file}",
                 "-o",

diff --git a/barry.hpp b/barry.hpp
@@ -3684,21 +3684,13 @@ inline BArrayDense<Cell_Type, Data_Type>::BArrayDense(
     const std::vector< size_t > & target,
     const std::vector< Cell_Type > & value,
     bool add
-) {
+) : N(N_), M(M_), el(N_ * M_, ZERO_CELL), el_rowsums(N_, ZERO_CELL), el_colsums(M_, ZERO_CELL) {
 
     if (source.size() != target.size())
         throw std::length_error("-source- and -target- don't match on length.");
     if (source.size() != value.size())
         throw std::length_error("-sorce- and -value- don't match on length.");
 
-    // Initializing
-    N = N_;
-    M = M_;
-
-    el.resize(N * M, ZERO_CELL);
-    el_rowsums.resize(N, ZERO_CELL);
-    el_colsums.resize(M, ZERO_CELL);
-
     // Writing the data
     for (size_t i = 0u; i < source.size(); ++i)
     {
@@ -3741,22 +3733,15 @@ inline BArrayDense<Cell_Type, Data_Type>:: BArrayDense(
     const std::vector< size_t > & source,
     const std::vector< size_t > & target,
     bool add
-) {
+) : N(N_), M(M_), el(N_ * M_, ZERO_CELL), el_rowsums(N_, ZERO_CELL), el_colsums(M_, ZERO_CELL) {
 
     std::vector< Cell_Type > value(source.size(), static_cast<Cell_Type>(1.0));
 
     if (source.size() != target.size())
         throw std::length_error("-source- and -target- don't match on length.");
     if (source.size() != value.size())
         throw std::length_error("-sorce- and -value- don't match on length.");
-
-    // Initializing
-    N = N_;
-    M = M_;
 
-    el.resize(N * M, ZERO_CELL);
-    el_rowsums.resize(N, ZERO_CELL);
-    el_colsums.resize(M, ZERO_CELL);
 
     // Writing the data
     for (size_t i = 0u; i < source.size(); ++i)
@@ -5385,7 +5370,7 @@ COUNTERS_TEMPLATE(void, add_counter)(
 )
 {
 
-    data.emplace_back(Counter<Array_Type,Data_Type>(
+    data.push_back(Counter<Array_Type,Data_Type>(
         count_fun_,
         init_fun_,
         hasher_fun_,
@@ -7482,41 +7467,40 @@ class Model {
  */
 
 inline double update_normalizing_constant(
-    const double * params,
+    const std::vector<double> & params,
     const double * support,
     size_t k,
     size_t n
 )
 {
-    std::vector< double > resv(n);
+    double res = 0.0;
 
     if (n > 1000u)
     {
 
+        std::vector< double > resv(n, 0.0);
+
         #if defined(__OPENMP) || defined(_OPENMP)
-        #pragma omp parallel for shared(resv)
+        #pragma omp parallel for shared(resv) firstprivate(params, n, k) 
         #elif defined(__GNUC__) && !defined(__clang__)
             #pragma GCC ivdep
         #endif
         for (size_t j = 0u; j < (k - 1u); ++j)
         {
 
-            double p = *(params + j);
-            double tmp = 0.0;
-            const double * support_n = support + i * k + 1u;
+            const double p = params[j];
 
             #if defined(__OPENMP) || defined(_OPENMP)
-            #pragma omp simd reduction(+:tmp)
+            #pragma omp simd 
             #elif defined(__GNUC__) && !defined(__clang__)
                 #pragma GCC ivdep
             #endif
             for (size_t i = 0u; i < n; ++i)
-                resv[i] += (*(support_n + j)) * p;
+                resv[i] += (*(support + i * k + 1u + j)) * p;
 
         }
 
-        // Accumulate resv to a double res
-        double res = 0.0;
+        // Accumulate resv to a double res        
         #if defined(__OPENMP) || defined(_OPENMP)
         #pragma omp simd reduction(+:res)
         #elif defined(__GNUC__) && !defined(__clang__)
@@ -7536,17 +7520,12 @@ inline double update_normalizing_constant(
             const double * support_n = support + i * k + 1u;
 
             for (size_t j = 0u; j < (k - 1u); ++j)
-                tmp += (*(support_n + j)) * (*(params + j));
+                tmp += (*(support_n + j)) * params[j];
 
-            resv[i] = std::exp(tmp BARRY_SAFE_EXP) * (*(support + i * k));
+            res += std::exp(tmp BARRY_SAFE_EXP) * (*(support + i * k));
 
         }
 
-        // Accumulate resv to a double res
-        double res = 0.0;
-        for (size_t i = 0u; i < n; ++i)
-            res += resv[i];
-
 
     }
 
@@ -8109,7 +8088,7 @@ inline double Model<Array_Type,Data_Counter_Type, Data_Rule_Type, Data_Rule_Dyn_
         size_t n = stats_support[idx].size() / k;
 
         normalizing_constants[idx] = update_normalizing_constant(
-            &params[0u], &stats_support[idx][0u], k, n
+            params, &stats_support[idx][0u], k, n
         );
 
         params_last[idx] = params;
@@ -8191,7 +8170,7 @@ inline double Model<Array_Type,Data_Counter_Type, Data_Rule_Type, Data_Rule_Dyn_
         size_t n = stats_support[loc].size() / k;
 
         normalizing_constants[loc] = update_normalizing_constant(
-            &params[0u], &stats_support[loc][0u], k, n
+            params, &stats_support[loc][0u], k, n
         );
 
         params_last[loc] = params;
@@ -8263,7 +8242,7 @@ inline double Model<Array_Type,Data_Counter_Type, Data_Rule_Type, Data_Rule_Dyn_
         size_t n = stats_support[loc].size() / k;
 
         normalizing_constants[loc] = update_normalizing_constant(
-            &params[0u], &stats_support[loc][0u], k, n
+            params, &stats_support[loc][0u], k, n
         );
 
         params_last[loc] = params;
@@ -8337,7 +8316,7 @@ inline double Model<Array_Type,Data_Counter_Type, Data_Rule_Type, Data_Rule_Dyn_
         size_t n = stats_support[loc].size() / k;
 
         normalizing_constants[loc] = update_normalizing_constant(
-            &params[0u], &stats_support[loc][0u], k, n
+            params, &stats_support[loc][0u], k, n
         );
 
         params_last[loc] = params;
@@ -8378,7 +8357,7 @@ inline double Model<Array_Type,Data_Counter_Type, Data_Rule_Type, Data_Rule_Dyn_
 
             first_calc_done[i] = true;
             normalizing_constants[i] = update_normalizing_constant(
-                &params[0u], &stats_support[i][0u], k, n
+                params, &stats_support[i][0u], k, n
             );
 
             params_last[i] = params;
@@ -8449,7 +8428,7 @@ inline double Model<Array_Type,Data_Counter_Type, Data_Rule_Type, Data_Rule_Dyn_
         size_t n = stats_support[id].size() / k;
 
         normalizing_constants[id] = update_normalizing_constant(
-            &params[0u], &stats_support[id][0u], k, n
+            params, &stats_support[id][0u], k, n
         );
 
         params_last[id] = params;
@@ -9542,8 +9521,8 @@ class NetCounterData {
 
     NetCounterData() : indices(0u), numbers(0u) {};
     NetCounterData(
-        const std::vector< size_t > indices_,
-        const std::vector< double > numbers_
+        const std::vector< size_t > & indices_,
+        const std::vector< double > & numbers_
     ): indices(indices_), numbers(numbers_) {};
 
     ~NetCounterData() {};

diff --git a/include/barry/counters/network.hpp b/include/barry/counters/network.hpp
@@ -61,8 +61,8 @@ class NetCounterData {
 
     NetCounterData() : indices(0u), numbers(0u) {};
     NetCounterData(
-        const std::vector< size_t > indices_,
-        const std::vector< double > numbers_
+        const std::vector< size_t > & indices_,
+        const std::vector< double > & numbers_
     ): indices(indices_), numbers(numbers_) {};
 
     ~NetCounterData() {};

diff --git a/include/barry/model-meat.hpp b/include/barry/model-meat.hpp
@@ -7,27 +7,28 @@
  */
 
 inline double update_normalizing_constant(
-    const double * params,
+    const std::vector<double> & params,
     const double * support,
     size_t k,
     size_t n
 )
 {
-    std::vector< double > resv(n);
     double res = 0.0;
 
     if (n > 1000u)
     {
 
+        std::vector< double > resv(n, 0.0);
+
         #if defined(__OPENMP) || defined(_OPENMP)
-        #pragma omp parallel for shared(resv)
+        #pragma omp parallel for shared(resv) firstprivate(params, n, k) 
         #elif defined(__GNUC__) && !defined(__clang__)
             #pragma GCC ivdep
         #endif
         for (size_t j = 0u; j < (k - 1u); ++j)
         {
 
-            double p = *(params + j);
+            const double p = params[j];
 
             #if defined(__OPENMP) || defined(_OPENMP)
             #pragma omp simd 
@@ -59,16 +60,12 @@ inline double update_normalizing_constant(
             const double * support_n = support + i * k + 1u;
 
             for (size_t j = 0u; j < (k - 1u); ++j)
-                tmp += (*(support_n + j)) * (*(params + j));
+                tmp += (*(support_n + j)) * params[j];
 
-            resv[i] = std::exp(tmp BARRY_SAFE_EXP) * (*(support + i * k));
+            res += std::exp(tmp BARRY_SAFE_EXP) * (*(support + i * k));
 
         }
 
-        // Accumulate resv to a double res
-        for (size_t i = 0u; i < n; ++i)
-            res += resv[i];
-
 
     }
 
@@ -631,7 +628,7 @@ inline double Model<Array_Type,Data_Counter_Type, Data_Rule_Type, Data_Rule_Dyn_
         size_t n = stats_support[idx].size() / k;
 
         normalizing_constants[idx] = update_normalizing_constant(
-            &params[0u], &stats_support[idx][0u], k, n
+            params, &stats_support[idx][0u], k, n
         );
 
         params_last[idx] = params;
@@ -713,7 +710,7 @@ inline double Model<Array_Type,Data_Counter_Type, Data_Rule_Type, Data_Rule_Dyn_
         size_t n = stats_support[loc].size() / k;
 
         normalizing_constants[loc] = update_normalizing_constant(
-            &params[0u], &stats_support[loc][0u], k, n
+            params, &stats_support[loc][0u], k, n
         );
 
         params_last[loc] = params;
@@ -785,7 +782,7 @@ inline double Model<Array_Type,Data_Counter_Type, Data_Rule_Type, Data_Rule_Dyn_
         size_t n = stats_support[loc].size() / k;
 
         normalizing_constants[loc] = update_normalizing_constant(
-            &params[0u], &stats_support[loc][0u], k, n
+            params, &stats_support[loc][0u], k, n
         );
 
         params_last[loc] = params;
@@ -859,7 +856,7 @@ inline double Model<Array_Type,Data_Counter_Type, Data_Rule_Type, Data_Rule_Dyn_
         size_t n = stats_support[loc].size() / k;
 
         normalizing_constants[loc] = update_normalizing_constant(
-            &params[0u], &stats_support[loc][0u], k, n
+            params, &stats_support[loc][0u], k, n
         );
 
         params_last[loc] = params;
@@ -900,7 +897,7 @@ inline double Model<Array_Type,Data_Counter_Type, Data_Rule_Type, Data_Rule_Dyn_
 
             first_calc_done[i] = true;
             normalizing_constants[i] = update_normalizing_constant(
-                &params[0u], &stats_support[i][0u], k, n
+                params, &stats_support[i][0u], k, n
             );
 
             params_last[i] = params;
@@ -971,7 +968,7 @@ inline double Model<Array_Type,Data_Counter_Type, Data_Rule_Type, Data_Rule_Dyn_
         size_t n = stats_support[id].size() / k;
 
         normalizing_constants[id] = update_normalizing_constant(
-            &params[0u], &stats_support[id][0u], k, n
+            params, &stats_support[id][0u], k, n
         );
 
         params_last[id] = params;

diff --git a/tests/Makefile b/tests/Makefile
@@ -5,7 +5,7 @@ BFILES=barr*.gcov geese-*.gcov flock-*.gcov \
        cell-*.gcov powerset-*.gcov support-*.gcov \
        statscounter*.gcov statsdb*.gcov
 
-CPPFLAGS= -std=c++17 -fopenmp -DBARRY_USE_OPENMP -Wall -pedantic -ftree-vectorize -march=native -ffast-math -Wbool-operation # only works with g++10
+CPPFLAGS= -std=c++17 -fopenmp -Wall -pedantic -march=native -Wbool-operation # only works with g++10
 
 CPPFLAGSV= -std=c++17 -Wall -pedantic