From 2efefadaf22da9634fa9728e3139bfa287220a5f Mon Sep 17 00:00:00 2001 From: "George G. Vega Yon" Date: Wed, 27 Sep 2023 15:55:28 -0600 Subject: [PATCH 01/14] More explicit omp instructions --- barry.hpp | 110 +++++++++++++++++--------- include/barry/barry-configuration.hpp | 2 +- include/barry/barry.hpp | 6 +- include/barry/model-meat.hpp | 32 +++++++- 4 files changed, 105 insertions(+), 45 deletions(-) diff --git a/barry.hpp b/barry.hpp index 1cb3ef644..f35072c0a 100644 --- a/barry.hpp +++ b/barry.hpp @@ -17,8 +17,12 @@ #include #include -#ifdef BARRY_USE_OMP +#ifdef __OPENMP #include + +// Set the number of threads to match the number of cores +// in the machine + #endif #ifndef BARRY_HPP @@ -113,7 +117,7 @@ namespace barry { #define BARRY_MAX_NUM_ELEMENTS static_cast< size_t >(std::numeric_limits< size_t >::max() /2u) #endif -#ifdef BARRY_USE_OMP +#ifdef __OPENMP #define BARRY_WITH_OMP #include #endif @@ -3171,9 +3175,9 @@ class BArrayDense { void rm_cell(size_t i, size_t j, bool check_bounds = true, bool check_exists = true); - void insert_cell(size_t i, size_t j, const Cell< Cell_Type > & v, bool check_bounds, bool check_exists); + void insert_cell(size_t i, size_t j, const Cell< Cell_Type > & v, bool check_bounds, bool); // void insert_cell(size_t i, size_t j, Cell< Cell_Type > && v, bool check_bounds, bool check_exists); - void insert_cell(size_t i, size_t j, Cell_Type v, bool check_bounds, bool check_exists); + void insert_cell(size_t i, size_t j, Cell_Type v, bool check_bounds, bool); void swap_cells( size_t i0, size_t j0, size_t i1, size_t j1, bool check_bounds = true, @@ -3795,13 +3799,16 @@ inline BArrayDense:: BArrayDense( ) : N(Array_.N), M(Array_.M){ // Dimensions - el.resize(0u); - el_rowsums.resize(0u); - el_colsums.resize(0u); + el = Array_.el; + el_rowsums = Array_.el_rowsums; + el_colsums = Array_.el_colsums; + // el.resize(0u); + // el_rowsums.resize(0u); + // el_colsums.resize(0u); - std::copy(Array_.el.begin(), Array_.el.end(), std::back_inserter(el)); - std::copy(Array_.el_rowsums.begin(), Array_.el_rowsums.end(), std::back_inserter(el_rowsums)); - std::copy(Array_.el_colsums.begin(), Array_.el_colsums.end(), std::back_inserter(el_colsums)); + // std::copy(Array_.el.begin(), Array_.el.end(), std::back_inserter(el)); + // std::copy(Array_.el_rowsums.begin(), Array_.el_rowsums.end(), std::back_inserter(el_rowsums)); + // std::copy(Array_.el_colsums.begin(), Array_.el_colsums.end(), std::back_inserter(el_colsums)); // this->NCells = Array_.NCells; this->visited = Array_.visited; @@ -3838,14 +3845,17 @@ inline BArrayDense & BArrayDense::ope if (this != &Array_) { - el.resize(0u); - el_rowsums.resize(0u); - el_colsums.resize(0u); + el = Array_.el; + el_rowsums = Array_.el_rowsums; + el_colsums = Array_.el_colsums; + // el.resize(0u); + // el_rowsums.resize(0u); + // el_colsums.resize(0u); - // Entries - std::copy(Array_.el.begin(), Array_.el.end(), std::back_inserter(el)); - std::copy(Array_.el_rowsums.begin(), Array_.el_rowsums.end(), std::back_inserter(el_rowsums)); - std::copy(Array_.el_colsums.begin(), Array_.el_colsums.end(), std::back_inserter(el_colsums)); + // // Entries + // std::copy(Array_.el.begin(), Array_.el.end(), std::back_inserter(el)); + // std::copy(Array_.el_rowsums.begin(), Array_.el_rowsums.end(), std::back_inserter(el_rowsums)); + // std::copy(Array_.el_colsums.begin(), Array_.el_colsums.end(), std::back_inserter(el_colsums)); // this->NCells = Array_.NCells; @@ -4048,9 +4058,10 @@ inline std::vector< Cell_Type > BArrayDense::get_row_vec ( if (check_bounds) out_of_range(i, 0u); - std::vector< Cell_Type > ans(ncol(), static_cast< Cell_Type >(false)); + std::vector< Cell_Type > ans; + ans.reserve(ncol()); for (size_t j = 0u; j < M; ++j) - ans[j] = el[POS(i, j)]; + ans.push_back(el[POS(i, j)]); return ans; @@ -4067,7 +4078,7 @@ template inline void BArrayDenseat(j) = el[POS(i, j)]; + x->operator[](j) = el[POS(i, j)]; } @@ -4080,9 +4091,10 @@ template inline std::vector< Cell_Type > if (check_bounds) out_of_range(0u, i); - std::vector< Cell_Type > ans(nrow(), static_cast< Cell_Type >(false)); + std::vector< Cell_Type > ans; + ans.reserve(nrow()); for (size_t j = 0u; j < N; ++j) - ans[j] = el[POS(j, i)]; + ans.push_back(el[POS(j, i)]); return ans; @@ -4099,7 +4111,7 @@ template inline void BArrayDenseat(j) = el[POS(j, i)];//this->get_cell(iter->first, i, false); + x->operator[](j) = el[POS(j, i)];//this->get_cell(iter->first, i, false); } template @@ -4297,7 +4309,7 @@ inline void BArrayDense::rm_cell ( if (check_bounds) out_of_range(i,j); - BARRY_UNUSED(check_exists) + // BARRY_UNUSED(check_exists) // Remove the pointer first (so it wont point to empty) el_rowsums[i] -= el[POS(i, j)]; @@ -4314,13 +4326,11 @@ inline void BArrayDense::insert_cell ( size_t j, const Cell< Cell_Type> & v, bool check_bounds, - bool check_exists + bool ) { if (check_bounds) out_of_range(i,j); - - BARRY_UNUSED(check_exists) if (el[POS(i,j)] == BARRY_ZERO_DENSE) { @@ -4350,13 +4360,11 @@ template inline void BArrayDense resv(n); #ifdef __OPENMP - #pragma omp simd reduction(+:res) + omp_set_num_threads(omp_get_num_procs()); + #pragma omp parallel for #else #ifdef __GNUC__ #ifndef __clang__ @@ -7494,13 +7502,36 @@ inline double update_normalizing_constant( double tmp = 0.0; const double * support_n = support + i * k + 1u; + #ifdef __OPENMP + #pragma omp simd reduction(+:tmp) + #else + #ifdef __GNUC__ + #ifndef __clang__ + #pragma GCC ivdep + #endif + #endif + #endif for (size_t j = 0u; j < (k - 1u); ++j) tmp += (*(support_n + j)) * (*(params + j)); - res += std::exp(tmp BARRY_SAFE_EXP) * (*(support + i * k)); + resv[i] = std::exp(tmp BARRY_SAFE_EXP) * (*(support + i * k)); } + // Accumulate resv to a double res + double res = 0.0; + #ifdef __OPENMP + #pragma omp parallel for simd reduction(+:res) + #else + #ifdef __GNUC__ + #ifndef __clang__ + #pragma GCC ivdep + #endif + #endif + #endif + for (size_t i = 0u; i < n; ++i) + res += resv[i]; + #ifdef BARRY_DEBUG if (std::isnan(res)) throw std::overflow_error( @@ -7538,6 +7569,7 @@ inline double likelihood_( // Computing the numerator #ifdef __OPENMP + omp_set_num_threads(omp_get_num_procs()); #pragma omp simd reduction(+:numerator) #else #ifdef __GNUC__ @@ -7546,13 +7578,14 @@ inline double likelihood_( #endif #endif #endif + #pragma code_align(32) for (size_t j = 0u; j < params.size(); ++j) numerator += *(stats_target + j) * params[j]; if (!log_) - numerator = exp(numerator BARRY_SAFE_EXP); + numerator = std::exp(numerator BARRY_SAFE_EXP); else - return numerator BARRY_SAFE_EXP - log(normalizing_constant); + return numerator BARRY_SAFE_EXP - std::log(normalizing_constant); double ans = numerator/normalizing_constant; @@ -8772,8 +8805,7 @@ MODEL_TEMPLATE(Array_Type, sample)( } else { probs.resize(pset_arrays[a].size()); - std::vector< double > temp_stats; - temp_stats.reserve(params.size()); + std::vector< double > temp_stats(params.size()); const std::vector< double > & stats = pset_stats[a]; int i_matches = -1; @@ -8782,7 +8814,7 @@ MODEL_TEMPLATE(Array_Type, sample)( // Filling out the parameters for (auto p = 0u; p < params.size(); ++p) - temp_stats.push_back(stats[array * k + p]); + temp_stats[p] = stats[array * k + p]; probs[array] = this->likelihood(params, temp_stats, i, false); cumprob += probs[array]; diff --git a/include/barry/barry-configuration.hpp b/include/barry/barry-configuration.hpp index 4a3ce97f5..f827f3384 100644 --- a/include/barry/barry-configuration.hpp +++ b/include/barry/barry-configuration.hpp @@ -55,7 +55,7 @@ #define BARRY_MAX_NUM_ELEMENTS static_cast< size_t >(std::numeric_limits< size_t >::max() /2u) #endif -#ifdef BARRY_USE_OMP +#ifdef __OPENMP #define BARRY_WITH_OMP #include #endif diff --git a/include/barry/barry.hpp b/include/barry/barry.hpp index eb3d2f5fd..7cff9f8c7 100644 --- a/include/barry/barry.hpp +++ b/include/barry/barry.hpp @@ -17,8 +17,12 @@ #include #include -#ifdef BARRY_USE_OMP +#ifdef __OPENMP #include + +// Set the number of threads to match the number of cores +// in the machine + #endif #ifndef BARRY_HPP diff --git a/include/barry/model-meat.hpp b/include/barry/model-meat.hpp index 75f9f9886..9da2ead0f 100644 --- a/include/barry/model-meat.hpp +++ b/include/barry/model-meat.hpp @@ -13,11 +13,11 @@ inline double update_normalizing_constant( size_t n ) { - - double res = 0.0; + std::vector< double > resv(n); #ifdef __OPENMP - #pragma omp simd reduction(+:res) + omp_set_num_threads(omp_get_num_procs()); + #pragma omp parallel for shared(resv) #else #ifdef __GNUC__ #ifndef __clang__ @@ -31,13 +31,36 @@ inline double update_normalizing_constant( double tmp = 0.0; const double * support_n = support + i * k + 1u; + #ifdef __OPENMP + #pragma omp simd reduction(+:tmp) + #else + #ifdef __GNUC__ + #ifndef __clang__ + #pragma GCC ivdep + #endif + #endif + #endif for (size_t j = 0u; j < (k - 1u); ++j) tmp += (*(support_n + j)) * (*(params + j)); - res += std::exp(tmp BARRY_SAFE_EXP) * (*(support + i * k)); + resv[i] = std::exp(tmp BARRY_SAFE_EXP) * (*(support + i * k)); } + // Accumulate resv to a double res + double res = 0.0; + #ifdef __OPENMP + #pragma omp parallel for simd reduction(+:res) + #else + #ifdef __GNUC__ + #ifndef __clang__ + #pragma GCC ivdep + #endif + #endif + #endif + for (size_t i = 0u; i < n; ++i) + res += resv[i]; + #ifdef BARRY_DEBUG if (std::isnan(res)) throw std::overflow_error( @@ -75,6 +98,7 @@ inline double likelihood_( // Computing the numerator #ifdef __OPENMP + omp_set_num_threads(omp_get_num_procs()); #pragma omp simd reduction(+:numerator) #else #ifdef __GNUC__ From bf2ace68c62d38e883025e6165810c60cef89a89 Mon Sep 17 00:00:00 2001 From: "George G. Vega Yon" Date: Wed, 27 Sep 2023 16:28:59 -0600 Subject: [PATCH 02/14] OpenMP was not being used! --- include/barry/barry-configuration.hpp | 2 +- include/barry/barry.hpp | 2 +- include/barry/model-meat.hpp | 14 ++++++++------ include/barry/support-meat.hpp | 4 ++-- include/barry/typedefs.hpp | 2 +- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/include/barry/barry-configuration.hpp b/include/barry/barry-configuration.hpp index f827f3384..b85645b7c 100644 --- a/include/barry/barry-configuration.hpp +++ b/include/barry/barry-configuration.hpp @@ -55,7 +55,7 @@ #define BARRY_MAX_NUM_ELEMENTS static_cast< size_t >(std::numeric_limits< size_t >::max() /2u) #endif -#ifdef __OPENMP +#if defined(__OPENMP) || defined(_OPENMP) #define BARRY_WITH_OMP #include #endif diff --git a/include/barry/barry.hpp b/include/barry/barry.hpp index 7cff9f8c7..74b42a033 100644 --- a/include/barry/barry.hpp +++ b/include/barry/barry.hpp @@ -17,7 +17,7 @@ #include #include -#ifdef __OPENMP +#if defined(__OPENMP) || defined(_OPENMP) #include // Set the number of threads to match the number of cores diff --git a/include/barry/model-meat.hpp b/include/barry/model-meat.hpp index 9da2ead0f..08714277a 100644 --- a/include/barry/model-meat.hpp +++ b/include/barry/model-meat.hpp @@ -15,8 +15,7 @@ inline double update_normalizing_constant( { std::vector< double > resv(n); - #ifdef __OPENMP - omp_set_num_threads(omp_get_num_procs()); + #if defined(__OPENMP) || defined(_OPENMP) #pragma omp parallel for shared(resv) #else #ifdef __GNUC__ @@ -31,7 +30,7 @@ inline double update_normalizing_constant( double tmp = 0.0; const double * support_n = support + i * k + 1u; - #ifdef __OPENMP + #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd reduction(+:tmp) #else #ifdef __GNUC__ @@ -49,7 +48,7 @@ inline double update_normalizing_constant( // Accumulate resv to a double res double res = 0.0; - #ifdef __OPENMP + #if defined(__OPENMP) || defined(_OPENMP) #pragma omp parallel for simd reduction(+:res) #else #ifdef __GNUC__ @@ -97,8 +96,7 @@ inline double likelihood_( double numerator = 0.0; // Computing the numerator - #ifdef __OPENMP - omp_set_num_threads(omp_get_num_procs()); + #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd reduction(+:numerator) #else #ifdef __GNUC__ @@ -595,6 +593,10 @@ MODEL_TEMPLATE(double, likelihood)( const size_t & i, bool as_log ) { + + #if defined(__OPENMP) || defined(_OPENMP) + omp_set_num_threads(omp_get_num_procs()); + #endif // Checking if the index exists if (i >= arrays2support.size()) diff --git a/include/barry/support-meat.hpp b/include/barry/support-meat.hpp index 27e892844..e6f4c7f03 100644 --- a/include/barry/support-meat.hpp +++ b/include/barry/support-meat.hpp @@ -244,7 +244,7 @@ inline void Support 0u) { - #ifdef __OPENMP + #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd #endif for (size_t n = 0u; n < n_counters; ++n) @@ -367,7 +367,7 @@ inline void Support 0u) { - #ifdef __OPENMP + #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd #endif for (size_t n = 0u; n < n_counters; ++n) diff --git a/include/barry/typedefs.hpp b/include/barry/typedefs.hpp index bb4678726..28ae44b2c 100644 --- a/include/barry/typedefs.hpp +++ b/include/barry/typedefs.hpp @@ -294,7 +294,7 @@ inline double vec_inner_prod( ) { double res = 0.0; - #ifdef __OPENMP + #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd reduction(+:res) #else #ifdef __GNUC__ From c57353c6f1584861c09a93ce9d00c988c696a2a5 Mon Sep 17 00:00:00 2001 From: "George G. Vega Yon" Date: Wed, 27 Sep 2023 16:50:41 -0600 Subject: [PATCH 03/14] Adding the cores arg --- include/barry/barry-macros.hpp | 7 + include/barry/model-bones.hpp | 15 +- include/barry/model-meat.hpp | 173 +++++++++++------- include/barry/models/geese/flock-bones.hpp | 7 +- include/barry/models/geese/flock-meat.hpp | 3 +- include/barry/models/geese/geese-bones.hpp | 3 +- .../models/geese/geese-meat-likelihood.hpp | 6 +- 7 files changed, 140 insertions(+), 74 deletions(-) diff --git a/include/barry/barry-macros.hpp b/include/barry/barry-macros.hpp index 1107911e1..20b261e3b 100644 --- a/include/barry/barry-macros.hpp +++ b/include/barry/barry-macros.hpp @@ -9,4 +9,11 @@ #define BARRY_UNUSED(expr) do { (void)(expr); } while (0); +#if defined(_OPENMP) || defined(__OPENMP) +#define BARRY_NCORES_ARG(default) size_t ncores default +#else +#define BARRY_NCORES_ARG(default) size_t +#endif + + #endif \ No newline at end of file diff --git a/include/barry/model-bones.hpp b/include/barry/model-bones.hpp index 270f0c2c7..80ecfbac6 100644 --- a/include/barry/model-bones.hpp +++ b/include/barry/model-bones.hpp @@ -240,33 +240,38 @@ class Model { double likelihood( const std::vector & params, const size_t & i, - bool as_log = false + bool as_log = false, + BARRY_NCORES_ARG(=2) ); double likelihood( const std::vector & params, const Array_Type & Array_, int i = -1, - bool as_log = false + bool as_log = false, + BARRY_NCORES_ARG(=2) ); double likelihood( const std::vector & params, const std::vector & target_, const size_t & i, - bool as_log = false + bool as_log = false, + BARRY_NCORES_ARG(=2) ); double likelihood( const std::vector & params, const double * target_, const size_t & i, - bool as_log = false + bool as_log = false, + BARRY_NCORES_ARG(=2) ); double likelihood_total( const std::vector & params, - bool as_log = false + bool as_log = false, + BARRY_NCORES_ARG(=2) ); ///@} diff --git a/include/barry/model-meat.hpp b/include/barry/model-meat.hpp index 08714277a..202e2bbce 100644 --- a/include/barry/model-meat.hpp +++ b/include/barry/model-meat.hpp @@ -144,15 +144,6 @@ inline double likelihood_( } -#define MODEL_TYPE() Model - -#define MODEL_TEMPLATE_ARGS() - -#define MODEL_TEMPLATE(a,b) \ - template MODEL_TEMPLATE_ARGS() inline a MODEL_TYPE()::b - template < typename Array_Type, typename Data_Counter_Type, @@ -324,20 +315,23 @@ inline Model & } -MODEL_TEMPLATE(void, store_psets)() noexcept { +template +inline void Model:: store_psets() noexcept { // if (with_pset) // throw std::logic_error("Powerset storage alreay activated."); with_pset = true; return; } -MODEL_TEMPLATE(std::vector< double >, gen_key)( +template +inline std::vector< double > Model:: gen_key( const Array_Type & Array_ ) { return this->counters->gen_hash(Array_); } -MODEL_TEMPLATE(void, add_counter)( +template +inline void Model:: add_counter( Counter & counter ) { @@ -345,7 +339,8 @@ MODEL_TEMPLATE(void, add_counter)( return; } -MODEL_TEMPLATE(void, add_counter)( +template +inline void Model:: add_counter( Counter_fun_type count_fun_, Counter_fun_type init_fun_, Data_Counter_Type data_ @@ -361,7 +356,8 @@ MODEL_TEMPLATE(void, add_counter)( } -MODEL_TEMPLATE(void, set_counters)( +template +inline void Model:: set_counters( Counters * counters_ ) { @@ -378,7 +374,8 @@ MODEL_TEMPLATE(void, set_counters)( } -MODEL_TEMPLATE(void, add_hasher)( +template +inline void Model:: add_hasher( Hasher_fun_type fun_ ) { @@ -388,7 +385,8 @@ MODEL_TEMPLATE(void, add_hasher)( //////////////////////////////////////////////////////////////////////////////// -MODEL_TEMPLATE(void, add_rule)( +template +inline void Model:: add_rule( Rule & rules ) { @@ -397,7 +395,8 @@ MODEL_TEMPLATE(void, add_rule)( } -MODEL_TEMPLATE(void, set_rules)( +template +inline void Model:: set_rules( Rules * rules_ ) { @@ -414,7 +413,8 @@ MODEL_TEMPLATE(void, set_rules)( //////////////////////////////////////////////////////////////////////////////// -MODEL_TEMPLATE(void, add_rule_dyn)( +template +inline void Model:: add_rule_dyn( Rule & rules_ ) { @@ -422,7 +422,8 @@ MODEL_TEMPLATE(void, add_rule_dyn)( return; } -MODEL_TEMPLATE(void, add_rule_dyn)( +template +inline void Model:: add_rule_dyn( Rule_fun_type rule_fun_, Data_Rule_Dyn_Type data_ ) { @@ -436,7 +437,8 @@ MODEL_TEMPLATE(void, add_rule_dyn)( } -MODEL_TEMPLATE(void, set_rules_dyn)( +template +inline void Model:: set_rules_dyn( Rules * rules_ ) { @@ -450,10 +452,10 @@ MODEL_TEMPLATE(void, set_rules_dyn)( } - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// -MODEL_TEMPLATE(size_t, add_array)( +template +inline size_t Model:: add_array( const Array_Type & Array_, bool force_new ) { @@ -588,14 +590,16 @@ MODEL_TEMPLATE(size_t, add_array)( } -MODEL_TEMPLATE(double, likelihood)( +template +inline double Model::likelihood( const std::vector & params, const size_t & i, - bool as_log + bool as_log, + BARRY_NCORES_ARG() ) { #if defined(__OPENMP) || defined(_OPENMP) - omp_set_num_threads(omp_get_num_procs()); + omp_set_num_threads(ncores); #endif // Checking if the index exists @@ -635,12 +639,18 @@ MODEL_TEMPLATE(double, likelihood)( } -MODEL_TEMPLATE(double, likelihood)( +template +inline double Model::likelihood( const std::vector & params, const Array_Type & Array_, int i, - bool as_log + bool as_log, + BARRY_NCORES_ARG() ) { + + #if defined(__OPENMP) || defined(_OPENMP) + omp_set_num_threads(ncores); + #endif // Key of the support set to use int loc; @@ -715,12 +725,18 @@ MODEL_TEMPLATE(double, likelihood)( } -MODEL_TEMPLATE(double, likelihood)( +template +inline double Model::likelihood( const std::vector & params, const std::vector & target_, const size_t & i, - bool as_log + bool as_log, + BARRY_NCORES_ARG() ) { + + #if defined(__OPENMP) || defined(_OPENMP) + omp_set_num_threads(ncores); + #endif // Checking if the index exists if (i >= arrays2support.size()) @@ -778,12 +794,18 @@ MODEL_TEMPLATE(double, likelihood)( } -MODEL_TEMPLATE(double, likelihood)( +template +inline double Model::likelihood( const std::vector & params, const double * target_, const size_t & i, - bool as_log + bool as_log, + BARRY_NCORES_ARG() ) { + + #if defined(__OPENMP) || defined(_OPENMP) + omp_set_num_threads(ncores); + #endif // Checking if the index exists if (i >= arrays2support.size()) @@ -848,10 +870,16 @@ MODEL_TEMPLATE(double, likelihood)( } -MODEL_TEMPLATE(double, likelihood_total)( +template +inline double Model::likelihood_total( const std::vector & params, - bool as_log + bool as_log, + BARRY_NCORES_ARG() ) { + + #if defined(__OPENMP) || defined(_OPENMP) + omp_set_num_threads(ncores); + #endif size_t params_last_size = params_last.size(); @@ -914,7 +942,8 @@ MODEL_TEMPLATE(double, likelihood_total)( } -MODEL_TEMPLATE(double, get_norm_const)( +template +inline double Model:: get_norm_const( const std::vector & params, const size_t & i, bool as_log @@ -950,7 +979,8 @@ MODEL_TEMPLATE(double, get_norm_const)( } -MODEL_TEMPLATE(const std::vector< Array_Type > *, get_pset)( +template +inline const std::vector< Array_Type > * Model:: get_pset( const size_t & i ) { @@ -962,7 +992,8 @@ MODEL_TEMPLATE(const std::vector< Array_Type > *, get_pset)( } -MODEL_TEMPLATE(const std::vector< double > *, get_pset_stats)( +template +inline const std::vector< double > * Model:: get_pset_stats( const size_t & i ) { @@ -973,7 +1004,8 @@ MODEL_TEMPLATE(const std::vector< double > *, get_pset_stats)( } -MODEL_TEMPLATE(void, print_stats)(size_t i) const +template +inline void Model:: print_stats(size_t i) const { if (i >= arrays2support.size()) @@ -1063,14 +1095,16 @@ inline void Model +inline size_t Model:: size() const noexcept { // INITIALIZED() return this->stats_target.size(); } -MODEL_TEMPLATE(size_t, size_unique)() const noexcept +template +inline size_t Model:: size_unique() const noexcept { // INITIALIZED() @@ -1078,7 +1112,8 @@ MODEL_TEMPLATE(size_t, size_unique)() const noexcept } -MODEL_TEMPLATE(size_t, nterms)() const noexcept +template +inline size_t Model:: nterms() const noexcept { if (transform_model_fun) @@ -1088,21 +1123,24 @@ MODEL_TEMPLATE(size_t, nterms)() const noexcept } -MODEL_TEMPLATE(size_t, nrules)() const noexcept +template +inline size_t Model:: nrules() const noexcept { return this->rules->size(); } -MODEL_TEMPLATE(size_t, nrules_dyn)() const noexcept +template +inline size_t Model:: nrules_dyn() const noexcept { return this->rules_dyn->size(); } -MODEL_TEMPLATE(size_t, support_size)() const noexcept +template +inline size_t Model:: support_size() const noexcept { // INITIALIZED() @@ -1114,7 +1152,8 @@ MODEL_TEMPLATE(size_t, support_size)() const noexcept } -MODEL_TEMPLATE(std::vector< std::string >, colnames)() const +template +inline std::vector< std::string > Model:: colnames() const { if (transform_model_fun) @@ -1209,7 +1248,8 @@ inline Array_Type Model +inline Array_Type Model:: sample( const Array_Type & Array_, const std::vector & params ) { @@ -1377,7 +1417,8 @@ MODEL_TEMPLATE(Array_Type, sample)( } -MODEL_TEMPLATE(double, conditional_prob)( +template +inline double Model:: conditional_prob( const Array_Type & Array_, const std::vector< double > & params, size_t i, @@ -1409,59 +1450,67 @@ MODEL_TEMPLATE(double, conditional_prob)( } -MODEL_TEMPLATE(const std::mt19937 *, get_rengine)() const { +template +inline const std::mt19937 * Model:: get_rengine() const { return this->rengine; } -template MODEL_TEMPLATE_ARGS() -inline Counters * MODEL_TYPE()::get_counters() { +template +inline Counters * Model::get_counters() { return this->counters; } -template MODEL_TEMPLATE_ARGS() -inline Rules * MODEL_TYPE()::get_rules() { +template +inline Rules * Model::get_rules() { return this->rules; } -template MODEL_TEMPLATE_ARGS() -inline Rules * MODEL_TYPE()::get_rules_dyn() { +template +inline Rules * Model::get_rules_dyn() { return this->rules_dyn; } -template MODEL_TEMPLATE_ARGS() +template inline Support * -MODEL_TYPE()::get_support_fun() { +Model::get_support_fun() { return &this->support_fun; } -MODEL_TEMPLATE(std::vector< std::vector< double > > *, get_stats_target)() +template +inline std::vector< std::vector< double > > * Model:: get_stats_target() { return &stats_target; } -MODEL_TEMPLATE(std::vector< std::vector< double > > *, get_stats_support)() +template +inline std::vector< std::vector< double > > * Model:: get_stats_support() { return &stats_support; } -MODEL_TEMPLATE(std::vector< size_t > *, get_arrays2support)() +template +inline std::vector< size_t > * Model:: get_arrays2support() { return &arrays2support; } -MODEL_TEMPLATE(std::vector< std::vector< Array_Type > > *, get_pset_arrays)() { +template +inline std::vector< std::vector< Array_Type > > * Model:: get_pset_arrays() { return &pset_arrays; } -MODEL_TEMPLATE(std::vector< std::vector > *, get_pset_stats)() { +template +inline std::vector< std::vector > * Model:: get_pset_stats() { return &pset_stats; } -MODEL_TEMPLATE(std::vector< std::vector > *, get_pset_probs)() { +template +inline std::vector< std::vector > * Model:: get_pset_probs() { return &pset_probs; } -MODEL_TEMPLATE(void, set_transform_model)( +template +inline void Model:: set_transform_model( std::function(double *,size_t)> fun, std::vector< std::string > names ) diff --git a/include/barry/models/geese/flock-bones.hpp b/include/barry/models/geese/flock-bones.hpp index 0628ee580..581a85e1d 100644 --- a/include/barry/models/geese/flock-bones.hpp +++ b/include/barry/models/geese/flock-bones.hpp @@ -15,8 +15,8 @@ class Flock { public: std::vector< Geese > dat; - size_t nfunctions = 0u; - bool initialized = false; + size_t nfunctions = 0u; + bool initialized = false; // Common components std::mt19937 rengine; @@ -69,7 +69,8 @@ class Flock { double likelihood_joint( const std::vector< double > & par, bool as_log = false, - bool use_reduced_sequence = true + bool use_reduced_sequence = true, + BARRY_NCORES_ARG(=1) ); /** diff --git a/include/barry/models/geese/flock-meat.hpp b/include/barry/models/geese/flock-meat.hpp index 8dd680aeb..901981ab1 100644 --- a/include/barry/models/geese/flock-meat.hpp +++ b/include/barry/models/geese/flock-meat.hpp @@ -138,7 +138,8 @@ inline PhyloModel * Flock::get_model() inline double Flock::likelihood_joint( const std::vector< double > & par, bool as_log, - bool use_reduced_sequence + bool use_reduced_sequence, + BARRY_NCORES_ARG() ) { diff --git a/include/barry/models/geese/geese-bones.hpp b/include/barry/models/geese/geese-bones.hpp index 85ae0296b..e8ead9cfb 100644 --- a/include/barry/models/geese/geese-bones.hpp +++ b/include/barry/models/geese/geese-bones.hpp @@ -215,7 +215,8 @@ class Geese { double likelihood( const std::vector< double > & par, bool as_log = false, - bool use_reduced_sequence = true + bool use_reduced_sequence = true, + BARRY_NCORES_ARG(= 1) ); double likelihood_exhaust(const std::vector< double > & par); diff --git a/include/barry/models/geese/geese-meat-likelihood.hpp b/include/barry/models/geese/geese-meat-likelihood.hpp index 3e5edcbce..a76b218b8 100644 --- a/include/barry/models/geese/geese-meat-likelihood.hpp +++ b/include/barry/models/geese/geese-meat-likelihood.hpp @@ -6,7 +6,8 @@ inline double Geese::likelihood( const std::vector< double > & par, bool as_log, - bool use_reduced_sequence + bool use_reduced_sequence, + BARRY_NCORES_ARG() ) { INITIALIZED() @@ -148,7 +149,8 @@ inline double Geese::likelihood( off_mult *= model->likelihood( par0, temp_stats, - node.narray[s] + node.narray[s], + ncores ); } catch (std::exception & e) { From d57476a45fb47825786005b1b3bccdc71f8dbe31 Mon Sep 17 00:00:00 2001 From: "George G. Vega Yon" Date: Wed, 27 Sep 2023 17:32:45 -0600 Subject: [PATCH 04/14] Adding ncores --- barry.hpp | 220 +++++++++++------- include/barry/barry-macros.hpp | 2 +- include/barry/counters/network.hpp | 2 +- include/barry/model-meat.hpp | 4 +- include/barry/models/geese/flock-bones.hpp | 2 +- include/barry/models/geese/flock-meat.hpp | 6 +- include/barry/models/geese/geese-bones.hpp | 2 +- .../models/geese/geese-meat-likelihood.hpp | 3 +- include/barry/typedefs.hpp | 6 +- 9 files changed, 155 insertions(+), 92 deletions(-) diff --git a/barry.hpp b/barry.hpp index f35072c0a..0d44fb688 100644 --- a/barry.hpp +++ b/barry.hpp @@ -17,7 +17,7 @@ #include #include -#ifdef __OPENMP +#if defined(__OPENMP) || defined(_OPENMP) #include // Set the number of threads to match the number of cores @@ -117,7 +117,7 @@ namespace barry { #define BARRY_MAX_NUM_ELEMENTS static_cast< size_t >(std::numeric_limits< size_t >::max() /2u) #endif -#ifdef __OPENMP +#if defined(__OPENMP) || defined(_OPENMP) #define BARRY_WITH_OMP #include #endif @@ -570,7 +570,7 @@ inline double vec_inner_prod( ) { double res = 0.0; - #ifdef __OPENMP + #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd reduction(+:res) #else #ifdef __GNUC__ @@ -617,6 +617,13 @@ inline double vec_inner_prod( #define BARRY_UNUSED(expr) do { (void)(expr); } while (0); +#if defined(_OPENMP) || defined(__OPENMP) +#define BARRY_NCORES_ARG(default) size_t ncores default +#else +#define BARRY_NCORES_ARG(default) size_t +#endif + + #endif /*////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// @@ -6356,7 +6363,7 @@ inline void Support 0u) { - #ifdef __OPENMP + #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd #endif for (size_t n = 0u; n < n_counters; ++n) @@ -6479,7 +6486,7 @@ inline void Support 0u) { - #ifdef __OPENMP + #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd #endif for (size_t n = 0u; n < n_counters; ++n) @@ -7295,33 +7302,38 @@ class Model { double likelihood( const std::vector & params, const size_t & i, - bool as_log = false + bool as_log = false, + BARRY_NCORES_ARG(=2) ); double likelihood( const std::vector & params, const Array_Type & Array_, int i = -1, - bool as_log = false + bool as_log = false, + BARRY_NCORES_ARG(=2) ); double likelihood( const std::vector & params, const std::vector & target_, const size_t & i, - bool as_log = false + bool as_log = false, + BARRY_NCORES_ARG(=2) ); double likelihood( const std::vector & params, const double * target_, const size_t & i, - bool as_log = false + bool as_log = false, + BARRY_NCORES_ARG(=2) ); double likelihood_total( const std::vector & params, - bool as_log = false + bool as_log = false, + BARRY_NCORES_ARG(=2) ); ///@} @@ -7486,9 +7498,8 @@ inline double update_normalizing_constant( { std::vector< double > resv(n); - #ifdef __OPENMP - omp_set_num_threads(omp_get_num_procs()); - #pragma omp parallel for + #if defined(__OPENMP) || defined(_OPENMP) + #pragma omp parallel for shared(resv) #else #ifdef __GNUC__ #ifndef __clang__ @@ -7502,7 +7513,7 @@ inline double update_normalizing_constant( double tmp = 0.0; const double * support_n = support + i * k + 1u; - #ifdef __OPENMP + #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd reduction(+:tmp) #else #ifdef __GNUC__ @@ -7520,7 +7531,7 @@ inline double update_normalizing_constant( // Accumulate resv to a double res double res = 0.0; - #ifdef __OPENMP + #if defined(__OPENMP) || defined(_OPENMP) #pragma omp parallel for simd reduction(+:res) #else #ifdef __GNUC__ @@ -7568,8 +7579,7 @@ inline double likelihood_( double numerator = 0.0; // Computing the numerator - #ifdef __OPENMP - omp_set_num_threads(omp_get_num_procs()); + #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd reduction(+:numerator) #else #ifdef __GNUC__ @@ -7578,7 +7588,6 @@ inline double likelihood_( #endif #endif #endif - #pragma code_align(32) for (size_t j = 0u; j < params.size(); ++j) numerator += *(stats_target + j) * params[j]; @@ -7618,15 +7627,6 @@ inline double likelihood_( } -#define MODEL_TYPE() Model - -#define MODEL_TEMPLATE_ARGS() - -#define MODEL_TEMPLATE(a,b) \ - template MODEL_TEMPLATE_ARGS() inline a MODEL_TYPE()::b - template < typename Array_Type, typename Data_Counter_Type, @@ -7798,20 +7798,23 @@ inline Model & } -MODEL_TEMPLATE(void, store_psets)() noexcept { +template +inline void Model:: store_psets() noexcept { // if (with_pset) // throw std::logic_error("Powerset storage alreay activated."); with_pset = true; return; } -MODEL_TEMPLATE(std::vector< double >, gen_key)( +template +inline std::vector< double > Model:: gen_key( const Array_Type & Array_ ) { return this->counters->gen_hash(Array_); } -MODEL_TEMPLATE(void, add_counter)( +template +inline void Model:: add_counter( Counter & counter ) { @@ -7819,7 +7822,8 @@ MODEL_TEMPLATE(void, add_counter)( return; } -MODEL_TEMPLATE(void, add_counter)( +template +inline void Model:: add_counter( Counter_fun_type count_fun_, Counter_fun_type init_fun_, Data_Counter_Type data_ @@ -7835,7 +7839,8 @@ MODEL_TEMPLATE(void, add_counter)( } -MODEL_TEMPLATE(void, set_counters)( +template +inline void Model:: set_counters( Counters * counters_ ) { @@ -7852,7 +7857,8 @@ MODEL_TEMPLATE(void, set_counters)( } -MODEL_TEMPLATE(void, add_hasher)( +template +inline void Model:: add_hasher( Hasher_fun_type fun_ ) { @@ -7862,7 +7868,8 @@ MODEL_TEMPLATE(void, add_hasher)( //////////////////////////////////////////////////////////////////////////////// -MODEL_TEMPLATE(void, add_rule)( +template +inline void Model:: add_rule( Rule & rules ) { @@ -7871,7 +7878,8 @@ MODEL_TEMPLATE(void, add_rule)( } -MODEL_TEMPLATE(void, set_rules)( +template +inline void Model:: set_rules( Rules * rules_ ) { @@ -7888,7 +7896,8 @@ MODEL_TEMPLATE(void, set_rules)( //////////////////////////////////////////////////////////////////////////////// -MODEL_TEMPLATE(void, add_rule_dyn)( +template +inline void Model:: add_rule_dyn( Rule & rules_ ) { @@ -7896,7 +7905,8 @@ MODEL_TEMPLATE(void, add_rule_dyn)( return; } -MODEL_TEMPLATE(void, add_rule_dyn)( +template +inline void Model:: add_rule_dyn( Rule_fun_type rule_fun_, Data_Rule_Dyn_Type data_ ) { @@ -7910,7 +7920,8 @@ MODEL_TEMPLATE(void, add_rule_dyn)( } -MODEL_TEMPLATE(void, set_rules_dyn)( +template +inline void Model:: set_rules_dyn( Rules * rules_ ) { @@ -7924,10 +7935,10 @@ MODEL_TEMPLATE(void, set_rules_dyn)( } - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// -MODEL_TEMPLATE(size_t, add_array)( +template +inline size_t Model:: add_array( const Array_Type & Array_, bool force_new ) { @@ -8062,11 +8073,17 @@ MODEL_TEMPLATE(size_t, add_array)( } -MODEL_TEMPLATE(double, likelihood)( +template +inline double Model::likelihood( const std::vector & params, const size_t & i, - bool as_log + bool as_log, + BARRY_NCORES_ARG() ) { + + #if defined(__OPENMP) || defined(_OPENMP) + omp_set_num_threads(ncores); + #endif // Checking if the index exists if (i >= arrays2support.size()) @@ -8105,12 +8122,18 @@ MODEL_TEMPLATE(double, likelihood)( } -MODEL_TEMPLATE(double, likelihood)( +template +inline double Model::likelihood( const std::vector & params, const Array_Type & Array_, int i, - bool as_log + bool as_log, + BARRY_NCORES_ARG() ) { + + #if defined(__OPENMP) || defined(_OPENMP) + omp_set_num_threads(ncores); + #endif // Key of the support set to use int loc; @@ -8185,12 +8208,18 @@ MODEL_TEMPLATE(double, likelihood)( } -MODEL_TEMPLATE(double, likelihood)( +template +inline double Model::likelihood( const std::vector & params, const std::vector & target_, const size_t & i, - bool as_log + bool as_log, + BARRY_NCORES_ARG() ) { + + #if defined(__OPENMP) || defined(_OPENMP) + omp_set_num_threads(ncores); + #endif // Checking if the index exists if (i >= arrays2support.size()) @@ -8248,12 +8277,18 @@ MODEL_TEMPLATE(double, likelihood)( } -MODEL_TEMPLATE(double, likelihood)( +template +inline double Model::likelihood( const std::vector & params, const double * target_, const size_t & i, - bool as_log + bool as_log, + BARRY_NCORES_ARG() ) { + + #if defined(__OPENMP) || defined(_OPENMP) + omp_set_num_threads(ncores); + #endif // Checking if the index exists if (i >= arrays2support.size()) @@ -8318,10 +8353,16 @@ MODEL_TEMPLATE(double, likelihood)( } -MODEL_TEMPLATE(double, likelihood_total)( +template +inline double Model::likelihood_total( const std::vector & params, - bool as_log + bool as_log, + BARRY_NCORES_ARG() ) { + + #if defined(__OPENMP) || defined(_OPENMP) + omp_set_num_threads(ncores); + #endif size_t params_last_size = params_last.size(); @@ -8384,7 +8425,8 @@ MODEL_TEMPLATE(double, likelihood_total)( } -MODEL_TEMPLATE(double, get_norm_const)( +template +inline double Model:: get_norm_const( const std::vector & params, const size_t & i, bool as_log @@ -8420,7 +8462,8 @@ MODEL_TEMPLATE(double, get_norm_const)( } -MODEL_TEMPLATE(const std::vector< Array_Type > *, get_pset)( +template +inline const std::vector< Array_Type > * Model:: get_pset( const size_t & i ) { @@ -8432,7 +8475,8 @@ MODEL_TEMPLATE(const std::vector< Array_Type > *, get_pset)( } -MODEL_TEMPLATE(const std::vector< double > *, get_pset_stats)( +template +inline const std::vector< double > * Model:: get_pset_stats( const size_t & i ) { @@ -8443,7 +8487,8 @@ MODEL_TEMPLATE(const std::vector< double > *, get_pset_stats)( } -MODEL_TEMPLATE(void, print_stats)(size_t i) const +template +inline void Model:: print_stats(size_t i) const { if (i >= arrays2support.size()) @@ -8533,14 +8578,16 @@ inline void Model +inline size_t Model:: size() const noexcept { // INITIALIZED() return this->stats_target.size(); } -MODEL_TEMPLATE(size_t, size_unique)() const noexcept +template +inline size_t Model:: size_unique() const noexcept { // INITIALIZED() @@ -8548,7 +8595,8 @@ MODEL_TEMPLATE(size_t, size_unique)() const noexcept } -MODEL_TEMPLATE(size_t, nterms)() const noexcept +template +inline size_t Model:: nterms() const noexcept { if (transform_model_fun) @@ -8558,21 +8606,24 @@ MODEL_TEMPLATE(size_t, nterms)() const noexcept } -MODEL_TEMPLATE(size_t, nrules)() const noexcept +template +inline size_t Model:: nrules() const noexcept { return this->rules->size(); } -MODEL_TEMPLATE(size_t, nrules_dyn)() const noexcept +template +inline size_t Model:: nrules_dyn() const noexcept { return this->rules_dyn->size(); } -MODEL_TEMPLATE(size_t, support_size)() const noexcept +template +inline size_t Model:: support_size() const noexcept { // INITIALIZED() @@ -8584,7 +8635,8 @@ MODEL_TEMPLATE(size_t, support_size)() const noexcept } -MODEL_TEMPLATE(std::vector< std::string >, colnames)() const +template +inline std::vector< std::string > Model:: colnames() const { if (transform_model_fun) @@ -8679,7 +8731,8 @@ inline Array_Type Model +inline Array_Type Model:: sample( const Array_Type & Array_, const std::vector & params ) { @@ -8847,7 +8900,8 @@ MODEL_TEMPLATE(Array_Type, sample)( } -MODEL_TEMPLATE(double, conditional_prob)( +template +inline double Model:: conditional_prob( const Array_Type & Array_, const std::vector< double > & params, size_t i, @@ -8879,59 +8933,67 @@ MODEL_TEMPLATE(double, conditional_prob)( } -MODEL_TEMPLATE(const std::mt19937 *, get_rengine)() const { +template +inline const std::mt19937 * Model:: get_rengine() const { return this->rengine; } -template MODEL_TEMPLATE_ARGS() -inline Counters * MODEL_TYPE()::get_counters() { +template +inline Counters * Model::get_counters() { return this->counters; } -template MODEL_TEMPLATE_ARGS() -inline Rules * MODEL_TYPE()::get_rules() { +template +inline Rules * Model::get_rules() { return this->rules; } -template MODEL_TEMPLATE_ARGS() -inline Rules * MODEL_TYPE()::get_rules_dyn() { +template +inline Rules * Model::get_rules_dyn() { return this->rules_dyn; } -template MODEL_TEMPLATE_ARGS() +template inline Support * -MODEL_TYPE()::get_support_fun() { +Model::get_support_fun() { return &this->support_fun; } -MODEL_TEMPLATE(std::vector< std::vector< double > > *, get_stats_target)() +template +inline std::vector< std::vector< double > > * Model:: get_stats_target() { return &stats_target; } -MODEL_TEMPLATE(std::vector< std::vector< double > > *, get_stats_support)() +template +inline std::vector< std::vector< double > > * Model:: get_stats_support() { return &stats_support; } -MODEL_TEMPLATE(std::vector< size_t > *, get_arrays2support)() +template +inline std::vector< size_t > * Model:: get_arrays2support() { return &arrays2support; } -MODEL_TEMPLATE(std::vector< std::vector< Array_Type > > *, get_pset_arrays)() { +template +inline std::vector< std::vector< Array_Type > > * Model:: get_pset_arrays() { return &pset_arrays; } -MODEL_TEMPLATE(std::vector< std::vector > *, get_pset_stats)() { +template +inline std::vector< std::vector > * Model:: get_pset_stats() { return &pset_stats; } -MODEL_TEMPLATE(std::vector< std::vector > *, get_pset_probs)() { +template +inline std::vector< std::vector > * Model:: get_pset_probs() { return &pset_probs; } -MODEL_TEMPLATE(void, set_transform_model)( +template +inline void Model:: set_transform_model( std::function(double *,size_t)> fun, std::vector< std::string > names ) diff --git a/include/barry/barry-macros.hpp b/include/barry/barry-macros.hpp index 20b261e3b..c32f36c9c 100644 --- a/include/barry/barry-macros.hpp +++ b/include/barry/barry-macros.hpp @@ -12,7 +12,7 @@ #if defined(_OPENMP) || defined(__OPENMP) #define BARRY_NCORES_ARG(default) size_t ncores default #else -#define BARRY_NCORES_ARG(default) size_t +#define BARRY_NCORES_ARG(default) size_t ncores default #endif diff --git a/include/barry/counters/network.hpp b/include/barry/counters/network.hpp index e6b5b8ec8..642cf2981 100644 --- a/include/barry/counters/network.hpp +++ b/include/barry/counters/network.hpp @@ -673,7 +673,7 @@ inline void counter_ctriads(NetCounters * counters) // i->j->k->i double ans = 0.0; - #ifdef __OPENM + #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd reduction(+:ans) #endif for (size_t k = 0u; k < Array.nrow(); ++k) diff --git a/include/barry/model-meat.hpp b/include/barry/model-meat.hpp index 202e2bbce..69fc5cf0e 100644 --- a/include/barry/model-meat.hpp +++ b/include/barry/model-meat.hpp @@ -914,7 +914,7 @@ inline double Model & par, bool as_log = false, bool use_reduced_sequence = true, - BARRY_NCORES_ARG(=1) + size_t ncores = 1u ); /** diff --git a/include/barry/models/geese/flock-meat.hpp b/include/barry/models/geese/flock-meat.hpp index 901981ab1..5ddae20c1 100644 --- a/include/barry/models/geese/flock-meat.hpp +++ b/include/barry/models/geese/flock-meat.hpp @@ -139,7 +139,7 @@ inline double Flock::likelihood_joint( const std::vector< double > & par, bool as_log, bool use_reduced_sequence, - BARRY_NCORES_ARG() + size_t ncores ) { @@ -150,14 +150,14 @@ inline double Flock::likelihood_joint( if (as_log) { for (auto& d : this->dat) - ans += d.likelihood(par, as_log, use_reduced_sequence); + ans += d.likelihood(par, as_log, use_reduced_sequence, ncores); } else { for (auto& d : this->dat) - ans *= d.likelihood(par, as_log, use_reduced_sequence); + ans *= d.likelihood(par, as_log, use_reduced_sequence, ncores); } diff --git a/include/barry/models/geese/geese-bones.hpp b/include/barry/models/geese/geese-bones.hpp index e8ead9cfb..9492dcac3 100644 --- a/include/barry/models/geese/geese-bones.hpp +++ b/include/barry/models/geese/geese-bones.hpp @@ -216,7 +216,7 @@ class Geese { const std::vector< double > & par, bool as_log = false, bool use_reduced_sequence = true, - BARRY_NCORES_ARG(= 1) + size_t ncores = 1u ); double likelihood_exhaust(const std::vector< double > & par); diff --git a/include/barry/models/geese/geese-meat-likelihood.hpp b/include/barry/models/geese/geese-meat-likelihood.hpp index a76b218b8..91bf84e40 100644 --- a/include/barry/models/geese/geese-meat-likelihood.hpp +++ b/include/barry/models/geese/geese-meat-likelihood.hpp @@ -7,7 +7,7 @@ inline double Geese::likelihood( const std::vector< double > & par, bool as_log, bool use_reduced_sequence, - BARRY_NCORES_ARG() + size_t ncores ) { INITIALIZED() @@ -150,6 +150,7 @@ inline double Geese::likelihood( par0, temp_stats, node.narray[s], + as_log, ncores ); } catch (std::exception & e) { diff --git a/include/barry/typedefs.hpp b/include/barry/typedefs.hpp index 28ae44b2c..1d4bbe004 100644 --- a/include/barry/typedefs.hpp +++ b/include/barry/typedefs.hpp @@ -256,7 +256,7 @@ inline bool vec_equal_approx( } ///@} -#ifdef __OPENM +#if defined(__OPENMP) || defined(_OPENMP) #pragma omp declare simd #endif template @@ -267,7 +267,7 @@ inline T vec_inner_prod( ) { double res = 0.0; - #ifdef __OPENM + #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd reduction(+:res) #else #ifdef __GNUC__ @@ -283,7 +283,7 @@ inline T vec_inner_prod( } -#ifdef __OPENM +#if defined(__OPENMP) || defined(_OPENMP) #pragma omp declare simd #endif template <> From 47309fc651266331429d61b56490e1d52c0f0ef6 Mon Sep 17 00:00:00 2001 From: "George G. Vega Yon" Date: Wed, 27 Sep 2023 22:49:01 -0600 Subject: [PATCH 05/14] Removing comments --- include/barry/model-meat.hpp | 3 --- tests/07-geese-flock.cpp | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/include/barry/model-meat.hpp b/include/barry/model-meat.hpp index 69fc5cf0e..cc5d210ad 100644 --- a/include/barry/model-meat.hpp +++ b/include/barry/model-meat.hpp @@ -764,7 +764,6 @@ inline double Modelstats_support[loc].size() == 0u) { - // return as_log ? -std::numeric_limits::infinity() : 0.0; throw std::logic_error("The support set for this array is empty."); } @@ -832,7 +831,6 @@ inline double Model::infinity() : 0.0; } } @@ -840,7 +838,6 @@ inline double Modelstats_support[loc].size() == 0u) { - // return as_log ? -std::numeric_limits::infinity() : 0.0; throw std::logic_error("The support set for this array is empty."); } diff --git a/tests/07-geese-flock.cpp b/tests/07-geese-flock.cpp index e29f661f0..06ae7bf8d 100644 --- a/tests/07-geese-flock.cpp +++ b/tests/07-geese-flock.cpp @@ -64,7 +64,7 @@ BARRY_TEST_CASE("Flock likelihood", "[flock-likelihood]") { aflock.init(); - double ans1 = aflock.likelihood_joint(params, true); + double ans1 = aflock.likelihood_joint(params, true, true, 2u); #ifdef CATCH_CONFIG_MAIN REQUIRE(std::abs(ans0 - ans1) < .00000001); #endif From 60c05a8106dc03bd1f2c7c468a97448ae0a7056b Mon Sep 17 00:00:00 2001 From: "George G. Vega Yon" Date: Thu, 28 Sep 2023 00:15:06 -0600 Subject: [PATCH 06/14] Reorg pragmas --- .vscode/launch.json | 32 ++++- .vscode/tasks.json | 24 ++++ barry.hpp | 127 +++++++++--------- include/barry/barraydense-meat.hpp | 19 +-- include/barry/model-meat.hpp | 91 +++++++------ .../models/geese/geese-meat-likelihood.hpp | 2 +- include/barry/typedefs.hpp | 16 +-- 7 files changed, 173 insertions(+), 138 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 787b44f83..ed175e310 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,7 +1,31 @@ { - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", - "configurations": [] + "configurations": [ + { + "name": "C/C++: g++ build and debug active file", + "type": "cppdbg", + "request": "launch", + "program": "${fileDirname}/${fileBasenameNoExtension}", + "args": [], + "stopAtEntry": false, + "cwd": "${fileDirname}", + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + }, + { + "description": "Set Disassembly Flavor to Intel", + "text": "-gdb-set disassembly-flavor intel", + "ignoreFailures": true + } + ], + "preLaunchTask": "C/C++: g++ build active file", + "miDebuggerPath": "/usr/bin/gdb" + } + ] } \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 05054c5cd..005ef595e 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -5,7 +5,11 @@ "label": "C/C++: g++ build active file", "command": "/usr/bin/g++", "args": [ + "-fopenmp", "-fdiagnostics-color=always", + "-ftree-vectorize", + "-march=native", + "-ffast-math", "-g", "${file}", "-o", @@ -22,6 +26,26 @@ "isDefault": true }, "detail": "Task generated by Debugger." + }, + { + "type": "cppbuild", + "label": "C/C++: gcc build active file", + "command": "/usr/bin/gcc", + "args": [ + "-fdiagnostics-color=always", + "-g", + "${file}", + "-o", + "${fileDirname}/${fileBasenameNoExtension}" + ], + "options": { + "cwd": "${fileDirname}" + }, + "problemMatcher": [ + "$gcc" + ], + "group": "build", + "detail": "compiler: /usr/bin/gcc" } ], "version": "2.0.0" diff --git a/barry.hpp b/barry.hpp index 0d44fb688..57f1e3452 100644 --- a/barry.hpp +++ b/barry.hpp @@ -532,7 +532,7 @@ inline bool vec_equal_approx( } ///@} -#ifdef __OPENM +#if defined(__OPENMP) || defined(_OPENMP) #pragma omp declare simd #endif template @@ -543,14 +543,10 @@ inline T vec_inner_prod( ) { double res = 0.0; - #ifdef __OPENM + #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd reduction(+:res) - #else - #ifdef __GNUC__ - #ifndef __clang__ - #pragma GCC ivdep - #endif - #endif + #elif defined(__GNUC__) && !defined(__clang__) + #pragma GCC ivdep #endif for (size_t i = 0u; i < n; ++i) res += (*(a + i) * *(b + i)); @@ -559,7 +555,7 @@ inline T vec_inner_prod( } -#ifdef __OPENM +#if defined(__OPENMP) || defined(_OPENMP) #pragma omp declare simd #endif template <> @@ -572,12 +568,8 @@ inline double vec_inner_prod( double res = 0.0; #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd reduction(+:res) - #else - #ifdef __GNUC__ - #ifndef __clang__ - #pragma GCC ivdep - #endif - #endif + #elif defined(__GNUC__) && !defined(__clang__) + #pragma GCC ivdep #endif for (size_t i = 0u; i < n; ++i) res += (*(a + i) * *(b + i)); @@ -620,7 +612,7 @@ inline double vec_inner_prod( #if defined(_OPENMP) || defined(__OPENMP) #define BARRY_NCORES_ARG(default) size_t ncores default #else -#define BARRY_NCORES_ARG(default) size_t +#define BARRY_NCORES_ARG(default) size_t ncores default #endif @@ -7498,50 +7490,66 @@ inline double update_normalizing_constant( { std::vector< double > resv(n); - #if defined(__OPENMP) || defined(_OPENMP) - #pragma omp parallel for shared(resv) - #else - #ifdef __GNUC__ - #ifndef __clang__ - #pragma GCC ivdep - #endif - #endif - #endif - for (size_t i = 0u; i < n; ++i) + if (n > 1000u) { - double tmp = 0.0; - const double * support_n = support + i * k + 1u; - #if defined(__OPENMP) || defined(_OPENMP) - #pragma omp simd reduction(+:tmp) - #else - #ifdef __GNUC__ - #ifndef __clang__ - #pragma GCC ivdep - #endif - #endif + #pragma omp parallel for shared(resv) + #elif defined(__GNUC__) && !defined(__clang__) + #pragma GCC ivdep #endif for (size_t j = 0u; j < (k - 1u); ++j) - tmp += (*(support_n + j)) * (*(params + j)); - - resv[i] = std::exp(tmp BARRY_SAFE_EXP) * (*(support + i * k)); + { - } + double p = *(params + j); + double tmp = 0.0; + const double * support_n = support + i * k + 1u; + + #if defined(__OPENMP) || defined(_OPENMP) + #pragma omp simd reduction(+:tmp) + #elif defined(__GNUC__) && !defined(__clang__) + #pragma GCC ivdep + #endif + for (size_t i = 0u; i < n; ++i) + resv[i] += (*(support_n + j)) * p; - // Accumulate resv to a double res - double res = 0.0; - #if defined(__OPENMP) || defined(_OPENMP) - #pragma omp parallel for simd reduction(+:res) - #else - #ifdef __GNUC__ - #ifndef __clang__ + } + + // Accumulate resv to a double res + double res = 0.0; + #if defined(__OPENMP) || defined(_OPENMP) + #pragma omp simd reduction(+:res) + #elif defined(__GNUC__) && !defined(__clang__) #pragma GCC ivdep - #endif #endif - #endif - for (size_t i = 0u; i < n; ++i) - res += resv[i]; + for (size_t i = 0u; i < n; ++i) + { + res += std::exp(resv[i] BARRY_SAFE_EXP) * (*(support + i * k)); + } + + } else { + + for (size_t i = 0u; i < n; ++i) + { + + double tmp = 0.0; + const double * support_n = support + i * k + 1u; + + for (size_t j = 0u; j < (k - 1u); ++j) + tmp += (*(support_n + j)) * (*(params + j)); + + resv[i] = std::exp(tmp BARRY_SAFE_EXP) * (*(support + i * k)); + + } + + // Accumulate resv to a double res + double res = 0.0; + for (size_t i = 0u; i < n; ++i) + res += resv[i]; + + + } + #ifdef BARRY_DEBUG if (std::isnan(res)) @@ -7581,12 +7589,8 @@ inline double likelihood_( // Computing the numerator #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd reduction(+:numerator) - #else - #ifdef __GNUC__ - #ifndef __clang__ - #pragma GCC ivdep - #endif - #endif + #elif defined(__GNUC__) && !defined(__clang__) + #pragma GCC ivdep #endif for (size_t j = 0u; j < params.size(); ++j) numerator += *(stats_target + j) * params[j]; @@ -8247,7 +8251,6 @@ inline double Modelstats_support[loc].size() == 0u) { - // return as_log ? -std::numeric_limits::infinity() : 0.0; throw std::logic_error("The support set for this array is empty."); } @@ -8315,7 +8318,6 @@ inline double Model::infinity() : 0.0; } } @@ -8323,7 +8325,6 @@ inline double Modelstats_support[loc].size() == 0u) { - // return as_log ? -std::numeric_limits::infinity() : 0.0; throw std::logic_error("The support set for this array is empty."); } @@ -8397,7 +8398,7 @@ inline double Model * counters) // i->j->k->i double ans = 0.0; - #ifdef __OPENM + #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd reduction(+:ans) #endif for (size_t k = 0u; k < Array.nrow(); ++k) diff --git a/include/barry/barraydense-meat.hpp b/include/barry/barraydense-meat.hpp index 57c2bc62a..3b0bccf49 100644 --- a/include/barry/barraydense-meat.hpp +++ b/include/barry/barraydense-meat.hpp @@ -39,21 +39,13 @@ inline BArrayDense::BArrayDense( const std::vector< size_t > & target, const std::vector< Cell_Type > & value, bool add -) { +) : N(N_), M(M_), el(N_ * M_, ZERO_CELL), el_rowsums(N_, ZERO_CELL), el_colsums(M_, ZERO_CELL) { if (source.size() != target.size()) throw std::length_error("-source- and -target- don't match on length."); if (source.size() != value.size()) throw std::length_error("-sorce- and -value- don't match on length."); - // Initializing - N = N_; - M = M_; - - el.resize(N * M, ZERO_CELL); - el_rowsums.resize(N, ZERO_CELL); - el_colsums.resize(M, ZERO_CELL); - // Writing the data for (size_t i = 0u; i < source.size(); ++i) { @@ -96,7 +88,7 @@ inline BArrayDense:: BArrayDense( const std::vector< size_t > & source, const std::vector< size_t > & target, bool add -) { +) : N(N_), M(M_), el(N_ * M_, ZERO_CELL), el_rowsums(N_, ZERO_CELL), el_colsums(M_, ZERO_CELL) { std::vector< Cell_Type > value(source.size(), static_cast(1.0)); @@ -104,14 +96,7 @@ inline BArrayDense:: BArrayDense( throw std::length_error("-source- and -target- don't match on length."); if (source.size() != value.size()) throw std::length_error("-sorce- and -value- don't match on length."); - - // Initializing - N = N_; - M = M_; - el.resize(N * M, ZERO_CELL); - el_rowsums.resize(N, ZERO_CELL); - el_colsums.resize(M, ZERO_CELL); // Writing the data for (size_t i = 0u; i < source.size(); ++i) diff --git a/include/barry/model-meat.hpp b/include/barry/model-meat.hpp index cc5d210ad..cedc69d44 100644 --- a/include/barry/model-meat.hpp +++ b/include/barry/model-meat.hpp @@ -14,51 +14,64 @@ inline double update_normalizing_constant( ) { std::vector< double > resv(n); + double res = 0.0; - #if defined(__OPENMP) || defined(_OPENMP) - #pragma omp parallel for shared(resv) - #else - #ifdef __GNUC__ - #ifndef __clang__ - #pragma GCC ivdep - #endif - #endif - #endif - for (size_t i = 0u; i < n; ++i) + if (n > 1000u) { - double tmp = 0.0; - const double * support_n = support + i * k + 1u; - #if defined(__OPENMP) || defined(_OPENMP) - #pragma omp simd reduction(+:tmp) - #else - #ifdef __GNUC__ - #ifndef __clang__ - #pragma GCC ivdep - #endif - #endif + #pragma omp parallel for shared(resv) + #elif defined(__GNUC__) && !defined(__clang__) + #pragma GCC ivdep #endif for (size_t j = 0u; j < (k - 1u); ++j) - tmp += (*(support_n + j)) * (*(params + j)); - - resv[i] = std::exp(tmp BARRY_SAFE_EXP) * (*(support + i * k)); + { - } + double p = *(params + j); + + #if defined(__OPENMP) || defined(_OPENMP) + #pragma omp simd + #elif defined(__GNUC__) && !defined(__clang__) + #pragma GCC ivdep + #endif + for (size_t i = 0u; i < n; ++i) + resv[i] += (*(support + i * k + 1u + j)) * p; - // Accumulate resv to a double res - double res = 0.0; - #if defined(__OPENMP) || defined(_OPENMP) - #pragma omp parallel for simd reduction(+:res) - #else - #ifdef __GNUC__ - #ifndef __clang__ + } + + // Accumulate resv to a double res + #if defined(__OPENMP) || defined(_OPENMP) + #pragma omp simd reduction(+:res) + #elif defined(__GNUC__) && !defined(__clang__) #pragma GCC ivdep - #endif #endif - #endif - for (size_t i = 0u; i < n; ++i) - res += resv[i]; + for (size_t i = 0u; i < n; ++i) + { + res += std::exp(resv[i] BARRY_SAFE_EXP) * (*(support + i * k)); + } + + } else { + + for (size_t i = 0u; i < n; ++i) + { + + double tmp = 0.0; + const double * support_n = support + i * k + 1u; + + for (size_t j = 0u; j < (k - 1u); ++j) + tmp += (*(support_n + j)) * (*(params + j)); + + resv[i] = std::exp(tmp BARRY_SAFE_EXP) * (*(support + i * k)); + + } + + // Accumulate resv to a double res + for (size_t i = 0u; i < n; ++i) + res += resv[i]; + + + } + #ifdef BARRY_DEBUG if (std::isnan(res)) @@ -98,12 +111,8 @@ inline double likelihood_( // Computing the numerator #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd reduction(+:numerator) - #else - #ifdef __GNUC__ - #ifndef __clang__ - #pragma GCC ivdep - #endif - #endif + #elif defined(__GNUC__) && !defined(__clang__) + #pragma GCC ivdep #endif for (size_t j = 0u; j < params.size(); ++j) numerator += *(stats_target + j) * params[j]; diff --git a/include/barry/models/geese/geese-meat-likelihood.hpp b/include/barry/models/geese/geese-meat-likelihood.hpp index 91bf84e40..ad908bf52 100644 --- a/include/barry/models/geese/geese-meat-likelihood.hpp +++ b/include/barry/models/geese/geese-meat-likelihood.hpp @@ -150,7 +150,7 @@ inline double Geese::likelihood( par0, temp_stats, node.narray[s], - as_log, + false, ncores ); } catch (std::exception & e) { diff --git a/include/barry/typedefs.hpp b/include/barry/typedefs.hpp index 1d4bbe004..5d34c90bf 100644 --- a/include/barry/typedefs.hpp +++ b/include/barry/typedefs.hpp @@ -269,12 +269,8 @@ inline T vec_inner_prod( double res = 0.0; #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd reduction(+:res) - #else - #ifdef __GNUC__ - #ifndef __clang__ - #pragma GCC ivdep - #endif - #endif + #elif defined(__GNUC__) && !defined(__clang__) + #pragma GCC ivdep #endif for (size_t i = 0u; i < n; ++i) res += (*(a + i) * *(b + i)); @@ -296,12 +292,8 @@ inline double vec_inner_prod( double res = 0.0; #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd reduction(+:res) - #else - #ifdef __GNUC__ - #ifndef __clang__ - #pragma GCC ivdep - #endif - #endif + #elif defined(__GNUC__) && !defined(__clang__) + #pragma GCC ivdep #endif for (size_t i = 0u; i < n; ++i) res += (*(a + i) * *(b + i)); From 8481f54045fab40b687f22896e38b93668f77d6c Mon Sep 17 00:00:00 2001 From: "George G. Vega Yon" Date: Thu, 28 Sep 2023 01:08:40 -0600 Subject: [PATCH 07/14] Fixed (didn't like all optims) --- .vscode/tasks.json | 3 -- barry.hpp | 65 ++++++++++-------------------- include/barry/counters/network.hpp | 4 +- include/barry/model-meat.hpp | 29 ++++++------- tests/Makefile | 2 +- 5 files changed, 38 insertions(+), 65 deletions(-) diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 005ef595e..672d305b1 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -7,9 +7,6 @@ "args": [ "-fopenmp", "-fdiagnostics-color=always", - "-ftree-vectorize", - "-march=native", - "-ffast-math", "-g", "${file}", "-o", diff --git a/barry.hpp b/barry.hpp index 57f1e3452..243cfa2ca 100644 --- a/barry.hpp +++ b/barry.hpp @@ -3684,21 +3684,13 @@ inline BArrayDense::BArrayDense( const std::vector< size_t > & target, const std::vector< Cell_Type > & value, bool add -) { +) : N(N_), M(M_), el(N_ * M_, ZERO_CELL), el_rowsums(N_, ZERO_CELL), el_colsums(M_, ZERO_CELL) { if (source.size() != target.size()) throw std::length_error("-source- and -target- don't match on length."); if (source.size() != value.size()) throw std::length_error("-sorce- and -value- don't match on length."); - // Initializing - N = N_; - M = M_; - - el.resize(N * M, ZERO_CELL); - el_rowsums.resize(N, ZERO_CELL); - el_colsums.resize(M, ZERO_CELL); - // Writing the data for (size_t i = 0u; i < source.size(); ++i) { @@ -3741,7 +3733,7 @@ inline BArrayDense:: BArrayDense( const std::vector< size_t > & source, const std::vector< size_t > & target, bool add -) { +) : N(N_), M(M_), el(N_ * M_, ZERO_CELL), el_rowsums(N_, ZERO_CELL), el_colsums(M_, ZERO_CELL) { std::vector< Cell_Type > value(source.size(), static_cast(1.0)); @@ -3749,14 +3741,7 @@ inline BArrayDense:: BArrayDense( throw std::length_error("-source- and -target- don't match on length."); if (source.size() != value.size()) throw std::length_error("-sorce- and -value- don't match on length."); - - // Initializing - N = N_; - M = M_; - el.resize(N * M, ZERO_CELL); - el_rowsums.resize(N, ZERO_CELL); - el_colsums.resize(M, ZERO_CELL); // Writing the data for (size_t i = 0u; i < source.size(); ++i) @@ -5385,7 +5370,7 @@ COUNTERS_TEMPLATE(void, add_counter)( ) { - data.emplace_back(Counter( + data.push_back(Counter( count_fun_, init_fun_, hasher_fun_, @@ -7482,41 +7467,40 @@ class Model { */ inline double update_normalizing_constant( - const double * params, + const std::vector & params, const double * support, size_t k, size_t n ) { - std::vector< double > resv(n); + double res = 0.0; if (n > 1000u) { + std::vector< double > resv(n, 0.0); + #if defined(__OPENMP) || defined(_OPENMP) - #pragma omp parallel for shared(resv) + #pragma omp parallel for shared(resv) firstprivate(params, n, k) #elif defined(__GNUC__) && !defined(__clang__) #pragma GCC ivdep #endif for (size_t j = 0u; j < (k - 1u); ++j) { - double p = *(params + j); - double tmp = 0.0; - const double * support_n = support + i * k + 1u; + const double p = params[j]; #if defined(__OPENMP) || defined(_OPENMP) - #pragma omp simd reduction(+:tmp) + #pragma omp simd #elif defined(__GNUC__) && !defined(__clang__) #pragma GCC ivdep #endif for (size_t i = 0u; i < n; ++i) - resv[i] += (*(support_n + j)) * p; + resv[i] += (*(support + i * k + 1u + j)) * p; } - // Accumulate resv to a double res - double res = 0.0; + // Accumulate resv to a double res #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd reduction(+:res) #elif defined(__GNUC__) && !defined(__clang__) @@ -7536,17 +7520,12 @@ inline double update_normalizing_constant( const double * support_n = support + i * k + 1u; for (size_t j = 0u; j < (k - 1u); ++j) - tmp += (*(support_n + j)) * (*(params + j)); + tmp += (*(support_n + j)) * params[j]; - resv[i] = std::exp(tmp BARRY_SAFE_EXP) * (*(support + i * k)); + res += std::exp(tmp BARRY_SAFE_EXP) * (*(support + i * k)); } - // Accumulate resv to a double res - double res = 0.0; - for (size_t i = 0u; i < n; ++i) - res += resv[i]; - } @@ -8109,7 +8088,7 @@ inline double Model indices_, - const std::vector< double > numbers_ + const std::vector< size_t > & indices_, + const std::vector< double > & numbers_ ): indices(indices_), numbers(numbers_) {}; ~NetCounterData() {}; diff --git a/include/barry/counters/network.hpp b/include/barry/counters/network.hpp index 642cf2981..782738f5d 100644 --- a/include/barry/counters/network.hpp +++ b/include/barry/counters/network.hpp @@ -61,8 +61,8 @@ class NetCounterData { NetCounterData() : indices(0u), numbers(0u) {}; NetCounterData( - const std::vector< size_t > indices_, - const std::vector< double > numbers_ + const std::vector< size_t > & indices_, + const std::vector< double > & numbers_ ): indices(indices_), numbers(numbers_) {}; ~NetCounterData() {}; diff --git a/include/barry/model-meat.hpp b/include/barry/model-meat.hpp index cedc69d44..f1617272c 100644 --- a/include/barry/model-meat.hpp +++ b/include/barry/model-meat.hpp @@ -7,27 +7,28 @@ */ inline double update_normalizing_constant( - const double * params, + const std::vector & params, const double * support, size_t k, size_t n ) { - std::vector< double > resv(n); double res = 0.0; if (n > 1000u) { + std::vector< double > resv(n, 0.0); + #if defined(__OPENMP) || defined(_OPENMP) - #pragma omp parallel for shared(resv) + #pragma omp parallel for shared(resv) firstprivate(params, n, k) #elif defined(__GNUC__) && !defined(__clang__) #pragma GCC ivdep #endif for (size_t j = 0u; j < (k - 1u); ++j) { - double p = *(params + j); + const double p = params[j]; #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd @@ -59,16 +60,12 @@ inline double update_normalizing_constant( const double * support_n = support + i * k + 1u; for (size_t j = 0u; j < (k - 1u); ++j) - tmp += (*(support_n + j)) * (*(params + j)); + tmp += (*(support_n + j)) * params[j]; - resv[i] = std::exp(tmp BARRY_SAFE_EXP) * (*(support + i * k)); + res += std::exp(tmp BARRY_SAFE_EXP) * (*(support + i * k)); } - // Accumulate resv to a double res - for (size_t i = 0u; i < n; ++i) - res += resv[i]; - } @@ -631,7 +628,7 @@ inline double Model Date: Thu, 28 Sep 2023 15:31:46 -0600 Subject: [PATCH 08/14] Relocating the pragma instructions --- include/barry/model-bones.hpp | 12 ++--- include/barry/model-meat.hpp | 45 +++++----------- .../models/geese/geese-meat-likelihood.hpp | 51 +++++++++++-------- 3 files changed, 45 insertions(+), 63 deletions(-) diff --git a/include/barry/model-bones.hpp b/include/barry/model-bones.hpp index 80ecfbac6..b5afddd12 100644 --- a/include/barry/model-bones.hpp +++ b/include/barry/model-bones.hpp @@ -240,32 +240,28 @@ class Model { double likelihood( const std::vector & params, const size_t & i, - bool as_log = false, - BARRY_NCORES_ARG(=2) + bool as_log = false ); double likelihood( const std::vector & params, const Array_Type & Array_, int i = -1, - bool as_log = false, - BARRY_NCORES_ARG(=2) + bool as_log = false ); double likelihood( const std::vector & params, const std::vector & target_, const size_t & i, - bool as_log = false, - BARRY_NCORES_ARG(=2) + bool as_log = false ); double likelihood( const std::vector & params, const double * target_, const size_t & i, - bool as_log = false, - BARRY_NCORES_ARG(=2) + bool as_log = false ); double likelihood_total( diff --git a/include/barry/model-meat.hpp b/include/barry/model-meat.hpp index f1617272c..7cfa7caf9 100644 --- a/include/barry/model-meat.hpp +++ b/include/barry/model-meat.hpp @@ -20,11 +20,11 @@ inline double update_normalizing_constant( std::vector< double > resv(n, 0.0); - #if defined(__OPENMP) || defined(_OPENMP) - #pragma omp parallel for shared(resv) firstprivate(params, n, k) - #elif defined(__GNUC__) && !defined(__clang__) - #pragma GCC ivdep - #endif + // #if defined(__OPENMP) || defined(_OPENMP) + // #pragma omp parallel for shared(resv) firstprivate(params, n, k) + // #elif defined(__GNUC__) && !defined(__clang__) + // #pragma GCC ivdep + // #endif for (size_t j = 0u; j < (k - 1u); ++j) { @@ -600,13 +600,8 @@ template ::likelihood( const std::vector & params, const size_t & i, - bool as_log, - BARRY_NCORES_ARG() + bool as_log ) { - - #if defined(__OPENMP) || defined(_OPENMP) - omp_set_num_threads(ncores); - #endif // Checking if the index exists if (i >= arrays2support.size()) @@ -650,13 +645,8 @@ inline double Model & params, const Array_Type & Array_, int i, - bool as_log, - BARRY_NCORES_ARG() + bool as_log ) { - - #if defined(__OPENMP) || defined(_OPENMP) - omp_set_num_threads(ncores); - #endif // Key of the support set to use int loc; @@ -736,13 +726,8 @@ inline double Model & params, const std::vector & target_, const size_t & i, - bool as_log, - BARRY_NCORES_ARG() + bool as_log ) { - - #if defined(__OPENMP) || defined(_OPENMP) - omp_set_num_threads(ncores); - #endif // Checking if the index exists if (i >= arrays2support.size()) @@ -804,13 +789,8 @@ inline double Model & params, const double * target_, const size_t & i, - bool as_log, - BARRY_NCORES_ARG() + bool as_log ) { - - #if defined(__OPENMP) || defined(_OPENMP) - omp_set_num_threads(ncores); - #endif // Checking if the index exists if (i >= arrays2support.size()) @@ -879,13 +859,12 @@ inline double Model * preseq; @@ -60,38 +58,47 @@ inline double Geese::likelihood( { // Starting the prob - double totprob = 0.0; + size_t array_id = node.narray[s]; // Retrieving the sets of arrays - const std::vector< PhyloArray > * psets = - model->get_pset(node.narray[s]); + const std::vector< PhyloArray > & psets = + *(model->get_pset(array_id)); - const std::vector * psets_stats = - model->get_pset_stats(node.narray[s]); + const std::vector & psets_stats = + *(model->get_pset_stats(array_id)); std::vector< std::vector< size_t > > & locations = pset_loc[ - arrays2support->operator[](node.narray[s]) + arrays2support->operator[](array_id) ]; // Summation over all possible values of X + const auto & node_offspring = node.offspring; + std::vector< double > totprob_n(psets.size(), 0.0); size_t nstate = 0u; - size_t narray = 0u; - for (auto x = psets->begin(); x != psets->end(); ++x) + #if defined(_OPENMP) || defined(__OPENMP) + #pragma omp parallel for num_threads(ncores) \ + shared(locations, psets, psets_stats, totprob_n) \ + firstprivate(nfunctions, par0, node_offspring, array_id) + #endif + for (size_t n = 0u; n < psets.size(); ++n) // x = psets->begin(); x != psets->end(); ++x) { - if (!x->is_dense()) + // Retrieving the pset + const auto & x = psets[n]; + + if (!x.is_dense()) throw std::logic_error("This is only supported for dense arrays."); - std::vector< size_t > & location_x = locations[narray++]; + const std::vector< size_t > & location_x = locations[n]; // Extracting the possible values of each offspring double off_mult = 1.0; - for (auto o = 0u; o < x->ncol(); ++o) + for (auto o = 0u; o < x.ncol(); ++o) { // Setting the node - n_off = node.offspring[o]; + const Node * n_off = node_offspring[o]; // In the case that the offspring is a leaf, then we need to // check whether the state makes sense. @@ -102,7 +109,7 @@ inline double Geese::likelihood( if (n_off->annotations[f] != 9u) { - if (x->operator()(f, o) != n_off->annotations[f]) + if (x(f, o) != n_off->annotations[f]) { off_mult = -1.0; @@ -123,7 +130,7 @@ inline double Geese::likelihood( } // Retrieving the location to the respective set of probabilities - off_mult *= node.offspring[o]->subtree_prob[location_x[o]]; + off_mult *= n_off->subtree_prob[location_x[o]]; } @@ -140,7 +147,7 @@ inline double Geese::likelihood( std::vector< double > temp_stats; temp_stats.reserve(par0.size()); for (auto p = 0u; p < par0.size(); ++p) - temp_stats.push_back(psets_stats->operator[](par0.size() * nstate + p)); + temp_stats.push_back(psets_stats[par0.size() * nstate + p]); nstate++; @@ -149,9 +156,8 @@ inline double Geese::likelihood( off_mult *= model->likelihood( par0, temp_stats, - node.narray[s], - false, - ncores + array_id, + false ); } catch (std::exception & e) { @@ -171,12 +177,13 @@ inline double Geese::likelihood( } // Adding to the total probabilities - totprob += off_mult; + totprob_n[n] = off_mult; } // Setting the probability at the node - node.subtree_prob[s] = totprob; + for (size_t n = 0u; n < psets.size(); ++n) + node.subtree_prob[s] += totprob_n[n]; } From 70ba5fe710d081ebbecfbda778998b536bd91098 Mon Sep 17 00:00:00 2001 From: "George G. Vega Yon" Date: Mon, 2 Oct 2023 09:41:04 -0600 Subject: [PATCH 09/14] New function to update norm const (all) --- include/barry/model-bones.hpp | 27 ++++-- include/barry/model-meat.hpp | 91 ++++++++++++++----- include/barry/models/geese/flock-bones.hpp | 6 +- include/barry/models/geese/flock-meat.hpp | 7 +- include/barry/models/geese/geese-bones.hpp | 3 +- .../models/geese/geese-meat-likelihood.hpp | 11 ++- tests/10-geese-predict.cpp | 1 + 7 files changed, 108 insertions(+), 38 deletions(-) diff --git a/include/barry/model-bones.hpp b/include/barry/model-bones.hpp index b5afddd12..fd0dc1852 100644 --- a/include/barry/model-bones.hpp +++ b/include/barry/model-bones.hpp @@ -60,9 +60,9 @@ class Model { */ ///@{ std::vector< std::vector< double > > stats_support; ///< Sufficient statistics of the model (support) - std::vector< size_t > stats_support_n_arrays; ///< Number of arrays included per support. + std::vector< size_t > stats_support_n_arrays; ///< Number of arrays included per support. std::vector< std::vector< double > > stats_target; ///< Target statistics of the model - std::vector< size_t > arrays2support; + std::vector< size_t > arrays2support; ///@} /** @@ -125,6 +125,14 @@ class Model { std::vector< std::string > transform_model_term_names; public: + + /** + * @brief Computes the normalizing constant for a given set of parameters + * @details This function will compute the normalizing constant for a given + * set of parameters. It will also update the `normalizing_constants` member + * variable. + */ + void update_normalizing_constants(const std::vector< double > & params); void set_rengine(std::mt19937 * rengine_, bool delete_ = false) { @@ -240,34 +248,39 @@ class Model { double likelihood( const std::vector & params, const size_t & i, - bool as_log = false + bool as_log = false, + bool no_update_normconst = false ); double likelihood( const std::vector & params, const Array_Type & Array_, int i = -1, - bool as_log = false + bool as_log = false, + bool no_update_normconst = false ); double likelihood( const std::vector & params, const std::vector & target_, const size_t & i, - bool as_log = false + bool as_log = false, + bool no_update_normconst = false ); double likelihood( const std::vector & params, const double * target_, const size_t & i, - bool as_log = false + bool as_log = false, + bool no_update_normconst = false ); double likelihood_total( const std::vector & params, bool as_log = false, - BARRY_NCORES_ARG(=2) + BARRY_NCORES_ARG(=2), + bool no_update_normconst = false ); ///@} diff --git a/include/barry/model-meat.hpp b/include/barry/model-meat.hpp index 7cfa7caf9..7a977bd2b 100644 --- a/include/barry/model-meat.hpp +++ b/include/barry/model-meat.hpp @@ -150,6 +150,37 @@ inline double likelihood_( } +template < + typename Array_Type, + typename Data_Counter_Type, + typename Data_Rule_Type, + typename Data_Rule_Dyn_Type + > +inline void Model::update_normalizing_constants( + const std::vector< double > & params +) { + + #if defined(__OPENMP) || defined(_OPENMP) + #pragma omp parallel for firstprivate(params) \ + shared(stats_support, normalizing_constants, first_calc_done) + #endif + for (size_t i = 0u; i < stats_support.size(); ++i) + { + + size_t k = params.size() + 1u; + size_t n = stats_support[i].size() / k; + + first_calc_done[i] = true; + normalizing_constants[i] = update_normalizing_constant( + params, &stats_support[i][0u], k, n + ); + + } + + return; + +} + template < typename Array_Type, typename Data_Counter_Type, @@ -600,7 +631,8 @@ template ::likelihood( const std::vector & params, const size_t & i, - bool as_log + bool as_log, + bool no_update_normconst ) { // Checking if the index exists @@ -614,7 +646,7 @@ inline double Model::infinity() : 0.0; // Checking if we have updated the normalizing constant or not - if (!first_calc_done[idx] || !vec_equal_approx(params, params_last[idx]) ) + if (!no_update_normconst && (!first_calc_done[idx] || !vec_equal_approx(params, params_last[idx]))) { first_calc_done[idx] = true; @@ -645,7 +677,8 @@ inline double Model & params, const Array_Type & Array_, int i, - bool as_log + bool as_log, + bool no_update_normconst ) { // Key of the support set to use @@ -691,7 +724,7 @@ inline double Model & params, const std::vector & target_, const size_t & i, - bool as_log + bool as_log, + bool no_update_normconst ) { // Checking if the index exists @@ -759,7 +793,7 @@ inline double Model & params, const double * target_, const size_t & i, - bool as_log + bool as_log, + bool no_update_normconst ) { // Checking if the index exists @@ -828,7 +863,7 @@ inline double Model::likelihood_total( const std::vector & params, bool as_log, - BARRY_NCORES_ARG() + BARRY_NCORES_ARG(), + bool no_update_normconst ) { size_t params_last_size = params_last.size(); @@ -865,24 +901,33 @@ inline double Model > & annotations, std::vector< size_t > & geneid, - std::vector< int > & parent, - std::vector< bool > & duplication + std::vector< int > & parent, + std::vector< bool > & duplication ); /** diff --git a/include/barry/models/geese/flock-meat.hpp b/include/barry/models/geese/flock-meat.hpp index 5ddae20c1..11e1f2993 100644 --- a/include/barry/models/geese/flock-meat.hpp +++ b/include/barry/models/geese/flock-meat.hpp @@ -147,17 +147,20 @@ inline double Flock::likelihood_joint( double ans = as_log ? 0.0: 1.0; + std::vector< double > par0(par.begin(), par.end() - nfunctions); + model.update_normalizing_constants(par0); + if (as_log) { for (auto& d : this->dat) - ans += d.likelihood(par, as_log, use_reduced_sequence, ncores); + ans += d.likelihood(par, as_log, use_reduced_sequence, ncores, true); } else { for (auto& d : this->dat) - ans *= d.likelihood(par, as_log, use_reduced_sequence, ncores); + ans *= d.likelihood(par, as_log, use_reduced_sequence, ncores, true); } diff --git a/include/barry/models/geese/geese-bones.hpp b/include/barry/models/geese/geese-bones.hpp index 9492dcac3..8d38832fe 100644 --- a/include/barry/models/geese/geese-bones.hpp +++ b/include/barry/models/geese/geese-bones.hpp @@ -216,7 +216,8 @@ class Geese { const std::vector< double > & par, bool as_log = false, bool use_reduced_sequence = true, - size_t ncores = 1u + size_t ncores = 1u, + bool no_update_normalizing_constant = false ); double likelihood_exhaust(const std::vector< double > & par); diff --git a/include/barry/models/geese/geese-meat-likelihood.hpp b/include/barry/models/geese/geese-meat-likelihood.hpp index 0c22e70da..365df2ce2 100644 --- a/include/barry/models/geese/geese-meat-likelihood.hpp +++ b/include/barry/models/geese/geese-meat-likelihood.hpp @@ -7,9 +7,11 @@ inline double Geese::likelihood( const std::vector< double > & par, bool as_log, bool use_reduced_sequence, - size_t ncores + size_t ncores, + bool no_update_normalizing_constant ) { + // Checking whether the model is initialized INITIALIZED() // Splitting the probabilities @@ -22,6 +24,10 @@ inline double Geese::likelihood( double ll = 0.0; + // Updating normalizing constants + if (!no_update_normalizing_constant) + model->update_normalizing_constants(par0); + // Following the prunning sequence std::vector< size_t > * preseq; @@ -157,7 +163,8 @@ inline double Geese::likelihood( par0, temp_stats, array_id, - false + false, + true ); } catch (std::exception & e) { diff --git a/tests/10-geese-predict.cpp b/tests/10-geese-predict.cpp index 497054322..6dae5b4a6 100644 --- a/tests/10-geese-predict.cpp +++ b/tests/10-geese-predict.cpp @@ -1,3 +1,4 @@ +#undef _OPENMP #include "tests.hpp" #include "../include/barry/models/geese.hpp" From b55cc88348b49aaca319b589e3ec76a76d82c006 Mon Sep 17 00:00:00 2001 From: "George G. Vega Yon" Date: Mon, 2 Oct 2023 09:54:38 -0600 Subject: [PATCH 10/14] Fixing geese likelihood w openmp --- include/barry/models/geese/geese-meat-likelihood.hpp | 1 + tests/07-geese-flock.cpp | 1 + tests/10-geese-predict.cpp | 1 - 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/barry/models/geese/geese-meat-likelihood.hpp b/include/barry/models/geese/geese-meat-likelihood.hpp index 365df2ce2..4b78e289a 100644 --- a/include/barry/models/geese/geese-meat-likelihood.hpp +++ b/include/barry/models/geese/geese-meat-likelihood.hpp @@ -189,6 +189,7 @@ inline double Geese::likelihood( } // Setting the probability at the node + node.subtree_prob[s] = 0.0; for (size_t n = 0u; n < psets.size(); ++n) node.subtree_prob[s] += totprob_n[n]; diff --git a/tests/07-geese-flock.cpp b/tests/07-geese-flock.cpp index 06ae7bf8d..dd984ce36 100644 --- a/tests/07-geese-flock.cpp +++ b/tests/07-geese-flock.cpp @@ -1,3 +1,4 @@ +#undef _OPENMP #ifndef CATCH_CONFIG_MAIN // #include "/opt/intel/oneapi/advisor/2022.0.0/include/advisor-annotate.h" #include diff --git a/tests/10-geese-predict.cpp b/tests/10-geese-predict.cpp index 6dae5b4a6..497054322 100644 --- a/tests/10-geese-predict.cpp +++ b/tests/10-geese-predict.cpp @@ -1,4 +1,3 @@ -#undef _OPENMP #include "tests.hpp" #include "../include/barry/models/geese.hpp" From d6c45559eea18e4c454ddea2deebaa57c6337a75 Mon Sep 17 00:00:00 2001 From: "George G. Vega Yon" Date: Mon, 2 Oct 2023 11:05:51 -0600 Subject: [PATCH 11/14] OpenMP finally working --- include/barry/model-bones.hpp | 6 +-- include/barry/model-meat.hpp | 40 ++-------------- .../models/geese/geese-meat-likelihood.hpp | 47 ++++++------------- tests/07-geese-flock.cpp | 1 - 4 files changed, 19 insertions(+), 75 deletions(-) diff --git a/include/barry/model-bones.hpp b/include/barry/model-bones.hpp index fd0dc1852..e454b86b3 100644 --- a/include/barry/model-bones.hpp +++ b/include/barry/model-bones.hpp @@ -293,11 +293,7 @@ class Model { * constant. */ ///@{ - double get_norm_const( - const std::vector< double > & params, - const size_t & i, - bool as_log = false - ); + std::vector< double > & get_normalizing_constants(); const std::vector< Array_Type > * get_pset( const size_t & i diff --git a/include/barry/model-meat.hpp b/include/barry/model-meat.hpp index 7a977bd2b..7aa8893ac 100644 --- a/include/barry/model-meat.hpp +++ b/include/barry/model-meat.hpp @@ -106,11 +106,6 @@ inline double likelihood_( double numerator = 0.0; // Computing the numerator - #if defined(__OPENMP) || defined(_OPENMP) - #pragma omp simd reduction(+:numerator) - #elif defined(__GNUC__) && !defined(__clang__) - #pragma GCC ivdep - #endif for (size_t j = 0u; j < params.size(); ++j) numerator += *(stats_target + j) * params[j]; @@ -970,39 +965,10 @@ inline double Model -inline double Model:: get_norm_const( - const std::vector & params, - const size_t & i, - bool as_log -) { - - // Checking if the index exists - if (i >= arrays2support.size()) - throw std::range_error("The requested support is out of range"); - - const auto id = arrays2support[i]; - - // Checking if we have updated the normalizing constant or not - if (!first_calc_done[id] || !vec_equal_approx(params, params_last[id]) ) - { - - first_calc_done[id] = true; - - size_t k = params.size() + 1u; - size_t n = stats_support[id].size() / k; - - normalizing_constants[id] = update_normalizing_constant( - params, &stats_support[id][0u], k, n - ); - - params_last[id] = params; - - } +inline std::vector< double > & +Model:: get_normalizing_constants() { - return as_log ? - std::log(normalizing_constants[id]) : - normalizing_constants[id] - ; + return normalizing_constants; } diff --git a/include/barry/models/geese/geese-meat-likelihood.hpp b/include/barry/models/geese/geese-meat-likelihood.hpp index 4b78e289a..d7fa7877c 100644 --- a/include/barry/models/geese/geese-meat-likelihood.hpp +++ b/include/barry/models/geese/geese-meat-likelihood.hpp @@ -29,27 +29,16 @@ inline double Geese::likelihood( model->update_normalizing_constants(par0); // Following the prunning sequence - std::vector< size_t > * preseq; - - if (use_reduced_sequence) - { - - preseq = &this->reduced_sequence; - - } - else - { - - preseq = &this->sequence; - - } + const std::vector< size_t > & preseq = use_reduced_sequence ? + this->reduced_sequence : this->sequence; // The first time it is called, it need to generate the corresponding // hashes of the columns so it is fast to access then (saves time // hashing and looking in the map.) - auto arrays2support = model->get_arrays2support(); + const auto arrays2support = model->get_arrays2support(); + const auto & normconst = model->get_normalizing_constants(); - for (auto& i : *preseq) + for (auto& i : preseq) { // We cannot compute probability at the leaf, we need to continue @@ -80,11 +69,10 @@ inline double Geese::likelihood( // Summation over all possible values of X const auto & node_offspring = node.offspring; std::vector< double > totprob_n(psets.size(), 0.0); - size_t nstate = 0u; #if defined(_OPENMP) || defined(__OPENMP) #pragma omp parallel for num_threads(ncores) \ shared(locations, psets, psets_stats, totprob_n) \ - firstprivate(nfunctions, par0, node_offspring, array_id) + firstprivate(nfunctions, par0, node_offspring, array_id, normconst) #endif for (size_t n = 0u; n < psets.size(); ++n) // x = psets->begin(); x != psets->end(); ++x) { @@ -142,30 +130,25 @@ inline double Geese::likelihood( // Is this state valid? if (off_mult < 0.0) - { - - ++nstate; continue; - - } // Multiplying by P(x|x_n), the transition probability std::vector< double > temp_stats; temp_stats.reserve(par0.size()); for (auto p = 0u; p < par0.size(); ++p) - temp_stats.push_back(psets_stats[par0.size() * nstate + p]); - - nstate++; + temp_stats.push_back(psets_stats[par0.size() * n + p]); // Use try catch in the following line try { - off_mult *= model->likelihood( + + off_mult *= barry::likelihood_( + &temp_stats[0u], par0, - temp_stats, - array_id, - false, - true + normconst[arrays2support->operator[](array_id)], + par0.size(), + false ); + } catch (std::exception & e) { auto err = std::string(e.what()); @@ -221,7 +204,7 @@ inline double Geese::likelihood( // In the case that the sequence is empty, then it means // that we are looking at a completely unnanotated tree, // thus the likelihood should be one - if (preseq->size() == 0u) + if (preseq.size() == 0u) return as_log ? -std::numeric_limits::infinity() : 1.0; diff --git a/tests/07-geese-flock.cpp b/tests/07-geese-flock.cpp index dd984ce36..06ae7bf8d 100644 --- a/tests/07-geese-flock.cpp +++ b/tests/07-geese-flock.cpp @@ -1,4 +1,3 @@ -#undef _OPENMP #ifndef CATCH_CONFIG_MAIN // #include "/opt/intel/oneapi/advisor/2022.0.0/include/advisor-annotate.h" #include From 00f2c5b6561123a982739db42384a6a3e157c51c Mon Sep 17 00:00:00 2001 From: "George G. Vega Yon" Date: Mon, 2 Oct 2023 11:35:32 -0600 Subject: [PATCH 12/14] Barrier for make-sense optimization --- include/barry/model-bones.hpp | 5 ++++- include/barry/model-meat.hpp | 9 +++++++-- include/barry/models/geese/flock-meat.hpp | 2 +- include/barry/models/geese/geese-meat-likelihood.hpp | 6 +++++- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/include/barry/model-bones.hpp b/include/barry/model-bones.hpp index e454b86b3..4f32a7b23 100644 --- a/include/barry/model-bones.hpp +++ b/include/barry/model-bones.hpp @@ -132,7 +132,10 @@ class Model { * set of parameters. It will also update the `normalizing_constants` member * variable. */ - void update_normalizing_constants(const std::vector< double > & params); + void update_normalizing_constants( + const std::vector< double > & params, + BARRY_NCORES_ARG(=1) + ); void set_rengine(std::mt19937 * rengine_, bool delete_ = false) { diff --git a/include/barry/model-meat.hpp b/include/barry/model-meat.hpp index 7aa8893ac..313a56b94 100644 --- a/include/barry/model-meat.hpp +++ b/include/barry/model-meat.hpp @@ -152,11 +152,16 @@ template < typename Data_Rule_Dyn_Type > inline void Model::update_normalizing_constants( - const std::vector< double > & params + const std::vector< double > & params, + size_t ncores ) { + + // Barrier to make sure paralelization makes sense + if ((ncores > 1u) && (stats_support.size() < 1000u)) + ncores = 1u; #if defined(__OPENMP) || defined(_OPENMP) - #pragma omp parallel for firstprivate(params) \ + #pragma omp parallel for firstprivate(params) num_threads(ncores) \ shared(stats_support, normalizing_constants, first_calc_done) #endif for (size_t i = 0u; i < stats_support.size(); ++i) diff --git a/include/barry/models/geese/flock-meat.hpp b/include/barry/models/geese/flock-meat.hpp index 11e1f2993..bd3297464 100644 --- a/include/barry/models/geese/flock-meat.hpp +++ b/include/barry/models/geese/flock-meat.hpp @@ -148,7 +148,7 @@ inline double Flock::likelihood_joint( double ans = as_log ? 0.0: 1.0; std::vector< double > par0(par.begin(), par.end() - nfunctions); - model.update_normalizing_constants(par0); + model.update_normalizing_constants(par0, ncores); if (as_log) { diff --git a/include/barry/models/geese/geese-meat-likelihood.hpp b/include/barry/models/geese/geese-meat-likelihood.hpp index d7fa7877c..fdae06552 100644 --- a/include/barry/models/geese/geese-meat-likelihood.hpp +++ b/include/barry/models/geese/geese-meat-likelihood.hpp @@ -26,7 +26,7 @@ inline double Geese::likelihood( // Updating normalizing constants if (!no_update_normalizing_constant) - model->update_normalizing_constants(par0); + model->update_normalizing_constants(par0, ncores); // Following the prunning sequence const std::vector< size_t > & preseq = use_reduced_sequence ? @@ -65,6 +65,10 @@ inline double Geese::likelihood( std::vector< std::vector< size_t > > & locations = pset_loc[ arrays2support->operator[](array_id) ]; + + // Making sure parallelization makes sense + if (psets.size() < 1000) + ncores = 1u; // Summation over all possible values of X const auto & node_offspring = node.offspring; From 0645d9204f84af5e9a45039463f4fe4b0be84b6a Mon Sep 17 00:00:00 2001 From: "George G. Vega Yon" Date: Mon, 2 Oct 2023 15:09:04 -0600 Subject: [PATCH 13/14] Avoiding overhead --- include/barry/model-meat.hpp | 12 +- .../models/geese/geese-meat-likelihood.hpp | 242 +++++++++++------- 2 files changed, 146 insertions(+), 108 deletions(-) diff --git a/include/barry/model-meat.hpp b/include/barry/model-meat.hpp index 313a56b94..11bbbd408 100644 --- a/include/barry/model-meat.hpp +++ b/include/barry/model-meat.hpp @@ -20,11 +20,6 @@ inline double update_normalizing_constant( std::vector< double > resv(n, 0.0); - // #if defined(__OPENMP) || defined(_OPENMP) - // #pragma omp parallel for shared(resv) firstprivate(params, n, k) - // #elif defined(__GNUC__) && !defined(__clang__) - // #pragma GCC ivdep - // #endif for (size_t j = 0u; j < (k - 1u); ++j) { @@ -162,7 +157,8 @@ inline void Model & totprob_n, + const std::vector< double > & par0, + const std::vector> & states, + const std::vector< PhyloArray > & psets, + const std::vector & psets_stats, + std::vector< std::vector< size_t > > & locations, + const std::vector & node_offspring +) +{ + // Retrieving the pset + const auto & x = psets[n]; + + if (!x.is_dense()) + throw std::logic_error("This is only supported for dense arrays."); + + const std::vector< size_t > & location_x = locations[n]; + + // Extracting the possible values of each offspring + double off_mult = 1.0; + + for (auto o = 0u; o < x.ncol(); ++o) + { + + // Setting the node + const Node * n_off = node_offspring[o]; + + // In the case that the offspring is a leaf, then we need to + // check whether the state makes sense. + if (n_off->is_leaf()) + { + for (auto f = 0u; f < nfunctions; ++f) + { + if (n_off->annotations[f] != 9u) + { + + if (x(f, o) != n_off->annotations[f]) + { + + off_mult = -1.0; + break; + + } + + } + + } + + // Going out + if (off_mult < 0) + break; + + continue; + + } + + // Retrieving the location to the respective set of probabilities + off_mult *= n_off->subtree_prob[location_x[o]]; + + } + + // Is this state valid? + if (off_mult < 0.0) + return; + + // Multiplying by P(x|x_n), the transition probability + std::vector< double > temp_stats; + temp_stats.reserve(par0.size()); + for (auto p = 0u; p < par0.size(); ++p) + temp_stats.push_back(psets_stats[par0.size() * n + p]); + + // Use try catch in the following line + try { + + off_mult *= barry::likelihood_( + &temp_stats[0u], + par0, + norm_const_i, + par0.size(), + false + ); + + } catch (std::exception & e) { + + auto err = std::string(e.what()); + + std::string state_str = ""; + for (const auto & ss : states[s]) + state_str += std::to_string(ss) + " "; + + err = "Error computing the likelihood at node " + + std::to_string(node_id) + " with state " + state_str + + ". Error message:\n" + + err; + + throw std::runtime_error(err); + + } + + // Adding to the total probabilities + totprob_n[n] = off_mult; + +} + inline double Geese::likelihood( const std::vector< double > & par, bool as_log, @@ -35,7 +144,7 @@ inline double Geese::likelihood( // The first time it is called, it need to generate the corresponding // hashes of the columns so it is fast to access then (saves time // hashing and looking in the map.) - const auto arrays2support = model->get_arrays2support(); + const auto & arrays2support = *(model->get_arrays2support()); const auto & normconst = model->get_normalizing_constants(); for (auto& i : preseq) @@ -47,6 +156,7 @@ inline double Geese::likelihood( // Since we are using this a lot... Node & node = nodes[i]; + const size_t node_id = node.id; // Iterating through states for (size_t s = 0u; s < states.size(); ++s) @@ -54,6 +164,8 @@ inline double Geese::likelihood( // Starting the prob size_t array_id = node.narray[s]; + size_t support_id = arrays2support[array_id]; + double norm_const_i = normconst[support_id]; // Retrieving the sets of arrays const std::vector< PhyloArray > & psets = @@ -62,9 +174,7 @@ inline double Geese::likelihood( const std::vector & psets_stats = *(model->get_pset_stats(array_id)); - std::vector< std::vector< size_t > > & locations = pset_loc[ - arrays2support->operator[](array_id) - ]; + std::vector< std::vector< size_t > > & locations = pset_loc[support_id]; // Making sure parallelization makes sense if (psets.size() < 1000) @@ -74,106 +184,42 @@ inline double Geese::likelihood( const auto & node_offspring = node.offspring; std::vector< double > totprob_n(psets.size(), 0.0); #if defined(_OPENMP) || defined(__OPENMP) - #pragma omp parallel for num_threads(ncores) \ - shared(locations, psets, psets_stats, totprob_n) \ - firstprivate(nfunctions, par0, node_offspring, array_id, normconst) - #endif - for (size_t n = 0u; n < psets.size(); ++n) // x = psets->begin(); x != psets->end(); ++x) + if (ncores > 1u) { - - // Retrieving the pset - const auto & x = psets[n]; - - if (!x.is_dense()) - throw std::logic_error("This is only supported for dense arrays."); - - const std::vector< size_t > & location_x = locations[n]; - - // Extracting the possible values of each offspring - double off_mult = 1.0; - - for (auto o = 0u; o < x.ncol(); ++o) + #pragma omp parallel for num_threads(ncores) \ + shared(\ + locations, psets, psets_stats, totprob_n, node, states,\ + par0, node_offspring, nfunctions, array_id, norm_const_i, \ + s, node_id) default(none) + for (size_t n = 0u; n < psets.size(); ++n) { - - // Setting the node - const Node * n_off = node_offspring[o]; - - // In the case that the offspring is a leaf, then we need to - // check whether the state makes sense. - if (n_off->is_leaf()) - { - for (auto f = 0u; f < nfunctions; ++f) - { - if (n_off->annotations[f] != 9u) - { - - if (x(f, o) != n_off->annotations[f]) - { - - off_mult = -1.0; - break; - - } - - } - - } - - // Going out - if (off_mult < 0) - break; - - continue; - - } - - // Retrieving the location to the respective set of probabilities - off_mult *= n_off->subtree_prob[location_x[o]]; - + pset_loop( + n, s, nfunctions, node_id, norm_const_i, totprob_n, + par0, states, psets, psets_stats, locations, + node_offspring + ); } - - // Is this state valid? - if (off_mult < 0.0) - continue; - - // Multiplying by P(x|x_n), the transition probability - std::vector< double > temp_stats; - temp_stats.reserve(par0.size()); - for (auto p = 0u; p < par0.size(); ++p) - temp_stats.push_back(psets_stats[par0.size() * n + p]); - - // Use try catch in the following line - try { - - off_mult *= barry::likelihood_( - &temp_stats[0u], - par0, - normconst[arrays2support->operator[](array_id)], - par0.size(), - false + } else { + for (size_t n = 0u; n < psets.size(); ++n) + { + pset_loop( + n, s, nfunctions, node_id, norm_const_i, totprob_n, + par0, states, psets, psets_stats, locations, + node_offspring ); - - } catch (std::exception & e) { - - auto err = std::string(e.what()); - - std::string state_str = ""; - for (const auto & ss : states[s]) - state_str += std::to_string(ss) + " "; - - err = "Error computing the likelihood at node " + - std::to_string(node.id) + " with state " + state_str + - ". Error message:\n" + - err; - - throw std::runtime_error(err); - } - - // Adding to the total probabilities - totprob_n[n] = off_mult; - } + #else + for (size_t n = 0u; n < psets.size(); ++n) + { + pset_loop( + n, s, nfunctions, node_id, norm_const_i, totprob_n, + par0, states, psets, psets_stats, locations, + node_offspring + ); + } + #endif + // Setting the probability at the node node.subtree_prob[s] = 0.0; From 20f32970e4941f6818b14a6212eb7622c73bd246 Mon Sep 17 00:00:00 2001 From: "George G. Vega Yon" Date: Mon, 2 Oct 2023 17:02:53 -0600 Subject: [PATCH 14/14] Removing extra mem allocation+copy in geese::likelihood --- include/barry/model-meat.hpp | 2 +- .../models/geese/geese-meat-likelihood.hpp | 17 ++++++++--------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/barry/model-meat.hpp b/include/barry/model-meat.hpp index 11bbbd408..78b00dcbd 100644 --- a/include/barry/model-meat.hpp +++ b/include/barry/model-meat.hpp @@ -101,7 +101,7 @@ inline double likelihood_( double numerator = 0.0; // Computing the numerator - for (size_t j = 0u; j < params.size(); ++j) + for (size_t j = 0u; j < n_params; ++j) numerator += *(stats_target + j) * params[j]; if (!log_) diff --git a/include/barry/models/geese/geese-meat-likelihood.hpp b/include/barry/models/geese/geese-meat-likelihood.hpp index 27616d4e8..ca55f9d60 100644 --- a/include/barry/models/geese/geese-meat-likelihood.hpp +++ b/include/barry/models/geese/geese-meat-likelihood.hpp @@ -14,7 +14,7 @@ inline void pset_loop( const std::vector> & states, const std::vector< PhyloArray > & psets, const std::vector & psets_stats, - std::vector< std::vector< size_t > > & locations, + const std::vector< std::vector< size_t > > & locations, const std::vector & node_offspring ) { @@ -73,17 +73,11 @@ inline void pset_loop( if (off_mult < 0.0) return; - // Multiplying by P(x|x_n), the transition probability - std::vector< double > temp_stats; - temp_stats.reserve(par0.size()); - for (auto p = 0u; p < par0.size(); ++p) - temp_stats.push_back(psets_stats[par0.size() * n + p]); - // Use try catch in the following line try { off_mult *= barry::likelihood_( - &temp_stats[0u], + &psets_stats[par0.size() * n], par0, norm_const_i, par0.size(), @@ -223,8 +217,13 @@ inline double Geese::likelihood( // Setting the probability at the node node.subtree_prob[s] = 0.0; + auto & nsp = node.subtree_prob[s]; + #if defined(_OPENMP) || defined(__OPENMP) + #pragma omp simd reduction(+:nsp) + #endif for (size_t n = 0u; n < psets.size(); ++n) - node.subtree_prob[s] += totprob_n[n]; + nsp += totprob_n[n]; + }