From 70ba5fe710d081ebbecfbda778998b536bd91098 Mon Sep 17 00:00:00 2001 From: "George G. Vega Yon" Date: Mon, 2 Oct 2023 09:41:04 -0600 Subject: [PATCH] New function to update norm const (all) --- include/barry/model-bones.hpp | 27 ++++-- include/barry/model-meat.hpp | 91 ++++++++++++++----- include/barry/models/geese/flock-bones.hpp | 6 +- include/barry/models/geese/flock-meat.hpp | 7 +- include/barry/models/geese/geese-bones.hpp | 3 +- .../models/geese/geese-meat-likelihood.hpp | 11 ++- tests/10-geese-predict.cpp | 1 + 7 files changed, 108 insertions(+), 38 deletions(-) diff --git a/include/barry/model-bones.hpp b/include/barry/model-bones.hpp index b5afddd12..fd0dc1852 100644 --- a/include/barry/model-bones.hpp +++ b/include/barry/model-bones.hpp @@ -60,9 +60,9 @@ class Model { */ ///@{ std::vector< std::vector< double > > stats_support; ///< Sufficient statistics of the model (support) - std::vector< size_t > stats_support_n_arrays; ///< Number of arrays included per support. + std::vector< size_t > stats_support_n_arrays; ///< Number of arrays included per support. std::vector< std::vector< double > > stats_target; ///< Target statistics of the model - std::vector< size_t > arrays2support; + std::vector< size_t > arrays2support; ///@} /** @@ -125,6 +125,14 @@ class Model { std::vector< std::string > transform_model_term_names; public: + + /** + * @brief Computes the normalizing constant for a given set of parameters + * @details This function will compute the normalizing constant for a given + * set of parameters. It will also update the `normalizing_constants` member + * variable. + */ + void update_normalizing_constants(const std::vector< double > & params); void set_rengine(std::mt19937 * rengine_, bool delete_ = false) { @@ -240,34 +248,39 @@ class Model { double likelihood( const std::vector & params, const size_t & i, - bool as_log = false + bool as_log = false, + bool no_update_normconst = false ); double likelihood( const std::vector & params, const Array_Type & Array_, int i = -1, - bool as_log = false + bool as_log = false, + bool no_update_normconst = false ); double likelihood( const std::vector & params, const std::vector & target_, const size_t & i, - bool as_log = false + bool as_log = false, + bool no_update_normconst = false ); double likelihood( const std::vector & params, const double * target_, const size_t & i, - bool as_log = false + bool as_log = false, + bool no_update_normconst = false ); double likelihood_total( const std::vector & params, bool as_log = false, - BARRY_NCORES_ARG(=2) + BARRY_NCORES_ARG(=2), + bool no_update_normconst = false ); ///@} diff --git a/include/barry/model-meat.hpp b/include/barry/model-meat.hpp index 7cfa7caf9..7a977bd2b 100644 --- a/include/barry/model-meat.hpp +++ b/include/barry/model-meat.hpp @@ -150,6 +150,37 @@ inline double likelihood_( } +template < + typename Array_Type, + typename Data_Counter_Type, + typename Data_Rule_Type, + typename Data_Rule_Dyn_Type + > +inline void Model::update_normalizing_constants( + const std::vector< double > & params +) { + + #if defined(__OPENMP) || defined(_OPENMP) + #pragma omp parallel for firstprivate(params) \ + shared(stats_support, normalizing_constants, first_calc_done) + #endif + for (size_t i = 0u; i < stats_support.size(); ++i) + { + + size_t k = params.size() + 1u; + size_t n = stats_support[i].size() / k; + + first_calc_done[i] = true; + normalizing_constants[i] = update_normalizing_constant( + params, &stats_support[i][0u], k, n + ); + + } + + return; + +} + template < typename Array_Type, typename Data_Counter_Type, @@ -600,7 +631,8 @@ template ::likelihood( const std::vector & params, const size_t & i, - bool as_log + bool as_log, + bool no_update_normconst ) { // Checking if the index exists @@ -614,7 +646,7 @@ inline double Model::infinity() : 0.0; // Checking if we have updated the normalizing constant or not - if (!first_calc_done[idx] || !vec_equal_approx(params, params_last[idx]) ) + if (!no_update_normconst && (!first_calc_done[idx] || !vec_equal_approx(params, params_last[idx]))) { first_calc_done[idx] = true; @@ -645,7 +677,8 @@ inline double Model & params, const Array_Type & Array_, int i, - bool as_log + bool as_log, + bool no_update_normconst ) { // Key of the support set to use @@ -691,7 +724,7 @@ inline double Model & params, const std::vector & target_, const size_t & i, - bool as_log + bool as_log, + bool no_update_normconst ) { // Checking if the index exists @@ -759,7 +793,7 @@ inline double Model & params, const double * target_, const size_t & i, - bool as_log + bool as_log, + bool no_update_normconst ) { // Checking if the index exists @@ -828,7 +863,7 @@ inline double Model::likelihood_total( const std::vector & params, bool as_log, - BARRY_NCORES_ARG() + BARRY_NCORES_ARG(), + bool no_update_normconst ) { size_t params_last_size = params_last.size(); @@ -865,24 +901,33 @@ inline double Model > & annotations, std::vector< size_t > & geneid, - std::vector< int > & parent, - std::vector< bool > & duplication + std::vector< int > & parent, + std::vector< bool > & duplication ); /** diff --git a/include/barry/models/geese/flock-meat.hpp b/include/barry/models/geese/flock-meat.hpp index 5ddae20c1..11e1f2993 100644 --- a/include/barry/models/geese/flock-meat.hpp +++ b/include/barry/models/geese/flock-meat.hpp @@ -147,17 +147,20 @@ inline double Flock::likelihood_joint( double ans = as_log ? 0.0: 1.0; + std::vector< double > par0(par.begin(), par.end() - nfunctions); + model.update_normalizing_constants(par0); + if (as_log) { for (auto& d : this->dat) - ans += d.likelihood(par, as_log, use_reduced_sequence, ncores); + ans += d.likelihood(par, as_log, use_reduced_sequence, ncores, true); } else { for (auto& d : this->dat) - ans *= d.likelihood(par, as_log, use_reduced_sequence, ncores); + ans *= d.likelihood(par, as_log, use_reduced_sequence, ncores, true); } diff --git a/include/barry/models/geese/geese-bones.hpp b/include/barry/models/geese/geese-bones.hpp index 9492dcac3..8d38832fe 100644 --- a/include/barry/models/geese/geese-bones.hpp +++ b/include/barry/models/geese/geese-bones.hpp @@ -216,7 +216,8 @@ class Geese { const std::vector< double > & par, bool as_log = false, bool use_reduced_sequence = true, - size_t ncores = 1u + size_t ncores = 1u, + bool no_update_normalizing_constant = false ); double likelihood_exhaust(const std::vector< double > & par); diff --git a/include/barry/models/geese/geese-meat-likelihood.hpp b/include/barry/models/geese/geese-meat-likelihood.hpp index 0c22e70da..365df2ce2 100644 --- a/include/barry/models/geese/geese-meat-likelihood.hpp +++ b/include/barry/models/geese/geese-meat-likelihood.hpp @@ -7,9 +7,11 @@ inline double Geese::likelihood( const std::vector< double > & par, bool as_log, bool use_reduced_sequence, - size_t ncores + size_t ncores, + bool no_update_normalizing_constant ) { + // Checking whether the model is initialized INITIALIZED() // Splitting the probabilities @@ -22,6 +24,10 @@ inline double Geese::likelihood( double ll = 0.0; + // Updating normalizing constants + if (!no_update_normalizing_constant) + model->update_normalizing_constants(par0); + // Following the prunning sequence std::vector< size_t > * preseq; @@ -157,7 +163,8 @@ inline double Geese::likelihood( par0, temp_stats, array_id, - false + false, + true ); } catch (std::exception & e) { diff --git a/tests/10-geese-predict.cpp b/tests/10-geese-predict.cpp index 497054322..6dae5b4a6 100644 --- a/tests/10-geese-predict.cpp +++ b/tests/10-geese-predict.cpp @@ -1,3 +1,4 @@ +#undef _OPENMP #include "tests.hpp" #include "../include/barry/models/geese.hpp"