diff --git a/barry.hpp b/barry.hpp index 243cfa2ca..54f9d2431 100644 --- a/barry.hpp +++ b/barry.hpp @@ -4094,6 +4094,12 @@ template inline void BArrayDenseoperator[](j) = el[POS(j, i)];//this->get_cell(iter->first, i, false); @@ -5370,7 +5376,7 @@ COUNTERS_TEMPLATE(void, add_counter)( ) { - data.push_back(Counter( + data.emplace_back(Counter( count_fun_, init_fun_, hasher_fun_, @@ -5954,9 +5960,9 @@ class Support { size_t max_num_elements = BARRY_MAX_NUM_ELEMENTS; // Temp variables to reduce memory allocation - std::vector< double > current_stats; - std::vector< size_t > coordinates_free; - std::vector< size_t > coordinates_locked; + std::vector< double > current_stats; + std::vector< size_t > coordinates_free; + std::vector< size_t > coordinates_locked; size_t coordiantes_n_free; size_t coordiantes_n_locked; std::vector< double > change_stats; @@ -6063,7 +6069,7 @@ class Support { size_t max_num_elements_ = 0u ); - std::vector< double > get_counts() const; + const std::vector< double > & get_counts() const; std::vector< double > * get_current_stats(); ///< List current statistics. void print() const; @@ -6475,10 +6481,11 @@ inline void Support -inline void Support::calc( - std::vector< Array_Type > * array_bank, - std::vector< double > * stats_bank, - size_t max_num_elements_ +inline void +Support::calc( + std::vector< Array_Type > * array_bank, + std::vector< double > * stats_bank, + size_t max_num_elements_ ) { if (max_num_elements_ != 0u) @@ -6648,9 +6655,8 @@ inline bool Support -inline std::vector< double > Support::get_counts() const { +inline const std::vector< double > & Support::get_counts() const { return data.get_data(); @@ -7098,10 +7104,13 @@ class Model { * - term k */ ///@{ - std::vector< std::vector< double > > stats_support; ///< Sufficient statistics of the model (support) - std::vector< size_t > stats_support_n_arrays; ///< Number of arrays included per support. - std::vector< std::vector< double > > stats_target; ///< Target statistics of the model - std::vector< size_t > arrays2support; + std::vector< double > stats_support; ///< Sufficient statistics of the model (support) + std::vector< size_t > stats_support_sizes; ///< Number of vectors included in the support. + std::vector< size_t > stats_support_sizes_acc; ///< Accumulated number of vectors included in the support. + std::vector< size_t > stats_support_n_arrays; ///< Number of arrays included per support. + std::vector< std::vector< double > > stats_target; ///< Target statistics of the model + std::vector< double > stats_likelihood; + std::vector< size_t > arrays2support; ///@} /** @@ -7118,8 +7127,10 @@ class Model { ///@{ bool with_pset = false; std::vector< std::vector< Array_Type > > pset_arrays; ///< Arrays of the support(s) - std::vector< std::vector > pset_stats; ///< Statistics of the support(s) - std::vector< std::vector > pset_probs; ///< Probabilities of the support(s) + std::vector< double > pset_stats; ///< Statistics of the support(s) + std::vector< double > pset_probs; ///< Probabilities of the support(s) + std::vector< size_t > pset_sizes; ///< Number of vectors included in the support. + std::vector< size_t > pset_locations; ///< Accumulated number of vectors included in the support. ///@} /** @@ -7164,6 +7175,29 @@ class Model { std::vector< std::string > transform_model_term_names; public: + + /** + * @brief Computes the normalizing constant for a given set of parameters + * @details This function will compute the normalizing constant for a given + * set of parameters. It will also update the `normalizing_constants` member + * variable. + */ + void update_normalizing_constants( + const std::vector< double > & params, + BARRY_NCORES_ARG(=1), + int i = -1 + ); + + void update_likelihoods( + const std::vector< double > & params, + BARRY_NCORES_ARG(=1) + ); + + void update_pset_probs( + const std::vector< double > & params, + BARRY_NCORES_ARG(=1), + int i = -1 + ); void set_rengine(std::mt19937 * rengine_, bool delete_ = false) { @@ -7280,7 +7314,7 @@ class Model { const std::vector & params, const size_t & i, bool as_log = false, - BARRY_NCORES_ARG(=2) + bool no_update_normconst = false ); double likelihood( @@ -7288,7 +7322,7 @@ class Model { const Array_Type & Array_, int i = -1, bool as_log = false, - BARRY_NCORES_ARG(=2) + bool no_update_normconst = false ); double likelihood( @@ -7296,7 +7330,7 @@ class Model { const std::vector & target_, const size_t & i, bool as_log = false, - BARRY_NCORES_ARG(=2) + bool no_update_normconst = false ); double likelihood( @@ -7304,13 +7338,14 @@ class Model { const double * target_, const size_t & i, bool as_log = false, - BARRY_NCORES_ARG(=2) + bool no_update_normconst = false ); double likelihood_total( const std::vector & params, bool as_log = false, - BARRY_NCORES_ARG(=2) + BARRY_NCORES_ARG(=2), + bool no_update_normconst = false ); ///@} @@ -7323,17 +7358,14 @@ class Model { * constant. */ ///@{ - double get_norm_const( - const std::vector< double > & params, - const size_t & i, - bool as_log = false - ); + const std::vector< double > & get_normalizing_constants() const; + const std::vector< double > & get_likelihoods() const; const std::vector< Array_Type > * get_pset( const size_t & i ); - const std::vector< double > * get_pset_stats( + const double * get_pset_stats( const size_t & i ); ///@} @@ -7409,11 +7441,15 @@ class Model { */ ///@{ std::vector< std::vector< double > > * get_stats_target(); - std::vector< std::vector< double > > * get_stats_support(); + std::vector< double > * get_stats_support(); ///< Sufficient statistics of the support(s) + std::vector< size_t > * get_stats_support_sizes(); ///< Number of vectors included in the support. + std::vector< size_t > * get_stats_support_sizes_acc(); ///< Accumulated number of vectors included in the support. std::vector< size_t > * get_arrays2support(); std::vector< std::vector< Array_Type > > * get_pset_arrays(); - std::vector< std::vector > * get_pset_stats(); ///< Statistics of the support(s) - std::vector< std::vector > * get_pset_probs(); + std::vector< double > * get_pset_stats(); ///< Statistics of the support(s) + std::vector< double > * get_pset_probs(); + std::vector< size_t > * get_pset_sizes(); + std::vector< size_t > * get_pset_locations(); ///@} /** @@ -7474,60 +7510,36 @@ inline double update_normalizing_constant( ) { double res = 0.0; - - if (n > 1000u) - { - std::vector< double > resv(n, 0.0); + std::vector< double > resv(n, 0.0); - #if defined(__OPENMP) || defined(_OPENMP) - #pragma omp parallel for shared(resv) firstprivate(params, n, k) - #elif defined(__GNUC__) && !defined(__clang__) - #pragma GCC ivdep - #endif - for (size_t j = 0u; j < (k - 1u); ++j) - { - - const double p = params[j]; - - #if defined(__OPENMP) || defined(_OPENMP) - #pragma omp simd - #elif defined(__GNUC__) && !defined(__clang__) - #pragma GCC ivdep - #endif - for (size_t i = 0u; i < n; ++i) - resv[i] += (*(support + i * k + 1u + j)) * p; - - } + for (size_t j = 0u; j < (k - 1u); ++j) + { - // Accumulate resv to a double res + const double p = params[j]; + #if defined(__OPENMP) || defined(_OPENMP) - #pragma omp simd reduction(+:res) + #pragma omp simd #elif defined(__GNUC__) && !defined(__clang__) #pragma GCC ivdep #endif for (size_t i = 0u; i < n; ++i) - { - res += std::exp(resv[i] BARRY_SAFE_EXP) * (*(support + i * k)); - } - - } else { - - for (size_t i = 0u; i < n; ++i) - { + resv[i] += (*(support + i * k + 1u + j)) * p; - double tmp = 0.0; - const double * support_n = support + i * k + 1u; - - for (size_t j = 0u; j < (k - 1u); ++j) - tmp += (*(support_n + j)) * params[j]; - - res += std::exp(tmp BARRY_SAFE_EXP) * (*(support + i * k)); + } - } + // Accumulate resv to a double res + #if defined(__OPENMP) || defined(_OPENMP) + #pragma omp simd reduction(+:res) + #elif defined(__GNUC__) && !defined(__clang__) + #pragma GCC ivdep + #endif + for (size_t i = 0u; i < n; ++i) + { + res += std::exp(resv[i] BARRY_SAFE_EXP) * (*(support + i * k)); + } - } #ifdef BARRY_DEBUG @@ -7566,12 +7578,13 @@ inline double likelihood_( double numerator = 0.0; // Computing the numerator + #ifdef __INTEL_LLVM_COMPILER + #pragma code_align 32 + #endif #if defined(__OPENMP) || defined(_OPENMP) #pragma omp simd reduction(+:numerator) - #elif defined(__GNUC__) && !defined(__clang__) - #pragma GCC ivdep #endif - for (size_t j = 0u; j < params.size(); ++j) + for (size_t j = 0u; j < n_params; ++j) numerator += *(stats_target + j) * params[j]; if (!log_) @@ -7610,6 +7623,179 @@ inline double likelihood_( } +template < + typename Array_Type, + typename Data_Counter_Type, + typename Data_Rule_Type, + typename Data_Rule_Dyn_Type + > +inline void Model::update_normalizing_constants( + const std::vector< double > & params, + size_t ncores, + int i +) { + + const size_t n = stats_support_sizes.size(); + + // Barrier to make sure paralelization makes sense + if ((ncores > 1u) && (n < 128u)) + ncores = 1u; + + + if (i >= 0) + ncores = 1u; + + #if defined(__OPENMP) || defined(_OPENMP) + #pragma omp parallel for firstprivate(params) num_threads(ncores) \ + shared(n, normalizing_constants, first_calc_done, \ + stats_support, stats_support_sizes, stats_support_sizes_acc, i) \ + default(none) + #endif + for (size_t s = 0u; s < n; ++s) + { + + if ((i > -1) && (i != static_cast(s))) + continue; + + size_t k = params.size() + 1u; + size_t n = stats_support_sizes[s]; + + first_calc_done[s] = true; + normalizing_constants[s] = update_normalizing_constant( + params, &stats_support[ + stats_support_sizes_acc[s] * k + ], k, n + ); + + } + + return; + +} + +template < + typename Array_Type, + typename Data_Counter_Type, + typename Data_Rule_Type, + typename Data_Rule_Dyn_Type + > +inline void Model::update_likelihoods( + const std::vector< double > & params, + size_t ncores +) { + + update_normalizing_constants(params, ncores); + + size_t n_params = params.size(); + + if (stats_likelihood.size() != stats_target.size()) + stats_likelihood.resize(stats_target.size()); + + #if defined(__OPENMP) || defined(_OPENMP) + #pragma omp parallel for simd num_threads(ncores) \ + shared(n_params, stats_target, normalizing_constants, arrays2support, \ + params) \ + default(none) + #endif + for (size_t s = 0u; s < stats_target.size(); ++s) + { + stats_likelihood[s] = 0.0; + for (size_t j = 0u; j < n_params; ++j) + stats_likelihood[s] += stats_target[s][j] * params[j]; + + stats_likelihood[s] = + std::exp(stats_likelihood[s] BARRY_SAFE_EXP)/ + normalizing_constants[arrays2support[s]]; + } + + return; + +} + +template < + typename Array_Type, + typename Data_Counter_Type, + typename Data_Rule_Type, + typename Data_Rule_Dyn_Type + > +inline void Model::update_pset_probs( + const std::vector< double > & params, + size_t ncores, + int i +) { + + update_normalizing_constants(params, ncores, i); + + if (i > -1) + params_last[i] = params; + + size_t n_params = params.size(); + pset_probs.resize( + pset_locations.back() + + pset_sizes.back() + ); + + // No need to paralelize if there is only one core + if (i >= 0) + ncores = 1u; + + #if defined(__OPENMP) || defined(_OPENMP) + #pragma omp parallel for num_threads(ncores) collapse(1) \ + shared(n_params, pset_stats, pset_probs, normalizing_constants, pset_sizes, \ + params, i) \ + default(none) + #endif + for (size_t s = 0u; s < pset_sizes.size(); ++s) + { + + if ((i >= 0) && (i != static_cast(s))) + continue; + + // When does the pset starts + size_t pset_start = pset_locations[s]; + + // Looping over observations of the pset + #if defined(__OPENMP) || defined(_OPENMP) + #pragma omp simd + #endif + for (size_t a = 0u; a < pset_sizes[s]; ++a) + { + + // Start location in the array + size_t start_loc = pset_start * n_params + a * n_params; + + pset_probs[pset_start + a] = 0.0; + + // Looping over the parameters + for (size_t j = 0u; j < n_params; ++j) + pset_probs[pset_start + a] += + pset_stats[start_loc + j] * params[j]; + + // Now turning into a probability + pset_probs[pset_start + a] = + std::exp(pset_probs[pset_start + a] BARRY_SAFE_EXP)/ + normalizing_constants[s]; + } + + #ifdef BARRY_DEBUG + // Making sure the probabilities add to one + double totprob = 0.0; + for (size_t i_ = 0u; i_ < pset_sizes[s]; ++i) + totprob =+ pset_probs[pset_start + i_]; + + if (std::abs(totprob - 1) > 1e-6) + throw std::runtime_error( + std::string("Probabilities do not add to one! ") + + std::string("totprob = ") + std::to_string(totprob) + ); + + #endif + } + + return; + +} + template < typename Array_Type, typename Data_Counter_Type, @@ -7618,8 +7804,12 @@ template < > inline Model::Model() : stats_support(0u), + stats_support_sizes(0u), + stats_support_sizes_acc(0u), stats_support_n_arrays(0u), - stats_target(0u), arrays2support(0u), + stats_target(0u), + stats_likelihood(0u), + arrays2support(0u), keys2support(0u), pset_arrays(0u), pset_stats(0u), counters(new Counters()), @@ -7654,8 +7844,12 @@ inline Model::M size_t size_ ) : stats_support(0u), + stats_support_sizes(0u), + stats_support_sizes_acc(0u), stats_support_n_arrays(0u), - stats_target(0u), arrays2support(0u), keys2support(0u), + stats_target(0u), + stats_likelihood(0u), + arrays2support(0u), keys2support(0u), pset_arrays(0u), pset_stats(0u), counters(new Counters()), rules(new Rules()), @@ -7692,12 +7886,18 @@ inline Model::M const Model & Model_ ) : stats_support(Model_.stats_support), + stats_support_sizes(Model_.stats_support_sizes), + stats_support_sizes_acc(Model_.stats_support_sizes_acc), stats_support_n_arrays(Model_.stats_support_n_arrays), stats_target(Model_.stats_target), + stats_likelihood(Model_.stats_likelihood), arrays2support(Model_.arrays2support), keys2support(Model_.keys2support), pset_arrays(Model_.pset_arrays), pset_stats(Model_.pset_stats), + pset_probs(Model_.pset_probs), + pset_sizes(Model_.pset_sizes), + pset_locations(Model_.pset_locations), counters(new Counters(*(Model_.counters))), rules(new Rules(*(Model_.rules))), rules_dyn(new Rules(*(Model_.rules_dyn))), @@ -7749,12 +7949,18 @@ inline Model & delete rules_dyn; stats_support = Model_.stats_support; + stats_support_sizes = Model_.stats_support_sizes; + stats_support_sizes_acc = Model_.stats_support_sizes_acc; stats_support_n_arrays = Model_.stats_support_n_arrays; stats_target = Model_.stats_target; + stats_likelihood = Model_.stats_likelihood; arrays2support = Model_.arrays2support; keys2support = Model_.keys2support; pset_arrays = Model_.pset_arrays; pset_stats = Model_.pset_stats; + pset_probs = Model_.pset_probs; + pset_sizes = Model_.pset_sizes; + pset_locations = Model_.pset_locations; counters = new Counters(*(Model_.counters)); rules = new Rules(*(Model_.rules)); rules_dyn = new Rules(*(Model_.rules_dyn)); @@ -7783,8 +7989,6 @@ inline Model & template inline void Model:: store_psets() noexcept { - // if (with_pset) - // throw std::logic_error("Powerset storage alreay activated."); with_pset = true; return; } @@ -7921,7 +8125,8 @@ inline void Model -inline size_t Model:: add_array( +inline size_t +Model::add_array( const Array_Type & Array_, bool force_new ) { @@ -7946,11 +8151,14 @@ inline size_t Model::const_iterator locator = keys2support.find(key); if (force_new | (locator == keys2support.end())) { + + // Current size of the support stats + size_t stats_support_size = stats_support.size(); // Adding to the map - keys2support[key] = stats_support.size(); + keys2support[key] = stats_support_sizes.size(); stats_support_n_arrays.push_back(1u); // How many elements now - arrays2support.push_back(stats_support.size()); // Map of the array id to the support + arrays2support.push_back(stats_support_sizes.size()); // Map of the array id to the support // Computing support using the counters included in the model support_fun.reset_array(Array_); @@ -7962,15 +8170,16 @@ inline size_t Model & params, const size_t & i, bool as_log, - BARRY_NCORES_ARG() + bool no_update_normconst ) { - - #if defined(__OPENMP) || defined(_OPENMP) - omp_set_num_threads(ncores); - #endif // Checking if the index exists if (i >= arrays2support.size()) @@ -8075,20 +8317,22 @@ inline double Modelstats_support[idx].size() == 0u) + if (this->stats_support_sizes[idx] == 0u) return as_log ? -std::numeric_limits::infinity() : 0.0; // Checking if we have updated the normalizing constant or not - if (!first_calc_done[idx] || !vec_equal_approx(params, params_last[idx]) ) + if (!no_update_normconst && (!first_calc_done[idx] || !vec_equal_approx(params, params_last[idx]))) { first_calc_done[idx] = true; size_t k = params.size() + 1u; - size_t n = stats_support[idx].size() / k; + size_t n = stats_support_sizes[idx]; normalizing_constants[idx] = update_normalizing_constant( - params, &stats_support[idx][0u], k, n + params, &stats_support[ + stats_support_sizes_acc[idx] * k + ], k, n ); params_last[idx] = params; @@ -8111,12 +8355,8 @@ inline double Modelstats_support[loc].size() == 0u) + if (this->stats_support_sizes[loc] == 0u) return as_log ? -std::numeric_limits::infinity() : 0.0; // Counting stats_target @@ -8161,16 +8401,18 @@ inline double Model & target_, const size_t & i, bool as_log, - BARRY_NCORES_ARG() + bool no_update_normconst ) { - - #if defined(__OPENMP) || defined(_OPENMP) - omp_set_num_threads(ncores); - #endif // Checking if the index exists if (i >= arrays2support.size()) @@ -8228,21 +8466,23 @@ inline double Modelstats_support[loc].size() == 0u) + if (this->stats_support_sizes[loc] == 0u) { throw std::logic_error("The support set for this array is empty."); } // Checking if we have updated the normalizing constant or not - if (!first_calc_done[loc] || !vec_equal_approx(params, params_last[loc]) ) { + if (!no_update_normconst && (!first_calc_done[loc] || !vec_equal_approx(params, params_last[loc])) ) { first_calc_done[loc] = true; size_t k = params.size() + 1u; - size_t n = stats_support[loc].size() / k; + size_t n = stats_support_sizes[loc]; normalizing_constants[loc] = update_normalizing_constant( - params, &stats_support[loc][0u], k, n + params, &stats_support[ + stats_support_sizes_acc[loc] * k + ], k, n ); params_last[loc] = params; @@ -8265,12 +8505,8 @@ inline double Model= arrays2support.size()) @@ -8302,21 +8538,23 @@ inline double Modelstats_support[loc].size() == 0u) + if (this->stats_support_sizes[loc] == 0u) { throw std::logic_error("The support set for this array is empty."); } // Checking if we have updated the normalizing constant or not - if (!first_calc_done[loc] || !vec_equal_approx(params, params_last[loc]) ) { + if (!no_update_normconst && (!first_calc_done[loc] || !vec_equal_approx(params, params_last[loc]) )) { first_calc_done[loc] = true; size_t k = params.size() + 1u; - size_t n = stats_support[loc].size() / k; + size_t n = stats_support_sizes[loc]; normalizing_constants[loc] = update_normalizing_constant( - params, &stats_support[loc][0u], k, n + params, &stats_support[ + stats_support_sizes_acc[loc] * k + ], k, n ); params_last[loc] = params; @@ -8337,33 +8575,41 @@ template ::likelihood_total( const std::vector & params, bool as_log, - BARRY_NCORES_ARG() + BARRY_NCORES_ARG(), + bool no_update_normconst ) { - - #if defined(__OPENMP) || defined(_OPENMP) - omp_set_num_threads(ncores); - #endif size_t params_last_size = params_last.size(); - for (size_t i = 0u; i < params_last_size; ++i) + if (!no_update_normconst) { - - if (!first_calc_done[i] || !vec_equal_approx(params, params_last[i]) ) + #if defined(__OPENMP) || defined(_OPENMP) + #pragma omp parallel for num_threads(ncores) \ + shared(normalizing_constants, params_last, first_calc_done, \ + stats_support, stats_support_sizes, stats_support_sizes_acc) \ + firstprivate(params) + #endif + for (size_t i = 0u; i < params_last_size; ++i) { - size_t k = params.size() + 1u; - size_t n = stats_support[i].size() / k; - - first_calc_done[i] = true; - normalizing_constants[i] = update_normalizing_constant( - params, &stats_support[i][0u], k, n - ); - - params_last[i] = params; - - } + if (!first_calc_done[i] || !vec_equal_approx(params, params_last[i]) ) + { + + size_t k = params.size() + 1u; + size_t n = stats_support_sizes[i]; + + first_calc_done[i] = true; + normalizing_constants[i] = update_normalizing_constant( + params, &stats_support[ + stats_support_sizes_acc[i] * k + ], k, n + ); + + params_last[i] = params; + + } + } } double res = 0.0; @@ -8405,45 +8651,35 @@ inline double Model -inline double Model:: get_norm_const( - const std::vector & params, - const size_t & i, - bool as_log -) { +template < + typename Array_Type, + typename Data_Counter_Type, + typename Data_Rule_Type, + typename Data_Rule_Dyn_Type + > +inline const std::vector< double > & +Model:: get_normalizing_constants() const { - // Checking if the index exists - if (i >= arrays2support.size()) - throw std::range_error("The requested support is out of range"); - - const auto id = arrays2support[i]; + return normalizing_constants; - // Checking if we have updated the normalizing constant or not - if (!first_calc_done[id] || !vec_equal_approx(params, params_last[id]) ) - { - - first_calc_done[id] = true; - - size_t k = params.size() + 1u; - size_t n = stats_support[id].size() / k; +} - normalizing_constants[id] = update_normalizing_constant( - params, &stats_support[id][0u], k, n - ); - - params_last[id] = params; - - } +template< + typename Array_Type, + typename Data_Counter_Type, + typename Data_Rule_Type, + typename Data_Rule_Dyn_Type + > +inline const std::vector< double > & +Model:: get_likelihoods() const { - return as_log ? - std::log(normalizing_constants[id]) : - normalizing_constants[id] - ; + return stats_likelihood; } template -inline const std::vector< Array_Type > * Model:: get_pset( +inline const std::vector< Array_Type > * +Model::get_pset( const size_t & i ) { @@ -8456,14 +8692,15 @@ inline const std::vector< Array_Type > * Model -inline const std::vector< double > * Model:: get_pset_stats( +inline const double * +Model::get_pset_stats( const size_t & i ) { if (i >= arrays2support.size()) throw std::range_error("The requested support is out of range"); - return &pset_stats[arrays2support[i]]; + return &pset_stats[pset_locations[arrays2support[i]] * counter_fun.size()]; } @@ -8474,20 +8711,30 @@ inline void Model= arrays2support.size()) throw std::range_error("The requested support is out of range"); - const auto & S = stats_support[arrays2support[i]]; + // const auto & S = stats_support[arrays2support[i]]; + size_t array_id = arrays2support[i]; size_t k = nterms(); - size_t nunique = S.size() / (k + 1u); + size_t nunique = stats_support_sizes.size(); for (size_t l = 0u; l < nunique; ++l) { printf_barry("% 5li ", l); - printf_barry("counts: %.0f motif: ", S[l * (k + 1u)]); + printf_barry("counts: %.0f motif: ", stats_support[ + stats_support_sizes_acc[l] * (k + 1u) + // l * (k + 1u) + ]); for (size_t j = 0u; j < k; ++j) - printf_barry("%.2f, ", S[l * (k + 1) + j + 1]); + { + printf_barry( + "%.2f, ", + stats_support[ + stats_support_sizes_acc[l] * (k + 1u) + j + 1u + ]); + } printf_barry("\n"); @@ -8509,14 +8756,14 @@ inline void Model::max(); int max_v = 0; - for (const auto & stat : this->stats_support) + for (const auto & stat : this->stats_support_sizes) { - if (static_cast(stat.size()) > max_v) - max_v = static_cast(stat.size()); + if (static_cast(stat) > max_v) + max_v = static_cast(stat); - if (static_cast(stat.size()) < min_v) - min_v = static_cast(stat.size()); + if (static_cast(stat) < min_v) + min_v = static_cast(stat); } @@ -8527,6 +8774,13 @@ inline void Modelsize()); printf_barry("Support size : %li\n", this->size_unique()); printf_barry("Support size range : [%i, %i]\n", min_v, max_v); + + if (with_pset) + { + printf_barry("Arrays in powerset : %li\n", + std::accumulate(pset_sizes.begin(), pset_sizes.end(), 0u) + ); + } printf_barry("Transform. Fun. : %s\n", transform_model_fun ? "yes": "no"); printf_barry("Model terms (%li) :\n", this->nterms()); for (auto & cn : this->colnames()) @@ -8571,7 +8825,7 @@ inline size_t Modelstats_support.size(); + return this->stats_support_sizes.size(); } @@ -8607,11 +8861,12 @@ inline size_t Model -inline Array_Type Model::sample( +inline Array_Type +Model::sample( const size_t & i, const std::vector & params ) { @@ -8652,44 +8908,36 @@ inline Array_Type Model(a)); // Sampling an array size_t j = 0u; - std::vector< double > & probs = pset_probs[a]; - if ((probs.size() > 0u) && (vec_equal_approx(params, params_last[a]))) + if (vec_equal_approx(params, params_last[a])) // If precomputed, then no need to recalc support { + const double * probs = &pset_probs[pset_locations[a]]; while (cumprob < r) - cumprob += probs[j++]; + cumprob += *(probs + j++); if (j > 0u) j--; } else { - probs.resize(pset_arrays[a].size()); - std::vector< double > temp_stats(params.size()); - const std::vector< double > & stats = pset_stats[a]; - - int i_matches = -1; - for (size_t array = 0u; array < probs.size(); ++array) - { - - // Filling out the parameters - for (auto p = 0u; p < params.size(); ++p) - temp_stats[p] = stats[array * k + p]; + update_pset_probs(params, 1u, static_cast(a)); - probs[array] = this->likelihood(params, temp_stats, i, false); - cumprob += probs[array]; + const double * probs = &pset_probs[pset_locations[a]]; + while (cumprob < r) + cumprob += *(probs + j++); - if (i_matches == -1 && cumprob >= r) - i_matches = array; - } + if (j > 0u) + j--; #ifdef BARRY_DEBUG - if (i_matches < 0) + if (j > pset_arrays.at(a).size()) throw std::logic_error( std::string( "Something went wrong when sampling from a different set of.") + @@ -8698,8 +8946,6 @@ inline Array_Type Model::const_iterator locator = keys2support.find(key); if (locator == keys2support.end()) { - // throw std::out_of_range("Sampling from an array that has no support in the model."); + size_t stats_support_size = stats_support.size(); // Adding to the map - keys2support[key] = stats_support.size(); + keys2support[key] = stats_support_sizes.size(); stats_support_n_arrays.push_back(1u); // How many elements now - arrays2support.push_back(stats_support.size()); // Map of the array id to the support + arrays2support.push_back(stats_support_sizes.size()); // Map of the array id to the support // Computing support using the counters included in the model support_fun.reset_array(Array_); @@ -8744,17 +8990,20 @@ inline Array_Type Model & probs = pset_probs[a]; - if ((probs.size() > 0u) && (vec_equal_approx(params, params_last[a]))) + double * probs = &pset_probs[ pset_locations[a] ]; + if (first_calc_done[a] && (vec_equal_approx(params, params_last[a]))) // If precomputed, then no need to recalc support { while (cumprob < r) - cumprob += probs[j++]; + cumprob += *(probs + j++); if (j > 0u) j--; } else { - probs.resize(pset_arrays[a].size()); + // probs.resize(pset_arrays[a].size()); std::vector< double > temp_stats(params.size()); - const std::vector< double > & stats = pset_stats[a]; + const double * stats = &pset_stats[pset_locations[a] * k]; int i_matches = -1; - for (size_t array = 0u; array < probs.size(); ++array) + for (size_t array = 0u; array < pset_sizes[a]; ++array) { // Filling out the parameters for (auto p = 0u; p < params.size(); ++p) temp_stats[p] = stats[array * k + p]; - probs[array] = this->likelihood(params, temp_stats, i, false); - cumprob += probs[array]; + *(probs + array) = this->likelihood(params, temp_stats, i, false); + cumprob += *(probs + array); if (i_matches == -1 && cumprob >= r) i_matches = array; @@ -8868,7 +9158,7 @@ inline Array_Type Model > * Model -inline std::vector< std::vector< double > > * Model:: get_stats_support() +inline std::vector< double > * +Model::get_stats_support() { return &stats_support; } +// Implementation of get_stats_support_sizes() +template +inline std::vector< size_t > * +Model::get_stats_support_sizes() +{ + return &stats_support_sizes; +} + +// Implementation of get_stats_support_sizes_acc() template -inline std::vector< size_t > * Model:: get_arrays2support() +inline std::vector< size_t > * +Model::get_stats_support_sizes_acc() +{ + return &stats_support_sizes_acc; +} + +template +inline std::vector< size_t > * +Model::get_arrays2support() { return &arrays2support; } template -inline std::vector< std::vector< Array_Type > > * Model:: get_pset_arrays() { +inline std::vector< std::vector< Array_Type > > * +Model::get_pset_arrays() { return &pset_arrays; } template -inline std::vector< std::vector > * Model:: get_pset_stats() { +inline std::vector * +Model::get_pset_stats() { return &pset_stats; } -template -inline std::vector< std::vector > * Model:: get_pset_probs() { +template < + typename Array_Type, + typename Data_Counter_Type, + typename Data_Rule_Type, + typename Data_Rule_Dyn_Type + > +inline std::vector * +Model::get_pset_probs() { return &pset_probs; } -template -inline void Model:: set_transform_model( +template < + typename Array_Type, + typename Data_Counter_Type, + typename Data_Rule_Type, + typename Data_Rule_Dyn_Type + > +inline std::vector< size_t > * Model:: get_pset_sizes() +{ + return &pset_sizes; +} + +template < + typename Array_Type, + typename Data_Counter_Type, + typename Data_Rule_Type, + typename Data_Rule_Dyn_Type + > +inline std::vector< size_t > * Model:: get_pset_locations() +{ + return &pset_locations; +} + +template < + typename Array_Type, + typename Data_Counter_Type, + typename Data_Rule_Type, + typename Data_Rule_Dyn_Type + > +inline void +Model::set_transform_model( std::function(double *,size_t)> fun, std::vector< std::string > names ) @@ -8987,35 +9331,62 @@ inline void Modelsize(); + auto stats_support_old = stats_support; + // Applying over the support - for (auto & s : stats_support) + for (size_t nsupport = 0u; nsupport < stats_support_sizes.size(); ++nsupport) { - // Making room for the new support - std::vector< double > s_new(0u); - s_new.reserve(s.size()); - - size_t n = s.size() / (k + 1u); + // How many observations in the support + size_t n = stats_support_sizes[nsupport]; - // Iterating through the unique sets + // Iterating through each observation in the nsupport'th for (size_t i = 0; i < n; ++i) { - // Appending size - s_new.push_back(s[i * (k + 1u)]); - // Applying transformation and adding to the new set - auto res = transform_model_fun(&s[i * (k + 1u) + 1u], k); + auto res = transform_model_fun( + &stats_support_old[ + stats_support_sizes_acc[nsupport] * (k + 1u) + + i * (k + 1u) + 1u + ], + k + ); if (res.size() != transform_model_term_names.size()) - throw std::length_error("The transform vector from -transform_model_fun- does not match the size of -transform_model_term_names-."); + throw std::length_error( + std::string("The transform vector from -transform_model_fun- ") + + std::string(" does not match the size of ") + + std::string("-transform_model_term_names-.") + ); - std::copy(res.begin(), res.end(), std::back_inserter(s_new)); + // Resizing stats_support if the transform stats do not match the + // previous size + if ((nsupport == 0u) && (i == 0u) && (res.size() != k)) + stats_support.resize( + (res.size() + 1) * ( + stats_support_sizes_acc.back() + + stats_support_sizes.back() + ) + ); - } + // Weigth + stats_support[ + stats_support_sizes_acc[nsupport] * (res.size() + 1u) + + (res.size() + 1u) * i + ] = stats_support_old[ + stats_support_sizes_acc[nsupport] * (k + 1u) + + i * (k + 1u) + ]; + + // Copying the rest of the elements + for (size_t j = 0u; j < res.size(); ++j) + stats_support[ + stats_support_sizes_acc[nsupport] * (res.size() + 1u) + + (res.size() + 1u) * i + j + 1u + ] = res[j]; - // Exchanging with the original - std::swap(s, s_new); + } } @@ -9031,12 +9402,13 @@ inline void Model new_stats; + size_t pset_stats_loc = pset_locations[s] * k; for (auto a = 0u; a < pset_arrays[s].size(); ++a) { // Computing the transformed version of the data auto tmpstats = transform_model_fun( - &pset_stats[s][a * k], k + &pset_stats[pset_stats_loc + a * k], k ); // Storing the new values @@ -9045,7 +9417,8 @@ inline void Model Rules::get_names() const std::vector< std::string > out; out.reserve(this->size()); - for (size_t i = 0u; i < out.size(); ++i) + for (size_t i = 0u; i < this->size(); ++i) out.push_back(this->data.at(i).get_name()); return out; diff --git a/include/barry/models/geese/geese-meat.hpp b/include/barry/models/geese/geese-meat.hpp index 5f36b43ca..67e9c973e 100644 --- a/include/barry/models/geese/geese-meat.hpp +++ b/include/barry/models/geese/geese-meat.hpp @@ -456,7 +456,6 @@ inline size_t Geese::nleafs() const noexcept inline size_t Geese::nterms() const { - INITIALIZED() return model->nterms() + this->nfuns(); } diff --git a/include/barry/rules-meat.hpp b/include/barry/rules-meat.hpp index 6413d5284..1e707abba 100644 --- a/include/barry/rules-meat.hpp +++ b/include/barry/rules-meat.hpp @@ -169,7 +169,7 @@ inline std::vector Rules::get_names() const std::vector< std::string > out; out.reserve(this->size()); - for (size_t i = 0u; i < out.size(); ++i) + for (size_t i = 0u; i < this->size(); ++i) out.push_back(this->data.at(i).get_name()); return out;