From 05d63903fcc71a067b8c4408b8c5a1b90d46a49c Mon Sep 17 00:00:00 2001 From: Clang Robot Date: Tue, 21 Nov 2023 16:40:52 +0000 Subject: [PATCH] Committing clang-format changes --- include/macis/asci/determinant_search.hpp | 22 +- include/macis/asci/determinant_sort.hpp | 2 - include/macis/asci/mask_constraints.hpp | 182 +++++----- include/macis/asci/pt2.hpp | 331 ++++++++++-------- .../sorted_double_loop.hpp | 4 +- include/macis/solvers/davidson.hpp | 24 +- include/macis/solvers/selected_ci_diag.hpp | 17 +- include/macis/util/mpi.hpp | 6 +- .../sparsexx/matrix_types/conversions.hpp | 37 +- .../include/sparsexx/spblas/pspmbv.hpp | 6 +- tests/standalone_driver.cxx | 43 ++- 11 files changed, 360 insertions(+), 314 deletions(-) diff --git a/include/macis/asci/determinant_search.hpp b/include/macis/asci/determinant_search.hpp index e8e2cc1e..7b5560f0 100644 --- a/include/macis/asci/determinant_search.hpp +++ b/include/macis/asci/determinant_search.hpp @@ -42,14 +42,14 @@ struct ASCISettings { double pt2_tol = 1e-16; size_t pt2_reserve_count = 70000000; - bool pt2_prune = false; - bool pt2_precompute_eps = false; - bool pt2_precompute_idx = false; - bool pt2_print_progress = false; + bool pt2_prune = false; + bool pt2_precompute_eps = false; + bool pt2_precompute_idx = false; + bool pt2_print_progress = false; size_t pt2_bigcon_thresh = 250; size_t nxtval_bcount_thresh = 1000; - size_t nxtval_bcount_inc = 10; + size_t nxtval_bcount_inc = 10; bool just_singles = false; size_t grow_factor = 8; @@ -306,15 +306,17 @@ asci_contrib_container> asci_contributions_constraint( // Atomically get the next task ID and increment for other // MPI ranks and threads - size_t ntake = ic < asci_settings.nxtval_bcount_thresh ? 1 : asci_settings.nxtval_bcount_inc; + size_t ntake = ic < asci_settings.nxtval_bcount_thresh + ? 1 + : asci_settings.nxtval_bcount_inc; ic = nxtval.fetch_and_add(ntake); // Loop over assigned tasks const size_t c_end = std::min(ncon_total, ic + ntake); for(; ic < c_end; ++ic) { const auto& con = constraints[ic].first; - //printf("[rank %4d tid:%4d] %10lu / %10lu\n", world_rank, - // omp_get_thread_num(), ic, ncon_total); + // printf("[rank %4d tid:%4d] %10lu / %10lu\n", world_rank, + // omp_get_thread_num(), ic, ncon_total); for(size_t i_alpha = 0, iw = 0; i_alpha < nuniq_alpha; ++i_alpha) { const auto& alpha_det = uniq_alpha[i_alpha].first; @@ -462,8 +464,8 @@ std::vector> asci_search( logger->info(" MAX_RV_SIZE = {}, JUST_SINGLES = {}", asci_settings.pair_size_max, asci_settings.just_singles); logger->info(" CDET_SUM = {:.2e}", - std::accumulate(C.begin(),C.begin() + ncdets, 0.0, - [](auto s, auto c){ return s + c*c; })); + std::accumulate(C.begin(), C.begin() + ncdets, 0.0, + [](auto s, auto c) { return s + c * c; })); MACIS_MPI_CODE(MPI_Barrier(comm);) auto asci_search_st = clock_type::now(); diff --git a/include/macis/asci/determinant_sort.hpp b/include/macis/asci/determinant_sort.hpp index 62370e55..ec738f14 100644 --- a/include/macis/asci/determinant_sort.hpp +++ b/include/macis/asci/determinant_sort.hpp @@ -62,11 +62,9 @@ void reorder_ci_on_alpha(WfnIterator begin, WfnIterator end, double* C) { std::copy(reorder_C.begin(), reorder_C.end(), C); } - template PairIterator accumulate_asci_pairs(PairIterator pairs_begin, PairIterator pairs_end) { - // Accumulate the ASCI scores into first instance of unique bitstrings auto cur_it = pairs_begin; for(auto it = cur_it + 1; it != pairs_end; ++it) { diff --git a/include/macis/asci/mask_constraints.hpp b/include/macis/asci/mask_constraints.hpp index a63db8e7..03881225 100644 --- a/include/macis/asci/mask_constraints.hpp +++ b/include/macis/asci/mask_constraints.hpp @@ -643,7 +643,7 @@ auto gen_constraints_general(size_t nlevels, size_t norb, size_t ns_othr, // Generate triplets + heuristic std::vector> constraint_sizes; constraint_sizes.reserve(norb * norb * norb); - #if 0 +#if 0 size_t total_work = 0; for(int t_i = 0; t_i < norb; ++t_i) for(int t_j = 0; t_j < t_i; ++t_j) @@ -664,39 +664,41 @@ auto gen_constraints_general(size_t nlevels, size_t norb, size_t ns_othr, } size_t local_average = (0.8 * total_work) / world_size; - #else +#else // Generate all the triplets for(int t_i = 0; t_i < norb; ++t_i) for(int t_j = 0; t_j < t_i; ++t_j) for(int t_k = 0; t_k < t_j; ++t_k) { auto constraint = constraint_type::make_triplet(t_i, t_j, t_k); constraint_sizes.emplace_back(constraint, 0ul); - } + } // Build up higher-order constraints as base if requested - if(nrec_min < 0 or nrec_min >= constraint_sizes.size()) // nrec_min < 0 implies that you want all the constraints upfront - for(size_t ilevel = 0; ilevel < nlevel_min; ++ilevel) { - decltype(constraint_sizes) cur_constraints; - cur_constraints.reserve(constraint_sizes.size() * norb); - for(auto [c,nw] : constraint_sizes) { - const auto C_min = c.C_min(); - for(auto q_l = 0; q_l < C_min; ++q_l) { - // Generate masks / counts - string_type cn_C = c.C(); - cn_C.flip(q_l); - string_type cn_B = c.B() >> (C_min - q_l); - constraint_type c_next(cn_C, cn_B, q_l); - cur_constraints.emplace_back(c_next, 0ul); + if(nrec_min < 0 or + nrec_min >= constraint_sizes.size()) // nrec_min < 0 implies that you want + // all the constraints upfront + for(size_t ilevel = 0; ilevel < nlevel_min; ++ilevel) { + decltype(constraint_sizes) cur_constraints; + cur_constraints.reserve(constraint_sizes.size() * norb); + for(auto [c, nw] : constraint_sizes) { + const auto C_min = c.C_min(); + for(auto q_l = 0; q_l < C_min; ++q_l) { + // Generate masks / counts + string_type cn_C = c.C(); + cn_C.flip(q_l); + string_type cn_B = c.B() >> (C_min - q_l); + constraint_type c_next(cn_C, cn_B, q_l); + cur_constraints.emplace_back(c_next, 0ul); + } } + constraint_sizes = std::move(cur_constraints); } - constraint_sizes = std::move(cur_constraints); - } struct atomic_wrapper { std::atomic value; - atomic_wrapper(size_t i = 0) : value(i) {}; - atomic_wrapper(const atomic_wrapper& other) : - atomic_wrapper(other.value.load()) {}; + atomic_wrapper(size_t i = 0) : value(i){}; + atomic_wrapper(const atomic_wrapper& other) + : atomic_wrapper(other.value.load()){}; atomic_wrapper& operator=(const atomic_wrapper& other) { value.store(other.value.load()); return *this; @@ -704,32 +706,32 @@ auto gen_constraints_general(size_t nlevels, size_t norb, size_t ns_othr, }; // Compute histogram - const auto ntrip_full = constraint_sizes.size(); + const auto ntrip_full = constraint_sizes.size(); std::vector constraint_work(ntrip_full, 0ul); { - global_atomic nxtval(MPI_COMM_WORLD); - #pragma omp parallel - { - size_t i_trip = 0; - while(i_trip < ntrip_full) { - i_trip = nxtval.fetch_and_add(1); - if(i_trip >= ntrip_full) break; - //if(!(i_trip%1000)) printf("cgen %lu / %lu\n", i_trip, ntrip_full); - auto& [constraint, __nw] = constraint_sizes[i_trip]; - auto& c_nw = constraint_work[i_trip]; - size_t nw = 0; - for(const auto& alpha : unique_alpha) { - if constexpr(flat_container) - nw += constraint_histogram(wfn_traits::alpha_string(alpha), ns_othr, - nd_othr, constraint); - else - nw += alpha.second * constraint_histogram(alpha.first, ns_othr, - nd_othr, constraint); + global_atomic nxtval(MPI_COMM_WORLD); +#pragma omp parallel + { + size_t i_trip = 0; + while(i_trip < ntrip_full) { + i_trip = nxtval.fetch_and_add(1); + if(i_trip >= ntrip_full) break; + // if(!(i_trip%1000)) printf("cgen %lu / %lu\n", i_trip, ntrip_full); + auto& [constraint, __nw] = constraint_sizes[i_trip]; + auto& c_nw = constraint_work[i_trip]; + size_t nw = 0; + for(const auto& alpha : unique_alpha) { + if constexpr(flat_container) + nw += constraint_histogram(wfn_traits::alpha_string(alpha), ns_othr, + nd_othr, constraint); + else + nw += alpha.second * constraint_histogram(alpha.first, ns_othr, + nd_othr, constraint); + } + if(nw) c_nw.value.fetch_add(nw); + } } - if(nw) c_nw.value.fetch_add(nw); - } - } - } // Scope nxtval + } // Scope nxtval std::vector constraint_work_bare(ntrip_full); for(auto i_trip = 0; i_trip < ntrip_full; ++i_trip) { @@ -744,28 +746,28 @@ auto gen_constraints_general(size_t nlevels, size_t norb, size_t ns_othr, // Remove zeros { - auto it = std::partition(constraint_sizes.begin(), constraint_sizes.end(), - [](const auto& p) { return p.second > 0; }); - constraint_sizes.erase(it, constraint_sizes.end()); + auto it = std::partition(constraint_sizes.begin(), constraint_sizes.end(), + [](const auto& p) { return p.second > 0; }); + constraint_sizes.erase(it, constraint_sizes.end()); } // Compute average - size_t total_work = std::accumulate(constraint_sizes.begin(), constraint_sizes.end(), - 0ul, [](auto s, const auto& p){ return s + p.second; }); + size_t total_work = + std::accumulate(constraint_sizes.begin(), constraint_sizes.end(), 0ul, + [](auto s, const auto& p) { return s + p.second; }); size_t local_average = total_work / world_size; - // Manual refinement of top configurations if(nrec_min > 0 and nrec_min < constraint_sizes.size()) { - const size_t nleave = constraint_sizes.size() - nrec_min; - std::vector> constraint_to_refine, - constraint_to_leave; + std::vector> constraint_to_refine, + constraint_to_leave; constraint_to_refine.reserve(nrec_min); constraint_to_refine.reserve(nleave); - std::copy_n(constraint_sizes.begin(), nrec_min, std::back_inserter(constraint_to_refine)); - std::copy_n(constraint_sizes.begin() + nrec_min, nleave, + std::copy_n(constraint_sizes.begin(), nrec_min, + std::back_inserter(constraint_to_refine)); + std::copy_n(constraint_sizes.begin() + nrec_min, nleave, std::back_inserter(constraint_to_leave)); // Deallocate original array @@ -774,8 +776,8 @@ auto gen_constraints_general(size_t nlevels, size_t norb, size_t ns_othr, // Generate refined constraints for(size_t ilevel = 0; ilevel < nlevel_min; ++ilevel) { decltype(constraint_sizes) cur_constraints; - cur_constraints.reserve(constraint_to_refine.size() * norb); - for(auto [c,nw] : constraint_to_refine) { + cur_constraints.reserve(constraint_to_refine.size() * norb); + for(auto [c, nw] : constraint_to_refine) { const auto C_min = c.C_min(); for(auto q_l = 0; q_l < C_min; ++q_l) { // Generate masks / counts @@ -795,27 +797,27 @@ auto gen_constraints_general(size_t nlevels, size_t norb, size_t ns_othr, std::vector().swap(constraint_work); std::vector().swap(constraint_work_bare); constraint_work.resize(nrefine, 0ul); - #pragma omp parallel +#pragma omp parallel { - size_t i_ref = 0; - while(i_ref < nrefine) { - i_ref = nxtval.fetch_and_add(1); - if(i_ref >= nrefine) break; - //if(!(i_ref%1000)) printf("cgen %lu / %lu\n", i_ref, nrefine); - auto& [constraint, __nw] = constraint_to_refine[i_ref]; - auto& c_nw = constraint_work[i_ref]; - size_t nw = 0; - for(const auto& alpha : unique_alpha) { - if constexpr(flat_container) - nw += constraint_histogram(wfn_traits::alpha_string(alpha), ns_othr, - nd_othr, constraint); - else - nw += alpha.second * constraint_histogram(alpha.first, ns_othr, - nd_othr, constraint); - } - if(nw) c_nw.value.fetch_add(nw); - } // constraint "loop" - } // OpenMP Context + size_t i_ref = 0; + while(i_ref < nrefine) { + i_ref = nxtval.fetch_and_add(1); + if(i_ref >= nrefine) break; + // if(!(i_ref%1000)) printf("cgen %lu / %lu\n", i_ref, nrefine); + auto& [constraint, __nw] = constraint_to_refine[i_ref]; + auto& c_nw = constraint_work[i_ref]; + size_t nw = 0; + for(const auto& alpha : unique_alpha) { + if constexpr(flat_container) + nw += constraint_histogram(wfn_traits::alpha_string(alpha), ns_othr, + nd_othr, constraint); + else + nw += alpha.second * constraint_histogram(alpha.first, ns_othr, + nd_othr, constraint); + } + if(nw) c_nw.value.fetch_add(nw); + } // constraint "loop" + } // OpenMP Context constraint_work_bare.resize(nrefine); for(auto i_ref = 0; i_ref < nrefine; ++i_ref) { @@ -830,24 +832,26 @@ auto gen_constraints_general(size_t nlevels, size_t norb, size_t ns_othr, // Remove zeros { - auto it = std::partition(constraint_to_refine.begin(), constraint_to_refine.end(), - [](const auto& p) { return p.second > 0; }); - constraint_to_refine.erase(it, constraint_to_refine.end()); + auto it = std::partition(constraint_to_refine.begin(), + constraint_to_refine.end(), + [](const auto& p) { return p.second > 0; }); + constraint_to_refine.erase(it, constraint_to_refine.end()); } // Concatenate the arrays constraint_sizes.reserve(nrefine + nleave); - std::copy_n(constraint_to_refine.begin(), nrefine, std::back_inserter(constraint_sizes)); - std::copy_n(constraint_to_leave.begin(), nleave, std::back_inserter(constraint_sizes)); - - size_t tmp = std::accumulate(constraint_sizes.begin(), constraint_sizes.end(), - 0ul, [](auto s, const auto& p){ return s + p.second; }); + std::copy_n(constraint_to_refine.begin(), nrefine, + std::back_inserter(constraint_sizes)); + std::copy_n(constraint_to_leave.begin(), nleave, + std::back_inserter(constraint_sizes)); + + size_t tmp = + std::accumulate(constraint_sizes.begin(), constraint_sizes.end(), 0ul, + [](auto s, const auto& p) { return s + p.second; }); if(tmp != total_work) throw std::runtime_error("Incorrect Refinement"); - } // Selective refinement logic + } // Selective refinement logic - - - #endif +#endif for(size_t ilevel = 0; ilevel < nlevels; ++ilevel) { // Select constraints larger than average to be broken apart diff --git a/include/macis/asci/pt2.hpp b/include/macis/asci/pt2.hpp index 639fc2bb..ab7ba4c3 100644 --- a/include/macis/asci/pt2.hpp +++ b/include/macis/asci/pt2.hpp @@ -9,9 +9,9 @@ #pragma once #include #include -#include #include #include +#include namespace macis { @@ -45,13 +45,19 @@ double asci_pt2_constraint(ASCISettings asci_settings, logger->info("[ASCI PT2 Settings]"); logger->info(" * NDETS = {}", ncdets); logger->info(" * PT2_TOL = {}", asci_settings.pt2_tol); - logger->info(" * PT2_RESERVE_COUNT = {}", asci_settings.pt2_reserve_count); - logger->info(" * PT2_CONSTRAINT_LVL_MAX = {}", asci_settings.pt2_max_constraint_level); - logger->info(" * PT2_CONSTRAINT_LVL_MIN = {}", asci_settings.pt2_min_constraint_level); - logger->info(" * PT2_CNSTRNT_RFNE_FORCE = {}", asci_settings.pt2_constraint_refine_force); + logger->info(" * PT2_RESERVE_COUNT = {}", + asci_settings.pt2_reserve_count); + logger->info(" * PT2_CONSTRAINT_LVL_MAX = {}", + asci_settings.pt2_max_constraint_level); + logger->info(" * PT2_CONSTRAINT_LVL_MIN = {}", + asci_settings.pt2_min_constraint_level); + logger->info(" * PT2_CNSTRNT_RFNE_FORCE = {}", + asci_settings.pt2_constraint_refine_force); logger->info(" * PT2_PRUNE = {}", asci_settings.pt2_prune); - logger->info(" * PT2_PRECOMP_EPS = {}", asci_settings.pt2_precompute_eps); - logger->info(" * PT2_BIGCON_THRESH = {}", asci_settings.pt2_bigcon_thresh); + logger->info(" * PT2_PRECOMP_EPS = {}", + asci_settings.pt2_precompute_eps); + logger->info(" * PT2_BIGCON_THRESH = {}", + asci_settings.pt2_bigcon_thresh); logger->info(" * NXTVAL_BCOUNT_THRESH = {}", asci_settings.nxtval_bcount_thresh); logger->info(" * NXTVAL_BCOUNT_INC = {}", @@ -69,14 +75,16 @@ double asci_pt2_constraint(ASCISettings asci_settings, double h_diag; size_t mem() const { - return sizeof(spin_wfn_type) + + return sizeof(spin_wfn_type) + (occ_beta.capacity() + vir_beta.capacity()) * sizeof(uint8_t) + - (2 + orb_ens_alpha.capacity() + orb_ens_beta.capacity()) * sizeof(double); + (2 + orb_ens_alpha.capacity() + orb_ens_beta.capacity()) * + sizeof(double); } beta_coeff_data(double c, size_t norb, const std::vector& occ_alpha, wfn_t w, - const HamiltonianGenerator>& ham_gen, bool pce, bool pci) { + const HamiltonianGenerator>& ham_gen, bool pce, + bool pci) { coeff = c; beta_string = wfn_traits::beta_string(w); @@ -88,7 +96,7 @@ double asci_pt2_constraint(ASCISettings asci_settings, std::vector o_32, v_32; if(pce or pci) { spin_wfn_traits::state_to_occ_vir(norb, beta_string, o_32, v_32); - occ_beta.resize(o_32.size()); + occ_beta.resize(o_32.size()); std::copy(o_32.begin(), o_32.end(), occ_beta.begin()); vir_beta.resize(v_32.size()); std::copy(v_32.begin(), v_32.end(), vir_beta.begin()); @@ -106,9 +114,9 @@ double asci_pt2_constraint(ASCISettings asci_settings, const size_t nuniq_alpha = uniq_alpha.size(); logger->info(" * NUNIQ_ALPHA = {}", nuniq_alpha); std::vector uniq_alpha_ioff(nuniq_alpha); - std::transform_exclusive_scan(uniq_alpha.begin(), uniq_alpha.end(), - uniq_alpha_ioff.begin(), 0ul, std::plus(), - [](const auto& p){ return p.second; }); + std::transform_exclusive_scan( + uniq_alpha.begin(), uniq_alpha.end(), uniq_alpha_ioff.begin(), 0ul, + std::plus(), [](const auto& p) { return p.second; }); using unique_alpha_data = std::vector; std::vector uad(nuniq_alpha); @@ -121,7 +129,9 @@ double asci_pt2_constraint(ASCISettings asci_settings, uad[i].reserve(nbeta); for(auto j = 0; j < nbeta; ++j, ++iw) { const auto& w = *(cdets_begin + iw); - uad[i].emplace_back(C[iw], norb, occ_alpha, w, ham_gen,asci_settings.pt2_precompute_eps, asci_settings.pt2_precompute_idx); + uad[i].emplace_back(C[iw], norb, occ_alpha, w, ham_gen, + asci_settings.pt2_precompute_eps, + asci_settings.pt2_precompute_idx); } } @@ -130,14 +140,16 @@ double asci_pt2_constraint(ASCISettings asci_settings, logger->info("MEM REQ DETS = {:.2e}", ncdets * sizeof(wfn_t) / gib); logger->info("MEM REQ C = {:.2e}", ncdets * sizeof(double) / gib); size_t mem_alpha = 0; - for( auto i = 0ul; i < nuniq_alpha; ++i) { + for(auto i = 0ul; i < nuniq_alpha; ++i) { mem_alpha += sizeof(spin_wfn_type); for(auto j = 0ul; j < uad[i].size(); ++j) { mem_alpha += uad[i][j].mem(); } } logger->info("MEM REQ ALPH = {:.2e}", mem_alpha / gib); - logger->info("MEM REQ CONT = {:.2e}", asci_settings.pt2_reserve_count * sizeof(asci_contrib>)/ gib); + logger->info( + "MEM REQ CONT = {:.2e}", + asci_settings.pt2_reserve_count * sizeof(asci_contrib>) / gib); } MPI_Barrier(comm); @@ -157,36 +169,37 @@ double asci_pt2_constraint(ASCISettings asci_settings, // auto constraints = dist_constraint_general>( // 5, norb, n_sing_beta, n_doub_beta, uniq_alpha, comm); auto constraints = gen_constraints_general>( - asci_settings.pt2_max_constraint_level, norb, n_sing_beta, - n_doub_beta, uniq_alpha, world_size * omp_get_max_threads(), - asci_settings.pt2_min_constraint_level, asci_settings.pt2_constraint_refine_force ); + asci_settings.pt2_max_constraint_level, norb, n_sing_beta, n_doub_beta, + uniq_alpha, world_size * omp_get_max_threads(), + asci_settings.pt2_min_constraint_level, + asci_settings.pt2_constraint_refine_force); auto gen_c_en = clock_type::now(); duration_type gen_c_dur = gen_c_en - gen_c_st; logger->info(" * GEN_DUR = {:.2e} ms", gen_c_dur.count()); - //if(!world_rank) { - // std::ofstream c_file("constraint_work.txt"); - // std::stringstream ss; - // for(auto [c,s] : constraints) { - // ss << c.C() << " " << s << std::endl; - // } - // auto str = ss.str(); - // c_file.write(str.c_str(), str.size()); - //} - //if(!world_rank) { - // std::ofstream c_file("unique_alpha.txt"); - // std::stringstream ss; - // for(size_t i = 0; i < nuniq_alpha; ++i) { - // ss << uniq_alpha[i].first << " " << uniq_alpha[i].second << std::endl; - // } - // auto str = ss.str(); - // c_file.write(str.c_str(), str.size()); - //} + // if(!world_rank) { + // std::ofstream c_file("constraint_work.txt"); + // std::stringstream ss; + // for(auto [c,s] : constraints) { + // ss << c.C() << " " << s << std::endl; + // } + // auto str = ss.str(); + // c_file.write(str.c_str(), str.size()); + // } + // if(!world_rank) { + // std::ofstream c_file("unique_alpha.txt"); + // std::stringstream ss; + // for(size_t i = 0; i < nuniq_alpha; ++i) { + // ss << uniq_alpha[i].first << " " << uniq_alpha[i].second << std::endl; + // } + // auto str = ss.str(); + // c_file.write(str.c_str(), str.size()); + // } double EPT2 = 0.0; size_t NPT2 = 0; const size_t ncon_total = constraints.size(); - const size_t ncon_big = asci_settings.pt2_bigcon_thresh; + const size_t ncon_big = asci_settings.pt2_bigcon_thresh; const size_t ncon_small = ncon_total - ncon_big; // Global atomic task-id counter @@ -201,91 +214,95 @@ double asci_pt2_constraint(ASCISettings asci_settings, while(ic < ncon_big) { // Atomically get the next task ID and increment for other // MPI ranks - ic = nxtval_big.fetch_and_add(1); + ic = nxtval_big.fetch_and_add(1); if(ic >= ncon_big) continue; if(asci_settings.pt2_print_progress) - printf("[pt2_big rank %4d] %10lu / %10lu\n", world_rank, ic, ncon_total); + printf("[pt2_big rank %4d] %10lu / %10lu\n", world_rank, ic, + ncon_total); const auto& con = constraints[ic].first; asci_contrib_container> asci_pairs_con; - #pragma omp parallel +#pragma omp parallel { - asci_contrib_container> asci_pairs; - #pragma omp for schedule(dynamic) - for(size_t i_alpha = 0; i_alpha < nuniq_alpha; ++i_alpha) { - const size_t old_pair_size = asci_pairs.size(); - const auto& alpha_det = uniq_alpha[i_alpha].first; - const auto ncon_alpha = constraint_histogram(alpha_det,1,1,con); - if(!ncon_alpha) continue; - const auto occ_alpha = bits_to_indices(alpha_det); - const bool alpha_satisfies_con = satisfies_constraint(alpha_det, con); - - const auto& bcd = uad[i_alpha]; - const size_t nbeta = bcd.size(); - for(size_t j_beta = 0; j_beta < nbeta; ++j_beta) { - const size_t iw = uniq_alpha_ioff[i_alpha] + j_beta; - const auto w = *(cdets_begin + iw); - const auto c = C[iw]; - const auto& beta_det = bcd[j_beta].beta_string; - const auto h_diag = bcd[j_beta].h_diag; - - // TODO: These copies are slow - #if 0 + asci_contrib_container> asci_pairs; +#pragma omp for schedule(dynamic) + for(size_t i_alpha = 0; i_alpha < nuniq_alpha; ++i_alpha) { + const size_t old_pair_size = asci_pairs.size(); + const auto& alpha_det = uniq_alpha[i_alpha].first; + const auto ncon_alpha = constraint_histogram(alpha_det, 1, 1, con); + if(!ncon_alpha) continue; + const auto occ_alpha = bits_to_indices(alpha_det); + const bool alpha_satisfies_con = satisfies_constraint(alpha_det, con); + + const auto& bcd = uad[i_alpha]; + const size_t nbeta = bcd.size(); + for(size_t j_beta = 0; j_beta < nbeta; ++j_beta) { + const size_t iw = uniq_alpha_ioff[i_alpha] + j_beta; + const auto w = *(cdets_begin + iw); + const auto c = C[iw]; + const auto& beta_det = bcd[j_beta].beta_string; + const auto h_diag = bcd[j_beta].h_diag; + +// TODO: These copies are slow +#if 0 const auto& occ_beta_8 = bcd[j_beta].occ_beta; const auto& vir_beta_8 = bcd[j_beta].vir_beta; std::vector occ_beta(occ_beta_8.size()), vir_beta(vir_beta_8.size()); std::copy(occ_beta_8.begin(), occ_beta_8.end(), occ_beta.begin()); std::copy(vir_beta_8.begin(), vir_beta_8.end(), vir_beta.begin()); - #else - std::vector occ_beta, vir_beta; - spin_wfn_traits::state_to_occ_vir(norb, beta_det, occ_beta, vir_beta); - #endif - - std::vector orb_ens_alpha, orb_ens_beta; - if(asci_settings.pt2_precompute_eps) { - orb_ens_alpha = bcd[j_beta].orb_ens_alpha; - orb_ens_beta = bcd[j_beta].orb_ens_beta; - } else { - orb_ens_alpha = ham_gen.single_orbital_ens(norb, occ_alpha, occ_beta); - orb_ens_beta = ham_gen.single_orbital_ens(norb, occ_beta, occ_alpha); - } +#else + std::vector occ_beta, vir_beta; + spin_wfn_traits::state_to_occ_vir(norb, beta_det, occ_beta, + vir_beta); +#endif + + std::vector orb_ens_alpha, orb_ens_beta; + if(asci_settings.pt2_precompute_eps) { + orb_ens_alpha = bcd[j_beta].orb_ens_alpha; + orb_ens_beta = bcd[j_beta].orb_ens_beta; + } else { + orb_ens_alpha = + ham_gen.single_orbital_ens(norb, occ_alpha, occ_beta); + orb_ens_beta = + ham_gen.single_orbital_ens(norb, occ_beta, occ_alpha); + } - // AA excitations - generate_constraint_singles_contributions_ss( - c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), T_pq, - norb, G_red, norb, V_red, norb, h_el_tol, h_diag, E_ASCI, - ham_gen, asci_pairs); - - // AAAA excitations - generate_constraint_doubles_contributions_ss( - c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), G_pqrs, - norb, h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); - - // AABB excitations - generate_constraint_doubles_contributions_os( - c, w, con, occ_alpha, occ_beta, vir_beta, orb_ens_alpha.data(), - orb_ens_beta.data(), V_pqrs, norb, h_el_tol, h_diag, E_ASCI, - ham_gen, asci_pairs); - - if(alpha_satisfies_con) { - // BB excitations - append_singles_asci_contributions( - c, w, beta_det, occ_beta, vir_beta, occ_alpha, - orb_ens_beta.data(), T_pq, norb, G_red, norb, V_red, norb, - h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); - - // BBBB excitations - append_ss_doubles_asci_contributions( - c, w, beta_det, alpha_det, occ_beta, vir_beta, occ_alpha, - orb_ens_beta.data(), G_pqrs, norb, h_el_tol, h_diag, E_ASCI, + // AA excitations + generate_constraint_singles_contributions_ss( + c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), T_pq, + norb, G_red, norb, V_red, norb, h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); - // No excitation (push inf to remove from list) - asci_pairs.push_back( - {w, std::numeric_limits::infinity(), 1.0}); + // AAAA excitations + generate_constraint_doubles_contributions_ss( + c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), G_pqrs, + norb, h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); + + // AABB excitations + generate_constraint_doubles_contributions_os( + c, w, con, occ_alpha, occ_beta, vir_beta, orb_ens_alpha.data(), + orb_ens_beta.data(), V_pqrs, norb, h_el_tol, h_diag, E_ASCI, + ham_gen, asci_pairs); + + if(alpha_satisfies_con) { + // BB excitations + append_singles_asci_contributions( + c, w, beta_det, occ_beta, vir_beta, occ_alpha, + orb_ens_beta.data(), T_pq, norb, G_red, norb, V_red, norb, + h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); + + // BBBB excitations + append_ss_doubles_asci_contributions( + c, w, beta_det, alpha_det, occ_beta, vir_beta, occ_alpha, + orb_ens_beta.data(), G_pqrs, norb, h_el_tol, h_diag, E_ASCI, + ham_gen, asci_pairs); + + // No excitation (push inf to remove from list) + asci_pairs.push_back( + {w, std::numeric_limits::infinity(), 1.0}); + } } - } - #if 0 +#if 0 if(asci_settings.pt2_prune and asci_pairs.size() > asci_settings.pt2_reserve_count and asci_pairs.size() != old_pair_size) { // Cleanup auto uit = stable_sort_and_accumulate_asci_pairs(asci_pairs.begin(), @@ -297,26 +314,26 @@ double asci_pt2_constraint(ASCISettings asci_settings, omp_get_thread_num(), ic, ncon_total, i_alpha, nuniq_alpha, asci_pairs.size()); } - #endif - - } // Unique Alpha Loop +#endif - // S&A Thread local pairs - sort_and_accumulate_asci_pairs(asci_pairs); + } // Unique Alpha Loop + // S&A Thread local pairs + sort_and_accumulate_asci_pairs(asci_pairs); - // Insert - #pragma omp critical - { - if(asci_pairs_con.size()) { - asci_pairs_con.reserve(asci_pairs.size() + asci_pairs_con.size()); - asci_pairs_con.insert(asci_pairs_con.end(), asci_pairs.begin(), asci_pairs.end()); - } else { - asci_pairs_con = std::move(asci_pairs); - } - } +// Insert +#pragma omp critical + { + if(asci_pairs_con.size()) { + asci_pairs_con.reserve(asci_pairs.size() + asci_pairs_con.size()); + asci_pairs_con.insert(asci_pairs_con.end(), asci_pairs.begin(), + asci_pairs.end()); + } else { + asci_pairs_con = std::move(asci_pairs); + } + } - } // OpenMP + } // OpenMP double EPT2_local = 0.0; size_t NPT2_local = 0; @@ -334,26 +351,28 @@ double asci_pt2_constraint(ASCISettings asci_settings, } asci_pairs_con.clear(); if(asci_settings.pt2_print_progress) - printf("[pt2_big rank %4d] CAPACITY %lu SZ %lu\n", world_rank, asci_pairs_con.capacity(), pair_size); + printf("[pt2_big rank %4d] CAPACITY %lu SZ %lu\n", world_rank, + asci_pairs_con.capacity(), pair_size); } EPT2 += EPT2_local; NPT2 += NPT2_local; - } // Constraint "loop" - } // "Big constraints" - + } // Constraint "loop" + } // "Big constraints" // Parallelize over both MPI + threads for "small" constraints #pragma omp parallel reduction(+ : EPT2) reduction(+ : NPT2) { // Process ASCI pair contributions for each constraint asci_contrib_container> asci_pairs; - //asci_pairs.reserve(asci_settings.pt2_reserve_count); + // asci_pairs.reserve(asci_settings.pt2_reserve_count); size_t ic = 0; while(ic < ncon_total) { // Atomically get the next task ID and increment for other // MPI ranks and threads - size_t ntake = ic < asci_settings.nxtval_bcount_thresh ? 1 : asci_settings.nxtval_bcount_inc; + size_t ntake = ic < asci_settings.nxtval_bcount_thresh + ? 1 + : asci_settings.nxtval_bcount_inc; ic = nxtval_small.fetch_and_add(ntake); // Loop over assigned tasks @@ -367,7 +386,7 @@ double asci_pt2_constraint(ASCISettings asci_settings, for(size_t i_alpha = 0; i_alpha < nuniq_alpha; ++i_alpha) { const size_t old_pair_size = asci_pairs.size(); const auto& alpha_det = uniq_alpha[i_alpha].first; - const auto ncon_alpha = constraint_histogram(alpha_det,1,1,con); + const auto ncon_alpha = constraint_histogram(alpha_det, 1, 1, con); if(!ncon_alpha) continue; const auto occ_alpha = bits_to_indices(alpha_det); const bool alpha_satisfies_con = satisfies_constraint(alpha_det, con); @@ -381,25 +400,28 @@ double asci_pt2_constraint(ASCISettings asci_settings, const auto& beta_det = bcd[j_beta].beta_string; const auto h_diag = bcd[j_beta].h_diag; - // TODO: These copies are slow - #if 0 +// TODO: These copies are slow +#if 0 const auto& occ_beta_8 = bcd[j_beta].occ_beta; const auto& vir_beta_8 = bcd[j_beta].vir_beta; std::vector occ_beta(occ_beta_8.size()), vir_beta(vir_beta_8.size()); std::copy(occ_beta_8.begin(), occ_beta_8.end(), occ_beta.begin()); std::copy(vir_beta_8.begin(), vir_beta_8.end(), vir_beta.begin()); - #else +#else std::vector occ_beta, vir_beta; - spin_wfn_traits::state_to_occ_vir(norb, beta_det, occ_beta, vir_beta); - #endif + spin_wfn_traits::state_to_occ_vir(norb, beta_det, occ_beta, + vir_beta); +#endif std::vector orb_ens_alpha, orb_ens_beta; if(asci_settings.pt2_precompute_eps) { orb_ens_alpha = bcd[j_beta].orb_ens_alpha; - orb_ens_beta = bcd[j_beta].orb_ens_beta; + orb_ens_beta = bcd[j_beta].orb_ens_beta; } else { - orb_ens_alpha = ham_gen.single_orbital_ens(norb, occ_alpha, occ_beta); - orb_ens_beta = ham_gen.single_orbital_ens(norb, occ_beta, occ_alpha); + orb_ens_alpha = + ham_gen.single_orbital_ens(norb, occ_alpha, occ_beta); + orb_ens_beta = + ham_gen.single_orbital_ens(norb, occ_beta, occ_alpha); } // AA excitations @@ -437,17 +459,22 @@ double asci_pt2_constraint(ASCISettings asci_settings, {w, std::numeric_limits::infinity(), 1.0}); } } - if(asci_settings.pt2_prune and asci_pairs.size() > asci_settings.pt2_reserve_count and asci_pairs.size() != old_pair_size) { - // Cleanup - auto uit = stable_sort_and_accumulate_asci_pairs(asci_pairs.begin(), - asci_pairs.end()); - asci_pairs.erase(uit, asci_pairs.end()); - //uit = std::stable_partition(asci_pairs.begin(), asci_pairs.end(), [&](const auto& p){ return std::abs(p.pt2()) > h_el_tol; }); - //asci_pairs.erase(uit, asci_pairs.end()); + if(asci_settings.pt2_prune and + asci_pairs.size() > asci_settings.pt2_reserve_count and + asci_pairs.size() != old_pair_size) { + // Cleanup + auto uit = stable_sort_and_accumulate_asci_pairs(asci_pairs.begin(), + asci_pairs.end()); + asci_pairs.erase(uit, asci_pairs.end()); + // uit = std::stable_partition(asci_pairs.begin(), asci_pairs.end(), + // [&](const auto& p){ return std::abs(p.pt2()) > h_el_tol; }); + // asci_pairs.erase(uit, asci_pairs.end()); if(asci_settings.pt2_print_progress) - printf("[pt2_prune rank %4d tid:%4d] IC = %lu / %lu IA = %lu / %lu SZ = %lu\n", world_rank, - omp_get_thread_num(), ic, ncon_total, i_alpha, - nuniq_alpha, asci_pairs.size()); + printf( + "[pt2_prune rank %4d tid:%4d] IC = %lu / %lu IA = %lu / %lu " + "SZ = %lu\n", + world_rank, omp_get_thread_num(), ic, ncon_total, i_alpha, + nuniq_alpha, asci_pairs.size()); if(asci_pairs.size() > asci_settings.pt2_reserve_count) { printf("* WARNING: PRUNED SIZE LARGER THAN RESERVE COUNT\n"); @@ -469,8 +496,8 @@ double asci_pt2_constraint(ASCISettings asci_settings, } } asci_pairs.clear(); - // Deallocate - if(asci_pairs.capacity() > asci_settings.pt2_reserve_count) + // Deallocate + if(asci_pairs.capacity() > asci_settings.pt2_reserve_count) asci_contrib_container>().swap(asci_pairs); } diff --git a/include/macis/hamiltonian_generator/sorted_double_loop.hpp b/include/macis/hamiltonian_generator/sorted_double_loop.hpp index e1c6882d..af8760dd 100644 --- a/include/macis/hamiltonian_generator/sorted_double_loop.hpp +++ b/include/macis/hamiltonian_generator/sorted_double_loop.hpp @@ -103,7 +103,9 @@ class SortedDoubleLoopHamiltonianGenerator #pragma omp for schedule(dynamic) for(size_t ia_bra = 0; ia_bra < nuniq_bra; ++ia_bra) { if(unique_alpha_bra[ia_bra].first.any()) { - if(!(ia_bra%100))printf("[ham_gen rank %d] IA_BRA = %lu / %lu\n", world_rank, ia_bra, nuniq_bra); + if(!(ia_bra % 100)) + printf("[ham_gen rank %d] IA_BRA = %lu / %lu\n", world_rank, ia_bra, + nuniq_bra); // Extract alpha bra const auto bra_alpha = unique_alpha_bra[ia_bra].first; const size_t beta_st_bra = unique_alpha_bra_idx[ia_bra]; diff --git a/include/macis/solvers/davidson.hpp b/include/macis/solvers/davidson.hpp index 47e4215b..bde3ef17 100644 --- a/include/macis/solvers/davidson.hpp +++ b/include/macis/solvers/davidson.hpp @@ -11,9 +11,9 @@ #include #include +#include #include #include -#include #include #include #include @@ -97,7 +97,7 @@ void p_diagonal_guess(size_t N_local, const SpMatType& A, double* X) { // Determine min index auto D_min = std::min_element(D.begin(), D.end()); auto min_idx = std::distance(D.begin(), D_min); - //printf("[rank %d] DMIN %lu %.6e\n", world_rank, min_idx, *D_min); + // printf("[rank %d] DMIN %lu %.6e\n", world_rank, min_idx, *D_min); // Zero out guess for(size_t i = 0; i < N_local; ++i) X[i] = 0.; @@ -254,8 +254,8 @@ inline void p_gram_schmidt(int64_t N_local, int64_t K, const double* V_old, double dot = blas::dot(N_local, V_new, 1, V_new, 1); dot = allreduce(dot, MPI_SUM, comm); double nrm = std::sqrt(dot); - //printf("[rank %d] GS DOT %.6e NRM %.6e\n", comm_rank(comm), - // dot, nrm); + // printf("[rank %d] GS DOT %.6e NRM %.6e\n", comm_rank(comm), + // dot, nrm); blas::scal(N_local, 1. / nrm, V_new, 1); } @@ -271,16 +271,16 @@ inline void p_rayleigh_ritz(int64_t N_local, int64_t K, const double* X, // Reduce result if(LDC != K) throw std::runtime_error("DIE DIE DIE RR"); - //allreduce(C, K * K, MPI_SUM, comm); + // allreduce(C, K * K, MPI_SUM, comm); std::allocator alloc; - double* tmp_c = world_rank ? nullptr: alloc.allocate(K*K); - reduce(C, tmp_c, K*K, MPI_SUM, 0, comm); + double* tmp_c = world_rank ? nullptr : alloc.allocate(K * K); + reduce(C, tmp_c, K * K, MPI_SUM, 0, comm); // Do local diagonalization on rank-0 if(!world_rank) { - memcpy(C, tmp_c, K*K*sizeof(double)); + memcpy(C, tmp_c, K * K * sizeof(double)); lapack::syev(lapack::Job::Vec, lapack::Uplo::Lower, K, C, LDC, W); - alloc.deallocate(tmp_c, K*K); + alloc.deallocate(tmp_c, K * K); } // Broadcast results @@ -390,19 +390,17 @@ auto p_davidson(int64_t N_local, int64_t max_m, const Functor& op, double E1_denom = 0, E1_num = 0; for(auto j = 0; j < N_local; ++j) { R_local[j] = -R_local[j] / (D_local[j] - LAM[0]); - E1_num += X_local[j] * R_local[j]; + E1_num += X_local[j] * R_local[j]; E1_denom += X_local[j] * X_local[j] / (D_local[j] - LAM[0]); } E1_denom = allreduce(E1_denom, MPI_SUM, comm); - E1_num = allreduce(E1_num, MPI_SUM, comm); + E1_num = allreduce(E1_num, MPI_SUM, comm); const double E1 = E1_num / E1_denom; for(auto j = 0; j < N_local; ++j) { R_local[j] += E1 * X_local[j] / (D_local[j] - LAM[0]); } - - // Project new vector out form old vectors p_gram_schmidt(N_local, k, V_local.data(), N_local, R_local, comm); diff --git a/include/macis/solvers/selected_ci_diag.hpp b/include/macis/solvers/selected_ci_diag.hpp index ea08676f..9edffb41 100644 --- a/include/macis/solvers/selected_ci_diag.hpp +++ b/include/macis/solvers/selected_ci_diag.hpp @@ -13,9 +13,9 @@ #include #include #include +#include #include #include -#include namespace macis { @@ -149,19 +149,22 @@ double selected_ci_diag(WfnIterator dets_begin, WfnIterator dets_end, auto world_size = comm_size(comm); auto world_rank = comm_rank(comm); //{ - //std::ofstream wfn_file("wfn_" + std::to_string(std::distance(dets_begin,dets_end)) + "_" + std::to_string(world_rank) + "." + std::to_string(world_size) + ".txt"); - //for(auto it = dets_begin; it != dets_end; ++it) { + // std::ofstream wfn_file("wfn_" + + // std::to_string(std::distance(dets_begin,dets_end)) + "_" + + // std::to_string(world_rank) + "." + std::to_string(world_size) + ".txt"); + // for(auto it = dets_begin; it != dets_end; ++it) { // wfn_file << *it << "\n"; //} - //wfn_file << std::flush; + // wfn_file << std::flush; //} #ifdef MACIS_ENABLE_MPI auto H = make_dist_csr_hamiltonian(comm, dets_begin, dets_end, ham_gen, h_el_tol); - //sparsexx::write_dist_mm("ham_" + std::to_string(H.n()) + "." + std::to_string(world_size) + ".mtx", H, 1); - //MACIS_MPI_CODE(MPI_Barrier(comm);) - //if(H.n() >= 10000000) throw "DIE DIE DIE"; + // sparsexx::write_dist_mm("ham_" + std::to_string(H.n()) + "." + + // std::to_string(world_size) + ".mtx", H, 1); + // MACIS_MPI_CODE(MPI_Barrier(comm);) + // if(H.n() >= 10000000) throw "DIE DIE DIE"; #else auto H = make_csr_hamiltonian(dets_begin, dets_end, ham_gen, h_el_tol); diff --git a/include/macis/util/mpi.hpp b/include/macis/util/mpi.hpp index 7c79c6fa..6375dbbc 100644 --- a/include/macis/util/mpi.hpp +++ b/include/macis/util/mpi.hpp @@ -146,18 +146,16 @@ mpi_datatype make_contiguous_mpi_datatype(int n) { return make_managed_mpi_datatype(contig_dtype); } - template -void reduce(const T* send, T* recv, size_t count, MPI_Op op, int root, +void reduce(const T* send, T* recv, size_t count, MPI_Op op, int root, MPI_Comm comm) { auto dtype = mpi_traits::datatype(); size_t intmax = std::numeric_limits::max(); size_t nchunk = count / intmax; if(nchunk) throw std::runtime_error("Msg over INT_MAX not yet tested"); - - MPI_Reduce( send, recv, count, dtype, op, root, comm ); + MPI_Reduce(send, recv, count, dtype, op, root, comm); } /** diff --git a/src/sparsexx/include/sparsexx/matrix_types/conversions.hpp b/src/sparsexx/include/sparsexx/matrix_types/conversions.hpp index 26466e7a..073cffbf 100644 --- a/src/sparsexx/include/sparsexx/matrix_types/conversions.hpp +++ b/src/sparsexx/include/sparsexx/matrix_types/conversions.hpp @@ -74,8 +74,8 @@ csr_matrix::csr_matrix( const auto& colind_coo = other.colind(); const auto& nzval_coo = other.nzval(); - // Compute rowptr - #if 0 +// Compute rowptr +#if 0 rowptr_.at(0) = other.indexing(); auto cur_row = 0; for(size_type i = 0; i < nnz_; ++i) @@ -84,29 +84,30 @@ csr_matrix::csr_matrix( rowptr_.at(cur_row) = i + indexing_; } rowptr_.at(m_) = nnz_ + indexing_; - #else +#else if(indexing_) throw std::runtime_error("NONZERO INDEXING"); for(size_type i = 0; i < nnz_; ++i) { rowptr_[rowind_coo[i] - indexing_ + 1]++; } for(size_type i = 0; i < m_; ++i) { - rowptr_[i+1] += rowptr_[i]; + rowptr_[i + 1] += rowptr_[i]; } if(indexing_) - for(size_type i = 0; i < m_+1; ++i) { - rowptr_[i] += indexing_; - } - #endif - - //for(size_type i = 0; i < m_; ++i) { - // auto row_st = rowptr_[i]; - // auto row_en = rowptr_[i+1]; - // for(size_type j = row_st; j < row_en; ++j) { - // if(rowind_coo[j] != i) throw std::runtime_error("ROWPTR WRONG"); - // } - // if(!std::is_sorted(colind_coo.begin() + row_st, colind_coo.begin() + row_en)) - // throw std::runtime_error("COLIND WRONG"); - //} + for(size_type i = 0; i < m_ + 1; ++i) { + rowptr_[i] += indexing_; + } +#endif + + // for(size_type i = 0; i < m_; ++i) { + // auto row_st = rowptr_[i]; + // auto row_en = rowptr_[i+1]; + // for(size_type j = row_st; j < row_en; ++j) { + // if(rowind_coo[j] != i) throw std::runtime_error("ROWPTR WRONG"); + // } + // if(!std::is_sorted(colind_coo.begin() + row_st, colind_coo.begin() + + // row_en)) + // throw std::runtime_error("COLIND WRONG"); + // } std::copy(colind_coo.begin(), colind_coo.end(), colind_.begin()); std::copy(nzval_coo.begin(), nzval_coo.end(), nzval_.begin()); diff --git a/src/sparsexx/include/sparsexx/spblas/pspmbv.hpp b/src/sparsexx/include/sparsexx/spblas/pspmbv.hpp index e736c144..e18f3ad3 100644 --- a/src/sparsexx/include/sparsexx/spblas/pspmbv.hpp +++ b/src/sparsexx/include/sparsexx/spblas/pspmbv.hpp @@ -50,7 +50,8 @@ struct spmv_info { int comm_size = recv_offsets.size(); for(int i = 0; i < comm_size; ++i) if(recv_counts[i]) { - if(recv_counts[i] > std::numeric_limits::max()) throw "DIE IN RECV"; + if(recv_counts[i] > std::numeric_limits::max()) + throw "DIE IN RECV"; reqs.emplace_back( detail::mpi_irecv(X + recv_offsets[i], recv_counts[i], i, 0, comm)); } @@ -63,7 +64,8 @@ struct spmv_info { int comm_size = send_offsets.size(); for(int i = 0; i < comm_size; ++i) if(send_counts[i]) { - if(send_counts[i] > std::numeric_limits::max()) throw "DIE IN SEND"; + if(send_counts[i] > std::numeric_limits::max()) + throw "DIE IN SEND"; reqs.emplace_back( detail::mpi_isend(X + send_offsets[i], send_counts[i], i, 0, comm)); } diff --git a/tests/standalone_driver.cxx b/tests/standalone_driver.cxx index 2471fb7d..ee424487 100644 --- a/tests/standalone_driver.cxx +++ b/tests/standalone_driver.cxx @@ -221,17 +221,27 @@ int main(int argc, char** argv) { } OPT_KEYWORD("ASCI.PT2", pt2, bool); OPT_KEYWORD("ASCI.PT2_TOL", asci_settings.pt2_tol, double); - OPT_KEYWORD("ASCI.PT2_RESERVE_COUNT", asci_settings.pt2_reserve_count, size_t); - OPT_KEYWORD("ASCI.PT2_CONSTRAINT_LVL_MAX", asci_settings.pt2_max_constraint_level, int); - OPT_KEYWORD("ASCI.PT2_CONSTRAINT_LVL_MIN", asci_settings.pt2_min_constraint_level, int); - OPT_KEYWORD("ASCI.PT2_CNSTRNT_RFNE_FORCE", asci_settings.pt2_constraint_refine_force,int64_t); + OPT_KEYWORD("ASCI.PT2_RESERVE_COUNT", asci_settings.pt2_reserve_count, + size_t); + OPT_KEYWORD("ASCI.PT2_CONSTRAINT_LVL_MAX", + asci_settings.pt2_max_constraint_level, int); + OPT_KEYWORD("ASCI.PT2_CONSTRAINT_LVL_MIN", + asci_settings.pt2_min_constraint_level, int); + OPT_KEYWORD("ASCI.PT2_CNSTRNT_RFNE_FORCE", + asci_settings.pt2_constraint_refine_force, int64_t); OPT_KEYWORD("ASCI.PT2_PRUNE", asci_settings.pt2_prune, bool); - OPT_KEYWORD("ASCI.PT2_PRECOMPUTE_EPS", asci_settings.pt2_precompute_eps, bool); - OPT_KEYWORD("ASCI.PT2_PRECOMPUTE_IDX", asci_settings.pt2_precompute_idx, bool); - OPT_KEYWORD("ASCI.PT2_PRINT_PROGRESS", asci_settings.pt2_print_progress, bool); - OPT_KEYWORD("ASCI.PT2_BIGCON_THRESH", asci_settings.pt2_bigcon_thresh, size_t); - OPT_KEYWORD("ASCI.NXTVAL_BCOUNT_THRESH", asci_settings.nxtval_bcount_thresh, size_t); - OPT_KEYWORD("ASCI.NXTVAL_BCOUNT_INC", asci_settings.nxtval_bcount_inc, size_t); + OPT_KEYWORD("ASCI.PT2_PRECOMPUTE_EPS", asci_settings.pt2_precompute_eps, + bool); + OPT_KEYWORD("ASCI.PT2_PRECOMPUTE_IDX", asci_settings.pt2_precompute_idx, + bool); + OPT_KEYWORD("ASCI.PT2_PRINT_PROGRESS", asci_settings.pt2_print_progress, + bool); + OPT_KEYWORD("ASCI.PT2_BIGCON_THRESH", asci_settings.pt2_bigcon_thresh, + size_t); + OPT_KEYWORD("ASCI.NXTVAL_BCOUNT_THRESH", asci_settings.nxtval_bcount_thresh, + size_t); + OPT_KEYWORD("ASCI.NXTVAL_BCOUNT_INC", asci_settings.nxtval_bcount_inc, + size_t); bool mp2_guess = false; OPT_KEYWORD("MCSCF.MP2_GUESS", mp2_guess, bool); @@ -405,13 +415,13 @@ int main(int argc, char** argv) { auto asci_st = hrt_t::now(); // Growth phase - std::tie(E0, dets, C) = macis::asci_grow( + std::tie(E0, dets, C) = macis::asci_grow( asci_settings, mcscf_settings, E0, std::move(dets), std::move(C), ham_gen, n_active MACIS_MPI_CODE(, MPI_COMM_WORLD)); // Refinement phase if(asci_settings.max_refine_iter) { - std::tie(E0, dets, C) = macis::asci_refine( + std::tie(E0, dets, C) = macis::asci_refine( asci_settings, mcscf_settings, E0, std::move(dets), std::move(C), ham_gen, n_active MACIS_MPI_CODE(, MPI_COMM_WORLD)); } @@ -445,10 +455,11 @@ int main(int argc, char** argv) { if(pt2) { MPI_Barrier(MPI_COMM_WORLD); auto pt2_st = hrt_t::now(); - EPT2 = macis::asci_pt2_constraint(asci_settings, - dets.begin(), dets.end(), E0 - (E_inactive + E_core), C, n_active, - ham_gen.T(), ham_gen.G_red(), ham_gen.V_red(), ham_gen.G(), - ham_gen.V(), ham_gen MACIS_MPI_CODE(, MPI_COMM_WORLD)); + EPT2 = macis::asci_pt2_constraint( + asci_settings, dets.begin(), dets.end(), + E0 - (E_inactive + E_core), C, n_active, ham_gen.T(), + ham_gen.G_red(), ham_gen.V_red(), ham_gen.G(), ham_gen.V(), + ham_gen MACIS_MPI_CODE(, MPI_COMM_WORLD)); MPI_Barrier(MPI_COMM_WORLD); auto pt2_en = hrt_t::now(); dur_t pt2_dur = pt2_en - pt2_st;