From 23575078d45e7b557e522b95d59c60f25dd81813 Mon Sep 17 00:00:00 2001 From: maki49 <1579492865@qq.com> Date: Sat, 9 Nov 2024 19:01:27 +0800 Subject: [PATCH 1/3] fix the initial guess --- source/module_lr/esolver_lrtd_lcao.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/module_lr/esolver_lrtd_lcao.cpp b/source/module_lr/esolver_lrtd_lcao.cpp index 5f696d7aa1..1f03dc9f14 100644 --- a/source/module_lr/esolver_lrtd_lcao.cpp +++ b/source/module_lr/esolver_lrtd_lcao.cpp @@ -587,8 +587,9 @@ void LR::ESolver_LR::set_X_initial_guess() const int is_in_x = openshell ? 0 : is; // if openshell, spin-up and spin-down are put together if (px.in_this_processor(virt_global, occ_global)) { + const int xstart_pair = ik * px.get_local_size(); const int ipair_loc = px.global2local_col(occ_global) * px.get_row_size() + px.global2local_row(virt_global); - X[is_in_x].data()[xstart_bs + ipair_loc] = (static_cast(1.0) / static_cast(nk)); + X[is_in_x].data()[xstart_bs + xstart_pair + ipair_loc] = (static_cast(1.0) / static_cast(nk)); } } } From 4c883be9d8b48b76b43cf70caa9e1ac009f01210 Mon Sep 17 00:00:00 2001 From: maki49 <1579492865@qq.com> Date: Tue, 12 Nov 2024 03:01:43 +0800 Subject: [PATCH 2/3] diag-precondition --- source/module_lr/esolver_lrtd_lcao.cpp | 14 ++++++++++++-- source/module_lr/hsolver_lrtd.hpp | 11 +++++------ .../module_lr/operator_casida/operator_lr_diag.h | 3 +-- tests/integrate/291_NO_KP_LR/result.ref | 2 +- tests/integrate/391_NO_GO_LR/result.ref | 2 +- tests/integrate/392_NO_GO_LR_HF/result.ref | 2 +- 6 files changed, 21 insertions(+), 13 deletions(-) diff --git a/source/module_lr/esolver_lrtd_lcao.cpp b/source/module_lr/esolver_lrtd_lcao.cpp index 1f03dc9f14..9e555290f5 100644 --- a/source/module_lr/esolver_lrtd_lcao.cpp +++ b/source/module_lr/esolver_lrtd_lcao.cpp @@ -16,6 +16,7 @@ #include "module_base/scalapack_connector.h" #include "module_parameter/parameter.h" #include "module_lr/ri_benchmark/ri_benchmark.h" +#include "module_lr/operator_casida/operator_lr_diag.h" // for precondition #ifdef __EXX template<> @@ -444,20 +445,29 @@ void LR::ESolver_LR::runner(int istep, UnitCell& cell) if (GlobalV::MY_RANK == 0) { assert(nst == LR_Util::write_value(efile(label), prec, e, nst)); } assert(nst * dim == LR_Util::write_value(vfile(label), prec, v, nst, dim)); }; + std::vector precondition(this->input.lr_solver == "lapack" ? 0 : nloc_per_band, 1.0); // allocate and initialize A matrix and density matrix if (openshell) { + for (int is : {0, 1}) + { + const int offset_is = is * this->paraX_[0].get_local_size(); + OperatorLRDiag pre_op(this->eig_ks.c + is * nk * (nocc[0] + nvirt[0]), this->paraX_[is], this->nk, this->nocc[is], this->nvirt[is]); + if (input.lr_solver != "lapack") { pre_op.act(1, offset_is, 1, precondition.data() + offset_is, precondition.data() + offset_is); } + } std::cout << "Solving spin-conserving excitation for open-shell system." << std::endl; HamiltULR hulr(xc_kernel, nspin, this->nbasis, this->nocc, this->nvirt, this->ucell, orb_cutoff_, GlobalC::GridD, *this->psi_ks, this->eig_ks, #ifdef __EXX this->exx_lri, this->exx_info.info_global.hybrid_alpha, #endif this->gint_, this->pot, this->kv, this->paraX_, this->paraC_, this->paraMat_); - LR::HSolver::solve(hulr, this->X[0].template data(), nloc_per_band, nstates, this->pelec->ekb.c, this->input.lr_solver, this->input.lr_thr); + LR::HSolver::solve(hulr, this->X[0].template data(), nloc_per_band, nstates, this->pelec->ekb.c, this->input.lr_solver, this->input.lr_thr, precondition); if (input.out_wfc_lr) { write_states("openshell", this->pelec->ekb.c, this->X[0].template data(), nloc_per_band, nstates); } } else { + OperatorLRDiag pre_op(this->eig_ks.c, this->paraX_[0], this->nk, this->nocc[0], this->nvirt[0]); + if (input.lr_solver != "lapack") { pre_op.act(1, nloc_per_band, 1, precondition.data(), precondition.data()); } auto spin_types = std::vector({ "singlet", "triplet" }); for (int is = 0;is < nspin;++is) { @@ -470,7 +480,7 @@ void LR::ESolver_LR::runner(int istep, UnitCell& cell) spin_types[is], input.ri_hartree_benchmark, (input.ri_hartree_benchmark == "aims" ? input.aims_nbasis : std::vector({}))); // solve the Casida equation LR::HSolver::solve(hlr, this->X[is].template data(), nloc_per_band, nstates, - this->pelec->ekb.c + is * nstates, this->input.lr_solver, this->input.lr_thr/*, + this->pelec->ekb.c + is * nstates, this->input.lr_solver, this->input.lr_thr, precondition/*, !std::set({ "hf", "hse" }).count(this->xc_kernel)*/); //whether the kernel is Hermitian if (input.out_wfc_lr) { write_states(spin_types[is], this->pelec->ekb.c + is * nstates, this->X[is].template data(), nloc_per_band, nstates); } } diff --git a/source/module_lr/hsolver_lrtd.hpp b/source/module_lr/hsolver_lrtd.hpp index 1777123468..6c8be619eb 100644 --- a/source/module_lr/hsolver_lrtd.hpp +++ b/source/module_lr/hsolver_lrtd.hpp @@ -32,6 +32,7 @@ namespace LR double* eig, const std::string method, const Real& diag_ethr, ///< threshold for diagonalization + const std::vector>& precondition, const bool hermitian = true) { ModuleBase::TITLE("HSolverLR", "solve"); @@ -39,8 +40,7 @@ namespace LR // note: if not TDA, the eigenvalues will be complex // then we will need a new constructor of DiagoDavid - // 1. allocate precondition and eigenvalue - std::vector> precondition(dim); + // 1. allocate eigenvalue std::vector> eigenvalue(nband); //nstates // 2. select the method #ifdef __MPI @@ -67,9 +67,7 @@ namespace LR } else { - // 3. set precondition and diagethr - for (int i = 0; i < dim; ++i) { precondition[i] = static_cast>(1.0); } - + // 3. set maxiter and funcs const int maxiter = hsolver::DiagoIterAssist::PW_DIAG_NMAX; auto hpsi_func = [&hm](T* psi_in, T* hpsi, const int ld_psi, const int nvec) {hm.hPsi(psi_in, hpsi, ld_psi, nvec);}; @@ -139,7 +137,8 @@ namespace LR auto psi_tensor = ct::TensorMap(psi, ct::DataTypeToEnum::value, ct::DeviceType::CpuDevice, ct::TensorShape({ nband, dim })); auto eigen_tensor = ct::TensorMap(eigenvalue.data(), ct::DataTypeToEnum>::value, ct::DeviceType::CpuDevice, ct::TensorShape({ nband })); - auto precon_tensor = ct::TensorMap(precondition.data(), ct::DataTypeToEnum>::value, ct::DeviceType::CpuDevice, ct::TensorShape({ dim })); + std::vector> precondition_(precondition); //since TensorMap does not support const pointer + auto precon_tensor = ct::TensorMap(precondition_.data(), ct::DataTypeToEnum>::value, ct::DeviceType::CpuDevice, ct::TensorShape({ dim })); auto hpsi_func = [&hm](const ct::Tensor& psi_in, ct::Tensor& hpsi) {hm.hPsi(psi_in.data(), hpsi.data(), psi_in.shape().dim_size(0) /*nbasis_local*/, 1/*band-by-band*/);}; auto spsi_func = [&hm](const ct::Tensor& psi_in, ct::Tensor& spsi) { std::memcpy(spsi.data(), psi_in.data(), sizeof(T) * psi_in.NumElements()); }; diff --git a/source/module_lr/operator_casida/operator_lr_diag.h b/source/module_lr/operator_casida/operator_lr_diag.h index d6255caabe..99a61d90df 100644 --- a/source/module_lr/operator_casida/operator_lr_diag.h +++ b/source/module_lr/operator_casida/operator_lr_diag.h @@ -46,9 +46,8 @@ namespace LR const bool is_first_node = false)const override { ModuleBase::TITLE("OperatorLRDiag", "act"); - const int nlocal_ph = nk * pX.get_local_size(); // local size of particle-hole basis hsolver::vector_mul_vector_op()(this->ctx, - nk * pX.get_local_size(), + nk * pX.get_local_size(), // local size of particle-hole basis hpsi, psi_in, this->eig_ks_diff.c); diff --git a/tests/integrate/291_NO_KP_LR/result.ref b/tests/integrate/291_NO_KP_LR/result.ref index 20d8ed5d51..722b3fff61 100644 --- a/tests/integrate/291_NO_KP_LR/result.ref +++ b/tests/integrate/291_NO_KP_LR/result.ref @@ -1 +1 @@ -totexcitationenergyref 0.784471 \ No newline at end of file +totexcitationenergyref 0.784274 \ No newline at end of file diff --git a/tests/integrate/391_NO_GO_LR/result.ref b/tests/integrate/391_NO_GO_LR/result.ref index 8d6a343a51..850f003f46 100644 --- a/tests/integrate/391_NO_GO_LR/result.ref +++ b/tests/integrate/391_NO_GO_LR/result.ref @@ -1 +1 @@ -totexcitationenergyref 2.641295 \ No newline at end of file +totexcitationenergyref 2.641190 \ No newline at end of file diff --git a/tests/integrate/392_NO_GO_LR_HF/result.ref b/tests/integrate/392_NO_GO_LR_HF/result.ref index e5caf54572..cbb7e398a5 100644 --- a/tests/integrate/392_NO_GO_LR_HF/result.ref +++ b/tests/integrate/392_NO_GO_LR_HF/result.ref @@ -1 +1 @@ -totexcitationenergyref 3.067979 \ No newline at end of file +totexcitationenergyref 3.067082 \ No newline at end of file From 2ca6c6c95fb78ecc097329d5492d6157e8cb4c00 Mon Sep 17 00:00:00 2001 From: maki49 <1579492865@qq.com> Date: Wed, 13 Nov 2024 04:07:01 +0800 Subject: [PATCH 3/3] fix segfault and non-hermitian in op_lr_exx --- .../module_lr/dm_trans/dm_trans_parallel.cpp | 2 +- .../operator_casida/operator_lr_exx.cpp | 29 +++++++++++++------ .../operator_casida/operator_lr_exx.h | 20 ++++++++----- tests/integrate/392_NO_GO_LR_HF/result.ref | 2 +- 4 files changed, 34 insertions(+), 19 deletions(-) diff --git a/source/module_lr/dm_trans/dm_trans_parallel.cpp b/source/module_lr/dm_trans/dm_trans_parallel.cpp index a21a95c8ee..e230ddbc00 100644 --- a/source/module_lr/dm_trans/dm_trans_parallel.cpp +++ b/source/module_lr/dm_trans/dm_trans_parallel.cpp @@ -109,7 +109,7 @@ std::vector cal_dm_trans_pblas(const std::complex* X_ // &beta, dm_trans[isk].data>(), &i1, &i1, pmat.desc); // ============== [C_virt * X * C_occ^\dagger]^T============= - // ============== = [C_occ^* * X^T * C_virt^T]^T============= + // ============== = [C_occ^* * X^T * C_virt^T]============= // 1. X*C_occ^\dagger char transa = 'N'; char transb = 'C'; diff --git a/source/module_lr/operator_casida/operator_lr_exx.cpp b/source/module_lr/operator_casida/operator_lr_exx.cpp index 47bdcb2399..c3399a72b4 100644 --- a/source/module_lr/operator_casida/operator_lr_exx.cpp +++ b/source/module_lr/operator_casida/operator_lr_exx.cpp @@ -27,6 +27,9 @@ namespace LR void OperatorLREXX::cal_DM_onebase(const int io, const int iv, const int ik) const { ModuleBase::TITLE("OperatorLREXX", "cal_DM_onebase"); + // NOTICE: DM_onebase will be passed into `cal_energy` interface and conjugated by "zdotc". + // So the formula should be the same as RHS. instead of LHS of the A-matrix, + // i.e. c1v · conj(c2o) · e^{-ik(R2-R1)} assert(ik == 0); for (auto cell : this->BvK_cells) { @@ -42,8 +45,12 @@ namespace LR const int nw2 = aims_nbasis.empty() ? ucell.atoms[it2].nw : aims_nbasis[it2]; for (int iw1 = 0;iw1 < nw1;++iw1) for (int iw2 = 0;iw2 < nw2;++iw2) - if (this->pmat.in_this_processor(ucell.itiaiw2iwt(it1, ia1, iw1), ucell.itiaiw2iwt(it2, ia2, iw2))) - D2d(iw1, iw2) = this->psi_ks_full(ik, io, ucell.itiaiw2iwt(it1, ia1, iw1)) * this->psi_ks_full(ik, nocc + iv, ucell.itiaiw2iwt(it2, ia2, iw2)); + { + const int iwt1 = ucell.itiaiw2iwt(it1, ia1, iw1); + const int iwt2 = ucell.itiaiw2iwt(it2, ia2, iw2); + if (this->pmat.in_this_processor(iwt1, iwt2)) + D2d(iw1, iw2) = this->psi_ks_full(ik, io, iwt1) * this->psi_ks_full(ik, nocc + iv, iwt2); + } } } } @@ -52,6 +59,9 @@ namespace LR void OperatorLREXX>::cal_DM_onebase(const int io, const int iv, const int ik) const { ModuleBase::TITLE("OperatorLREXX", "cal_DM_onebase"); + // NOTICE: DM_onebase will be passed into `cal_energy` interface and conjugated by "zdotc". + // So the formula should be the same as RHS. instead of LHS of the A-matrix, + // i.e. c1v · conj(c2o) · e^{-ik(R2-R1)} for (auto cell : this->BvK_cells) { std::complex frac = RI::Global_Func::convert>(std::exp( @@ -68,8 +78,12 @@ namespace LR const int nw2 = aims_nbasis.empty() ? ucell.atoms[it2].nw : aims_nbasis[it2]; for (int iw1 = 0;iw1 < nw1;++iw1) for (int iw2 = 0;iw2 < nw2;++iw2) - if (this->pmat.in_this_processor(ucell.itiaiw2iwt(it1, ia1, iw1), ucell.itiaiw2iwt(it2, ia2, iw2))) - D2d(iw1, iw2) = frac * std::conj(this->psi_ks_full(ik, io, ucell.itiaiw2iwt(it1, ia1, iw1))) * this->psi_ks_full(ik, nocc + iv, ucell.itiaiw2iwt(it2, ia2, iw2)); + { + const int iwt1 = ucell.itiaiw2iwt(it1, ia1, iw1); + const int iwt2 = ucell.itiaiw2iwt(it2, ia2, iw2); + if (this->pmat.in_this_processor(iwt1, iwt2)) + D2d(iw1, iw2) = frac * std::conj(this->psi_ks_full(ik, io, iwt2)) * this->psi_ks_full(ik, nocc + iv, iwt1); + } } } } @@ -78,9 +92,6 @@ namespace LR void OperatorLREXX::act(const int nbands, const int nbasis, const int npol, const T* psi_in, T* hpsi, const int ngk_ik, const bool is_first_node)const { ModuleBase::TITLE("OperatorLREXX", "act"); - - const int& nk = this->kv.get_nks() / this->nspin; - // convert parallel info to LibRI interfaces std::vector, std::set>> judge = RI_2D_Comm::get_2D_judge(this->pmat); @@ -120,12 +131,12 @@ namespace LR { const int xstart_bk = ik * pX.get_local_size(); this->cal_DM_onebase(io, iv, ik); //set Ds_onebase for all e-h pairs (not only on this processor) - // LR_Util::print_CV(Ds_onebase[is], "Ds_onebase of occ " + std::to_string(io) + ", virtual " + std::to_string(iv) + " in OperatorLREXX", 1e-10); + // LR_Util::print_CV(Ds_onebase, "Ds_onebase of occ " + std::to_string(io) + ", virtual " + std::to_string(iv) + " in OperatorLREXX", 1e-10); const T& ene = 2 * alpha * //minus for exchange(but here plus is right, why?), 2 for Hartree to Ry lri->exx_lri.post_2D.cal_energy(this->Ds_onebase, lri->Hexxs[0]); if (this->pX.in_this_processor(iv, io)) { - hpsi[xstart_bk + ik * pX.get_local_size() + this->pX.global2local_col(io) * this->pX.get_row_size() + this->pX.global2local_row(iv)] += ene; + hpsi[xstart_bk + this->pX.global2local_col(io) * this->pX.get_row_size() + this->pX.global2local_row(iv)] += ene; } } } diff --git a/source/module_lr/operator_casida/operator_lr_exx.h b/source/module_lr/operator_casida/operator_lr_exx.h index 657f4c4af2..c02604ee86 100644 --- a/source/module_lr/operator_casida/operator_lr_exx.h +++ b/source/module_lr/operator_casida/operator_lr_exx.h @@ -32,7 +32,7 @@ namespace LR const Parallel_Orbitals& pmat_in, const double& alpha = 1.0, const std::vector& aims_nbasis = {}) - : nspin(nspin), naos(naos), nocc(nocc), nvirt(nvirt), + : nspin(nspin), naos(naos), nocc(nocc), nvirt(nvirt), nk(kv_in.get_nks() / nspin), psi_ks(psi_ks_in), DM_trans(DM_trans_in), exx_lri(exx_lri_in), kv(kv_in), pX(pX_in), pc(pc_in), pmat(pmat_in), ucell(ucell_in), alpha(alpha), aims_nbasis(aims_nbasis) @@ -42,8 +42,11 @@ namespace LR this->is_first_node = false; // reduce psi_ks for later use - this->psi_ks_full.resize(this->kv.get_nks(), nocc + nvirt, this->naos); - LR_Util::gather_2d_to_full(this->pc, this->psi_ks.get_pointer(), this->psi_ks_full.get_pointer(), false, this->naos, nocc + nvirt); + this->psi_ks_full.resize(this->nk, nocc + nvirt, this->naos); + for (int ik = 0;ik < nk;++ik) + { + LR_Util::gather_2d_to_full(this->pc, &this->psi_ks(ik, 0, 0), &this->psi_ks_full(ik, 0, 0), false, this->naos, nocc + nvirt); + } // get cells in BvK supercell const TC period = RI_Util::get_Born_vonKarmen_period(kv_in); @@ -63,12 +66,13 @@ namespace LR const int ngk_ik = 0, const bool is_first_node = false) const override; - private: + private: //global sizes - const int& nspin; - const int& naos; - const int& nocc; - const int& nvirt; + const int nspin = 1; + const int naos = 1; + const int nocc = 1; + const int nvirt = 1; + const int nk = 1; ///< number of k-points const double alpha = 1.0; //(allow non-ref constant) const bool cal_dm_trans = false; const bool tdm_sym = false; ///< whether transition density matrix is symmetric diff --git a/tests/integrate/392_NO_GO_LR_HF/result.ref b/tests/integrate/392_NO_GO_LR_HF/result.ref index cbb7e398a5..c05e9ea91c 100644 --- a/tests/integrate/392_NO_GO_LR_HF/result.ref +++ b/tests/integrate/392_NO_GO_LR_HF/result.ref @@ -1 +1 @@ -totexcitationenergyref 3.067082 \ No newline at end of file +totexcitationenergyref 3.067058 \ No newline at end of file