From 9bebb83af2d938471572f5cace79bf8ed90af6d9 Mon Sep 17 00:00:00 2001 From: kylebystrom Date: Wed, 22 May 2024 14:07:35 -0400 Subject: [PATCH] start smooth vi, l=1 still not working --- ciderpress/dft/lcao_convolutions.py | 154 ++++++-- ciderpress/dft/lcao_nldf_generator.py | 127 ++++++- ciderpress/lib/mod_cider/convolutions.c | 448 +++++++++++++++++++++++- ciderpress/lib/mod_cider/convolutions.h | 2 + ciderpress/pyscf/descriptors.py | 11 +- ciderpress/pyscf/nldf_convolutions.py | 142 +++++++- ciderpress/pyscf/numint.py | 8 +- ciderpress/pyscf/tests/test_nldf.py | 40 ++- examples/pyscf/compute_ae.py | 4 + 9 files changed, 890 insertions(+), 46 deletions(-) diff --git a/ciderpress/dft/lcao_convolutions.py b/ciderpress/dft/lcao_convolutions.py index f78e9f5..0ecc037 100644 --- a/ciderpress/dft/lcao_convolutions.py +++ b/ciderpress/dft/lcao_convolutions.py @@ -214,6 +214,30 @@ def env(self): libcider.get_atco_env(env.ctypes.data_as(ctypes.c_void_p), self._atco) return env + def _run_checks(self, theta_rlmq, p_uq, loc, rads, offset): + nalpha = theta_rlmq.shape[-1] + stride = p_uq.shape[1] + if offset is None: + offset = 0 + else: + assert offset >= 0 + nrad = rads.size + assert loc.flags.c_contiguous + assert loc.dtype == np.int32 + assert loc.ndim == 1 + assert rads.flags.c_contiguous + assert rads.dtype == np.float64 + assert theta_rlmq.ndim == 3 + assert theta_rlmq.shape[0] == nrad + assert theta_rlmq.flags.c_contiguous + assert theta_rlmq.dtype == np.float64 + assert p_uq.ndim == 2 + assert p_uq.shape[0] == self.nao + assert p_uq.flags.c_contiguous + assert p_uq.dtype == np.float64 + assert offset + nalpha <= stride, "{} {} {}".format(offset, nalpha, stride) + return nalpha, stride, offset, nrad + def convert_rad2orb_( self, theta_rlmq, p_uq, loc, rads, rad2orb=True, offset=None, zero_output=True ): @@ -232,7 +256,7 @@ def convert_rad2orb_( Args: theta_rlmq (np.ndarray): float64, shape (nrad, nlm, nalpha) - p_uq (np.ndarray): float64, shape (ngrids, nalpha) + p_uq (np.ndarray): float64, shape (ngrids, stride) loc (np.ndarray, AtomicGridsIndexer): An int32 array whose shape depends on whether rad2orb is True. rads (np.ndarray): float64 list of radial coordinates with size @@ -251,27 +275,9 @@ def convert_rad2orb_( loc = loc.ra_loc else: loc = loc.ar_loc - nalpha = theta_rlmq.shape[-1] - stride = p_uq.shape[1] - if offset is None: - offset = 0 - else: - assert offset >= 0 - nrad = rads.size - assert loc.flags.c_contiguous - assert loc.dtype == np.int32 - assert loc.ndim == 1 - assert rads.flags.c_contiguous - assert rads.dtype == np.float64 - assert theta_rlmq.ndim == 3 - assert theta_rlmq.shape[0] == nrad - assert theta_rlmq.flags.c_contiguous - assert theta_rlmq.dtype == np.float64 - assert p_uq.ndim == 2 - assert p_uq.shape[0] == self.nao - assert p_uq.flags.c_contiguous - assert p_uq.dtype == np.float64 - assert offset + nalpha <= stride + nalpha, stride, offset, nrad = self._run_checks( + theta_rlmq, p_uq, loc, rads, offset + ) if rad2orb: assert loc.size == self.natm + 1 fn = libcider.contract_rad_to_orb @@ -295,6 +301,95 @@ def convert_rad2orb_( ctypes.c_int(offset), ) + def _solve_coefs_(self, p_uq, nalpha, stride, offset): + assert p_uq.flags.c_contiguous + assert p_uq.shape[-1] == stride + libcider.solve_atc_coefs_for_orbs( + p_uq.ctypes.data_as(ctypes.c_void_p), + self.atco_c_ptr, + ctypes.c_int(nalpha), + ctypes.c_int(stride), + ctypes.c_int(offset), + ) + + def convert_rad2orb_conv_( + self, + theta_rlmq, + p_uq, + loc, + rads, + alphas, + l_add=0, + rad2orb=True, + offset=None, + zero_output=True, + ): + """ + Take a set of functions stored on radial grids and spherical harmonics, + convolve it by the squared-exponentials with exponent parameters `alphas`, + and project it onto the atomic orbital basis. This is an in-place + operation, where theta_rlmq is written if rad2orb is False and + p_uq is written if rad2orb is True. len(alphas) == theta_rlmq.shape[-1] + See convert_rad2orb_ for more notes. + + Args: + theta_rlmq (np.ndarray): float64, shape (nrad, nlm, nalpha) + p_uq (np.ndarray): float64, shape (ngrids, stride) + loc (np.ndarray, AtomicGridsIndexer): + An int32 array whose shape depends on whether rad2orb is True. + rads (np.ndarray): float64 list of radial coordinates with size + theta_rlmq.shape[0]. Corresponds to rad_arr member of + AtomicGridsIndexer. + alphas (np.ndarray): exponents of squared-exponentials for + convolution + rad2orb (bool): Whether to convert radial coordinates to orbital + basis (True) or vice versa (False). + offset (int): starting index in p_uq to add to/from + zero_output (bool): Whether to set output zero before + adding to it. Default to true. Set to False if you + want to add to the existing values in the output + array. + """ + alphas = np.ascontiguousarray(alphas) + if isinstance(loc, AtomicGridsIndexer): + if rad2orb: + loc = loc.ra_loc + else: + loc = loc.ar_loc + nalpha, stride, offset, nrad = self._run_checks( + theta_rlmq, p_uq, loc, rads, offset + ) + if rad2orb: + assert loc.size == self.natm + 1 + fn = libcider.contract_rad_to_orb_conv + if zero_output: + p_uq[:, offset : offset + nalpha] = 0.0 + else: + assert loc.size == nrad + fn = libcider.contract_orb_to_rad_conv + if zero_output: + theta_rlmq[:] = 0.0 + # if not rad2orb: + # # orbital to radial grid conversion + # self._solve_coefs_(p_uq, nalpha, stride, offset) + fn( + theta_rlmq.ctypes.data_as(ctypes.c_void_p), + p_uq.ctypes.data_as(ctypes.c_void_p), + loc.ctypes.data_as(ctypes.c_void_p), + rads.ctypes.data_as(ctypes.c_void_p), + alphas.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(rads.size), + ctypes.c_int(theta_rlmq.shape[1]), + self.atco_c_ptr, + ctypes.c_int(nalpha), + ctypes.c_int(stride), + ctypes.c_int(offset), + ctypes.c_int(l_add), + ) + # if rad2orb: + # # radial grid to orbital conversion + # self._solve_coefs_(p_uq, nalpha, stride, offset) + class ConvolutionCollection: """ @@ -483,6 +578,21 @@ def multiply_atc_integrals(self, input, output=None, fwd=True): ) return output + def apply_vi_contractions(self, conv_vo, conv_vq, use_r2=False): + libcider.apply_vi_contractions( + conv_vo.ctypes.data_as(ctypes.c_void_p), + conv_vq.ctypes.data_as(ctypes.c_void_p), + self._ccl, + ctypes.c_int(1 if use_r2 else 0), + ) + + def solve_coefs_(self, p_uq): + print(p_uq.shape) + assert p_uq.flags.c_contiguous + libcider.solve_atc_coefs_for_orbs2( + p_uq.ctypes.data_as(ctypes.c_void_p), self._ccl + ) + class ConvolutionCollectionK(ConvolutionCollection): """ diff --git a/ciderpress/dft/lcao_nldf_generator.py b/ciderpress/dft/lcao_nldf_generator.py index a9da8f1..28b3c64 100644 --- a/ciderpress/dft/lcao_nldf_generator.py +++ b/ciderpress/dft/lcao_nldf_generator.py @@ -22,12 +22,22 @@ def __init__(self, plan, ccl, interpolator, grids_indexer): self._uq_buf = np.empty((self.ccl.atco_inp.nao, nalpha)) self._vq_buf = np.empty((self.ccl.atco_out.nao, self.ccl.num_out)) self._rlmq_buf = self.grids_indexer.empty_rlmq(nalpha) + self.timer = None @property def natm(self): return self.interpolator.atom_coords.shape[0] + def _start_timer(self, name): + if self.timer is not None: + self.timer.start(name) + + def _stop_timer(self, name): + if self.timer is not None: + self.timer.stop(name) + def _perform_fwd_convolution(self, theta_gq): + self._start_timer("Part1") theta_rlmq = self._rlmq_buf theta_uq = self._uq_buf conv_vq = self._vq_buf @@ -46,12 +56,18 @@ def _perform_fwd_convolution(self, theta_gq): self.plan.get_transformed_interpolation_terms( theta_uq, i=-1, fwd=True, inplace=True ) + self._stop_timer("Part1") + self._start_timer("Part2") self.ccl.multiply_atc_integrals(theta_uq, output=conv_vq, fwd=True) if self.plan.nldf_settings.nldf_type != "i": self.plan.get_transformed_interpolation_terms( conv_vq[:, : self.plan.nalpha], i=0, fwd=False, inplace=True ) - return self.interpolator.project_orb2grid(conv_vq) + self._stop_timer("Part2") + self._start_timer("Part3") + res = self.interpolator.project_orb2grid(conv_vq) + self._stop_timer("Part3") + return res def _perform_bwd_convolution(self, vf_gq): vtheta_rlmq = self._rlmq_buf @@ -59,12 +75,17 @@ def _perform_bwd_convolution(self, vf_gq): vconv_vq = self._vq_buf vconv_vq[:] = 0.0 vtheta_uq[:] = 0.0 + self._start_timer("Part3") self.interpolator.project_grid2orb(vf_gq, f_uq=vconv_vq) + self._stop_timer("Part3") + self._start_timer("Part2") if self.plan.nldf_settings.nldf_type != "i": self.plan.get_transformed_interpolation_terms( vconv_vq[:, : self.plan.nalpha], i=0, fwd=True, inplace=True ) self.ccl.multiply_atc_integrals(vconv_vq, output=vtheta_uq, fwd=False) + self._stop_timer("Part2") + self._start_timer("Part1") self.plan.get_transformed_interpolation_terms( vtheta_uq, i=-1, fwd=False, inplace=True ) @@ -78,6 +99,7 @@ def _perform_bwd_convolution(self, vf_gq): ) vtheta_gq = self.grids_indexer.empty_gq(self.plan.nalpha) self.grids_indexer.reduce_angc_ylm_(vtheta_rlmq, vtheta_gq, a2y=False) + self._stop_timer("Part1") return vtheta_gq def get_features(self, rho_in, spin=0): @@ -228,3 +250,106 @@ def get_potential(self, vfeat, spin=0): ) # self._cache[spin] = None return vrho_out + + +class VINLDFGen(LCAONLDFGenerator): + def __init__(self, plan, ccl, interpolator, grids_indexer): + """ + + Args: + plan (NLDFAuxiliaryPlan): + ccl (ConvolutionCollection): + interpolator (LCAOInterpolator): + grids_indexer (AtomicGridsIndexer): + """ + super(VINLDFGen, self).__init__(plan, ccl, interpolator, grids_indexer) + self._uq_buf = np.empty((self.ccl.atco_out.nao, self.plan.nalpha)) + self._vq_buf = np.empty((self.ccl.atco_out.nao, self.ccl.num_out)) + self.timer = None + + def _perform_fwd_convolution(self, theta_gq): + self._start_timer("Part1") + theta_rlmq = self._rlmq_buf + theta_uq = self._uq_buf + conv_vq = self._vq_buf + conv_vq[:] = 0.0 + theta_uq[:] = 0.0 + theta_rlmq[:] = 0.0 + self.grids_indexer.reduce_angc_ylm_(theta_rlmq, theta_gq, a2y=True) + shape = theta_rlmq.shape + theta_rlmq.shape = (-1, theta_rlmq.shape[-1]) + self.plan.get_transformed_interpolation_terms( + theta_rlmq, i=-1, fwd=True, inplace=True + ) + theta_rlmq.shape = shape + self._stop_timer("Part1") + self._start_timer("Part2") + self.ccl.atco_out.convert_rad2orb_conv_( + theta_rlmq, + theta_uq, + self.grids_indexer, + self.grids_indexer.rad_arr, + self.plan.alphas, + rad2orb=True, + offset=0, + ) + theta_uq[:] *= self.plan.alpha_norms + self.ccl.apply_vi_contractions(conv_vq, theta_uq) + theta_uq[:] = 0 + self.ccl.atco_out.convert_rad2orb_conv_( + theta_rlmq, + theta_uq, + self.grids_indexer, + self.grids_indexer.rad_arr, + self.plan.alphas, + l_add=2, + rad2orb=True, + offset=0, + ) + theta_uq[:] *= self.plan.alpha_norms + self.ccl.apply_vi_contractions(conv_vq, theta_uq, use_r2=True) + stride = conv_vq.shape[-1] + print(stride, conv_vq.shape) + # self.ccl.atco_out._solve_coefs_(conv_vq, stride, stride, 0) + self.ccl.solve_coefs_(conv_vq) + # print(self.plan.alpha_norms) + # conv_vq[:, 0] = (self.plan.alpha_norms * theta_uq).sum(axis=-1) + # conv_vq[:, 3] = (self.plan.alphas * self.plan.alpha_norms * theta_uq).sum( + # axis=-1 + # ) + # conv_vq[:, :] = (self.plan.alpha_norms * theta_uq).sum(axis=-1)[..., None] + # conv_vq[:, 0] = theta_uq.sum(axis=-1) + self._stop_timer("Part2") + self._start_timer("Part3") + res = self.interpolator.project_orb2grid(conv_vq) + self._stop_timer("Part3") + return res + + def _perform_bwd_convolution(self, vf_gq): + vtheta_rlmq = self._rlmq_buf + # vtheta_uq = self._uq_buf + vconv_vq = self._vq_buf + vconv_vq[:] = 0.0 + # vtheta_uq[:] = 0.0 + self._start_timer("Part3") + self.interpolator.project_grid2orb(vf_gq, f_uq=vconv_vq) + self._stop_timer("Part3") + self._start_timer("Part2") + self.ccl.atco_out.convert_rad2orb_conv_( + vtheta_rlmq, + vconv_vq, + self.grids_indexer, + self.grids_indexer.rad_arr, + self.plan.alphas, + rad2orb=False, + offset=0, + ) + self._stop_timer("Part2") + self._start_timer("Part1") + self.plan.get_transformed_interpolation_terms( + vtheta_rlmq, i=-1, fwd=False, inplace=True + ) + vtheta_gq = self.grids_indexer.empty_gq(self.plan.nalpha) + self.grids_indexer.reduce_angc_ylm_(vtheta_rlmq, vtheta_gq, a2y=False) + self._stop_timer("Part1") + return vtheta_gq diff --git a/ciderpress/lib/mod_cider/convolutions.c b/ciderpress/lib/mod_cider/convolutions.c index fe5d3fb..5a241ef 100644 --- a/ciderpress/lib/mod_cider/convolutions.c +++ b/ciderpress/lib/mod_cider/convolutions.c @@ -9,6 +9,10 @@ inline double gauss_integral(int l, double a) { return 0.5 * pow(a, -1.5 - l) * tgamma(1.5 + l); } +inline double gauss_mlapl_integral(int l, double a, double b) { + return 2 * a * b * pow(a + b, -2.5 - l) * tgamma(2.5 + l); +} + // TODO all gauss integrals below should be inline for efficiency, // TODO but they cannot be made inline and then assigned to function pointers. /** @@ -175,6 +179,7 @@ void generate_atc_basis_set(atc_basis_set **atco_ptr, int *atom2l0, int *lmaxs, atcc->gtrans_0 = (double *)malloc(ngsh2 * sizeof(double)); atcc->gtrans_m = (double *)malloc(ngsh2 * sizeof(double)); atcc->gtrans_p = (double *)malloc(ngsh2 * sizeof(double)); + atcc->gtrans_l = (double *)malloc(ngsh2 * sizeof(double)); for (int g = 0; g < ngsh; g++) { atcc->gammas[g] = all_exps[g + gsh0]; atcc->gcoefs[g] = all_coefs[g + gsh0]; @@ -194,6 +199,8 @@ void generate_atc_basis_set(atc_basis_set **atco_ptr, int *atom2l0, int *lmaxs, coef0 * coef1 * gauss_integral(l - 1, exp0 + exp1); atcc->gtrans_p[atcc->l_loc2[l] + g0 * ngsh_l + g1] = coef0 * coef1 * gauss_integral(l + 1, exp0 + exp1); + atcc->gtrans_l[atcc->l_loc2[l] + g0 * ngsh_l + g1] = + coef0 * coef1 * gauss_mlapl_integral(l, exp0, exp1); } } dpotrf_(&(atco->UPLO), &ngsh_l, atcc->gtrans_0 + atcc->l_loc2[l], @@ -202,6 +209,8 @@ void generate_atc_basis_set(atc_basis_set **atco_ptr, int *atom2l0, int *lmaxs, &ngsh_l, &info); dpotrf_(&(atco->UPLO), &ngsh_l, atcc->gtrans_p + atcc->l_loc2[l], &ngsh_l, &info); + dpotrf_(&(atco->UPLO), &ngsh_l, atcc->gtrans_l + atcc->l_loc2[l], + &ngsh_l, &info); } } atco->bas = (int *)malloc(BAS_SLOTS * atco->nbas * sizeof(int)); @@ -247,6 +256,7 @@ void free_atc_atom(atc_atom atcc) { free(atcc.gtrans_0); free(atcc.gtrans_m); free(atcc.gtrans_p); + free(atcc.gtrans_l); } /** @@ -577,7 +587,7 @@ void generate_atc_integrals_vi(convolution_collection *ccl, int featid, // k^8_alpha(r, r') = alpha^2 * (r-r')^2 * exp(-alpha*(r-r')^2) else if (featid == 8) integral_func = &gauss_a2dida; - // NOTE: k^9 = 4 * k^8 - 2 * k^7 should given the Laplacian of the k^0 + // NOTE: k^9 = 4 * k^8 - 2 * k^7 should give the Laplacian of the k^0 // feature. k^9_alpha(r, r') = \nabla^2 exp(-alpha*(r-r')^2) else if (featid == 9) integral_func = &gauss_lapli0; @@ -713,6 +723,114 @@ void solve_atc_coefs(convolution_collection *ccl) { } } +void solve_atc_coefs_for_orbs(double *p_uq, atc_basis_set *atco, int nalpha, + int stride, int offset) { +#pragma omp parallel + { + double *pp_uq; + int ia, dish; + int ish0; + int one = 1; + atc_atom atcc; + double *chomat; + int aq, q, m; + int info; + int mstride, u0; + int max_dish = 0; + for (ia = 0; ia < atco->natm; ia++) { + atcc = atco->atc_convs[ia]; + for (int l = 0; l < atcc.lmax + 1; l++) { + ish0 = atcc.global_l_loc[l]; + dish = atcc.global_l_loc[l + 1] - ish0; + max_dish = MAX(dish, max_dish); + } + } + double *buf = (double *)malloc(max_dish * sizeof(double)); +#pragma omp for schedule(dynamic, 4) + for (aq = 0; aq < nalpha * atco->natm; aq++) { + ia = aq / nalpha; + q = aq % nalpha; + atcc = atco->atc_convs[ia]; + for (int l = 0; l < atcc.lmax + 1; l++) { + ish0 = atcc.global_l_loc[l]; + dish = atcc.global_l_loc[l + 1] - ish0; + u0 = atco->ao_loc[ish0]; + mstride = (2 * l + 1) * stride; + chomat = atcc.gtrans_0 + atcc.l_loc2[l]; + for (m = 0; m < 2 * l + 1; m++) { + pp_uq = p_uq + (u0 + m) * stride + q + offset; + for (int sh = 0; sh < dish; sh++) { + buf[sh] = pp_uq[sh * mstride]; + } + dpotrs_(&(atco->UPLO), &dish, &one, chomat, &dish, buf, + &dish, &info); + for (int sh = 0; sh < dish; sh++) { + pp_uq[sh * mstride] = buf[sh]; + } + } + } + } + free(buf); + } +} + +void solve_atc_coefs_for_orbs2(double *p_uq, convolution_collection *ccl) { +#pragma omp parallel + { + double *pp_uq; + int ia, dish; + int ish0; + int one = 1; + atc_basis_set *atco = ccl->atco_out; + atc_atom atcc; + double *chomat; + int aq, q, m; + int info; + int mstride, u0; + int max_dish = 0; + int ifeat; + for (ia = 0; ia < atco->natm; ia++) { + atcc = atco->atc_convs[ia]; + for (int l = 0; l < atcc.lmax + 1; l++) { + ish0 = atcc.global_l_loc[l]; + dish = atcc.global_l_loc[l + 1] - ish0; + max_dish = MAX(dish, max_dish); + } + } + double *buf = (double *)malloc(max_dish * sizeof(double)); +#pragma omp for schedule(dynamic, 4) + for (aq = 0; aq < ccl->nfeat_i * atco->natm; aq++) { + ia = aq / ccl->nfeat_i; + q = aq % ccl->nfeat_i; + atcc = atco->atc_convs[ia]; + for (int l = 0; l < atcc.lmax + 1; l++) { + ish0 = atcc.global_l_loc[l]; + dish = atcc.global_l_loc[l + 1] - ish0; + u0 = atco->ao_loc[ish0]; + mstride = (2 * l + 1) * ccl->nfeat_i; + ifeat = ccl->icontrib_ids[q]; + if (ifeat == 3 || ifeat == 4 || ifeat == 5 || ifeat == 6) { + chomat = atcc.gtrans_l + atcc.l_loc2[l]; + } else { + chomat = atcc.gtrans_0 + atcc.l_loc2[l]; + } + for (m = 0; m < 2 * l + 1; m++) { + pp_uq = p_uq + (u0 + m) * ccl->nfeat_i + q; + for (int sh = 0; sh < dish; sh++) { + buf[sh] = pp_uq[sh * mstride]; + } + dpotrs_(&(atco->UPLO), &dish, &one, chomat, &dish, buf, + &dish, &info); + for (int sh = 0; sh < dish; sh++) { + pp_uq[sh * mstride] = buf[sh]; + } + } + } + } + free(buf); + } +} + /** * Convolve the input inp_uq to out_vq using the convolution_collection ccl. * NOTE this is for version i, j, and ij. Use multiply_atc_integrals_vk for @@ -986,6 +1104,334 @@ void contract_orb_to_rad(double *theta_rlmq, double *p_uq, int *ar_loc, } } +/** + * Given a radial distribution of functions for each spherical harmonic + * lm on each atom, with dimension nalpha, project onto the orbital + * basis set given by atco. Note: this computes projections onto each + * (non-orthogonal) basis function, not expansion coefficients + * theta_rlmq (nrad x nlm x nalpha) : input functions to project onto atco basis + * p_uq (atco->nao x nalpha) : output projections + * ra_loc (length natm + 1) : Range of rad indices that correspond + * to each atom + * rads (length nrad) : radial coordinates for each radial index + * nrad : number of radial coordinates over all atoms + * nlm : number of spherical harmonics (lmax + 1)^2 + * atco : stores the atomic basis set. + * nalpha : number of functions stored in the rlm space. + */ +void contract_rad_to_orb_conv(double *theta_rlmq, double *p_uq, int *ra_loc, + double *rads, double *alphas, int nrad, int nlm, + atc_basis_set *atco, int nalpha, int stride, + int offset, int l_add) { + p_uq = p_uq + offset; +#pragma omp parallel + { + double PI = 4.0 * atan(1.0); + int ish, i0, L0, nm, l, l_eff, at; + double *p_q, *theta_mq; + int *bas = atco->bas; + int *ao_loc = atco->ao_loc; + double *env = atco->env; + int nbas = atco->nbas; + int *ibas; + double coef, beta; + int r, m, q, mq; + double *exp_list = (double *)malloc(sizeof(double) * nalpha); + double *coef_list = (double *)malloc(sizeof(double) * nalpha); + double *val_list = (double *)malloc(sizeof(double) * nalpha); +#pragma omp for schedule(dynamic, 4) + for (ish = 0; ish < nbas; ish++) { + ibas = bas + ish * BAS_SLOTS; + at = ibas[ATOM_OF]; + l = ibas[ANG_OF]; + l_eff = l + l_add; + coef = env[ibas[PTR_COEFF]]; + beta = env[ibas[PTR_EXP]]; + i0 = ao_loc[ish]; + nm = 2 * l + 1; + L0 = l * l; + for (q = 0; q < nalpha; q++) { + exp_list[q] = beta * alphas[q] / (beta + alphas[q]); + coef_list[q] = coef * pow(PI / alphas[q], 1.5) * + pow(alphas[q] / (beta + alphas[q]), 1.5 + l); + } + for (r = ra_loc[at]; r < ra_loc[at + 1]; r++) { + theta_mq = theta_rlmq + nalpha * (r * nlm + L0); + p_q = p_uq + i0 * stride; + mq = 0; + for (q = 0; q < nalpha; q++) { + val_list[q] = coef_list[q] * pow(rads[r], l_eff) * + exp(-exp_list[q] * rads[r] * rads[r]); + } + for (m = 0; m < nm; m++) { + for (q = 0; q < nalpha; q++, mq++) { + p_q[q] += val_list[q] * theta_mq[mq]; + } + p_q += stride; + } + } + } + free(exp_list); + free(coef_list); + free(val_list); + } +} + +/** + * Backwards version of contract_rad_to_orb_conv + * (i.e. takes p_uq as input and projects the convolved basis + * functions out onto the radial coordinates and spherical harmonics). + * See contract_rad_to_orb_conv for variable definitions and details. + */ +void contract_orb_to_rad_conv(double *theta_rlmq, double *p_uq, int *ar_loc, + double *rads, double *alphas, int nrad, int nlm, + atc_basis_set *atco, int nalpha, int stride, + int offset, int l_add) { + p_uq = p_uq + offset; + int nbas = atco->nbas; + double *exp_list = (double *)malloc(sizeof(double) * nbas * nalpha); + double *coef_list = (double *)malloc(sizeof(double) * nbas * nalpha); +#pragma omp parallel + { + double PI = 4.0 * atan(1.0); + int ish, i0, L0, nm, l, at; + double *p_q, *theta_mq; + int *bas = atco->bas; + int *ao_loc = atco->ao_loc; + double *env = atco->env; + int *ibas; + double coef, beta; + int r, m, q, mq; + double *val_list = (double *)malloc(sizeof(double) * nalpha); +#pragma omp for schedule(dynamic, 4) + for (ish = 0; ish < nbas; ish++) { + ibas = bas + ish * BAS_SLOTS; + at = ibas[ATOM_OF]; + l = ibas[ANG_OF]; + coef = env[ibas[PTR_COEFF]]; + beta = env[ibas[PTR_EXP]]; + for (q = 0; q < nalpha; q++) { + exp_list[ish * nalpha + q] = + beta * alphas[q] / (beta + alphas[q]); + coef_list[ish * nalpha + q] = + coef * pow(PI / alphas[q], 1.5) * + pow(alphas[q] / (beta + alphas[q]), 1.5 + l); + } + } +#pragma omp for schedule(dynamic, 4) + for (r = 0; r < nrad; r++) { + at = ar_loc[r]; + for (ish = atco->atom_loc_ao[at]; ish < atco->atom_loc_ao[at + 1]; + ish++) { + ibas = bas + ish * BAS_SLOTS; + l = ibas[ANG_OF]; + coef = env[ibas[PTR_COEFF]]; + beta = env[ibas[PTR_EXP]]; + i0 = ao_loc[ish]; + nm = 2 * l + 1; + L0 = l * l; + for (q = 0; q < nalpha; q++) { + val_list[q] = + coef_list[ish * nalpha + q] * pow(rads[r], l) * + exp(-exp_list[ish * nalpha + q] * rads[r] * rads[r]); + } + theta_mq = theta_rlmq + nalpha * (r * nlm + L0); + p_q = p_uq + i0 * stride; + mq = 0; + for (m = 0; m < nm; m++) { + for (q = 0; q < nalpha; q++, mq++) { + theta_mq[mq] += val_list[q] * p_q[q]; + } + p_q += stride; + } + } + } + } +} + +double real_se_prefac(int l, double alpha, double beta) { return 1.0; } + +double real_se_prefac_r2(int l, double alpha, double beta) { return 0.0; } + +double real_se_ap_prefac(int l, double alpha, double beta) { return alpha; } + +double real_se_ap_prefac_r2(int l, double alpha, double beta) { return 0.0; } + +double real_se_r2_prefac(int l, double alpha, double beta) { + return (3 * alpha - 2 * beta * l) / (2 * alpha * (beta + alpha)); +} + +double real_se_r2_prefac_r2(int l, double alpha, double beta) { + return beta * beta / ((beta + alpha) * (beta + alpha)); +} + +double real_se_apr2_prefac(int l, double alpha, double beta) { + return alpha * real_se_r2_prefac(l, alpha, beta); +} + +double real_se_apr2_prefac_r2(int l, double alpha, double beta) { + return alpha * real_se_r2_prefac_r2(l, alpha, beta); +} + +double real_se_ap2r2_prefac(int l, double alpha, double beta) { + return alpha * alpha * real_se_r2_prefac(l, alpha, beta); +} + +double real_se_ap2r2_prefac_r2(int l, double alpha, double beta) { + return alpha * alpha * real_se_r2_prefac_r2(l, alpha, beta); +} + +double real_se_lapl_prefac(int l, double alpha, double beta) { + return 4 * real_se_ap2r2_prefac(l, alpha, beta) - + 2 * real_se_ap_prefac(l, alpha, beta); +} + +double real_se_lapl_prefac_r2(int l, double alpha, double beta) { + return 4 * real_se_ap2r2_prefac_r2(l, alpha, beta) - + 2 * real_se_ap_prefac_r2(l, alpha, beta); +} + +double real_se_grad_prefac_m1(int l, double alpha, double beta) { + return -0.0 * real_se_lapl_prefac(l, alpha, beta); +} + +double real_se_grad_prefac_r2_m1(int l, double alpha, double beta) { + return -0.0 * real_se_lapl_prefac_r2(l, alpha, beta); +} + +double real_se_grad_prefac_p1(int l, double alpha, double beta) { + return pow(-1.0, l) * alpha * beta / (alpha + beta) * + real_se_lapl_prefac(l, alpha, beta); +} + +double real_se_grad_prefac_r2_p1(int l, double alpha, double beta) { + return pow(-1.0, l) * alpha * beta / (alpha + beta) * + real_se_lapl_prefac_r2(l, alpha, beta); +} + +double real_se_rvec_prefac_m1(int l, double alpha, double beta) { + return real_se_grad_prefac_m1(l, alpha, beta) / alpha; +} + +double real_se_rvec_prefac_p1(int l, double alpha, double beta) { + return real_se_grad_prefac_p1(l, alpha, beta) / alpha; +} + +double real_se_rvec_prefac_r2_m1(int l, double alpha, double beta) { + return real_se_grad_prefac_r2_m1(l, alpha, beta) / alpha; +} + +double real_se_rvec_prefac_r2_p1(int l, double alpha, double beta) { + return real_se_grad_prefac_r2_p1(l, alpha, beta) / alpha; +} + +void get_real_conv_prefacs_vi(double *out, atc_basis_set *atco, double *alphas, + int use_r2, int featid, int num_out, int nalpha) { +#pragma omp parallel + { + int sh, q, l; + double beta; + int *ibas; + double (*prefac_func)(int, double, double); + // NOTE: This messy if statement can be replaced by + // subclassing when converting to C++. + if (use_r2) { + if (featid == 0) + prefac_func = &real_se_prefac_r2; + else if (featid == 1) + prefac_func = &real_se_r2_prefac_r2; + else if (featid == 2) + prefac_func = &real_se_apr2_prefac_r2; + else if (featid == 3) + prefac_func = &real_se_rvec_prefac_r2_m1; + else if (featid == 4) + prefac_func = &real_se_grad_prefac_r2_m1; + else if (featid == 5) + prefac_func = &real_se_rvec_prefac_r2_p1; + else if (featid == 6) + prefac_func = &real_se_grad_prefac_r2_p1; + else if (featid == 7) + prefac_func = &real_se_ap_prefac_r2; + else if (featid == 8) + prefac_func = &real_se_ap2r2_prefac_r2; + else if (featid == 9) + prefac_func = &real_se_lapl_prefac_r2; + else { + printf("Unsupported featid\n"); + exit(-1); + } + } else { + if (featid == 0) + prefac_func = &real_se_prefac; + else if (featid == 1) + prefac_func = &real_se_r2_prefac; + else if (featid == 2) + prefac_func = &real_se_apr2_prefac; + else if (featid == 3) + prefac_func = &real_se_rvec_prefac_m1; + else if (featid == 4) + prefac_func = &real_se_grad_prefac_m1; + else if (featid == 5) + prefac_func = &real_se_rvec_prefac_p1; + else if (featid == 6) + prefac_func = &real_se_grad_prefac_p1; + else if (featid == 7) + prefac_func = &real_se_ap_prefac; + else if (featid == 8) + prefac_func = &real_se_ap2r2_prefac; + else if (featid == 9) + prefac_func = &real_se_lapl_prefac; + else { + printf("Unsupported featid\n"); + exit(-1); + } + } +#pragma omp for + for (sh = 0; sh < atco->nbas; sh++) { + for (q = 0; q < nalpha; q++) { + ibas = atco->bas + sh * BAS_SLOTS; + l = ibas[ANG_OF]; + beta = atco->env[ibas[PTR_EXP]]; + out[sh * nalpha * num_out + q] = + prefac_func(l, alphas[q], beta); + } + } + } +} + +void apply_vi_contractions(double *conv_vo, double *conv_vq, + convolution_collection *ccl, int use_r2) { + int ifeat; + atc_basis_set *atco = ccl->atco_out; + int nalpha = ccl->nalpha; + int nbeta = ccl->nfeat_i; + int oqstride = nalpha * nbeta; + double *ints_soq = + (double *)malloc(nalpha * nbeta * atco->nbas * sizeof(double)); + for (ifeat = 0; ifeat < nbeta; ifeat++) { + get_real_conv_prefacs_vi(ints_soq + ifeat * nalpha, atco, ccl->alphas, + use_r2, ccl->icontrib_ids[ifeat], nbeta, + nalpha); + } +#pragma omp parallel + { + int sh, v, b, q; +#pragma omp for schedule(dynamic, 4) + for (sh = 0; sh < atco->nbas; sh++) { + for (v = atco->ao_loc[sh]; v < atco->ao_loc[sh + 1]; v++) { + for (b = 0; b < ccl->nfeat_i; b++) { + for (q = 0; q < ccl->nalpha; q++) { + conv_vo[v * nbeta + b] += + ints_soq[sh * oqstride + b * nalpha + q] * + conv_vq[v * nalpha + q]; + } + } + } + } + } + free(ints_soq); +} + // TODO implementing these functions could be helpful for PAW implementation /** * Given a radial distribution of functions for each spherical harmonic diff --git a/ciderpress/lib/mod_cider/convolutions.h b/ciderpress/lib/mod_cider/convolutions.h index 0baf5ee..db01700 100644 --- a/ciderpress/lib/mod_cider/convolutions.h +++ b/ciderpress/lib/mod_cider/convolutions.h @@ -31,6 +31,8 @@ typedef struct { // with l-1 used for the principal angular momentum number double *gtrans_p; // transformation (inverted overlap) matrix for gammas // with l+1 used for the principal angular momentum number + double *gtrans_l; // transformation (inverted overlap) matrix for gammas + // using -laplacian norm } atc_atom; /** diff --git a/ciderpress/pyscf/descriptors.py b/ciderpress/pyscf/descriptors.py index 1ab81fe..9fe0d10 100644 --- a/ciderpress/pyscf/descriptors.py +++ b/ciderpress/pyscf/descriptors.py @@ -11,6 +11,7 @@ from ciderpress.dft.settings import ( FracLaplSettings, NLDFSettings, + NLDFSettingsVI, SDMXBaseSettings, SemilocalSettings, dalpha, @@ -21,7 +22,11 @@ from ciderpress.pyscf.analyzers import UHFAnalyzer from ciderpress.pyscf.frac_lapl import FLNumInt from ciderpress.pyscf.gen_cider_grid import CiderGrids -from ciderpress.pyscf.nldf_convolutions import DEFAULT_CIDER_LMAX, PyscfNLDFGenerator +from ciderpress.pyscf.nldf_convolutions import ( + DEFAULT_CIDER_LMAX, + PyscfNLDFGenerator, + PyscfVIGenerator, +) from ciderpress.pyscf.sdmx_slow import EXXSphGenerator NLDF_VERSION_LIST = ["i", "j", "ij", "k"] @@ -405,6 +410,10 @@ def _nldf_desc_getter( nldf_generator = PyscfNLDFGenerator.from_mol_and_settings( mol, grids.grids_indexer, 1, nldf_settings, **kwargs ) + if isinstance(nldf_settings, NLDFSettingsVI): + nldf_generator = PyscfVIGenerator.from_mol_and_settings( + mol, grids.grids_indexer, 1, nldf_settings, **kwargs + ) nldf_generator.interpolator.set_coords(pgrids.coords) ni = NumInt() xctype = "MGGA" diff --git a/ciderpress/pyscf/nldf_convolutions.py b/ciderpress/pyscf/nldf_convolutions.py index ffe68fd..d0630fd 100644 --- a/ciderpress/pyscf/nldf_convolutions.py +++ b/ciderpress/pyscf/nldf_convolutions.py @@ -12,7 +12,7 @@ get_gamma_lists_from_bas_and_env, ) from ciderpress.dft.lcao_interpolation import LCAOInterpolator, LCAOInterpolatorDirect -from ciderpress.dft.lcao_nldf_generator import LCAONLDFGenerator +from ciderpress.dft.lcao_nldf_generator import LCAONLDFGenerator, VINLDFGen from ciderpress.dft.plans import VI_ID_MAP, NLDFGaussianPlan, NLDFSplinePlan @@ -128,7 +128,8 @@ def __init__(self, settings, **kwargs): self.settings = settings self.kwargs = kwargs - def initialize_nldf_generator(self, mol, grids_indexer, nspin): + def initialize_nldf_generator(self, mol, grids_indexer, nspin, timer=None): + self.kwargs["timer"] = timer return PyscfNLDFGenerator.from_mol_and_settings( mol, grids_indexer, nspin, self.settings, **self.kwargs ) @@ -160,6 +161,7 @@ def from_mol_and_settings( aparam=0.03, dparam=0.04, nrad=200, + timer=None, ): if lmax is None: lmax = grids_indexer.lmax @@ -241,9 +243,143 @@ def from_mol_and_settings( nrad=nrad, onsite_direct=onsite_direct, ) + if timer is not None: + timer.start("Compute integrals") ccl.compute_integrals_() + if timer is not None: + timer.stop() + timer.start("Projection coefs") ccl.solve_projection_coefficients() - return cls(plan, ccl, interpolator, grids_indexer) + if timer is not None: + timer.stop("Projection coefs") + output = cls(plan, ccl, interpolator, grids_indexer) + output.timer = timer + return output + + def get_extra_ao(self): + return (self.interpolator.nlm + 4) * self.natm + + +class PyscfVIGenerator(VINLDFGen): + """ + A PySCF-specific wrapper for the NLDF feature generator. + """ + + @classmethod + def from_mol_and_settings( + cls, + mol, + grids_indexer, + nspin, + nldf_settings, + plan_type="gaussian", + lmax=None, + aux_lambd=1.6, + aug_beta=None, + alpha_max=10000, + alpha_min=None, + alpha_formula=None, + rhocut=1e-10, + expcut=1e-10, + gbuf=2.0, + interpolator_type="onsite_direct", + aparam=0.03, + dparam=0.04, + nrad=200, + timer=None, + ): + if lmax is None: + lmax = grids_indexer.lmax + if lmax > grids_indexer.lmax: + raise ValueError("lmax cannot be larger than grids_indexer.lmax") + if interpolator_type not in ["onsite_direct", "onsite_spline", "train_gen"]: + raise ValueError + if aug_beta is None: + aug_beta = aux_lambd + if alpha_min is None: + alpha_min = nldf_settings.theta_params[0] / 256 # sensible default + if plan_type not in ["gaussian", "spline"]: + raise ValueError("plan_type must be gaussian or spline") + if alpha_formula is None: + alpha_formula = "etb" if plan_type == "gaussian" else "zexp" + nalpha = int(np.ceil(np.log(alpha_max / alpha_min) / np.log(aux_lambd))) + 1 + plan_class = NLDFGaussianPlan if plan_type == "gaussian" else NLDFSplinePlan + plan = plan_class( + nldf_settings, + nspin, + alpha_min, + aux_lambd, + nalpha, + coef_order="gq", + alpha_formula=alpha_formula, + proc_inds=None, + rhocut=rhocut, + expcut=expcut, + ) + basis = aug_etb_for_cider(mol, lmax=lmax, beta=aug_beta)[0] + mol = gto.M( + atom=mol.atom, + basis=basis, + spin=mol.spin, + charge=mol.charge, + unit=mol.unit, + ) + dat = get_gamma_lists_from_mol(mol) + atco_inp = ATCBasis(*dat) + dat = get_convolution_expnts_from_expnts( + plan.alphas, dat[0], dat[1], dat[2], dat[4], gbuf=gbuf + ) + atco_out = ATCBasis(*dat) + if plan.nldf_settings.nldf_type == "k": + ccl = ConvolutionCollectionK( + atco_inp, atco_out, plan.alphas, plan.alpha_norms + ) + else: + has_vj = "j" in plan.nldf_settings.nldf_type + ifeat_ids = [] + for spec in plan.nldf_settings.l0_feat_specs: + ifeat_ids.append(VI_ID_MAP[spec]) + for spec in plan.nldf_settings.l1_feat_specs: + ifeat_ids.append(VI_ID_MAP[spec]) + ccl = ConvolutionCollection( + atco_inp, + atco_out, + plan.alphas, + plan.alpha_norms, + has_vj=has_vj, + ifeat_ids=ifeat_ids, + ) + if interpolator_type == "train_gen": + interpolator = LCAOInterpolator.from_ccl( + mol.atom_coords(unit="Bohr"), + ccl, + aparam=aparam, + dparam=dparam, + nrad=nrad, + ) + else: + onsite_direct = interpolator_type == "onsite_direct" + interpolator = LCAOInterpolatorDirect.from_ccl( + grids_indexer, + mol.atom_coords(unit="Bohr"), + ccl, + aparam=aparam, + dparam=dparam, + nrad=nrad, + onsite_direct=onsite_direct, + ) + if timer is not None: + timer.start("Compute integrals") + ccl.compute_integrals_() + if timer is not None: + timer.stop() + timer.start("Projection coefs") + ccl.solve_projection_coefficients() + if timer is not None: + timer.stop("Projection coefs") + output = cls(plan, ccl, interpolator, grids_indexer) + output.timer = timer + return output def get_extra_ao(self): return (self.interpolator.nlm + 4) * self.natm diff --git a/ciderpress/pyscf/numint.py b/ciderpress/pyscf/numint.py index 30d695b..80efd15 100644 --- a/ciderpress/pyscf/numint.py +++ b/ciderpress/pyscf/numint.py @@ -317,6 +317,7 @@ def nr_rks_nldf( Returns: """ + ni.timer.start("nr_rks_nldf") if not hasattr(grids, "grids_indexer"): raise ValueError("Grids object must have indexer for NLDF evaluation") @@ -362,8 +363,10 @@ def block_loop(ao_deriv): if any(x in xc_code.upper() for x in ("CC06", "CS", "BR89", "MK00")): raise NotImplementedError("laplacian in meta-GGA method") ao_deriv = 1 + ni.timer.start("density") for i, ip0, ip1, ao, mask, weight, coords in block_loop(ao_deriv): rho_full[i, :, ip0:ip1] = make_rho(i, ao, mask, xctype) + ni.timer.stop("density") par_atom = False extra_ao = ni.nldfgen.get_extra_ao() @@ -408,6 +411,7 @@ def block_loop(ao_deriv): for idm in range(nset): wv_full[idm, :, :] += ni.nldfgen.get_potential(vxc_nldf_full[idm]) + ni.timer.start("potential") buffers = None pair_mask = mol.get_overlap_cond() < -np.log(ni.cutoff) v1 = np.zeros_like(vmat) @@ -436,6 +440,8 @@ def block_loop(ao_deriv): dtype = np.result_type(*dms) if vmat.dtype != dtype: vmat = np.asarray(vmat, dtype=dtype) + ni.timer.stop("potential") + ni.timer.stop("nr_rks_nldf") return nelec, excsum, vmat @@ -1109,7 +1115,7 @@ def initialize_feature_generators(self, mol, grids, nspin): cond = cond or self.nldfgen.plan.nspin != nspin if cond: self.nldfgen = self.nldf_init.initialize_nldf_generator( - mol, grids.grids_indexer, nspin + mol, grids.grids_indexer, nspin, timer=self.timer ) self.nldfgen.interpolator.set_coords(grids.coords) super().initialize_feature_generators(mol, grids, nspin) diff --git a/ciderpress/pyscf/tests/test_nldf.py b/ciderpress/pyscf/tests/test_nldf.py index 1ecc179..c1dfaa2 100644 --- a/ciderpress/pyscf/tests/test_nldf.py +++ b/ciderpress/pyscf/tests/test_nldf.py @@ -169,20 +169,24 @@ def _check_nldf_equivalence( aug_beta=beta, ) # TODO uncomment after fixing vi stability - # import traceback as tb - # errs = {} - # for i in range(ifeat_pred.shape[1]): - # print(i) - # try: - # assert_allclose(ifeat_pred[:, i], ifeat[:, i], rtol=rtol, atol=atol) - # except AssertionError as e: - # errs[i] = ''.join(tb.format_exception(None, e, e.__traceback__)) - # if len(errs) > 0: - # estr = '' - # for i, err in errs.items(): - # print(i, err) - # estr = estr + 'FEATURE {}\n{}\n'.format(i, err) - # raise AssertionError(estr) + import traceback as tb + + errs = {} + for i in range(ifeat_pred.shape[1]): + print(i) + try: + assert_allclose( + ifeat_pred[:, i], ifeat[:, i], rtol=0.1 * rtol, atol=0.1 * atol + ) + except AssertionError as e: + # raise e + errs[i] = "".join(tb.format_exception(None, e, e.__traceback__)) + if len(errs) > 0: + estr = "" + for i, err in errs.items(): + print(i, err) + estr = estr + "FEATURE {}\n{}\n".format(i, err) + raise AssertionError(estr) jfeat_pred = get_descriptors( analyzer, self.vj_settings, @@ -294,7 +298,7 @@ def _check_nldf_occ_derivs( ) assert_almost_equal(rho_pred, rho_ref, 12) # TODO uncomment after fixing vi stability - # assert_almost_equal(ifeat_pred, ifeat_ref, 12) + assert_almost_equal(ifeat_pred, ifeat_ref, 12) assert_almost_equal(jfeat_pred, jfeat_ref, 12) # TODO uncomment after fixing vi stability # assert_almost_equal(ijfeat_pred, ijfeat_ref, 12) @@ -339,8 +343,9 @@ def _check_nldf_occ_derivs( ) occd_ifeat_fd = (ifeat_pert - ifeat_ref)[spin] / delta for i in range(self.vi_settings.nfeat): - # TODO uncomment after fixing vi stability - continue + # TODO run this check + # continue + print(i) assert_allclose( occd_pred[i], occd_ifeat_fd[i], rtol=rtol, atol=atol ) @@ -726,6 +731,7 @@ def test_nldf_equivalence(self): def test_nldf_occ_derivs(self): mols = self.mols tols = [(2e-3, 2e-3), (2e-3, 1e-2), (2e-3, 5e-3), (2e-3, 2e-3)] + tols = [(1e-3, 1e-3), (1e-3, 1e-3), (1e-3, 1e-3), (1e-3, 1e-3)] for mol, (atol, rtol) in zip(mols, tols): print(mol.atom) ks = dft.RKS(mol) if mol.spin == 0 else dft.UKS(mol) diff --git a/examples/pyscf/compute_ae.py b/examples/pyscf/compute_ae.py index a7af460..7726422 100644 --- a/examples/pyscf/compute_ae.py +++ b/examples/pyscf/compute_ae.py @@ -102,9 +102,13 @@ def run_calc(mol, spinpol): ckernel="GGA_C_PBE", ) ks.small_rho_cutoff = 0.0 + else: + ks.xc = functional ks.grids.level = 3 ks = ks.apply(scf.addons.remove_linear_dep_) etot = ks.kernel() + if is_cider: + ks._numint.timer.write() return etot