make some progress on fixing l=1 single-basis convolutions

mir-group · Jun 5, 2024 · 27826d7 · 27826d7
1 parent 9bebb83
commit 27826d7
Show file tree

Hide file tree

Showing 10 changed files with 548 additions and 35 deletions.
diff --git a/ciderpress/dft/grids_indexer.py b/ciderpress/dft/grids_indexer.py
@@ -190,6 +190,16 @@ def empty_rlmq(self, nalpha=1):
         """
         return np.empty((self.nrad, self.nlm, nalpha), order="C", dtype=np.float64)
 
+    def empty_xrlmq(self, nalpha=1):
+        """
+        Args:
+            nalpha (int): Number of interpolation points for CIDER kernel.
+
+        Returns:
+            np.ndarray with shape (3, nrad, nlm, nalpha)
+        """
+        return np.empty((3, self.nrad, self.nlm, nalpha), order="C", dtype=np.float64)
+
     def empty_gq(self, nalpha=1):
         """
         Get an empty array for storing theta_gq.

diff --git a/ciderpress/dft/lcao_convolutions.py b/ciderpress/dft/lcao_convolutions.py
@@ -5,6 +5,7 @@
 
 from ciderpress import lib
 from ciderpress.dft.grids_indexer import AtomicGridsIndexer
+from ciderpress.dft.sph_harm_coeff import get_deriv_ylm_coeff, get_ylm1_coeff
 from ciderpress.lib import load_library as load_cider_library
 
 libcider = load_cider_library("libmcider")
@@ -312,6 +313,9 @@ def _solve_coefs_(self, p_uq, nalpha, stride, offset):
             ctypes.c_int(offset),
         )
 
+    def solve_atc_coefs_(self, p_uq):
+        self._solve_coefs_(p_uq, p_uq.shape[-1], p_uq.shape[-1], 0)
+
     def convert_rad2orb_conv_(
         self,
         theta_rlmq,
@@ -586,13 +590,90 @@ def apply_vi_contractions(self, conv_vo, conv_vq, use_r2=False):
             ctypes.c_int(1 if use_r2 else 0),
         )
 
+    def apply_vi_contractions_l0(self, conv_vq, use_r2=False, conv_vo=None):
+        ifeats = np.asarray(self._icontrib_ids[: self.n0], dtype=np.int32, order="C")
+        if conv_vo is None:
+            conv_vo = np.zeros((conv_vq.shape[0], self.n0), dtype=np.float64)
+        assert conv_vo.flags.c_contiguous
+        assert conv_vq.flags.c_contiguous
+        libcider.apply_vi_contractions2(
+            conv_vo.ctypes.data_as(ctypes.c_void_p),
+            conv_vq.ctypes.data_as(ctypes.c_void_p),
+            self._ccl,
+            ctypes.c_int(1 if use_r2 else 0),
+            ifeats.ctypes.data_as(ctypes.c_void_p),
+            ctypes.c_int(ifeats.size),
+        )
+        return conv_vo
+
+    def apply_vi_contractions_l1(self, conv_vq, use_r2=False, conv_vo=None):
+        all_l1_ifeats = []
+        for i, ifeat_id in enumerate(self._icontrib1m_ids):
+            all_l1_ifeats.append(ifeat_id + 7)
+        ifeats = np.asarray(all_l1_ifeats, dtype=np.int32, order="C")
+        if conv_vo is None:
+            conv_vo = np.zeros((conv_vq.shape[0], len(ifeats)), dtype=np.float64)
+        assert conv_vo.flags.c_contiguous
+        assert conv_vq.flags.c_contiguous
+        libcider.apply_vi_contractions2(
+            conv_vo.ctypes.data_as(ctypes.c_void_p),
+            conv_vq.ctypes.data_as(ctypes.c_void_p),
+            self._ccl,
+            ctypes.c_int(1 if use_r2 else 0),
+            ifeats.ctypes.data_as(ctypes.c_void_p),
+            ctypes.c_int(ifeats.size),
+        )
+        return conv_vo
+
     def solve_coefs_(self, p_uq):
-        print(p_uq.shape)
         assert p_uq.flags.c_contiguous
         libcider.solve_atc_coefs_for_orbs2(
             p_uq.ctypes.data_as(ctypes.c_void_p), self._ccl
         )
 
+    def multiply_coefs_for_l1(self, p_uq):
+        assert p_uq.flags.c_contiguous
+        libcider.multiply_coefs_for_l1(p_uq.ctypes.data_as(ctypes.c_void_p), self._ccl)
+
+    def fill_l1_part(self, theta_rlmq, theta_xrlmq, plus=True):
+        nrad, nlm, nalpha = theta_rlmq.shape
+        lmax = int(np.sqrt(nlm + 1e-7)) - 1
+        gaunt_vl = get_ylm1_coeff(lmax, plus=True)
+        if plus:
+            fn = libcider.fill_lp1_orb_fwd
+        else:
+            fn = libcider.fill_lm1_orb_fwd
+        fn(
+            theta_rlmq.ctypes.data_as(ctypes.c_void_p),
+            theta_xrlmq.ctypes.data_as(ctypes.c_void_p),
+            gaunt_vl.ctypes.data_as(ctypes.c_void_p),
+            ctypes.c_int(nlm),
+            ctypes.c_int(nalpha),
+            ctypes.c_int(nalpha),
+            ctypes.c_int(nalpha),
+            ctypes.c_int(0),
+            ctypes.c_int(0),
+            ctypes.c_int(nrad),
+        )
+
+    def fill_l1_part_deriv(self, theta_rlmq, theta_xrlmq):
+        nrad, nlm, nalpha = theta_rlmq.shape
+        lmax = int(np.sqrt(nlm + 1e-7)) - 1
+        gaunt_vl = get_deriv_ylm_coeff(lmax)
+        fn = libcider.fill_lp1_orb_fwd
+        fn(
+            theta_rlmq.ctypes.data_as(ctypes.c_void_p),
+            theta_xrlmq.ctypes.data_as(ctypes.c_void_p),
+            gaunt_vl.ctypes.data_as(ctypes.c_void_p),
+            ctypes.c_int(nlm),
+            ctypes.c_int(nalpha),
+            ctypes.c_int(nalpha),
+            ctypes.c_int(nalpha),
+            ctypes.c_int(0),
+            ctypes.c_int(0),
+            ctypes.c_int(nrad),
+        )
+
 
 class ConvolutionCollectionK(ConvolutionCollection):
     """

diff --git a/ciderpress/dft/lcao_interpolation.py b/ciderpress/dft/lcao_interpolation.py
@@ -51,6 +51,7 @@ def __init__(
         aparam=0.03,
         dparam=0.04,
         nrad=200,
+        onsite_direct=False,
     ):
         """
         Args:
@@ -123,6 +124,7 @@ def _call_spline_(w_rsp, bas, env):
             _call_spline_(self.wm_rsp, self._l1bas, self._l1env)
 
         self.is_num_ai_setup = False
+        self.onsite_direct = onsite_direct
 
     @property
     def num_in(self):
@@ -508,7 +510,6 @@ def _interpolate_nopar_atom(self, f_arlpq, f_gq, fwd):
                     ngrids -= self._ga_loc[a + 1] - self._ga_loc[a]
                 if not fwd:
                     self._call_l1_fill(f_gq, self.atom_coords[a], fwd)
-                self._loc_ai[a][-1]
                 fn(
                     f_gq.ctypes.data_as(ctypes.c_void_p),
                     f_arlpq[a].ctypes.data_as(ctypes.c_void_p),

diff --git a/ciderpress/dft/lcao_nldf_generator.py b/ciderpress/dft/lcao_nldf_generator.py
@@ -308,17 +308,7 @@ def _perform_fwd_convolution(self, theta_gq):
         )
         theta_uq[:] *= self.plan.alpha_norms
         self.ccl.apply_vi_contractions(conv_vq, theta_uq, use_r2=True)
-        stride = conv_vq.shape[-1]
-        print(stride, conv_vq.shape)
-        # self.ccl.atco_out._solve_coefs_(conv_vq, stride, stride, 0)
         self.ccl.solve_coefs_(conv_vq)
-        # print(self.plan.alpha_norms)
-        # conv_vq[:, 0] = (self.plan.alpha_norms * theta_uq).sum(axis=-1)
-        # conv_vq[:, 3] = (self.plan.alphas * self.plan.alpha_norms * theta_uq).sum(
-        #     axis=-1
-        # )
-        # conv_vq[:, :] = (self.plan.alpha_norms * theta_uq).sum(axis=-1)[..., None]
-        # conv_vq[:, 0] = theta_uq.sum(axis=-1)
         self._stop_timer("Part2")
         self._start_timer("Part3")
         res = self.interpolator.project_orb2grid(conv_vq)
@@ -353,3 +343,112 @@ def _perform_bwd_convolution(self, vf_gq):
         self.grids_indexer.reduce_angc_ylm_(vtheta_rlmq, vtheta_gq, a2y=False)
         self._stop_timer("Part1")
         return vtheta_gq
+
+
+class VINLDFGen2(VINLDFGen):
+    def __init__(self, plan, ccl, interpolator, grids_indexer):
+        super(VINLDFGen2, self).__init__(plan, ccl, interpolator, grids_indexer)
+        interp = self.interpolator
+        from ciderpress.dft.lcao_interpolation import LCAOInterpolator
+
+        self.interpolator = LCAOInterpolator(
+            interp.atom_coords,
+            ccl.atco_out,
+            self.ccl.n0 + 3 * self.ccl.n1,
+            0,
+            aparam=interp._aparam,
+            dparam=interp._dparam,
+            nrad=interp._nrad,
+            onsite_direct=interp.onsite_direct,
+        )
+        self._xrlmq_buf = self.grids_indexer.empty_xrlmq(self.ccl.nalpha)
+
+    def _perform_fwd_convolution(self, theta_gq):
+        self._start_timer("Part1")
+        theta_rlmq = self._rlmq_buf
+        theta_xrlmq = self._xrlmq_buf
+        theta_uq = self._uq_buf
+        conv_vq = self._vq_buf
+        conv_vq[:] = 0.0
+        theta_uq[:] = 0.0
+        theta_rlmq[:] = 0.0
+        theta_xrlmq[:] = 0.0
+        self.grids_indexer.reduce_angc_ylm_(theta_rlmq, theta_gq, a2y=True)
+        shape = theta_rlmq.shape
+        theta_rlmq.shape = (-1, theta_rlmq.shape[-1])
+        self.plan.get_transformed_interpolation_terms(
+            theta_rlmq, i=-1, fwd=True, inplace=True
+        )
+        theta_rlmq.shape = shape
+        self._stop_timer("Part1")
+        self._start_timer("Part2")
+        args = [
+            theta_rlmq,
+            theta_uq,
+            self.grids_indexer,
+            self.grids_indexer.rad_arr,
+            self.plan.alphas,
+        ]
+        kwargs = {"rad2orb": True, "offset": 0, "l_add": 0}
+        self.ccl.atco_out.convert_rad2orb_conv_(*args, **kwargs)
+        theta_uq[:] *= self.plan.alpha_norms
+        l0_conv_vq = self.ccl.apply_vi_contractions_l0(theta_uq)
+        theta_uq[:] = 0
+        kwargs["l_add"] = 2
+        self.ccl.atco_out.convert_rad2orb_conv_(*args, **kwargs)
+        theta_uq[:] *= self.plan.alpha_norms
+        self.ccl.apply_vi_contractions_l0(theta_uq, use_r2=True, conv_vo=l0_conv_vq)
+
+        theta_xrlmq[:] = 0
+        self.ccl.fill_l1_part(theta_rlmq, theta_xrlmq, plus=True)
+        # self.ccl.fill_l1_part_deriv(theta_rlmq, theta_xrlmq)
+        theta_xrlmq[:] *= -0.5 * np.sqrt(4 * np.pi / 3)
+        l1_conv_vox = np.zeros((l0_conv_vq.shape[0], 2, 3), order="C")
+        for x in range(3):
+            args[0] = theta_xrlmq[x]
+            kwargs["l_add"] = -1
+            theta_uq[:] = 0.0
+            self.ccl.atco_out.convert_rad2orb_conv_(*args, **kwargs)
+            theta_uq[:] *= self.plan.alpha_norms
+            l1_conv_vox[..., x] = self.ccl.apply_vi_contractions_l1(
+                theta_uq, use_r2=False
+            )
+
+        theta_xrlmq[:] = 0.0
+        self.ccl.fill_l1_part(theta_rlmq, theta_xrlmq, plus=True)
+        theta_xrlmq[:] *= 0.5 * np.sqrt(4 * np.pi / 3)
+        for x in range(3):
+            args[0] = theta_xrlmq[x]
+            kwargs["l_add"] = 1
+            theta_uq[:] = 0.0
+            self.ccl.atco_out.convert_rad2orb_conv_(*args, **kwargs)
+            theta_uq[:] *= self.plan.alpha_norms
+            l1_conv_vox[..., x] += self.ccl.apply_vi_contractions_l1(
+                theta_uq, use_r2=True
+            )
+
+        theta_xrlmq[:] = 0.0
+        self.ccl.fill_l1_part(theta_rlmq, theta_xrlmq, plus=False)
+        theta_xrlmq[:] *= 0.5 * np.sqrt(4 * np.pi / 3)
+        for x in range(3):
+            args[0] = theta_xrlmq[x]
+            kwargs["l_add"] = 1
+            theta_uq[:] = 0.0
+            self.ccl.atco_out.convert_rad2orb_conv_(*args, **kwargs)
+            theta_uq[:] *= self.plan.alpha_norms
+            l1_conv_vox[..., x] += self.ccl.apply_vi_contractions_l1(
+                theta_uq, use_r2=True
+            )
+
+        conv_vq = np.ascontiguousarray(
+            np.concatenate(
+                [l0_conv_vq, l1_conv_vox.reshape(l1_conv_vox.shape[0], -1)], axis=-1
+            )
+        )
+
+        self.ccl.atco_out.solve_atc_coefs_(conv_vq)
+        self._stop_timer("Part2")
+        self._start_timer("Part3")
+        res = self.interpolator.project_orb2grid(conv_vq)
+        self._stop_timer("Part3")
+        return res
diff --git a/ciderpress/dft/plans.py b/ciderpress/dft/plans.py
@@ -865,7 +865,6 @@ def __init__(
                         + 0.25 * _get_int_d(n, prod, asum)
                         + 0.25 * _get_int_d(n, prod, bsum)
                     )
-                # print(ratio, r, n, rdr, np.linalg.cond(l0mats[-1]))
             for n, rdr in self.settings.iterate_l1_terms(ratio):
                 # num = 0.25 * (ratio ** (0.5 * n) + ratio ** (-0.5 * n))
                 num = 0.5
@@ -881,7 +880,6 @@ def __init__(
                         + 0.25 * _get_int_1d(n, prod, asum)
                         + 0.25 * _get_int_1d(n, prod, bsum)
                     )
-                # print(ratio, r, n, rdr, np.linalg.cond(l1mats[-1]))
         self._num_l0_feat = len(l0mats)
         self._num_l1_feat = len(l1mats)
         coul_list = [m for m in l0mats + l1mats]
@@ -1380,6 +1378,7 @@ def eval_rho_vi_(self, f_qg, drho, feat, spin):
             i += 1
         for j, k in self._l1_dots:
             feat[i] = np.einsum("xg,xg->g", l1cache[j], l1cache[k])
+            # feat[i] = l1cache[j][2] * l1cache[k][2]
             feat[i] *= self.nspin
             i += 1
         return feat

diff --git a/ciderpress/dft/sph_harm_coeff.py b/ciderpress/dft/sph_harm_coeff.py
@@ -1,7 +1,7 @@
 import ctypes
 
 import numpy as np
-from sympy.physics.wigner import clebsch_gordan
+from sympy.physics.wigner import clebsch_gordan, real_gaunt
 
 from ciderpress.lib import load_library as load_cider_library
 
@@ -149,6 +149,26 @@ def get_deriv_ylm_coeff(lmax):
     return gaunt_coeff
 
 
+def get_ylm1_coeff(lmax, plus=True):
+    nlm = (lmax + 1) * (lmax + 1)
+    gaunt_coeff = np.zeros((5, nlm))
+    if plus:
+        sign = 1
+    else:
+        sign = -1
+    for l in range(lmax + 1):
+        for im in range(2 * l + 1):
+            lm = l * l + im
+            m = im - l
+            lp1 = l + sign * 1
+            gaunt_coeff[0, lm] = real_gaunt(1, l, lp1, 1, m, m - 1)
+            gaunt_coeff[1, lm] = real_gaunt(1, l, lp1, 1, m, m + 1)
+            gaunt_coeff[2, lm] = real_gaunt(1, l, lp1, -1, m, m - 1)
+            gaunt_coeff[3, lm] = real_gaunt(1, l, lp1, -1, m, m + 1)
+            gaunt_coeff[4, lm] = real_gaunt(1, l, lp1, 0, m, m)
+    return gaunt_coeff
+
+
 def get_deriv_ylm_coeff_v2(lmax):
     nlm = (lmax + 1) * (lmax + 1)
     gaunt_coeff = np.zeros((5, nlm))