start smooth vi, l=1 still not working

mir-group · May 22, 2024 · 9bebb83 · 9bebb83
1 parent 5c232d5
commit 9bebb83
Show file tree

Hide file tree

Showing 9 changed files with 890 additions and 46 deletions.
diff --git a/ciderpress/dft/lcao_convolutions.py b/ciderpress/dft/lcao_convolutions.py
@@ -214,6 +214,30 @@ def env(self):
         libcider.get_atco_env(env.ctypes.data_as(ctypes.c_void_p), self._atco)
         return env
 
+    def _run_checks(self, theta_rlmq, p_uq, loc, rads, offset):
+        nalpha = theta_rlmq.shape[-1]
+        stride = p_uq.shape[1]
+        if offset is None:
+            offset = 0
+        else:
+            assert offset >= 0
+        nrad = rads.size
+        assert loc.flags.c_contiguous
+        assert loc.dtype == np.int32
+        assert loc.ndim == 1
+        assert rads.flags.c_contiguous
+        assert rads.dtype == np.float64
+        assert theta_rlmq.ndim == 3
+        assert theta_rlmq.shape[0] == nrad
+        assert theta_rlmq.flags.c_contiguous
+        assert theta_rlmq.dtype == np.float64
+        assert p_uq.ndim == 2
+        assert p_uq.shape[0] == self.nao
+        assert p_uq.flags.c_contiguous
+        assert p_uq.dtype == np.float64
+        assert offset + nalpha <= stride, "{} {} {}".format(offset, nalpha, stride)
+        return nalpha, stride, offset, nrad
+
     def convert_rad2orb_(
         self, theta_rlmq, p_uq, loc, rads, rad2orb=True, offset=None, zero_output=True
     ):
@@ -232,7 +256,7 @@ def convert_rad2orb_(
 
         Args:
             theta_rlmq (np.ndarray): float64, shape (nrad, nlm, nalpha)
-            p_uq (np.ndarray): float64, shape (ngrids, nalpha)
+            p_uq (np.ndarray): float64, shape (ngrids, stride)
             loc (np.ndarray, AtomicGridsIndexer):
                 An int32 array whose shape depends on whether rad2orb is True.
             rads (np.ndarray): float64 list of radial coordinates with size
@@ -251,27 +275,9 @@ def convert_rad2orb_(
                 loc = loc.ra_loc
             else:
                 loc = loc.ar_loc
-        nalpha = theta_rlmq.shape[-1]
-        stride = p_uq.shape[1]
-        if offset is None:
-            offset = 0
-        else:
-            assert offset >= 0
-        nrad = rads.size
-        assert loc.flags.c_contiguous
-        assert loc.dtype == np.int32
-        assert loc.ndim == 1
-        assert rads.flags.c_contiguous
-        assert rads.dtype == np.float64
-        assert theta_rlmq.ndim == 3
-        assert theta_rlmq.shape[0] == nrad
-        assert theta_rlmq.flags.c_contiguous
-        assert theta_rlmq.dtype == np.float64
-        assert p_uq.ndim == 2
-        assert p_uq.shape[0] == self.nao
-        assert p_uq.flags.c_contiguous
-        assert p_uq.dtype == np.float64
-        assert offset + nalpha <= stride
+        nalpha, stride, offset, nrad = self._run_checks(
+            theta_rlmq, p_uq, loc, rads, offset
+        )
         if rad2orb:
             assert loc.size == self.natm + 1
             fn = libcider.contract_rad_to_orb
@@ -295,6 +301,95 @@ def convert_rad2orb_(
             ctypes.c_int(offset),
         )
 
+    def _solve_coefs_(self, p_uq, nalpha, stride, offset):
+        assert p_uq.flags.c_contiguous
+        assert p_uq.shape[-1] == stride
+        libcider.solve_atc_coefs_for_orbs(
+            p_uq.ctypes.data_as(ctypes.c_void_p),
+            self.atco_c_ptr,
+            ctypes.c_int(nalpha),
+            ctypes.c_int(stride),
+            ctypes.c_int(offset),
+        )
+
+    def convert_rad2orb_conv_(
+        self,
+        theta_rlmq,
+        p_uq,
+        loc,
+        rads,
+        alphas,
+        l_add=0,
+        rad2orb=True,
+        offset=None,
+        zero_output=True,
+    ):
+        """
+        Take a set of functions stored on radial grids and spherical harmonics,
+        convolve it by the squared-exponentials with exponent parameters `alphas`,
+        and project it onto the atomic orbital basis. This is an in-place
+        operation, where theta_rlmq is written if rad2orb is False and
+        p_uq is written if rad2orb is True. len(alphas) == theta_rlmq.shape[-1]
+        See convert_rad2orb_ for more notes.
+
+        Args:
+            theta_rlmq (np.ndarray): float64, shape (nrad, nlm, nalpha)
+            p_uq (np.ndarray): float64, shape (ngrids, stride)
+            loc (np.ndarray, AtomicGridsIndexer):
+                An int32 array whose shape depends on whether rad2orb is True.
+            rads (np.ndarray): float64 list of radial coordinates with size
+                theta_rlmq.shape[0]. Corresponds to rad_arr member of
+                AtomicGridsIndexer.
+            alphas (np.ndarray): exponents of squared-exponentials for
+                convolution
+            rad2orb (bool): Whether to convert radial coordinates to orbital
+                basis (True) or vice versa (False).
+            offset (int): starting index in p_uq to add to/from
+            zero_output (bool): Whether to set output zero before
+                adding to it. Default to true. Set to False if you
+                want to add to the existing values in the output
+                array.
+        """
+        alphas = np.ascontiguousarray(alphas)
+        if isinstance(loc, AtomicGridsIndexer):
+            if rad2orb:
+                loc = loc.ra_loc
+            else:
+                loc = loc.ar_loc
+        nalpha, stride, offset, nrad = self._run_checks(
+            theta_rlmq, p_uq, loc, rads, offset
+        )
+        if rad2orb:
+            assert loc.size == self.natm + 1
+            fn = libcider.contract_rad_to_orb_conv
+            if zero_output:
+                p_uq[:, offset : offset + nalpha] = 0.0
+        else:
+            assert loc.size == nrad
+            fn = libcider.contract_orb_to_rad_conv
+            if zero_output:
+                theta_rlmq[:] = 0.0
+        # if not rad2orb:
+        #     # orbital to radial grid conversion
+        #     self._solve_coefs_(p_uq, nalpha, stride, offset)
+        fn(
+            theta_rlmq.ctypes.data_as(ctypes.c_void_p),
+            p_uq.ctypes.data_as(ctypes.c_void_p),
+            loc.ctypes.data_as(ctypes.c_void_p),
+            rads.ctypes.data_as(ctypes.c_void_p),
+            alphas.ctypes.data_as(ctypes.c_void_p),
+            ctypes.c_int(rads.size),
+            ctypes.c_int(theta_rlmq.shape[1]),
+            self.atco_c_ptr,
+            ctypes.c_int(nalpha),
+            ctypes.c_int(stride),
+            ctypes.c_int(offset),
+            ctypes.c_int(l_add),
+        )
+        # if rad2orb:
+        #     # radial grid to orbital conversion
+        #     self._solve_coefs_(p_uq, nalpha, stride, offset)
+
 
 class ConvolutionCollection:
     """
@@ -483,6 +578,21 @@ def multiply_atc_integrals(self, input, output=None, fwd=True):
         )
         return output
 
+    def apply_vi_contractions(self, conv_vo, conv_vq, use_r2=False):
+        libcider.apply_vi_contractions(
+            conv_vo.ctypes.data_as(ctypes.c_void_p),
+            conv_vq.ctypes.data_as(ctypes.c_void_p),
+            self._ccl,
+            ctypes.c_int(1 if use_r2 else 0),
+        )
+
+    def solve_coefs_(self, p_uq):
+        print(p_uq.shape)
+        assert p_uq.flags.c_contiguous
+        libcider.solve_atc_coefs_for_orbs2(
+            p_uq.ctypes.data_as(ctypes.c_void_p), self._ccl
+        )
+
 
 class ConvolutionCollectionK(ConvolutionCollection):
     """

diff --git a/ciderpress/dft/lcao_nldf_generator.py b/ciderpress/dft/lcao_nldf_generator.py
@@ -22,12 +22,22 @@ def __init__(self, plan, ccl, interpolator, grids_indexer):
         self._uq_buf = np.empty((self.ccl.atco_inp.nao, nalpha))
         self._vq_buf = np.empty((self.ccl.atco_out.nao, self.ccl.num_out))
         self._rlmq_buf = self.grids_indexer.empty_rlmq(nalpha)
+        self.timer = None
 
     @property
     def natm(self):
         return self.interpolator.atom_coords.shape[0]
 
+    def _start_timer(self, name):
+        if self.timer is not None:
+            self.timer.start(name)
+
+    def _stop_timer(self, name):
+        if self.timer is not None:
+            self.timer.stop(name)
+
     def _perform_fwd_convolution(self, theta_gq):
+        self._start_timer("Part1")
         theta_rlmq = self._rlmq_buf
         theta_uq = self._uq_buf
         conv_vq = self._vq_buf
@@ -46,25 +56,36 @@ def _perform_fwd_convolution(self, theta_gq):
         self.plan.get_transformed_interpolation_terms(
             theta_uq, i=-1, fwd=True, inplace=True
         )
+        self._stop_timer("Part1")
+        self._start_timer("Part2")
         self.ccl.multiply_atc_integrals(theta_uq, output=conv_vq, fwd=True)
         if self.plan.nldf_settings.nldf_type != "i":
             self.plan.get_transformed_interpolation_terms(
                 conv_vq[:, : self.plan.nalpha], i=0, fwd=False, inplace=True
             )
-        return self.interpolator.project_orb2grid(conv_vq)
+        self._stop_timer("Part2")
+        self._start_timer("Part3")
+        res = self.interpolator.project_orb2grid(conv_vq)
+        self._stop_timer("Part3")
+        return res
 
     def _perform_bwd_convolution(self, vf_gq):
         vtheta_rlmq = self._rlmq_buf
         vtheta_uq = self._uq_buf
         vconv_vq = self._vq_buf
         vconv_vq[:] = 0.0
         vtheta_uq[:] = 0.0
+        self._start_timer("Part3")
         self.interpolator.project_grid2orb(vf_gq, f_uq=vconv_vq)
+        self._stop_timer("Part3")
+        self._start_timer("Part2")
         if self.plan.nldf_settings.nldf_type != "i":
             self.plan.get_transformed_interpolation_terms(
                 vconv_vq[:, : self.plan.nalpha], i=0, fwd=True, inplace=True
             )
         self.ccl.multiply_atc_integrals(vconv_vq, output=vtheta_uq, fwd=False)
+        self._stop_timer("Part2")
+        self._start_timer("Part1")
         self.plan.get_transformed_interpolation_terms(
             vtheta_uq, i=-1, fwd=False, inplace=True
         )
@@ -78,6 +99,7 @@ def _perform_bwd_convolution(self, vf_gq):
         )
         vtheta_gq = self.grids_indexer.empty_gq(self.plan.nalpha)
         self.grids_indexer.reduce_angc_ylm_(vtheta_rlmq, vtheta_gq, a2y=False)
+        self._stop_timer("Part1")
         return vtheta_gq
 
     def get_features(self, rho_in, spin=0):
@@ -228,3 +250,106 @@ def get_potential(self, vfeat, spin=0):
             )
         # self._cache[spin] = None
         return vrho_out
+
+
+class VINLDFGen(LCAONLDFGenerator):
+    def __init__(self, plan, ccl, interpolator, grids_indexer):
+        """
+
+        Args:
+            plan (NLDFAuxiliaryPlan):
+            ccl (ConvolutionCollection):
+            interpolator (LCAOInterpolator):
+            grids_indexer (AtomicGridsIndexer):
+        """
+        super(VINLDFGen, self).__init__(plan, ccl, interpolator, grids_indexer)
+        self._uq_buf = np.empty((self.ccl.atco_out.nao, self.plan.nalpha))
+        self._vq_buf = np.empty((self.ccl.atco_out.nao, self.ccl.num_out))
+        self.timer = None
+
+    def _perform_fwd_convolution(self, theta_gq):
+        self._start_timer("Part1")
+        theta_rlmq = self._rlmq_buf
+        theta_uq = self._uq_buf
+        conv_vq = self._vq_buf
+        conv_vq[:] = 0.0
+        theta_uq[:] = 0.0
+        theta_rlmq[:] = 0.0
+        self.grids_indexer.reduce_angc_ylm_(theta_rlmq, theta_gq, a2y=True)
+        shape = theta_rlmq.shape
+        theta_rlmq.shape = (-1, theta_rlmq.shape[-1])
+        self.plan.get_transformed_interpolation_terms(
+            theta_rlmq, i=-1, fwd=True, inplace=True
+        )
+        theta_rlmq.shape = shape
+        self._stop_timer("Part1")
+        self._start_timer("Part2")
+        self.ccl.atco_out.convert_rad2orb_conv_(
+            theta_rlmq,
+            theta_uq,
+            self.grids_indexer,
+            self.grids_indexer.rad_arr,
+            self.plan.alphas,
+            rad2orb=True,
+            offset=0,
+        )
+        theta_uq[:] *= self.plan.alpha_norms
+        self.ccl.apply_vi_contractions(conv_vq, theta_uq)
+        theta_uq[:] = 0
+        self.ccl.atco_out.convert_rad2orb_conv_(
+            theta_rlmq,
+            theta_uq,
+            self.grids_indexer,
+            self.grids_indexer.rad_arr,
+            self.plan.alphas,
+            l_add=2,
+            rad2orb=True,
+            offset=0,
+        )
+        theta_uq[:] *= self.plan.alpha_norms
+        self.ccl.apply_vi_contractions(conv_vq, theta_uq, use_r2=True)
+        stride = conv_vq.shape[-1]
+        print(stride, conv_vq.shape)
+        # self.ccl.atco_out._solve_coefs_(conv_vq, stride, stride, 0)
+        self.ccl.solve_coefs_(conv_vq)
+        # print(self.plan.alpha_norms)
+        # conv_vq[:, 0] = (self.plan.alpha_norms * theta_uq).sum(axis=-1)
+        # conv_vq[:, 3] = (self.plan.alphas * self.plan.alpha_norms * theta_uq).sum(
+        #     axis=-1
+        # )
+        # conv_vq[:, :] = (self.plan.alpha_norms * theta_uq).sum(axis=-1)[..., None]
+        # conv_vq[:, 0] = theta_uq.sum(axis=-1)
+        self._stop_timer("Part2")
+        self._start_timer("Part3")
+        res = self.interpolator.project_orb2grid(conv_vq)
+        self._stop_timer("Part3")
+        return res
+
+    def _perform_bwd_convolution(self, vf_gq):
+        vtheta_rlmq = self._rlmq_buf
+        # vtheta_uq = self._uq_buf
+        vconv_vq = self._vq_buf
+        vconv_vq[:] = 0.0
+        # vtheta_uq[:] = 0.0
+        self._start_timer("Part3")
+        self.interpolator.project_grid2orb(vf_gq, f_uq=vconv_vq)
+        self._stop_timer("Part3")
+        self._start_timer("Part2")
+        self.ccl.atco_out.convert_rad2orb_conv_(
+            vtheta_rlmq,
+            vconv_vq,
+            self.grids_indexer,
+            self.grids_indexer.rad_arr,
+            self.plan.alphas,
+            rad2orb=False,
+            offset=0,
+        )
+        self._stop_timer("Part2")
+        self._start_timer("Part1")
+        self.plan.get_transformed_interpolation_terms(
+            vtheta_rlmq, i=-1, fwd=False, inplace=True
+        )
+        vtheta_gq = self.grids_indexer.empty_gq(self.plan.nalpha)
+        self.grids_indexer.reduce_angc_ylm_(vtheta_rlmq, vtheta_gq, a2y=False)
+        self._stop_timer("Part1")
+        return vtheta_gq