start transition away from fortran for GPAW

mir-group · Jun 21, 2024 · fe02c76 · fe02c76
1 parent 26e01e7
commit fe02c76
Show file tree

Hide file tree

Showing 10 changed files with 289 additions and 84 deletions.
diff --git a/ciderpress/dft/pwutil.py b/ciderpress/dft/pwutil.py
@@ -0,0 +1,96 @@
+import ctypes
+
+import numpy as np
+
+from ciderpress.dft.futil import sph_nlxc_mod as fnlxc
+from ciderpress.lib import load_library
+
+pw_cutil = load_library("libpwutil.so")
+
+
+def eval_pasdw_funcs(radfuncs_ng, ylm_lg, nlist_i, lmlist_i):
+    assert radfuncs_ng.flags.c_contiguous
+    assert ylm_lg.flags.c_contiguous
+    assert nlist_i.flags.c_contiguous
+    assert lmlist_i.flags.c_contiguous
+    nlm, ng = ylm_lg.shape
+    assert radfuncs_ng.shape[1] == ng
+    ni = len(nlist_i)
+    assert len(nlist_i) == len(lmlist_i)
+    funcs_ig = np.empty((ni, ng), order="C")
+    pw_cutil.eval_pasdw_funcs(
+        radfuncs_ng.ctypes.data_as(ctypes.c_void_p),
+        ylm_lg.ctypes.data_as(ctypes.c_void_p),
+        funcs_ig.ctypes.data_as(ctypes.c_void_p),
+        nlist_i.ctypes.data_as(ctypes.c_void_p),
+        lmlist_i.ctypes.data_as(ctypes.c_void_p),
+        ctypes.c_int(ni),
+        ctypes.c_int(ng),
+        ctypes.c_int(nlm),
+    )
+    return funcs_ig.T
+
+
+def pasdw_reduce_i(*args):
+    return fnlxc.pasdw_reduce_i(*args)
+
+
+def pasdw_reduce_g(*args):
+    return fnlxc.pasdw_reduce_g(*args)
+
+
+def eval_cubic_spline(*args):
+    return fnlxc.eval_cubic_spline(*args)
+
+
+def eval_cubic_spline_deriv(*args):
+    return fnlxc.eval_cubic_spline_deriv(*args)
+
+
+def eval_cubic_interp(*args):
+    return fnlxc.eval_cubic_interp(*args)
+
+
+def eval_cubic_interp_noderiv(*args):
+    return fnlxc.eval_cubic_interp_noderiv(*args)
+
+
+def recursive_sph_harm_t2(nlm, rhat_gv):
+    n = rhat_gv.shape[0]
+    res = np.empty((n, nlm), order="C")
+    assert rhat_gv.flags.c_contiguous
+    pw_cutil.recursive_sph_harm_vec(
+        ctypes.c_int(nlm),
+        ctypes.c_int(n),
+        rhat_gv.ctypes.data_as(ctypes.c_void_p),
+        res.ctypes.data_as(ctypes.c_void_p),
+    )
+    return res
+
+
+def recursive_sph_harm_t2_deriv(nlm, rhat_gv):
+    n = rhat_gv.shape[0]
+    res = np.empty((n, nlm), order="C")
+    dres = np.empty((n, 3, nlm), order="C")
+    assert rhat_gv.flags.c_contiguous
+    pw_cutil.recursive_sph_harm_vec(
+        ctypes.c_int(nlm),
+        ctypes.c_int(n),
+        rhat_gv.ctypes.data_as(ctypes.c_void_p),
+        res.ctypes.data_as(ctypes.c_void_p),
+        dres.ctypes.data_as(ctypes.c_void_p),
+    )
+    return res, dres
+
+
+def mulexp(F_k, theta_k, k2_k, a, b):
+    nk = F_k.size
+    assert F_k.size == theta_k.size == k2_k.size
+    return pw_cutil.mulexp(
+        F_k.ctypes.data_as(ctypes.c_void_p),
+        theta_k.ctypes.data_as(ctypes.c_void_p),
+        k2_k.ctypes.data_as(ctypes.c_void_p),
+        ctypes.c_double(a),
+        ctypes.c_double(b),
+        ctypes.c_int(nk),
+    )
diff --git a/ciderpress/gpaw/atom_utils.py b/ciderpress/gpaw/atom_utils.py
@@ -5,8 +5,7 @@
 from scipy.interpolate import interp1d
 from scipy.linalg import cho_factor, cho_solve
 
-from ciderpress.dft.futil import fast_sph_harm as fsh
-from ciderpress.dft.futil import sph_nlxc_mod as fnlxc
+from ciderpress.dft import pwutil
 from ciderpress.gpaw.etb_util import ETBProjector
 from ciderpress.gpaw.fit_paw_gauss_pot import (
     construct_full_p_matrices,
@@ -1175,17 +1174,17 @@ def get_funcs(self, pfunc=True):
             funcs_xtp = self.psetup.ffuncs_jtp
             xlist_i = self.psetup.jlist_i
 
-        radfuncs_gn = fnlxc.eval_cubic_spline(
+        radfuncs_gn = pwutil.eval_cubic_spline(
             funcs_xtp.T,
             self.t_g,
             self.dt_g,
         )
         lmax = self.psetup.lmax
         Lmax = (lmax + 1) * (lmax + 1)
-        ylm = fsh.recursive_sph_harm_t2(Lmax, self.rhat_gv.T)
-        funcs_gi = fnlxc.eval_pasdw_funcs(
-            radfuncs_gn,
-            ylm.T,
+        ylm = pwutil.recursive_sph_harm_t2(Lmax, self.rhat_gv)
+        funcs_gi = pwutil.eval_pasdw_funcs(
+            radfuncs_gn.T,
+            np.ascontiguousarray(ylm.T),
             xlist_i,
             self.psetup.lmlist_i,
         )
@@ -1199,13 +1198,13 @@ def get_grads(self, pfunc=True):
             funcs_xtp = self.psetup.ffuncs_jtp
             xlist_i = self.psetup.jlist_i
 
-        radfuncs_gn = fnlxc.eval_cubic_spline(
+        radfuncs_gn = pwutil.eval_cubic_spline(
             funcs_xtp.T,
             self.t_g,
             self.dt_g,
         )
         radderivs_gn = (
-            fnlxc.eval_cubic_spline_deriv(
+            pwutil.eval_cubic_spline_deriv(
                 funcs_xtp.T,
                 self.t_g,
                 self.dt_g,
@@ -1214,55 +1213,55 @@ def get_grads(self, pfunc=True):
         )
         lmax = self.psetup.lmax
         Lmax = (lmax + 1) * (lmax + 1)
-        ylm, dylm = fsh.recursive_sph_harm_t2_deriv(Lmax, self.rhat_gv.T)
+        ylm, dylm = pwutil.recursive_sph_harm_t2_deriv(Lmax, self.rhat_gv.T)
         dylm /= self.rad_g + 1e-8  # TODO right amount of regularization?
-        rodylm = np.einsum("gv,gLv->gL", self.rhat_gv, dylm.T)
+        rodylm = np.einsum("gv,gvL->Lg", self.rhat_gv, dylm)
         # dylm = np.dot(dylm, drhat_g.T)
-        funcs_gi = fnlxc.eval_pasdw_funcs(
-            radderivs_gn,
-            ylm.T,
+        funcs_gi = pwutil.eval_pasdw_funcs(
+            radderivs_gn.T,
+            np.ascontiguousarray(ylm.T),
             xlist_i,
             self.psetup.lmlist_i,
         )
-        funcs_gi -= fnlxc.eval_pasdw_funcs(
-            radfuncs_gn,
+        funcs_gi -= pwutil.eval_pasdw_funcs(
+            radfuncs_gn.T,
             rodylm,
             xlist_i,
             self.psetup.lmlist_i,
         )
         funcs_vgi = self.rhat_gv.T[:, :, None] * funcs_gi
         for v in range(3):
-            funcs_vgi[v] += fnlxc.eval_pasdw_funcs(
-                radfuncs_gn,
-                dylm[v].T,
+            funcs_vgi[v] += pwutil.eval_pasdw_funcs(
+                radfuncs_gn.T,
+                np.ascontiguousarray(dylm[:, v]),
                 xlist_i,
                 self.psetup.lmlist_i,
             )
         return funcs_vgi
 
     def setup_ovlpt(self):
-        rad_pfuncs_gn = fnlxc.eval_cubic_spline(
+        rad_pfuncs_gn = pwutil.eval_cubic_spline(
             self.psetup.pfuncs_ntp.T,
             self.t_g,
             self.dt_g,
         )
-        rad_ffuncs_jn = fnlxc.eval_cubic_spline(
+        rad_ffuncs_jn = pwutil.eval_cubic_spline(
             self.psetup.ffuncs_jtp.T,
             self.t_g,
             self.dt_g,
         )
         lmax = self.psetup.lmax
         Lmax = (lmax + 1) * (lmax + 1)
-        ylm = fsh.recursive_sph_harm_t2(Lmax, self.rhat_gv.T)
-        pfuncs_gi = fnlxc.eval_pasdw_funcs(
-            rad_pfuncs_gn,
-            ylm.T,
+        ylm = pwutil.recursive_sph_harm_t2(Lmax, self.rhat_gv)
+        pfuncs_gi = pwutil.eval_pasdw_funcs(
+            rad_pfuncs_gn.T,
+            np.ascontiguousarray(ylm.T),
             self.psetup.nlist_i,
             self.psetup.lmlist_i,
         )
-        ffuncs_gi = fnlxc.eval_pasdw_funcs(
-            rad_ffuncs_jn,
-            ylm.T,
+        ffuncs_gi = pwutil.eval_pasdw_funcs(
+            rad_ffuncs_jn.T,
+            np.ascontiguousarray(ylm.T),
             self.psetup.jlist_i,
             self.psetup.lmlist_i,
         )

diff --git a/ciderpress/gpaw/cider_fft.py b/ciderpress/gpaw/cider_fft.py
@@ -32,7 +32,7 @@
 from gpaw.xc.mgga import MGGA
 from scipy.linalg import cho_factor, cho_solve
 
-from ciderpress.dft.futil import sph_nlxc_mod as fnlxc
+from ciderpress.dft import pwutil
 from ciderpress.dft.plans import SemilocalPlan2
 from ciderpress.gpaw.mp_cider import CiderParallelization
 
@@ -826,7 +826,7 @@ def calculate_6d_integral(self):
                 F_k = np.zeros(self.k2_k.shape, complex)
             for b in self.alphas:
                 aexp = self.bas_exp[a] + self.bas_exp[b]
-                fnlxc.mulexp(
+                pwutil.mulexp(
                     F_k.ravel(),
                     self.theta_ak[b].ravel(),
                     self.k2_k.ravel(),
@@ -872,7 +872,7 @@ def calculate_6d_stress_integral(self):
             for b in self.alphas:
                 aexp = self.bas_exp[a] + self.bas_exp[b]
                 invexp = 1.0 / (4 * aexp)
-                fnlxc.mulexp(
+                pwutil.mulexp(
                     F_k.ravel(),
                     self.theta_ak[b].ravel(),
                     self.k2_k.ravel(),
@@ -939,7 +939,7 @@ def calculate_6d_integral_fwd(self, n_g, cider_exp, c_abi=None):
                 self.rbuf_ag[a][:] = 0.0
         self.timer.start("COEFS")
         for ind, a in enumerate(self.alphas):
-            pa_g, dpa_g = fnlxc.eval_cubic_interp(
+            pa_g, dpa_g = pwutil.eval_cubic_interp(
                 i_g.ravel(), dq0_g.ravel(), self.C_aip[ind].T
             )
             pa_g = pa_g.reshape(i_g.shape)
@@ -975,7 +975,7 @@ def calculate_6d_integral_fwd(self, n_g, cider_exp, c_abi=None):
                 dq0_g = None
             for ind, a in enumerate(self.alphas):
                 self.timer.start("COEFS")
-                pa_g, dpa_g = fnlxc.eval_cubic_interp(
+                pa_g, dpa_g = pwutil.eval_cubic_interp(
                     i_g.ravel(), dq0_g.ravel(), self.C_aip[ind].T
                 )
                 pa_g = pa_g.reshape(i_g.shape)

diff --git a/ciderpress/gpaw/cider_paw.py b/ciderpress/gpaw/cider_paw.py
@@ -3,7 +3,7 @@
 import yaml
 from gpaw.xc.mgga import MGGA
 
-from ciderpress.dft.futil import sph_nlxc_mod as fnlxc
+from ciderpress.dft import pwutil
 from ciderpress.gpaw.atom_utils import AtomPASDWSlice, PASDWCiderKernel
 from ciderpress.gpaw.cider_fft import (
     CiderGGA,
@@ -226,7 +226,7 @@ def write_augfeat_to_rbuf(self, c_abi, pot=False):
                 coefs_bi = coefs_abi[a]
                 if atom_slice.sinv_pf is not None:
                     coefs_bi = coefs_bi.dot(atom_slice.sinv_pf.T)
-                fnlxc.pasdw_reduce_i(
+                pwutil.pasdw_reduce_i(
                     coefs_bi[0],
                     funcs_gi,
                     feat_g.T,
@@ -243,7 +243,7 @@ def write_augfeat_to_rbuf(self, c_abi, pot=False):
                     coefs_bi = coefs_bi.dot(atom_slice.sinv_pf.T)
                 for ib, b in enumerate(self.alphas):
                     assert self.rbuf_ag[b].flags.c_contiguous
-                    fnlxc.pasdw_reduce_i(
+                    pwutil.pasdw_reduce_i(
                         coefs_bi[ib],
                         funcs_gi,
                         self.rbuf_ag[b].T,
@@ -278,7 +278,7 @@ def interpolate_rbuf_onto_atoms(self, pot=False):
                 funcs_gi = atom_slice.get_funcs()
                 ni = funcs_gi.shape[1]
                 coefs_bi = np.zeros((1, ni))
-                fnlxc.pasdw_reduce_g(
+                pwutil.pasdw_reduce_g(
                     coefs_bi[0],
                     funcs_gi,
                     feat_g.T,
@@ -295,7 +295,7 @@ def interpolate_rbuf_onto_atoms(self, pot=False):
                 ni = funcs_gi.shape[1]
                 coefs_bi = np.zeros((Nalpha_r, ni))
                 for ib, b in enumerate(self.alphas):
-                    fnlxc.pasdw_reduce_g(
+                    pwutil.pasdw_reduce_g(
                         coefs_bi[ib],
                         funcs_gi,
                         self.rbuf_ag[b].T,
@@ -369,7 +369,7 @@ def construct_grad_terms(self, c_ag, c_abi, aug=False, stress=False):
                 if stress:
                     if aug:
                         tmp_i = np.zeros(ni)
-                        fnlxc.pasdw_reduce_g(
+                        pwutil.pasdw_reduce_g(
                             tmp_i,
                             funcs_gi,
                             feat_g.T,
@@ -378,28 +378,28 @@ def construct_grad_terms(self, c_ag, c_abi, aug=False, stress=False):
                         P += self.gd.dv * tmp_i.dot(coefs_bi[0])
                     for v in range(3):
                         dx_g = atom_slice.rad_g * atom_slice.rhat_gv[:, v]
-                        fnlxc.pasdw_reduce_g(
+                        pwutil.pasdw_reduce_g(
                             stress_vv[v],
                             (dx_g * intb_vg).T,
                             feat_g.T,
                             atom_slice.indset,
                         )
                         if has_ofit:
-                            fnlxc.pasdw_reduce_g(
+                            pwutil.pasdw_reduce_g(
                                 stress_vv[v],
                                 ft_xbg[v, :, 0].T,
                                 feat_g.T,
                                 atom_slice.indset,
                             )
                 else:
-                    fnlxc.pasdw_reduce_g(
+                    pwutil.pasdw_reduce_g(
                         F_av[a],
                         intb_vg.T,
                         feat_g.T,
                         atom_slice.indset,
                     )
                     if has_ofit:
-                        fnlxc.pasdw_reduce_g(
+                        pwutil.pasdw_reduce_g(
                             F_av[a],
                             ft_xbg[:, 0].T,
                             feat_g.T,
@@ -427,7 +427,7 @@ def construct_grad_terms(self, c_ag, c_abi, aug=False, stress=False):
                     if stress:
                         if aug:
                             tmp_i = np.zeros(ni)
-                            fnlxc.pasdw_reduce_g(
+                            pwutil.pasdw_reduce_g(
                                 tmp_i,
                                 funcs_gi,
                                 c_ag[b].T,
@@ -436,28 +436,28 @@ def construct_grad_terms(self, c_ag, c_abi, aug=False, stress=False):
                             P += self.gd.dv * tmp_i.dot(coefs_bi[ib])
                         for v in range(3):
                             dx_g = atom_slice.rad_g * atom_slice.rhat_gv[:, v]
-                            fnlxc.pasdw_reduce_g(
+                            pwutil.pasdw_reduce_g(
                                 stress_vv[v],
                                 (dx_g * intb_vg).T,
                                 c_ag[b].T,
                                 atom_slice.indset,
                             )
                             if has_ofit:
-                                fnlxc.pasdw_reduce_g(
+                                pwutil.pasdw_reduce_g(
                                     stress_vv[v],
                                     ft_xbg[v, :, ib].T,
                                     c_ag[b].T,
                                     atom_slice.indset,
                                 )
                     else:
-                        fnlxc.pasdw_reduce_g(
+                        pwutil.pasdw_reduce_g(
                             F_av[a],
                             intb_vg.T,
                             c_ag[b].T,
                             atom_slice.indset,
                         )
                         if has_ofit:
-                            fnlxc.pasdw_reduce_g(
+                            pwutil.pasdw_reduce_g(
                                 F_av[a],
                                 ft_xbg[:, ib].T,
                                 c_ag[b].T,